dbt-selly/dbt-env/lib/python3.8/site-packages/dbt/logger.py

663 lines
20 KiB
Python

import dbt.flags
import dbt.ui
import json
import logging
import os
import sys
import time
import warnings
from dataclasses import dataclass
from datetime import datetime
from typing import Optional, List, ContextManager, Callable, Dict, Any, Set
import colorama
import logbook
from dbt.dataclass_schema import dbtClassMixin
# Colorama needs some help on windows because we're using logger.info
# intead of print(). If the Windows env doesn't have a TERM var set,
# then we should override the logging stream to use the colorama
# converter. If the TERM var is set (as with Git Bash), then it's safe
# to send escape characters and no log handler injection is needed.
colorama_stdout = sys.stdout
colorama_wrap = True
colorama.init(wrap=colorama_wrap)
if sys.platform == 'win32' and not os.getenv('TERM'):
colorama_wrap = False
colorama_stdout = colorama.AnsiToWin32(sys.stdout).stream
elif sys.platform == 'win32':
colorama_wrap = False
colorama.init(wrap=colorama_wrap)
STDOUT_LOG_FORMAT = '{record.message}'
DEBUG_LOG_FORMAT = (
'{record.time:%Y-%m-%d %H:%M:%S.%f%z} '
'({record.thread_name}): '
'{record.message}'
)
SECRET_ENV_PREFIX = 'DBT_ENV_SECRET_'
def get_secret_env() -> List[str]:
return [
v for k, v in os.environ.items()
if k.startswith(SECRET_ENV_PREFIX)
]
ExceptionInformation = str
@dataclass
class LogMessage(dbtClassMixin):
timestamp: datetime
message: str
channel: str
level: int
levelname: str
thread_name: str
process: int
extra: Optional[Dict[str, Any]] = None
exc_info: Optional[ExceptionInformation] = None
@classmethod
def from_record_formatted(cls, record: logbook.LogRecord, message: str):
extra = dict(record.extra)
log_message = LogMessage(
timestamp=record.time,
message=message,
channel=record.channel,
level=record.level,
levelname=logbook.get_level_name(record.level),
extra=extra,
thread_name=record.thread_name,
process=record.process,
exc_info=record.formatted_exception,
)
return log_message
class LogMessageFormatter(logbook.StringFormatter):
def __call__(self, record, handler):
data = self.format_record(record, handler)
exc = self.format_exception(record)
if exc:
data.exc_info = exc
return data
def format_record(self, record, handler):
message = super().format_record(record, handler)
return LogMessage.from_record_formatted(record, message)
class JsonFormatter(LogMessageFormatter):
def __call__(self, record, handler):
"""Return a the record converted to LogMessage's JSON form"""
# utils imports exceptions which imports logger...
import dbt.utils
log_message = super().__call__(record, handler)
dct = log_message.to_dict(omit_none=True)
return json.dumps(dct, cls=dbt.utils.JSONEncoder)
class FormatterMixin:
def __init__(self, format_string):
self._text_format_string = format_string
self.formatter_class = logbook.StringFormatter
# triggers a formatter update via logbook.StreamHandler
self.format_string = self._text_format_string
def format_json(self):
# set our formatter to the json formatter
self.formatter_class = JsonFormatter
self.format_string = STDOUT_LOG_FORMAT
def format_text(self):
# set our formatter to the regular stdout/stderr handler
self.formatter_class = logbook.StringFormatter
self.format_string = self._text_format_string
def reset(self):
raise NotImplementedError(
'reset() not implemented in FormatterMixin subclass'
)
class OutputHandler(logbook.StreamHandler, FormatterMixin):
"""Output handler.
The `format_string` parameter only changes the default text output, not
debug mode or json.
"""
def __init__(
self,
stream,
level=logbook.INFO,
format_string=STDOUT_LOG_FORMAT,
bubble=True,
) -> None:
self._default_format = format_string
logbook.StreamHandler.__init__(
self,
stream=stream,
level=level,
format_string=format_string,
bubble=bubble,
)
FormatterMixin.__init__(self, format_string)
def set_text_format(self, format_string: str):
"""Set the text format to format_string. In JSON output mode, this is
a noop.
"""
if self.formatter_class is logbook.StringFormatter:
# reset text format
self._text_format_string = format_string
self.format_text()
def reset(self):
self.level = logbook.INFO
self._text_format_string = self._default_format
self.format_text()
def should_handle(self, record):
if record.level < self.level:
return False
text_mode = self.formatter_class is logbook.StringFormatter
if text_mode and record.extra.get('json_only', False):
return False
elif not text_mode and record.extra.get('text_only', False):
return False
else:
return True
def _redirect_std_logging():
logbook.compat.redirect_logging()
def _root_channel(record: logbook.LogRecord) -> str:
return record.channel.split('.')[0]
class Relevel(logbook.Processor):
def __init__(
self,
allowed: List[str],
min_level=logbook.WARNING,
target_level=logbook.DEBUG,
) -> None:
self.allowed: Set[str] = set(allowed)
self.min_level = min_level
self.target_level = target_level
super().__init__()
def process(self, record):
if _root_channel(record) in self.allowed:
return
record.extra['old_level'] = record.level
# suppress logs at/below our min level by lowering them to NOTSET
if record.level < self.min_level:
record.level = logbook.NOTSET
# if we didn't mess with it, then lower all logs above our level to
# our target level.
else:
record.level = self.target_level
class JsonOnly(logbook.Processor):
def process(self, record):
record.extra['json_only'] = True
class TextOnly(logbook.Processor):
def process(self, record):
record.extra['text_only'] = True
class TimingProcessor(logbook.Processor):
def __init__(self, timing_info: Optional[dbtClassMixin] = None):
self.timing_info = timing_info
super().__init__()
def process(self, record):
if self.timing_info is not None:
record.extra['timing_info'] = self.timing_info.to_dict(
omit_none=True)
class DbtProcessState(logbook.Processor):
def __init__(self, value: str):
self.value = value
super().__init__()
def process(self, record):
overwrite = (
'run_state' not in record.extra or
record.extra['run_state'] == 'internal'
)
if overwrite:
record.extra['run_state'] = self.value
class DbtModelState(logbook.Processor):
def __init__(self, state: Dict[str, str]):
self.state = state
super().__init__()
def process(self, record):
record.extra.update(self.state)
class DbtStatusMessage(logbook.Processor):
def process(self, record):
record.extra['is_status_message'] = True
class UniqueID(logbook.Processor):
def __init__(self, unique_id: str):
self.unique_id = unique_id
super().__init__()
def process(self, record):
record.extra['unique_id'] = self.unique_id
class NodeCount(logbook.Processor):
def __init__(self, node_count: int):
self.node_count = node_count
super().__init__()
def process(self, record):
record.extra['node_count'] = self.node_count
class NodeMetadata(logbook.Processor):
def __init__(self, node, index):
self.node = node
self.index = index
super().__init__()
def mapping_keys(self):
return []
def process_keys(self, record):
for attr, key in self.mapping_keys():
value = getattr(self.node, attr, None)
if value is not None:
record.extra[key] = value
def process(self, record):
self.process_keys(record)
record.extra['node_index'] = self.index
class ModelMetadata(NodeMetadata):
def mapping_keys(self):
return [
('alias', 'node_alias'),
('schema', 'node_schema'),
('database', 'node_database'),
('original_file_path', 'node_path'),
('name', 'node_name'),
('resource_type', 'resource_type'),
('depends_on_nodes', 'depends_on'),
]
def process_config(self, record):
if hasattr(self.node, 'config'):
materialized = getattr(self.node.config, 'materialized', None)
if materialized is not None:
record.extra['node_materialized'] = materialized
def process(self, record):
super().process(record)
self.process_config(record)
class HookMetadata(NodeMetadata):
def mapping_keys(self):
return [
('name', 'node_name'),
('resource_type', 'resource_type'),
]
class TimestampNamed(logbook.Processor):
def __init__(self, name: str):
self.name = name
super().__init__()
def process(self, record):
super().process(record)
record.extra[self.name] = datetime.utcnow().isoformat()
class ScrubSecrets(logbook.Processor):
def process(self, record):
for secret in get_secret_env():
record.message = str(record.message).replace(secret, "*****")
logger = logbook.Logger('dbt')
# provide this for the cache, disabled by default
CACHE_LOGGER = logbook.Logger('dbt.cache')
CACHE_LOGGER.disable()
warnings.filterwarnings("ignore", category=ResourceWarning,
message="unclosed.*<socket.socket.*>")
initialized = False
def make_log_dir_if_missing(log_dir):
import dbt.clients.system
dbt.clients.system.make_directory(log_dir)
class DebugWarnings(logbook.compat.redirected_warnings):
"""Log warnings, except send them to 'debug' instead of 'warning' level.
"""
def make_record(self, message, exception, filename, lineno):
rv = super().make_record(message, exception, filename, lineno)
rv.level = logbook.DEBUG
rv.extra['from_warnings'] = True
return rv
# push Python warnings to debug level logs. This will suppress all import-time
# warnings.
DebugWarnings().__enter__()
# redirect stdlib logging to logbook
_redirect_std_logging()
class DelayedFileHandler(logbook.RotatingFileHandler, FormatterMixin):
def __init__(
self,
log_dir: Optional[str] = None,
level=logbook.DEBUG,
filter=None,
bubble=True,
max_size=10 * 1024 * 1024, # 10 mb
backup_count=5,
) -> None:
self.disabled = False
self._msg_buffer: Optional[List[logbook.LogRecord]] = []
# if we get 1k messages without a logfile being set, something is wrong
self._bufmax = 1000
self._log_path: Optional[str] = None
# we need the base handler class' __init__ to run so handling works
logbook.Handler.__init__(self, level, filter, bubble)
if log_dir is not None:
self.set_path(log_dir)
self._text_format_string = None
self._max_size = max_size
self._backup_count = backup_count
def reset(self):
if self.initialized:
self.close()
self._log_path = None
self._msg_buffer = []
self.disabled = False
@property
def initialized(self):
return self._log_path is not None
def set_path(self, log_dir):
"""log_dir can be the path to a log directory, or `None` to avoid
writing to a file (for `dbt debug`).
"""
if self.disabled:
return
assert not self.initialized, 'set_path called after being set'
if log_dir is None:
self.disabled = True
return
make_log_dir_if_missing(log_dir)
log_path = os.path.join(log_dir, 'dbt.log')
self._super_init(log_path)
self._replay_buffered()
self._log_path = log_path
def _super_init(self, log_path):
logbook.RotatingFileHandler.__init__(
self,
filename=log_path,
level=self.level,
filter=self.filter,
delay=True,
max_size=self._max_size,
backup_count=self._backup_count,
bubble=self.bubble,
format_string=DEBUG_LOG_FORMAT,
)
FormatterMixin.__init__(self, DEBUG_LOG_FORMAT)
def _replay_buffered(self):
assert self._msg_buffer is not None, \
'_msg_buffer should never be None in _replay_buffered'
for record in self._msg_buffer:
super().emit(record)
self._msg_buffer = None
def format(self, record: logbook.LogRecord) -> str:
msg = super().format(record)
subbed = str(msg)
for escape_sequence in dbt.ui.COLORS.values():
subbed = subbed.replace(escape_sequence, '')
return subbed
def emit(self, record: logbook.LogRecord):
"""emit is not thread-safe with set_path, but it is thread-safe with
itself
"""
if self.disabled:
return
elif self.initialized:
super().emit(record)
else:
assert self._msg_buffer is not None, \
'_msg_buffer should never be None if _log_path is set'
self._msg_buffer.append(record)
assert len(self._msg_buffer) < self._bufmax, \
'too many messages received before initilization!'
class LogManager(logbook.NestedSetup):
def __init__(self, stdout=colorama_stdout, stderr=sys.stderr):
self.stdout = stdout
self.stderr = stderr
self._null_handler = logbook.NullHandler()
self._output_handler = OutputHandler(self.stdout)
self._file_handler = DelayedFileHandler()
self._relevel_processor = Relevel(allowed=['dbt', 'werkzeug'])
self._state_processor = DbtProcessState('internal')
self._scrub_processor = ScrubSecrets()
# keep track of whether we've already entered to decide if we should
# be actually pushing. This allows us to log in main() and also
# support entering dbt execution via handle_and_check.
self._stack_depth = 0
super().__init__([
self._null_handler,
self._output_handler,
self._file_handler,
self._relevel_processor,
self._state_processor,
self._scrub_processor
])
def push_application(self):
self._stack_depth += 1
if self._stack_depth == 1:
super().push_application()
def pop_application(self):
self._stack_depth -= 1
if self._stack_depth == 0:
super().pop_application()
def disable(self):
self.add_handler(logbook.NullHandler())
def add_handler(self, handler):
"""add an handler to the log manager that runs before the file handler.
"""
self.objects.append(handler)
# this is used by `dbt ls` to allow piping stdout to jq, etc
def stderr_console(self):
"""Output to stderr at WARNING level instead of stdout"""
self._output_handler.stream = self.stderr
self._output_handler.level = logbook.WARNING
def stdout_console(self):
"""enable stdout and disable stderr"""
self._output_handler.stream = self.stdout
self._output_handler.level = logbook.INFO
def set_debug(self):
self._output_handler.set_text_format(DEBUG_LOG_FORMAT)
self._output_handler.level = logbook.DEBUG
def set_path(self, path):
self._file_handler.set_path(path)
def initialized(self):
return self._file_handler.initialized
def format_json(self):
for handler in self.objects:
if isinstance(handler, FormatterMixin):
handler.format_json()
def format_text(self):
for handler in self.objects:
if isinstance(handler, FormatterMixin):
handler.format_text()
def reset_handlers(self):
"""Reset the handlers to their defaults. This is nice in testing!"""
self.stdout_console()
for handler in self.objects:
if isinstance(handler, FormatterMixin):
handler.reset()
def set_output_stream(self, stream, error=None):
if error is None:
error = stream
if self._output_handler.stream is self.stdout:
self._output_handler.stream = stream
elif self._output_handler.stream is self.stderr:
self._output_handler.stream = error
self.stdout = stream
self.stderr = error
log_manager = LogManager()
def log_cache_events(flag):
"""Set the cache logger to propagate its messages based on the given flag.
"""
# the flag is True if we should log, and False if we shouldn't, so disabled
# is the inverse.
CACHE_LOGGER.disabled = not flag
GLOBAL_LOGGER = logger
class LogMessageHandler(logbook.Handler):
formatter_class = LogMessageFormatter
def format_logmessage(self, record):
"""Format a LogRecord into a LogMessage"""
message = self.format(record)
return LogMessage.from_record_formatted(record, message)
class ListLogHandler(LogMessageHandler):
def __init__(
self,
level: int = logbook.NOTSET,
filter: Callable = None,
bubble: bool = False,
lst: Optional[List[LogMessage]] = None
) -> None:
super().__init__(level, filter, bubble)
if lst is None:
lst = []
self.records: List[LogMessage] = lst
def should_handle(self, record):
"""Only ever emit dbt-sourced log messages to the ListHandler."""
if _root_channel(record) != 'dbt':
return False
return super().should_handle(record)
def emit(self, record: logbook.LogRecord):
as_dict = self.format_logmessage(record)
self.records.append(as_dict)
def _env_log_level(var_name: str) -> int:
# convert debugging environment variable name to a log level
if dbt.flags.env_set_truthy(var_name):
return logging.DEBUG
else:
return logging.ERROR
LOG_LEVEL_GOOGLE = _env_log_level('DBT_GOOGLE_DEBUG_LOGGING')
LOG_LEVEL_SNOWFLAKE = _env_log_level('DBT_SNOWFLAKE_CONNECTOR_DEBUG_LOGGING')
LOG_LEVEL_BOTOCORE = _env_log_level('DBT_BOTOCORE_DEBUG_LOGGING')
LOG_LEVEL_HTTP = _env_log_level('DBT_HTTP_DEBUG_LOGGING')
LOG_LEVEL_WERKZEUG = _env_log_level('DBT_WERKZEUG_DEBUG_LOGGING')
logging.getLogger('botocore').setLevel(LOG_LEVEL_BOTOCORE)
logging.getLogger('requests').setLevel(LOG_LEVEL_HTTP)
logging.getLogger('urllib3').setLevel(LOG_LEVEL_HTTP)
logging.getLogger('google').setLevel(LOG_LEVEL_GOOGLE)
logging.getLogger('snowflake.connector').setLevel(LOG_LEVEL_SNOWFLAKE)
logging.getLogger('parsedatetime').setLevel(logging.ERROR)
logging.getLogger('werkzeug').setLevel(LOG_LEVEL_WERKZEUG)
def list_handler(
lst: Optional[List[LogMessage]],
level=logbook.NOTSET,
) -> ContextManager:
"""Return a context manager that temporarly attaches a list to the logger.
"""
return ListLogHandler(lst=lst, level=level, bubble=True)
def get_timestamp():
return time.strftime("%H:%M:%S")
def print_timestamped_line(msg: str, use_color: Optional[str] = None):
if use_color is not None:
msg = dbt.ui.color(msg, use_color)
GLOBAL_LOGGER.info("{} | {}".format(get_timestamp(), msg))