import dbt.flags import dbt.ui import json import logging import os import sys import time import warnings from dataclasses import dataclass from datetime import datetime from typing import Optional, List, ContextManager, Callable, Dict, Any, Set import colorama import logbook from dbt.dataclass_schema import dbtClassMixin # Colorama needs some help on windows because we're using logger.info # intead of print(). If the Windows env doesn't have a TERM var set, # then we should override the logging stream to use the colorama # converter. If the TERM var is set (as with Git Bash), then it's safe # to send escape characters and no log handler injection is needed. colorama_stdout = sys.stdout colorama_wrap = True colorama.init(wrap=colorama_wrap) if sys.platform == 'win32' and not os.getenv('TERM'): colorama_wrap = False colorama_stdout = colorama.AnsiToWin32(sys.stdout).stream elif sys.platform == 'win32': colorama_wrap = False colorama.init(wrap=colorama_wrap) STDOUT_LOG_FORMAT = '{record.message}' DEBUG_LOG_FORMAT = ( '{record.time:%Y-%m-%d %H:%M:%S.%f%z} ' '({record.thread_name}): ' '{record.message}' ) SECRET_ENV_PREFIX = 'DBT_ENV_SECRET_' def get_secret_env() -> List[str]: return [ v for k, v in os.environ.items() if k.startswith(SECRET_ENV_PREFIX) ] ExceptionInformation = str @dataclass class LogMessage(dbtClassMixin): timestamp: datetime message: str channel: str level: int levelname: str thread_name: str process: int extra: Optional[Dict[str, Any]] = None exc_info: Optional[ExceptionInformation] = None @classmethod def from_record_formatted(cls, record: logbook.LogRecord, message: str): extra = dict(record.extra) log_message = LogMessage( timestamp=record.time, message=message, channel=record.channel, level=record.level, levelname=logbook.get_level_name(record.level), extra=extra, thread_name=record.thread_name, process=record.process, exc_info=record.formatted_exception, ) return log_message class LogMessageFormatter(logbook.StringFormatter): def __call__(self, record, handler): data = self.format_record(record, handler) exc = self.format_exception(record) if exc: data.exc_info = exc return data def format_record(self, record, handler): message = super().format_record(record, handler) return LogMessage.from_record_formatted(record, message) class JsonFormatter(LogMessageFormatter): def __call__(self, record, handler): """Return a the record converted to LogMessage's JSON form""" # utils imports exceptions which imports logger... import dbt.utils log_message = super().__call__(record, handler) dct = log_message.to_dict(omit_none=True) return json.dumps(dct, cls=dbt.utils.JSONEncoder) class FormatterMixin: def __init__(self, format_string): self._text_format_string = format_string self.formatter_class = logbook.StringFormatter # triggers a formatter update via logbook.StreamHandler self.format_string = self._text_format_string def format_json(self): # set our formatter to the json formatter self.formatter_class = JsonFormatter self.format_string = STDOUT_LOG_FORMAT def format_text(self): # set our formatter to the regular stdout/stderr handler self.formatter_class = logbook.StringFormatter self.format_string = self._text_format_string def reset(self): raise NotImplementedError( 'reset() not implemented in FormatterMixin subclass' ) class OutputHandler(logbook.StreamHandler, FormatterMixin): """Output handler. The `format_string` parameter only changes the default text output, not debug mode or json. """ def __init__( self, stream, level=logbook.INFO, format_string=STDOUT_LOG_FORMAT, bubble=True, ) -> None: self._default_format = format_string logbook.StreamHandler.__init__( self, stream=stream, level=level, format_string=format_string, bubble=bubble, ) FormatterMixin.__init__(self, format_string) def set_text_format(self, format_string: str): """Set the text format to format_string. In JSON output mode, this is a noop. """ if self.formatter_class is logbook.StringFormatter: # reset text format self._text_format_string = format_string self.format_text() def reset(self): self.level = logbook.INFO self._text_format_string = self._default_format self.format_text() def should_handle(self, record): if record.level < self.level: return False text_mode = self.formatter_class is logbook.StringFormatter if text_mode and record.extra.get('json_only', False): return False elif not text_mode and record.extra.get('text_only', False): return False else: return True def _redirect_std_logging(): logbook.compat.redirect_logging() def _root_channel(record: logbook.LogRecord) -> str: return record.channel.split('.')[0] class Relevel(logbook.Processor): def __init__( self, allowed: List[str], min_level=logbook.WARNING, target_level=logbook.DEBUG, ) -> None: self.allowed: Set[str] = set(allowed) self.min_level = min_level self.target_level = target_level super().__init__() def process(self, record): if _root_channel(record) in self.allowed: return record.extra['old_level'] = record.level # suppress logs at/below our min level by lowering them to NOTSET if record.level < self.min_level: record.level = logbook.NOTSET # if we didn't mess with it, then lower all logs above our level to # our target level. else: record.level = self.target_level class JsonOnly(logbook.Processor): def process(self, record): record.extra['json_only'] = True class TextOnly(logbook.Processor): def process(self, record): record.extra['text_only'] = True class TimingProcessor(logbook.Processor): def __init__(self, timing_info: Optional[dbtClassMixin] = None): self.timing_info = timing_info super().__init__() def process(self, record): if self.timing_info is not None: record.extra['timing_info'] = self.timing_info.to_dict( omit_none=True) class DbtProcessState(logbook.Processor): def __init__(self, value: str): self.value = value super().__init__() def process(self, record): overwrite = ( 'run_state' not in record.extra or record.extra['run_state'] == 'internal' ) if overwrite: record.extra['run_state'] = self.value class DbtModelState(logbook.Processor): def __init__(self, state: Dict[str, str]): self.state = state super().__init__() def process(self, record): record.extra.update(self.state) class DbtStatusMessage(logbook.Processor): def process(self, record): record.extra['is_status_message'] = True class UniqueID(logbook.Processor): def __init__(self, unique_id: str): self.unique_id = unique_id super().__init__() def process(self, record): record.extra['unique_id'] = self.unique_id class NodeCount(logbook.Processor): def __init__(self, node_count: int): self.node_count = node_count super().__init__() def process(self, record): record.extra['node_count'] = self.node_count class NodeMetadata(logbook.Processor): def __init__(self, node, index): self.node = node self.index = index super().__init__() def mapping_keys(self): return [] def process_keys(self, record): for attr, key in self.mapping_keys(): value = getattr(self.node, attr, None) if value is not None: record.extra[key] = value def process(self, record): self.process_keys(record) record.extra['node_index'] = self.index class ModelMetadata(NodeMetadata): def mapping_keys(self): return [ ('alias', 'node_alias'), ('schema', 'node_schema'), ('database', 'node_database'), ('original_file_path', 'node_path'), ('name', 'node_name'), ('resource_type', 'resource_type'), ('depends_on_nodes', 'depends_on'), ] def process_config(self, record): if hasattr(self.node, 'config'): materialized = getattr(self.node.config, 'materialized', None) if materialized is not None: record.extra['node_materialized'] = materialized def process(self, record): super().process(record) self.process_config(record) class HookMetadata(NodeMetadata): def mapping_keys(self): return [ ('name', 'node_name'), ('resource_type', 'resource_type'), ] class TimestampNamed(logbook.Processor): def __init__(self, name: str): self.name = name super().__init__() def process(self, record): super().process(record) record.extra[self.name] = datetime.utcnow().isoformat() class ScrubSecrets(logbook.Processor): def process(self, record): for secret in get_secret_env(): record.message = str(record.message).replace(secret, "*****") logger = logbook.Logger('dbt') # provide this for the cache, disabled by default CACHE_LOGGER = logbook.Logger('dbt.cache') CACHE_LOGGER.disable() warnings.filterwarnings("ignore", category=ResourceWarning, message="unclosed.*") initialized = False def make_log_dir_if_missing(log_dir): import dbt.clients.system dbt.clients.system.make_directory(log_dir) class DebugWarnings(logbook.compat.redirected_warnings): """Log warnings, except send them to 'debug' instead of 'warning' level. """ def make_record(self, message, exception, filename, lineno): rv = super().make_record(message, exception, filename, lineno) rv.level = logbook.DEBUG rv.extra['from_warnings'] = True return rv # push Python warnings to debug level logs. This will suppress all import-time # warnings. DebugWarnings().__enter__() # redirect stdlib logging to logbook _redirect_std_logging() class DelayedFileHandler(logbook.RotatingFileHandler, FormatterMixin): def __init__( self, log_dir: Optional[str] = None, level=logbook.DEBUG, filter=None, bubble=True, max_size=10 * 1024 * 1024, # 10 mb backup_count=5, ) -> None: self.disabled = False self._msg_buffer: Optional[List[logbook.LogRecord]] = [] # if we get 1k messages without a logfile being set, something is wrong self._bufmax = 1000 self._log_path: Optional[str] = None # we need the base handler class' __init__ to run so handling works logbook.Handler.__init__(self, level, filter, bubble) if log_dir is not None: self.set_path(log_dir) self._text_format_string = None self._max_size = max_size self._backup_count = backup_count def reset(self): if self.initialized: self.close() self._log_path = None self._msg_buffer = [] self.disabled = False @property def initialized(self): return self._log_path is not None def set_path(self, log_dir): """log_dir can be the path to a log directory, or `None` to avoid writing to a file (for `dbt debug`). """ if self.disabled: return assert not self.initialized, 'set_path called after being set' if log_dir is None: self.disabled = True return make_log_dir_if_missing(log_dir) log_path = os.path.join(log_dir, 'dbt.log') self._super_init(log_path) self._replay_buffered() self._log_path = log_path def _super_init(self, log_path): logbook.RotatingFileHandler.__init__( self, filename=log_path, level=self.level, filter=self.filter, delay=True, max_size=self._max_size, backup_count=self._backup_count, bubble=self.bubble, format_string=DEBUG_LOG_FORMAT, ) FormatterMixin.__init__(self, DEBUG_LOG_FORMAT) def _replay_buffered(self): assert self._msg_buffer is not None, \ '_msg_buffer should never be None in _replay_buffered' for record in self._msg_buffer: super().emit(record) self._msg_buffer = None def format(self, record: logbook.LogRecord) -> str: msg = super().format(record) subbed = str(msg) for escape_sequence in dbt.ui.COLORS.values(): subbed = subbed.replace(escape_sequence, '') return subbed def emit(self, record: logbook.LogRecord): """emit is not thread-safe with set_path, but it is thread-safe with itself """ if self.disabled: return elif self.initialized: super().emit(record) else: assert self._msg_buffer is not None, \ '_msg_buffer should never be None if _log_path is set' self._msg_buffer.append(record) assert len(self._msg_buffer) < self._bufmax, \ 'too many messages received before initilization!' class LogManager(logbook.NestedSetup): def __init__(self, stdout=colorama_stdout, stderr=sys.stderr): self.stdout = stdout self.stderr = stderr self._null_handler = logbook.NullHandler() self._output_handler = OutputHandler(self.stdout) self._file_handler = DelayedFileHandler() self._relevel_processor = Relevel(allowed=['dbt', 'werkzeug']) self._state_processor = DbtProcessState('internal') self._scrub_processor = ScrubSecrets() # keep track of whether we've already entered to decide if we should # be actually pushing. This allows us to log in main() and also # support entering dbt execution via handle_and_check. self._stack_depth = 0 super().__init__([ self._null_handler, self._output_handler, self._file_handler, self._relevel_processor, self._state_processor, self._scrub_processor ]) def push_application(self): self._stack_depth += 1 if self._stack_depth == 1: super().push_application() def pop_application(self): self._stack_depth -= 1 if self._stack_depth == 0: super().pop_application() def disable(self): self.add_handler(logbook.NullHandler()) def add_handler(self, handler): """add an handler to the log manager that runs before the file handler. """ self.objects.append(handler) # this is used by `dbt ls` to allow piping stdout to jq, etc def stderr_console(self): """Output to stderr at WARNING level instead of stdout""" self._output_handler.stream = self.stderr self._output_handler.level = logbook.WARNING def stdout_console(self): """enable stdout and disable stderr""" self._output_handler.stream = self.stdout self._output_handler.level = logbook.INFO def set_debug(self): self._output_handler.set_text_format(DEBUG_LOG_FORMAT) self._output_handler.level = logbook.DEBUG def set_path(self, path): self._file_handler.set_path(path) def initialized(self): return self._file_handler.initialized def format_json(self): for handler in self.objects: if isinstance(handler, FormatterMixin): handler.format_json() def format_text(self): for handler in self.objects: if isinstance(handler, FormatterMixin): handler.format_text() def reset_handlers(self): """Reset the handlers to their defaults. This is nice in testing!""" self.stdout_console() for handler in self.objects: if isinstance(handler, FormatterMixin): handler.reset() def set_output_stream(self, stream, error=None): if error is None: error = stream if self._output_handler.stream is self.stdout: self._output_handler.stream = stream elif self._output_handler.stream is self.stderr: self._output_handler.stream = error self.stdout = stream self.stderr = error log_manager = LogManager() def log_cache_events(flag): """Set the cache logger to propagate its messages based on the given flag. """ # the flag is True if we should log, and False if we shouldn't, so disabled # is the inverse. CACHE_LOGGER.disabled = not flag GLOBAL_LOGGER = logger class LogMessageHandler(logbook.Handler): formatter_class = LogMessageFormatter def format_logmessage(self, record): """Format a LogRecord into a LogMessage""" message = self.format(record) return LogMessage.from_record_formatted(record, message) class ListLogHandler(LogMessageHandler): def __init__( self, level: int = logbook.NOTSET, filter: Callable = None, bubble: bool = False, lst: Optional[List[LogMessage]] = None ) -> None: super().__init__(level, filter, bubble) if lst is None: lst = [] self.records: List[LogMessage] = lst def should_handle(self, record): """Only ever emit dbt-sourced log messages to the ListHandler.""" if _root_channel(record) != 'dbt': return False return super().should_handle(record) def emit(self, record: logbook.LogRecord): as_dict = self.format_logmessage(record) self.records.append(as_dict) def _env_log_level(var_name: str) -> int: # convert debugging environment variable name to a log level if dbt.flags.env_set_truthy(var_name): return logging.DEBUG else: return logging.ERROR LOG_LEVEL_GOOGLE = _env_log_level('DBT_GOOGLE_DEBUG_LOGGING') LOG_LEVEL_SNOWFLAKE = _env_log_level('DBT_SNOWFLAKE_CONNECTOR_DEBUG_LOGGING') LOG_LEVEL_BOTOCORE = _env_log_level('DBT_BOTOCORE_DEBUG_LOGGING') LOG_LEVEL_HTTP = _env_log_level('DBT_HTTP_DEBUG_LOGGING') LOG_LEVEL_WERKZEUG = _env_log_level('DBT_WERKZEUG_DEBUG_LOGGING') logging.getLogger('botocore').setLevel(LOG_LEVEL_BOTOCORE) logging.getLogger('requests').setLevel(LOG_LEVEL_HTTP) logging.getLogger('urllib3').setLevel(LOG_LEVEL_HTTP) logging.getLogger('google').setLevel(LOG_LEVEL_GOOGLE) logging.getLogger('snowflake.connector').setLevel(LOG_LEVEL_SNOWFLAKE) logging.getLogger('parsedatetime').setLevel(logging.ERROR) logging.getLogger('werkzeug').setLevel(LOG_LEVEL_WERKZEUG) def list_handler( lst: Optional[List[LogMessage]], level=logbook.NOTSET, ) -> ContextManager: """Return a context manager that temporarly attaches a list to the logger. """ return ListLogHandler(lst=lst, level=level, bubble=True) def get_timestamp(): return time.strftime("%H:%M:%S") def print_timestamped_line(msg: str, use_color: Optional[str] = None): if use_color is not None: msg = dbt.ui.color(msg, use_color) GLOBAL_LOGGER.info("{} | {}".format(get_timestamp(), msg))