from typing import Optional from dbt.clients.yaml_helper import ( # noqa:F401 yaml, safe_load, Loader, Dumper, ) from dbt.logger import GLOBAL_LOGGER as logger from dbt import version as dbt_version from snowplow_tracker import Subject, Tracker, Emitter, logger as sp_logger from snowplow_tracker import SelfDescribingJson from datetime import datetime import logbook import pytz import platform import uuid import requests import os sp_logger.setLevel(100) COLLECTOR_URL = "fishtownanalytics.sinter-collect.com" COLLECTOR_PROTOCOL = "https" INVOCATION_SPEC = 'iglu:com.dbt/invocation/jsonschema/1-0-2' PLATFORM_SPEC = 'iglu:com.dbt/platform/jsonschema/1-0-0' RUN_MODEL_SPEC = 'iglu:com.dbt/run_model/jsonschema/1-0-1' INVOCATION_ENV_SPEC = 'iglu:com.dbt/invocation_env/jsonschema/1-0-0' PACKAGE_INSTALL_SPEC = 'iglu:com.dbt/package_install/jsonschema/1-0-0' RPC_REQUEST_SPEC = 'iglu:com.dbt/rpc_request/jsonschema/1-0-1' DEPRECATION_WARN_SPEC = 'iglu:com.dbt/deprecation_warn/jsonschema/1-0-0' LOAD_ALL_TIMING_SPEC = 'iglu:com.dbt/load_all_timing/jsonschema/1-0-3' RESOURCE_COUNTS = 'iglu:com.dbt/resource_counts/jsonschema/1-0-0' EXPERIMENTAL_PARSER = 'iglu:com.dbt/experimental_parser/jsonschema/1-0-0' PARTIAL_PARSER = 'iglu:com.dbt/partial_parser/jsonschema/1-0-1' DBT_INVOCATION_ENV = 'DBT_INVOCATION_ENV' class TimeoutEmitter(Emitter): def __init__(self): super().__init__( COLLECTOR_URL, protocol=COLLECTOR_PROTOCOL, buffer_size=30, on_failure=self.handle_failure, method='post', # don't set this. byte_limit=None, ) @staticmethod def handle_failure(num_ok, unsent): # num_ok will always be 0, unsent will always be 1 entry long, because # the buffer is length 1, so not much to talk about logger.warning('Error sending message, disabling tracking') disable_tracking() def _log_request(self, request, payload): sp_logger.info(f"Sending {request} request to {self.endpoint}...") sp_logger.debug(f"Payload: {payload}") def _log_result(self, request, status_code): msg = f"{request} request finished with status code: {status_code}" if self.is_good_status_code(status_code): sp_logger.info(msg) else: sp_logger.warning(msg) def http_post(self, payload): self._log_request('POST', payload) r = requests.post( self.endpoint, data=payload, headers={'content-type': 'application/json; charset=utf-8'}, timeout=5.0 ) self._log_result('GET', r.status_code) return r def http_get(self, payload): self._log_request('GET', payload) r = requests.get(self.endpoint, params=payload, timeout=5.0) self._log_result('GET', r.status_code) return r emitter = TimeoutEmitter() tracker = Tracker( emitter, namespace="cf", app_id="dbt", ) class User: def __init__(self, cookie_dir): self.do_not_track = True self.cookie_dir = cookie_dir self.id = None self.invocation_id = str(uuid.uuid4()) self.run_started_at = datetime.now(tz=pytz.utc) def state(self): return "do not track" if self.do_not_track else "tracking" @property def cookie_path(self): return os.path.join(self.cookie_dir, '.user.yml') def initialize(self): self.do_not_track = False cookie = self.get_cookie() self.id = cookie.get('id') subject = Subject() subject.set_user_id(self.id) tracker.set_subject(subject) def disable_tracking(self): self.do_not_track = True self.id = None self.cookie_dir = None tracker.set_subject(None) def set_cookie(self): # If the user points dbt to a profile directory which exists AND # contains a profiles.yml file, then we can set a cookie. If the # specified folder does not exist, or if there is not a profiles.yml # file in this folder, then an inconsistent cookie can be used. This # will change in every dbt invocation until the user points to a # profile dir file which contains a valid profiles.yml file. # # See: https://github.com/dbt-labs/dbt/issues/1645 user = {"id": str(uuid.uuid4())} cookie_path = os.path.abspath(self.cookie_dir) profiles_file = os.path.join(cookie_path, 'profiles.yml') if os.path.exists(cookie_path) and os.path.exists(profiles_file): with open(self.cookie_path, "w") as fh: yaml.dump(user, fh) return user def get_cookie(self): if not os.path.isfile(self.cookie_path): user = self.set_cookie() else: with open(self.cookie_path, "r") as fh: try: user = safe_load(fh) if user is None: user = self.set_cookie() except yaml.reader.ReaderError: user = self.set_cookie() return user active_user: Optional[User] = None def get_run_type(args): return 'regular' def get_invocation_context(user, config, args): # this adapter might not have implemented the type or unique_field properties try: adapter_type = config.credentials.type except Exception: adapter_type = None try: adapter_unique_id = config.credentials.hashed_unique_field() except Exception: adapter_unique_id = None return { "project_id": None if config is None else config.hashed_name(), "user_id": user.id, "invocation_id": user.invocation_id, "command": args.which, "options": None, "version": str(dbt_version.installed), "run_type": get_run_type(args), "adapter_type": adapter_type, "adapter_unique_id": adapter_unique_id, } def get_invocation_start_context(user, config, args): data = get_invocation_context(user, config, args) start_data = { "progress": "start", "result_type": None, "result": None } data.update(start_data) return SelfDescribingJson(INVOCATION_SPEC, data) def get_invocation_end_context(user, config, args, result_type): data = get_invocation_context(user, config, args) start_data = { "progress": "end", "result_type": result_type, "result": None } data.update(start_data) return SelfDescribingJson(INVOCATION_SPEC, data) def get_invocation_invalid_context(user, config, args, result_type): data = get_invocation_context(user, config, args) start_data = { "progress": "invalid", "result_type": result_type, "result": None } data.update(start_data) return SelfDescribingJson(INVOCATION_SPEC, data) def get_platform_context(): data = { "platform": platform.platform(), "python": platform.python_version(), "python_version": platform.python_implementation(), } return SelfDescribingJson(PLATFORM_SPEC, data) def get_dbt_env_context(): default = 'manual' dbt_invocation_env = os.getenv(DBT_INVOCATION_ENV, default) if dbt_invocation_env == '': dbt_invocation_env = default data = { "environment": dbt_invocation_env, } return SelfDescribingJson(INVOCATION_ENV_SPEC, data) def track(user, *args, **kwargs): if user.do_not_track: return else: logger.debug("Sending event: {}".format(kwargs)) try: tracker.track_struct_event(*args, **kwargs) except Exception: logger.debug( "An error was encountered while trying to send an event" ) def track_invocation_start(config=None, args=None): context = [ get_invocation_start_context(active_user, config, args), get_platform_context(), get_dbt_env_context() ] track( active_user, category="dbt", action='invocation', label='start', context=context ) def track_project_load(options): context = [SelfDescribingJson(LOAD_ALL_TIMING_SPEC, options)] assert active_user is not None, \ 'Cannot track project loading time when active user is None' track( active_user, category='dbt', action='load_project', label=active_user.invocation_id, context=context ) def track_resource_counts(resource_counts): context = [SelfDescribingJson(RESOURCE_COUNTS, resource_counts)] assert active_user is not None, \ 'Cannot track resource counts when active user is None' track( active_user, category='dbt', action='resource_counts', label=active_user.invocation_id, context=context ) def track_model_run(options): context = [SelfDescribingJson(RUN_MODEL_SPEC, options)] assert active_user is not None, \ 'Cannot track model runs when active user is None' track( active_user, category="dbt", action='run_model', label=active_user.invocation_id, context=context ) def track_rpc_request(options): context = [SelfDescribingJson(RPC_REQUEST_SPEC, options)] assert active_user is not None, \ 'Cannot track rpc requests when active user is None' track( active_user, category="dbt", action='rpc_request', label=active_user.invocation_id, context=context ) def track_package_install(config, args, options): assert active_user is not None, \ 'Cannot track package installs when active user is None' invocation_data = get_invocation_context(active_user, config, args) context = [ SelfDescribingJson(INVOCATION_SPEC, invocation_data), SelfDescribingJson(PACKAGE_INSTALL_SPEC, options) ] track( active_user, category="dbt", action='package', label=active_user.invocation_id, property_='install', context=context ) def track_deprecation_warn(options): assert active_user is not None, \ 'Cannot track deprecation warnings when active user is None' context = [ SelfDescribingJson(DEPRECATION_WARN_SPEC, options) ] track( active_user, category="dbt", action='deprecation', label=active_user.invocation_id, property_='warn', context=context ) def track_invocation_end( config=None, args=None, result_type=None ): user = active_user context = [ get_invocation_end_context(user, config, args, result_type), get_platform_context(), get_dbt_env_context() ] assert active_user is not None, \ 'Cannot track invocation end when active user is None' track( active_user, category="dbt", action='invocation', label='end', context=context ) def track_invalid_invocation( config=None, args=None, result_type=None ): assert active_user is not None, \ 'Cannot track invalid invocations when active user is None' user = active_user invocation_context = get_invocation_invalid_context( user, config, args, result_type ) context = [ invocation_context, get_platform_context(), get_dbt_env_context() ] track( active_user, category="dbt", action='invocation', label='invalid', context=context ) def track_experimental_parser_sample(options): context = [SelfDescribingJson(EXPERIMENTAL_PARSER, options)] assert active_user is not None, \ 'Cannot track experimental parser info when active user is None' track( active_user, category='dbt', action='experimental_parser', label=active_user.invocation_id, context=context ) def track_partial_parser(options): context = [SelfDescribingJson(PARTIAL_PARSER, options)] assert active_user is not None, \ 'Cannot track partial parser info when active user is None' track( active_user, category='dbt', action='partial_parser', label=active_user.invocation_id, context=context ) def flush(): logger.debug("Flushing usage events") try: tracker.flush() except Exception: logger.debug( "An error was encountered while trying to flush usage events" ) def disable_tracking(): global active_user if active_user is not None: active_user.disable_tracking() else: active_user = User(None) def do_not_track(): global active_user active_user = User(None) def initialize_tracking(cookie_dir): global active_user active_user = User(cookie_dir) try: active_user.initialize() except Exception: logger.debug('Got an exception trying to initialize tracking', exc_info=True) active_user = User(None) def get_invocation_id() -> Optional[str]: if active_user is None: return None else: return active_user.invocation_id class InvocationProcessor(logbook.Processor): def __init__(self): super().__init__() def process(self, record): if active_user is not None: record.extra.update({ "run_started_at": active_user.run_started_at.isoformat(), "invocation_id": active_user.invocation_id, })