import json import os from typing import ( Any, Dict, NoReturn, Optional, Mapping ) from dbt import flags from dbt import tracking from dbt.clients.jinja import undefined_error, get_rendered from dbt.clients.yaml_helper import ( # noqa: F401 yaml, safe_load, SafeLoader, Loader, Dumper ) from dbt.contracts.graph.compiled import CompiledResource from dbt.exceptions import raise_compiler_error, MacroReturn from dbt.logger import GLOBAL_LOGGER as logger from dbt.version import __version__ as dbt_version # These modules are added to the context. Consider alternative # approaches which will extend well to potentially many modules import pytz import datetime import re def get_pytz_module_context() -> Dict[str, Any]: context_exports = pytz.__all__ # type: ignore return { name: getattr(pytz, name) for name in context_exports } def get_datetime_module_context() -> Dict[str, Any]: context_exports = [ 'date', 'datetime', 'time', 'timedelta', 'tzinfo' ] return { name: getattr(datetime, name) for name in context_exports } def get_re_module_context() -> Dict[str, Any]: context_exports = re.__all__ return { name: getattr(re, name) for name in context_exports } def get_context_modules() -> Dict[str, Dict[str, Any]]: return { 'pytz': get_pytz_module_context(), 'datetime': get_datetime_module_context(), 're': get_re_module_context(), } class ContextMember: def __init__(self, value, name=None): self.name = name self.inner = value def key(self, default): if self.name is None: return default return self.name def contextmember(value): if isinstance(value, str): return lambda v: ContextMember(v, name=value) return ContextMember(value) def contextproperty(value): if isinstance(value, str): return lambda v: ContextMember(property(v), name=value) return ContextMember(property(value)) class ContextMeta(type): def __new__(mcls, name, bases, dct): context_members = {} context_attrs = {} new_dct = {} for base in bases: context_members.update(getattr(base, '_context_members_', {})) context_attrs.update(getattr(base, '_context_attrs_', {})) for key, value in dct.items(): if isinstance(value, ContextMember): context_key = value.key(key) context_members[context_key] = value.inner context_attrs[context_key] = key value = value.inner new_dct[key] = value new_dct['_context_members_'] = context_members new_dct['_context_attrs_'] = context_attrs return type.__new__(mcls, name, bases, new_dct) class Var: UndefinedVarError = "Required var '{}' not found in config:\nVars "\ "supplied to {} = {}" _VAR_NOTSET = object() def __init__( self, context: Mapping[str, Any], cli_vars: Mapping[str, Any], node: Optional[CompiledResource] = None ) -> None: self._context: Mapping[str, Any] = context self._cli_vars: Mapping[str, Any] = cli_vars self._node: Optional[CompiledResource] = node self._merged: Mapping[str, Any] = self._generate_merged() def _generate_merged(self) -> Mapping[str, Any]: return self._cli_vars @property def node_name(self): if self._node is not None: return self._node.name else: return '' def get_missing_var(self, var_name): dct = {k: self._merged[k] for k in self._merged} pretty_vars = json.dumps(dct, sort_keys=True, indent=4) msg = self.UndefinedVarError.format( var_name, self.node_name, pretty_vars ) raise_compiler_error(msg, self._node) def has_var(self, var_name: str): return var_name in self._merged def get_rendered_var(self, var_name): raw = self._merged[var_name] # if bool/int/float/etc are passed in, don't compile anything if not isinstance(raw, str): return raw return get_rendered(raw, self._context) def __call__(self, var_name, default=_VAR_NOTSET): if self.has_var(var_name): return self.get_rendered_var(var_name) elif default is not self._VAR_NOTSET: return default else: return self.get_missing_var(var_name) class BaseContext(metaclass=ContextMeta): def __init__(self, cli_vars): self._ctx = {} self.cli_vars = cli_vars def generate_builtins(self): builtins: Dict[str, Any] = {} for key, value in self._context_members_.items(): if hasattr(value, '__get__'): # handle properties, bound methods, etc value = value.__get__(self) builtins[key] = value return builtins # no dbtClassMixin so this is not an actual override def to_dict(self): self._ctx['context'] = self._ctx builtins = self.generate_builtins() self._ctx['builtins'] = builtins self._ctx.update(builtins) return self._ctx @contextproperty def dbt_version(self) -> str: """The `dbt_version` variable returns the installed version of dbt that is currently running. It can be used for debugging or auditing purposes. > macros/get_version.sql {% macro get_version() %} {% set msg = "The installed version of dbt is: " ~ dbt_version %} {% do log(msg, info=true) %} {% endmacro %} Example output: $ dbt run-operation get_version The installed version of dbt is 0.16.0 """ return dbt_version @contextproperty def var(self) -> Var: """Variables can be passed from your `dbt_project.yml` file into models during compilation. These variables are useful for configuring packages for deployment in multiple environments, or defining values that should be used across multiple models within a package. To add a variable to a model, use the `var()` function: > my_model.sql: select * from events where event_type = '{{ var("event_type") }}' If you try to run this model without supplying an `event_type` variable, you'll receive a compilation error that looks like this: Encountered an error: ! Compilation error while compiling model package_name.my_model: ! Required var 'event_type' not found in config: Vars supplied to package_name.my_model = { } To supply a variable to a given model, add one or more `vars` dictionaries to the `models` config in your `dbt_project.yml` file. These `vars` are in-scope for all models at or below where they are defined, so place them where they make the most sense. Below are three different placements of the `vars` dict, all of which will make the `my_model` model compile. > dbt_project.yml: # 1) scoped at the model level models: package_name: my_model: materialized: view vars: event_type: activation # 2) scoped at the package level models: package_name: vars: event_type: activation my_model: materialized: view # 3) scoped globally models: vars: event_type: activation package_name: my_model: materialized: view ## Variable default values The `var()` function takes an optional second argument, `default`. If this argument is provided, then it will be the default value for the variable if one is not explicitly defined. > my_model.sql: -- Use 'activation' as the event_type if the variable is not -- defined. select * from events where event_type = '{{ var("event_type", "activation") }}' """ return Var(self._ctx, self.cli_vars) @contextmember @staticmethod def env_var(var: str, default: Optional[str] = None) -> str: """The env_var() function. Return the environment variable named 'var'. If there is no such environment variable set, return the default. If the default is None, raise an exception for an undefined variable. """ if var in os.environ: return os.environ[var] elif default is not None: return default else: msg = f"Env var required but not provided: '{var}'" undefined_error(msg) if os.environ.get('DBT_MACRO_DEBUGGING'): @contextmember @staticmethod def debug(): """Enter a debugger at this line in the compiled jinja code.""" import sys import ipdb # type: ignore frame = sys._getframe(3) ipdb.set_trace(frame) return '' @contextmember('return') @staticmethod def _return(data: Any) -> NoReturn: """The `return` function can be used in macros to return data to the caller. The type of the data (`dict`, `list`, `int`, etc) will be preserved through the return call. :param data: The data to return to the caller > macros/example.sql: {% macro get_data() %} {{ return([1,2,3]) }} {% endmacro %} > models/my_model.sql: select -- getdata() returns a list! {% for i in getdata() %} {{ i }} {% if not loop.last %},{% endif %} {% endfor %} """ raise MacroReturn(data) @contextmember @staticmethod def fromjson(string: str, default: Any = None) -> Any: """The `fromjson` context method can be used to deserialize a json string into a Python object primitive, eg. a `dict` or `list`. :param value: The json string to deserialize :param default: A default value to return if the `string` argument cannot be deserialized (optional) Usage: {% set my_json_str = '{"abc": 123}' %} {% set my_dict = fromjson(my_json_str) %} {% do log(my_dict['abc']) %} """ try: return json.loads(string) except ValueError: return default @contextmember @staticmethod def tojson( value: Any, default: Any = None, sort_keys: bool = False ) -> Any: """The `tojson` context method can be used to serialize a Python object primitive, eg. a `dict` or `list` to a json string. :param value: The value serialize to json :param default: A default value to return if the `value` argument cannot be serialized :param sort_keys: If True, sort the keys. Usage: {% set my_dict = {"abc": 123} %} {% set my_json_string = tojson(my_dict) %} {% do log(my_json_string) %} """ try: return json.dumps(value, sort_keys=sort_keys) except ValueError: return default @contextmember @staticmethod def fromyaml(value: str, default: Any = None) -> Any: """The fromyaml context method can be used to deserialize a yaml string into a Python object primitive, eg. a `dict` or `list`. :param value: The yaml string to deserialize :param default: A default value to return if the `string` argument cannot be deserialized (optional) Usage: {% set my_yml_str -%} dogs: - good - bad {%- endset %} {% set my_dict = fromyaml(my_yml_str) %} {% do log(my_dict['dogs'], info=true) %} -- ["good", "bad"] {% do my_dict['dogs'].pop() } {% do log(my_dict['dogs'], info=true) %} -- ["good"] """ try: return safe_load(value) except (AttributeError, ValueError, yaml.YAMLError): return default # safe_dump defaults to sort_keys=True, but we act like json.dumps (the # opposite) @contextmember @staticmethod def toyaml( value: Any, default: Optional[str] = None, sort_keys: bool = False ) -> Optional[str]: """The `tojson` context method can be used to serialize a Python object primitive, eg. a `dict` or `list` to a yaml string. :param value: The value serialize to yaml :param default: A default value to return if the `value` argument cannot be serialized :param sort_keys: If True, sort the keys. Usage: {% set my_dict = {"abc": 123} %} {% set my_yaml_string = toyaml(my_dict) %} {% do log(my_yaml_string) %} """ try: return yaml.safe_dump(data=value, sort_keys=sort_keys) except (ValueError, yaml.YAMLError): return default @contextmember @staticmethod def log(msg: str, info: bool = False) -> str: """Logs a line to either the log file or stdout. :param msg: The message to log :param info: If `False`, write to the log file. If `True`, write to both the log file and stdout. > macros/my_log_macro.sql {% macro some_macro(arg1, arg2) %} {{ log("Running some_macro: " ~ arg1 ~ ", " ~ arg2) }} {% endmacro %}" """ if info: logger.info(msg) else: logger.debug(msg) return '' @contextproperty def run_started_at(self) -> Optional[datetime.datetime]: """`run_started_at` outputs the timestamp that this run started, e.g. `2017-04-21 01:23:45.678`. The `run_started_at` variable is a Python `datetime` object. As of 0.9.1, the timezone of this variable defaults to UTC. > run_started_at_example.sql select '{{ run_started_at.strftime("%Y-%m-%d") }}' as date_day from ... To modify the timezone of this variable, use the the `pytz` module: > run_started_at_utc.sql {% set est = modules.pytz.timezone("America/New_York") %} select '{{ run_started_at.astimezone(est) }}' as run_started_est from ... """ if tracking.active_user is not None: return tracking.active_user.run_started_at else: return None @contextproperty def invocation_id(self) -> Optional[str]: """invocation_id outputs a UUID generated for this dbt run (useful for auditing) """ if tracking.active_user is not None: return tracking.active_user.invocation_id else: return None @contextproperty def modules(self) -> Dict[str, Any]: """The `modules` variable in the Jinja context contains useful Python modules for operating on data. # datetime This variable is a pointer to the Python datetime module. Usage: {% set dt = modules.datetime.datetime.now() %} # pytz This variable is a pointer to the Python pytz module. Usage: {% set dt = modules.datetime.datetime(2002, 10, 27, 6, 0, 0) %} {% set dt_local = modules.pytz.timezone('US/Eastern').localize(dt) %} {{ dt_local }} """ # noqa return get_context_modules() @contextproperty def flags(self) -> Any: """The `flags` variable contains true/false values for flags provided on the command line. > flags.sql: {% if flags.FULL_REFRESH %} drop table ... {% else %} -- no-op {% endif %} The list of valid flags are: - `flags.STRICT_MODE`: True if `--strict` (or `-S`) was provided on the command line - `flags.FULL_REFRESH`: True if `--full-refresh` was provided on the command line - `flags.NON_DESTRUCTIVE`: True if `--non-destructive` was provided on the command line """ return flags def generate_base_context(cli_vars: Dict[str, Any]) -> Dict[str, Any]: ctx = BaseContext(cli_vars) # This is not a Mashumaro to_dict call return ctx.to_dict()