import itertools import os from copy import deepcopy from dataclasses import dataclass, fields from pathlib import Path from typing import ( Dict, Any, Optional, Mapping, Iterator, Iterable, Tuple, List, MutableSet, Type ) from .profile import Profile from .project import Project from .renderer import DbtProjectYamlRenderer, ProfileRenderer from .utils import parse_cli_vars from dbt import tracking from dbt.adapters.factory import get_relation_class_by_name, get_include_paths from dbt.helper_types import FQNPath, PathSet from dbt.context.base import generate_base_context from dbt.context.target import generate_target_context from dbt.contracts.connection import AdapterRequiredConfig, Credentials from dbt.contracts.graph.manifest import ManifestMetadata from dbt.contracts.relation import ComponentName from dbt.logger import GLOBAL_LOGGER as logger from dbt.ui import warning_tag from dbt.contracts.project import Configuration, UserConfig from dbt.exceptions import ( RuntimeException, DbtProfileError, DbtProjectError, validator_error_message, warn_or_error, raise_compiler_error ) from dbt.dataclass_schema import ValidationError def _project_quoting_dict( proj: Project, profile: Profile ) -> Dict[ComponentName, bool]: src: Dict[str, Any] = profile.credentials.translate_aliases(proj.quoting) result: Dict[ComponentName, bool] = {} for key in ComponentName: if key in src: value = src[key] if isinstance(value, bool): result[key] = value return result @dataclass class RuntimeConfig(Project, Profile, AdapterRequiredConfig): args: Any profile_name: str cli_vars: Dict[str, Any] dependencies: Optional[Mapping[str, 'RuntimeConfig']] = None def __post_init__(self): self.validate() @classmethod def from_parts( cls, project: Project, profile: Profile, args: Any, dependencies: Optional[Mapping[str, 'RuntimeConfig']] = None, ) -> 'RuntimeConfig': """Instantiate a RuntimeConfig from its components. :param profile: A parsed dbt Profile. :param project: A parsed dbt Project. :param args: The parsed command-line arguments. :returns RuntimeConfig: The new configuration. """ quoting: Dict[str, Any] = ( get_relation_class_by_name(profile.credentials.type) .get_default_quote_policy() .replace_dict(_project_quoting_dict(project, profile)) ).to_dict(omit_none=True) cli_vars: Dict[str, Any] = parse_cli_vars(getattr(args, 'vars', '{}')) return cls( project_name=project.project_name, version=project.version, project_root=project.project_root, source_paths=project.source_paths, macro_paths=project.macro_paths, data_paths=project.data_paths, test_paths=project.test_paths, analysis_paths=project.analysis_paths, docs_paths=project.docs_paths, asset_paths=project.asset_paths, target_path=project.target_path, snapshot_paths=project.snapshot_paths, clean_targets=project.clean_targets, log_path=project.log_path, modules_path=project.modules_path, quoting=quoting, models=project.models, on_run_start=project.on_run_start, on_run_end=project.on_run_end, dispatch=project.dispatch, seeds=project.seeds, snapshots=project.snapshots, dbt_version=project.dbt_version, packages=project.packages, manifest_selectors=project.manifest_selectors, selectors=project.selectors, query_comment=project.query_comment, sources=project.sources, tests=project.tests, vars=project.vars, config_version=project.config_version, unrendered=project.unrendered, profile_name=profile.profile_name, target_name=profile.target_name, config=profile.config, threads=profile.threads, credentials=profile.credentials, args=args, cli_vars=cli_vars, dependencies=dependencies, ) def new_project(self, project_root: str) -> 'RuntimeConfig': """Given a new project root, read in its project dictionary, supply the existing project's profile info, and create a new project file. :param project_root: A filepath to a dbt project. :raises DbtProfileError: If the profile is invalid. :raises DbtProjectError: If project is missing or invalid. :returns: The new configuration. """ # copy profile profile = Profile(**self.to_profile_info()) profile.validate() # load the new project and its packages. Don't pass cli variables. renderer = DbtProjectYamlRenderer(generate_target_context(profile, {})) project = Project.from_project_root( project_root, renderer, verify_version=getattr(self.args, 'version_check', False), ) cfg = self.from_parts( project=project, profile=profile, args=deepcopy(self.args), ) # force our quoting back onto the new project. cfg.quoting = deepcopy(self.quoting) return cfg def serialize(self) -> Dict[str, Any]: """Serialize the full configuration to a single dictionary. For any instance that has passed validate() (which happens in __init__), it matches the Configuration contract. Note that args are not serialized. :returns dict: The serialized configuration. """ result = self.to_project_config(with_packages=True) result.update(self.to_profile_info(serialize_credentials=True)) result['cli_vars'] = deepcopy(self.cli_vars) return result def validate(self): """Validate the configuration against its contract. :raises DbtProjectError: If the configuration fails validation. """ try: Configuration.validate(self.serialize()) except ValidationError as e: raise DbtProjectError(validator_error_message(e)) from e @classmethod def _get_rendered_profile( cls, args: Any, profile_renderer: ProfileRenderer, profile_name: Optional[str], ) -> Profile: return Profile.render_from_args( args, profile_renderer, profile_name ) @classmethod def collect_parts( cls: Type['RuntimeConfig'], args: Any ) -> Tuple[Project, Profile]: # profile_name from the project project_root = args.project_dir if args.project_dir else os.getcwd() version_check = getattr(args, 'version_check', False) partial = Project.partial_load( project_root, verify_version=version_check ) # build the profile using the base renderer and the one fact we know cli_vars: Dict[str, Any] = parse_cli_vars(getattr(args, 'vars', '{}')) profile_renderer = ProfileRenderer(generate_base_context(cli_vars)) profile_name = partial.render_profile_name(profile_renderer) profile = cls._get_rendered_profile( args, profile_renderer, profile_name ) # get a new renderer using our target information and render the # project ctx = generate_target_context(profile, cli_vars) project_renderer = DbtProjectYamlRenderer(ctx) project = partial.render(project_renderer) return (project, profile) @classmethod def from_args(cls, args: Any) -> 'RuntimeConfig': """Given arguments, read in dbt_project.yml from the current directory, read in packages.yml if it exists, and use them to find the profile to load. :param args: The arguments as parsed from the cli. :raises DbtProjectError: If the project is invalid or missing. :raises DbtProfileError: If the profile is invalid or missing. :raises ValidationException: If the cli variables are invalid. """ project, profile = cls.collect_parts(args) return cls.from_parts( project=project, profile=profile, args=args, ) def get_metadata(self) -> ManifestMetadata: return ManifestMetadata( project_id=self.hashed_name(), adapter_type=self.credentials.type ) def _get_v2_config_paths( self, config, path: FQNPath, paths: MutableSet[FQNPath], ) -> PathSet: for key, value in config.items(): if isinstance(value, dict) and not key.startswith('+'): self._get_v2_config_paths(value, path + (key,), paths) else: paths.add(path) return frozenset(paths) def _get_config_paths( self, config: Dict[str, Any], path: FQNPath = (), paths: Optional[MutableSet[FQNPath]] = None, ) -> PathSet: if paths is None: paths = set() for key, value in config.items(): if isinstance(value, dict) and not key.startswith('+'): self._get_v2_config_paths(value, path + (key,), paths) else: paths.add(path) return frozenset(paths) def get_resource_config_paths(self) -> Dict[str, PathSet]: """Return a dictionary with resource type keys whose values are lists of lists of strings, where each inner list of strings represents a configured path in the resource. """ return { 'models': self._get_config_paths(self.models), 'seeds': self._get_config_paths(self.seeds), 'snapshots': self._get_config_paths(self.snapshots), 'sources': self._get_config_paths(self.sources), 'tests': self._get_config_paths(self.tests), } def get_unused_resource_config_paths( self, resource_fqns: Mapping[str, PathSet], disabled: PathSet, ) -> List[FQNPath]: """Return a list of lists of strings, where each inner list of strings represents a type + FQN path of a resource configuration that is not used. """ disabled_fqns = frozenset(tuple(fqn) for fqn in disabled) resource_config_paths = self.get_resource_config_paths() unused_resource_config_paths = [] for resource_type, config_paths in resource_config_paths.items(): used_fqns = resource_fqns.get(resource_type, frozenset()) fqns = used_fqns | disabled_fqns for config_path in config_paths: if not _is_config_used(config_path, fqns): unused_resource_config_paths.append( (resource_type,) + config_path ) return unused_resource_config_paths def warn_for_unused_resource_config_paths( self, resource_fqns: Mapping[str, PathSet], disabled: PathSet, ) -> None: unused = self.get_unused_resource_config_paths(resource_fqns, disabled) if len(unused) == 0: return msg = UNUSED_RESOURCE_CONFIGURATION_PATH_MESSAGE.format( len(unused), '\n'.join('- {}'.format('.'.join(u)) for u in unused) ) warn_or_error(msg, log_fmt=warning_tag('{}')) def load_dependencies(self) -> Mapping[str, 'RuntimeConfig']: if self.dependencies is None: all_projects = {self.project_name: self} internal_packages = get_include_paths(self.credentials.type) # raise exception if fewer installed packages than in packages.yml count_packages_specified = len(self.packages.packages) # type: ignore count_packages_installed = len(tuple(self._get_project_directories())) if count_packages_specified > count_packages_installed: raise_compiler_error( f'dbt found {count_packages_specified} package(s) ' f'specified in packages.yml, but only ' f'{count_packages_installed} package(s) installed ' f'in {self.modules_path}. Run "dbt deps" to ' f'install package dependencies.' ) project_paths = itertools.chain( internal_packages, self._get_project_directories() ) for project_name, project in self.load_projects(project_paths): if project_name in all_projects: raise_compiler_error( f'dbt found more than one package with the name ' f'"{project_name}" included in this project. Package ' f'names must be unique in a project. Please rename ' f'one of these packages.' ) all_projects[project_name] = project self.dependencies = all_projects return self.dependencies def clear_dependencies(self): self.dependencies = None def load_projects( self, paths: Iterable[Path] ) -> Iterator[Tuple[str, 'RuntimeConfig']]: for path in paths: try: project = self.new_project(str(path)) except DbtProjectError as e: raise DbtProjectError( f'Failed to read package: {e}', result_type='invalid_project', path=path, ) from e else: yield project.project_name, project def _get_project_directories(self) -> Iterator[Path]: root = Path(self.project_root) / self.modules_path if root.exists(): for path in root.iterdir(): if path.is_dir() and not path.name.startswith('__'): yield path class UnsetCredentials(Credentials): def __init__(self): super().__init__('', '') @property def type(self): return None @property def unique_field(self): return None def connection_info(self, *args, **kwargs): return {} def _connection_keys(self): return () class UnsetConfig(UserConfig): def __getattribute__(self, name): if name in {f.name for f in fields(UserConfig)}: raise AttributeError( f"'UnsetConfig' object has no attribute {name}" ) def __post_serialize__(self, dct): return {} class UnsetProfile(Profile): def __init__(self): self.credentials = UnsetCredentials() self.config = UnsetConfig() self.profile_name = '' self.target_name = '' self.threads = -1 def to_target_dict(self): return {} def __getattribute__(self, name): if name in {'profile_name', 'target_name', 'threads'}: raise RuntimeException( f'Error: disallowed attribute "{name}" - no profile!' ) return Profile.__getattribute__(self, name) @dataclass class UnsetProfileConfig(RuntimeConfig): """This class acts a lot _like_ a RuntimeConfig, except if your profile is missing, any access to profile members results in an exception. """ def __post_init__(self): # instead of futzing with InitVar overrides or rewriting __init__, just # `del` the attrs we don't want users touching. del self.profile_name del self.target_name # don't call super().__post_init__(), as that calls validate(), and # this object isn't very valid def __getattribute__(self, name): # Override __getattribute__ to check that the attribute isn't 'banned'. if name in {'profile_name', 'target_name'}: raise RuntimeException( f'Error: disallowed attribute "{name}" - no profile!' ) # avoid every attribute access triggering infinite recursion return RuntimeConfig.__getattribute__(self, name) def to_target_dict(self): # re-override the poisoned profile behavior return {} @classmethod def from_parts( cls, project: Project, profile: Profile, args: Any, dependencies: Optional[Mapping[str, 'RuntimeConfig']] = None, ) -> 'RuntimeConfig': """Instantiate a RuntimeConfig from its components. :param profile: Ignored. :param project: A parsed dbt Project. :param args: The parsed command-line arguments. :returns RuntimeConfig: The new configuration. """ cli_vars: Dict[str, Any] = parse_cli_vars(getattr(args, 'vars', '{}')) return cls( project_name=project.project_name, version=project.version, project_root=project.project_root, source_paths=project.source_paths, macro_paths=project.macro_paths, data_paths=project.data_paths, test_paths=project.test_paths, analysis_paths=project.analysis_paths, docs_paths=project.docs_paths, asset_paths=project.asset_paths, target_path=project.target_path, snapshot_paths=project.snapshot_paths, clean_targets=project.clean_targets, log_path=project.log_path, modules_path=project.modules_path, quoting=project.quoting, # we never use this anyway. models=project.models, on_run_start=project.on_run_start, on_run_end=project.on_run_end, dispatch=project.dispatch, seeds=project.seeds, snapshots=project.snapshots, dbt_version=project.dbt_version, packages=project.packages, manifest_selectors=project.manifest_selectors, selectors=project.selectors, query_comment=project.query_comment, sources=project.sources, tests=project.tests, vars=project.vars, config_version=project.config_version, unrendered=project.unrendered, profile_name='', target_name='', config=UnsetConfig(), threads=getattr(args, 'threads', 1), credentials=UnsetCredentials(), args=args, cli_vars=cli_vars, dependencies=dependencies, ) @classmethod def _get_rendered_profile( cls, args: Any, profile_renderer: ProfileRenderer, profile_name: Optional[str], ) -> Profile: try: profile = Profile.render_from_args( args, profile_renderer, profile_name ) except (DbtProjectError, DbtProfileError) as exc: logger.debug( 'Profile not loaded due to error: {}', exc, exc_info=True ) logger.info( 'No profile "{}" found, continuing with no target', profile_name ) # return the poisoned form profile = UnsetProfile() # disable anonymous usage statistics tracking.disable_tracking() return profile @classmethod def from_args(cls: Type[RuntimeConfig], args: Any) -> 'RuntimeConfig': """Given arguments, read in dbt_project.yml from the current directory, read in packages.yml if it exists, and use them to find the profile to load. :param args: The arguments as parsed from the cli. :raises DbtProjectError: If the project is invalid or missing. :raises DbtProfileError: If the profile is invalid or missing. :raises ValidationException: If the cli variables are invalid. """ project, profile = cls.collect_parts(args) if not isinstance(profile, UnsetProfile): # if it's a real profile, return a real config cls = RuntimeConfig return cls.from_parts( project=project, profile=profile, args=args ) UNUSED_RESOURCE_CONFIGURATION_PATH_MESSAGE = """\ Configuration paths exist in your dbt_project.yml file which do not \ apply to any resources. There are {} unused configuration paths: {} """ def _is_config_used(path, fqns): if fqns: for fqn in fqns: if len(path) <= len(fqn) and fqn[:len(path)] == path: return True return False