import dataclasses import os from datetime import datetime from typing import ( List, Tuple, ClassVar, Type, TypeVar, Dict, Any, Optional ) from dbt.clients.system import write_json, read_json from dbt.exceptions import ( InternalException, RuntimeException, ) from dbt.version import __version__ from dbt.tracking import get_invocation_id from dbt.dataclass_schema import dbtClassMixin SourceKey = Tuple[str, str] def list_str() -> List[str]: """Mypy gets upset about stuff like: from dataclasses import dataclass, field from typing import Optional, List @dataclass class Foo: x: Optional[List[str]] = field(default_factory=list) Because `list` could be any kind of list, I guess """ return [] class Replaceable: def replace(self, **kwargs): return dataclasses.replace(self, **kwargs) class Mergeable(Replaceable): def merged(self, *args): """Perform a shallow merge, where the last non-None write wins. This is intended to merge dataclasses that are a collection of optional values. """ replacements = {} cls = type(self) for arg in args: for field in dataclasses.fields(cls): value = getattr(arg, field.name) if value is not None: replacements[field.name] = value return self.replace(**replacements) class Writable: def write(self, path: str): write_json( path, self.to_dict(omit_none=False) # type: ignore ) class AdditionalPropertiesMixin: """Make this class an extensible property. The underlying class definition must include a type definition for a field named '_extra' that is of type `Dict[str, Any]`. """ ADDITIONAL_PROPERTIES = True # This takes attributes in the dictionary that are # not in the class definitions and puts them in an # _extra dict in the class @classmethod def __pre_deserialize__(cls, data): # dir() did not work because fields with # metadata settings are not found # The original version of this would create the # object first and then update extra with the # extra keys, but that won't work here, so # we're copying the dict so we don't insert the # _extra in the original data. This also requires # that Mashumaro actually build the '_extra' field cls_keys = cls._get_field_names() new_dict = {} for key, value in data.items(): if key not in cls_keys and key != '_extra': if '_extra' not in new_dict: new_dict['_extra'] = {} new_dict['_extra'][key] = value else: new_dict[key] = value data = new_dict data = super().__pre_deserialize__(data) return data def __post_serialize__(self, dct): data = super().__post_serialize__(dct) data.update(self.extra) if '_extra' in data: del data['_extra'] return data def replace(self, **kwargs): dct = self.to_dict(omit_none=False) dct.update(kwargs) return self.from_dict(dct) @property def extra(self): return self._extra class Readable: @classmethod def read(cls, path: str): try: data = read_json(path) except (EnvironmentError, ValueError) as exc: raise RuntimeException( f'Could not read {cls.__name__} at "{path}" as JSON: {exc}' ) from exc return cls.from_dict(data) # type: ignore BASE_SCHEMAS_URL = 'https://schemas.getdbt.com/' SCHEMA_PATH = 'dbt/{name}/v{version}.json' @dataclasses.dataclass class SchemaVersion: name: str version: int @property def path(self) -> str: return SCHEMA_PATH.format( name=self.name, version=self.version ) def __str__(self) -> str: return BASE_SCHEMAS_URL + self.path SCHEMA_VERSION_KEY = 'dbt_schema_version' METADATA_ENV_PREFIX = 'DBT_ENV_CUSTOM_ENV_' def get_metadata_env() -> Dict[str, str]: return { k[len(METADATA_ENV_PREFIX):]: v for k, v in os.environ.items() if k.startswith(METADATA_ENV_PREFIX) } @dataclasses.dataclass class BaseArtifactMetadata(dbtClassMixin): dbt_schema_version: str dbt_version: str = __version__ generated_at: datetime = dataclasses.field( default_factory=datetime.utcnow ) invocation_id: Optional[str] = dataclasses.field( default_factory=get_invocation_id ) env: Dict[str, str] = dataclasses.field(default_factory=get_metadata_env) def schema_version(name: str, version: int): def inner(cls: Type[VersionedSchema]): cls.dbt_schema_version = SchemaVersion( name=name, version=version, ) return cls return inner @dataclasses.dataclass class VersionedSchema(dbtClassMixin): dbt_schema_version: ClassVar[SchemaVersion] @classmethod def json_schema(cls, embeddable: bool = False) -> Dict[str, Any]: result = super().json_schema(embeddable=embeddable) if not embeddable: result['$id'] = str(cls.dbt_schema_version) return result T = TypeVar('T', bound='ArtifactMixin') # metadata should really be a Generic[T_M] where T_M is a TypeVar bound to # BaseArtifactMetadata. Unfortunately this isn't possible due to a mypy issue: # https://github.com/python/mypy/issues/7520 @dataclasses.dataclass(init=False) class ArtifactMixin(VersionedSchema, Writable, Readable): metadata: BaseArtifactMetadata @classmethod def validate(cls, data): super().validate(data) if cls.dbt_schema_version is None: raise InternalException( 'Cannot call from_dict with no schema version!' )