213 lines
5.8 KiB
Python
213 lines
5.8 KiB
Python
import dataclasses
|
|
import os
|
|
from datetime import datetime
|
|
from typing import (
|
|
List, Tuple, ClassVar, Type, TypeVar, Dict, Any, Optional
|
|
)
|
|
|
|
from dbt.clients.system import write_json, read_json
|
|
from dbt.exceptions import (
|
|
InternalException,
|
|
RuntimeException,
|
|
)
|
|
from dbt.version import __version__
|
|
from dbt.tracking import get_invocation_id
|
|
from dbt.dataclass_schema import dbtClassMixin
|
|
|
|
SourceKey = Tuple[str, str]
|
|
|
|
|
|
def list_str() -> List[str]:
|
|
"""Mypy gets upset about stuff like:
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import Optional, List
|
|
|
|
@dataclass
|
|
class Foo:
|
|
x: Optional[List[str]] = field(default_factory=list)
|
|
|
|
|
|
Because `list` could be any kind of list, I guess
|
|
"""
|
|
return []
|
|
|
|
|
|
class Replaceable:
|
|
def replace(self, **kwargs):
|
|
return dataclasses.replace(self, **kwargs)
|
|
|
|
|
|
class Mergeable(Replaceable):
|
|
def merged(self, *args):
|
|
"""Perform a shallow merge, where the last non-None write wins. This is
|
|
intended to merge dataclasses that are a collection of optional values.
|
|
"""
|
|
replacements = {}
|
|
cls = type(self)
|
|
for arg in args:
|
|
for field in dataclasses.fields(cls):
|
|
value = getattr(arg, field.name)
|
|
if value is not None:
|
|
replacements[field.name] = value
|
|
|
|
return self.replace(**replacements)
|
|
|
|
|
|
class Writable:
|
|
def write(self, path: str):
|
|
write_json(
|
|
path, self.to_dict(omit_none=False) # type: ignore
|
|
)
|
|
|
|
|
|
class AdditionalPropertiesMixin:
|
|
"""Make this class an extensible property.
|
|
|
|
The underlying class definition must include a type definition for a field
|
|
named '_extra' that is of type `Dict[str, Any]`.
|
|
"""
|
|
ADDITIONAL_PROPERTIES = True
|
|
|
|
# This takes attributes in the dictionary that are
|
|
# not in the class definitions and puts them in an
|
|
# _extra dict in the class
|
|
@classmethod
|
|
def __pre_deserialize__(cls, data):
|
|
# dir() did not work because fields with
|
|
# metadata settings are not found
|
|
# The original version of this would create the
|
|
# object first and then update extra with the
|
|
# extra keys, but that won't work here, so
|
|
# we're copying the dict so we don't insert the
|
|
# _extra in the original data. This also requires
|
|
# that Mashumaro actually build the '_extra' field
|
|
cls_keys = cls._get_field_names()
|
|
new_dict = {}
|
|
for key, value in data.items():
|
|
if key not in cls_keys and key != '_extra':
|
|
if '_extra' not in new_dict:
|
|
new_dict['_extra'] = {}
|
|
new_dict['_extra'][key] = value
|
|
else:
|
|
new_dict[key] = value
|
|
data = new_dict
|
|
data = super().__pre_deserialize__(data)
|
|
return data
|
|
|
|
def __post_serialize__(self, dct):
|
|
data = super().__post_serialize__(dct)
|
|
data.update(self.extra)
|
|
if '_extra' in data:
|
|
del data['_extra']
|
|
return data
|
|
|
|
def replace(self, **kwargs):
|
|
dct = self.to_dict(omit_none=False)
|
|
dct.update(kwargs)
|
|
return self.from_dict(dct)
|
|
|
|
@property
|
|
def extra(self):
|
|
return self._extra
|
|
|
|
|
|
class Readable:
|
|
@classmethod
|
|
def read(cls, path: str):
|
|
try:
|
|
data = read_json(path)
|
|
except (EnvironmentError, ValueError) as exc:
|
|
raise RuntimeException(
|
|
f'Could not read {cls.__name__} at "{path}" as JSON: {exc}'
|
|
) from exc
|
|
|
|
return cls.from_dict(data) # type: ignore
|
|
|
|
|
|
BASE_SCHEMAS_URL = 'https://schemas.getdbt.com/'
|
|
SCHEMA_PATH = 'dbt/{name}/v{version}.json'
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class SchemaVersion:
|
|
name: str
|
|
version: int
|
|
|
|
@property
|
|
def path(self) -> str:
|
|
return SCHEMA_PATH.format(
|
|
name=self.name,
|
|
version=self.version
|
|
)
|
|
|
|
def __str__(self) -> str:
|
|
return BASE_SCHEMAS_URL + self.path
|
|
|
|
|
|
SCHEMA_VERSION_KEY = 'dbt_schema_version'
|
|
|
|
|
|
METADATA_ENV_PREFIX = 'DBT_ENV_CUSTOM_ENV_'
|
|
|
|
|
|
def get_metadata_env() -> Dict[str, str]:
|
|
return {
|
|
k[len(METADATA_ENV_PREFIX):]: v for k, v in os.environ.items()
|
|
if k.startswith(METADATA_ENV_PREFIX)
|
|
}
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class BaseArtifactMetadata(dbtClassMixin):
|
|
dbt_schema_version: str
|
|
dbt_version: str = __version__
|
|
generated_at: datetime = dataclasses.field(
|
|
default_factory=datetime.utcnow
|
|
)
|
|
invocation_id: Optional[str] = dataclasses.field(
|
|
default_factory=get_invocation_id
|
|
)
|
|
env: Dict[str, str] = dataclasses.field(default_factory=get_metadata_env)
|
|
|
|
|
|
def schema_version(name: str, version: int):
|
|
def inner(cls: Type[VersionedSchema]):
|
|
cls.dbt_schema_version = SchemaVersion(
|
|
name=name,
|
|
version=version,
|
|
)
|
|
return cls
|
|
return inner
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class VersionedSchema(dbtClassMixin):
|
|
dbt_schema_version: ClassVar[SchemaVersion]
|
|
|
|
@classmethod
|
|
def json_schema(cls, embeddable: bool = False) -> Dict[str, Any]:
|
|
result = super().json_schema(embeddable=embeddable)
|
|
if not embeddable:
|
|
result['$id'] = str(cls.dbt_schema_version)
|
|
return result
|
|
|
|
|
|
T = TypeVar('T', bound='ArtifactMixin')
|
|
|
|
|
|
# metadata should really be a Generic[T_M] where T_M is a TypeVar bound to
|
|
# BaseArtifactMetadata. Unfortunately this isn't possible due to a mypy issue:
|
|
# https://github.com/python/mypy/issues/7520
|
|
@dataclasses.dataclass(init=False)
|
|
class ArtifactMixin(VersionedSchema, Writable, Readable):
|
|
metadata: BaseArtifactMetadata
|
|
|
|
@classmethod
|
|
def validate(cls, data):
|
|
super().validate(data)
|
|
if cls.dbt_schema_version is None:
|
|
raise InternalException(
|
|
'Cannot call from_dict with no schema version!'
|
|
)
|