492 lines
12 KiB
Python
492 lines
12 KiB
Python
|
from dbt.contracts.graph.manifest import CompileResultNode
|
||
|
from dbt.contracts.graph.unparsed import (
|
||
|
FreshnessThreshold
|
||
|
)
|
||
|
from dbt.contracts.graph.parsed import ParsedSourceDefinition
|
||
|
from dbt.contracts.util import (
|
||
|
BaseArtifactMetadata,
|
||
|
ArtifactMixin,
|
||
|
VersionedSchema,
|
||
|
Replaceable,
|
||
|
schema_version,
|
||
|
)
|
||
|
from dbt.exceptions import InternalException
|
||
|
from dbt.logger import (
|
||
|
TimingProcessor,
|
||
|
JsonOnly,
|
||
|
GLOBAL_LOGGER as logger,
|
||
|
)
|
||
|
from dbt.utils import lowercase
|
||
|
from dbt.dataclass_schema import dbtClassMixin, StrEnum
|
||
|
|
||
|
import agate
|
||
|
|
||
|
from dataclasses import dataclass, field
|
||
|
from datetime import datetime
|
||
|
from typing import (
|
||
|
Union, Dict, List, Optional, Any, NamedTuple, Sequence,
|
||
|
)
|
||
|
|
||
|
from dbt.clients.system import write_json
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class TimingInfo(dbtClassMixin):
|
||
|
name: str
|
||
|
started_at: Optional[datetime] = None
|
||
|
completed_at: Optional[datetime] = None
|
||
|
|
||
|
def begin(self):
|
||
|
self.started_at = datetime.utcnow()
|
||
|
|
||
|
def end(self):
|
||
|
self.completed_at = datetime.utcnow()
|
||
|
|
||
|
|
||
|
class collect_timing_info:
|
||
|
def __init__(self, name: str):
|
||
|
self.timing_info = TimingInfo(name=name)
|
||
|
|
||
|
def __enter__(self):
|
||
|
self.timing_info.begin()
|
||
|
return self.timing_info
|
||
|
|
||
|
def __exit__(self, exc_type, exc_value, traceback):
|
||
|
self.timing_info.end()
|
||
|
with JsonOnly(), TimingProcessor(self.timing_info):
|
||
|
logger.debug('finished collecting timing info')
|
||
|
|
||
|
|
||
|
class NodeStatus(StrEnum):
|
||
|
Success = "success"
|
||
|
Error = "error"
|
||
|
Fail = "fail"
|
||
|
Warn = "warn"
|
||
|
Skipped = "skipped"
|
||
|
Pass = "pass"
|
||
|
RuntimeErr = "runtime error"
|
||
|
|
||
|
|
||
|
class RunStatus(StrEnum):
|
||
|
Success = NodeStatus.Success
|
||
|
Error = NodeStatus.Error
|
||
|
Skipped = NodeStatus.Skipped
|
||
|
|
||
|
|
||
|
class TestStatus(StrEnum):
|
||
|
Pass = NodeStatus.Pass
|
||
|
Error = NodeStatus.Error
|
||
|
Fail = NodeStatus.Fail
|
||
|
Warn = NodeStatus.Warn
|
||
|
Skipped = NodeStatus.Skipped
|
||
|
|
||
|
|
||
|
class FreshnessStatus(StrEnum):
|
||
|
Pass = NodeStatus.Pass
|
||
|
Warn = NodeStatus.Warn
|
||
|
Error = NodeStatus.Error
|
||
|
RuntimeErr = NodeStatus.RuntimeErr
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class BaseResult(dbtClassMixin):
|
||
|
status: Union[RunStatus, TestStatus, FreshnessStatus]
|
||
|
timing: List[TimingInfo]
|
||
|
thread_id: str
|
||
|
execution_time: float
|
||
|
adapter_response: Dict[str, Any]
|
||
|
message: Optional[str]
|
||
|
failures: Optional[int]
|
||
|
|
||
|
@classmethod
|
||
|
def __pre_deserialize__(cls, data):
|
||
|
data = super().__pre_deserialize__(data)
|
||
|
if 'message' not in data:
|
||
|
data['message'] = None
|
||
|
if 'failures' not in data:
|
||
|
data['failures'] = None
|
||
|
return data
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class NodeResult(BaseResult):
|
||
|
node: CompileResultNode
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class RunResult(NodeResult):
|
||
|
agate_table: Optional[agate.Table] = field(
|
||
|
default=None, metadata={
|
||
|
'serialize': lambda x: None, 'deserialize': lambda x: None
|
||
|
}
|
||
|
)
|
||
|
|
||
|
@property
|
||
|
def skipped(self):
|
||
|
return self.status == RunStatus.Skipped
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class ExecutionResult(dbtClassMixin):
|
||
|
results: Sequence[BaseResult]
|
||
|
elapsed_time: float
|
||
|
|
||
|
def __len__(self):
|
||
|
return len(self.results)
|
||
|
|
||
|
def __iter__(self):
|
||
|
return iter(self.results)
|
||
|
|
||
|
def __getitem__(self, idx):
|
||
|
return self.results[idx]
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class RunResultsMetadata(BaseArtifactMetadata):
|
||
|
dbt_schema_version: str = field(
|
||
|
default_factory=lambda: str(RunResultsArtifact.dbt_schema_version)
|
||
|
)
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class RunResultOutput(BaseResult):
|
||
|
unique_id: str
|
||
|
|
||
|
|
||
|
def process_run_result(result: RunResult) -> RunResultOutput:
|
||
|
return RunResultOutput(
|
||
|
unique_id=result.node.unique_id,
|
||
|
status=result.status,
|
||
|
timing=result.timing,
|
||
|
thread_id=result.thread_id,
|
||
|
execution_time=result.execution_time,
|
||
|
message=result.message,
|
||
|
adapter_response=result.adapter_response,
|
||
|
failures=result.failures
|
||
|
)
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class RunExecutionResult(
|
||
|
ExecutionResult,
|
||
|
):
|
||
|
results: Sequence[RunResult]
|
||
|
args: Dict[str, Any] = field(default_factory=dict)
|
||
|
generated_at: datetime = field(default_factory=datetime.utcnow)
|
||
|
|
||
|
def write(self, path: str):
|
||
|
writable = RunResultsArtifact.from_execution_results(
|
||
|
results=self.results,
|
||
|
elapsed_time=self.elapsed_time,
|
||
|
generated_at=self.generated_at,
|
||
|
args=self.args,
|
||
|
)
|
||
|
writable.write(path)
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
@schema_version('run-results', 3)
|
||
|
class RunResultsArtifact(ExecutionResult, ArtifactMixin):
|
||
|
results: Sequence[RunResultOutput]
|
||
|
args: Dict[str, Any] = field(default_factory=dict)
|
||
|
|
||
|
@classmethod
|
||
|
def from_execution_results(
|
||
|
cls,
|
||
|
results: Sequence[RunResult],
|
||
|
elapsed_time: float,
|
||
|
generated_at: datetime,
|
||
|
args: Dict,
|
||
|
):
|
||
|
processed_results = [process_run_result(result) for result in results]
|
||
|
meta = RunResultsMetadata(
|
||
|
dbt_schema_version=str(cls.dbt_schema_version),
|
||
|
generated_at=generated_at,
|
||
|
)
|
||
|
return cls(
|
||
|
metadata=meta,
|
||
|
results=processed_results,
|
||
|
elapsed_time=elapsed_time,
|
||
|
args=args
|
||
|
)
|
||
|
|
||
|
def write(self, path: str):
|
||
|
write_json(path, self.to_dict(omit_none=False))
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class RunOperationResult(ExecutionResult):
|
||
|
success: bool
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class RunOperationResultMetadata(BaseArtifactMetadata):
|
||
|
dbt_schema_version: str = field(default_factory=lambda: str(
|
||
|
RunOperationResultsArtifact.dbt_schema_version
|
||
|
))
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
@schema_version('run-operation-result', 1)
|
||
|
class RunOperationResultsArtifact(RunOperationResult, ArtifactMixin):
|
||
|
|
||
|
@classmethod
|
||
|
def from_success(
|
||
|
cls,
|
||
|
success: bool,
|
||
|
elapsed_time: float,
|
||
|
generated_at: datetime,
|
||
|
):
|
||
|
meta = RunOperationResultMetadata(
|
||
|
dbt_schema_version=str(cls.dbt_schema_version),
|
||
|
generated_at=generated_at,
|
||
|
)
|
||
|
return cls(
|
||
|
metadata=meta,
|
||
|
results=[],
|
||
|
elapsed_time=elapsed_time,
|
||
|
success=success,
|
||
|
)
|
||
|
|
||
|
# due to issues with typing.Union collapsing subclasses, this can't subclass
|
||
|
# PartialResult
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class SourceFreshnessResult(NodeResult):
|
||
|
node: ParsedSourceDefinition
|
||
|
status: FreshnessStatus
|
||
|
max_loaded_at: datetime
|
||
|
snapshotted_at: datetime
|
||
|
age: float
|
||
|
|
||
|
@property
|
||
|
def skipped(self):
|
||
|
return False
|
||
|
|
||
|
|
||
|
class FreshnessErrorEnum(StrEnum):
|
||
|
runtime_error = 'runtime error'
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class SourceFreshnessRuntimeError(dbtClassMixin):
|
||
|
unique_id: str
|
||
|
error: Optional[Union[str, int]]
|
||
|
status: FreshnessErrorEnum
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class SourceFreshnessOutput(dbtClassMixin):
|
||
|
unique_id: str
|
||
|
max_loaded_at: datetime
|
||
|
snapshotted_at: datetime
|
||
|
max_loaded_at_time_ago_in_s: float
|
||
|
status: FreshnessStatus
|
||
|
criteria: FreshnessThreshold
|
||
|
adapter_response: Dict[str, Any]
|
||
|
timing: List[TimingInfo]
|
||
|
thread_id: str
|
||
|
execution_time: float
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class PartialSourceFreshnessResult(NodeResult):
|
||
|
status: FreshnessStatus
|
||
|
|
||
|
@property
|
||
|
def skipped(self):
|
||
|
return False
|
||
|
|
||
|
|
||
|
FreshnessNodeResult = Union[PartialSourceFreshnessResult,
|
||
|
SourceFreshnessResult]
|
||
|
FreshnessNodeOutput = Union[SourceFreshnessRuntimeError, SourceFreshnessOutput]
|
||
|
|
||
|
|
||
|
def process_freshness_result(
|
||
|
result: FreshnessNodeResult
|
||
|
) -> FreshnessNodeOutput:
|
||
|
unique_id = result.node.unique_id
|
||
|
if result.status == FreshnessStatus.RuntimeErr:
|
||
|
return SourceFreshnessRuntimeError(
|
||
|
unique_id=unique_id,
|
||
|
error=result.message,
|
||
|
status=FreshnessErrorEnum.runtime_error,
|
||
|
)
|
||
|
|
||
|
# we know that this must be a SourceFreshnessResult
|
||
|
if not isinstance(result, SourceFreshnessResult):
|
||
|
raise InternalException(
|
||
|
'Got {} instead of a SourceFreshnessResult for a '
|
||
|
'non-error result in freshness execution!'
|
||
|
.format(type(result))
|
||
|
)
|
||
|
# if we're here, we must have a non-None freshness threshold
|
||
|
criteria = result.node.freshness
|
||
|
if criteria is None:
|
||
|
raise InternalException(
|
||
|
'Somehow evaluated a freshness result for a source '
|
||
|
'that has no freshness criteria!'
|
||
|
)
|
||
|
return SourceFreshnessOutput(
|
||
|
unique_id=unique_id,
|
||
|
max_loaded_at=result.max_loaded_at,
|
||
|
snapshotted_at=result.snapshotted_at,
|
||
|
max_loaded_at_time_ago_in_s=result.age,
|
||
|
status=result.status,
|
||
|
criteria=criteria,
|
||
|
adapter_response=result.adapter_response,
|
||
|
timing=result.timing,
|
||
|
thread_id=result.thread_id,
|
||
|
execution_time=result.execution_time,
|
||
|
)
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class FreshnessMetadata(BaseArtifactMetadata):
|
||
|
dbt_schema_version: str = field(
|
||
|
default_factory=lambda: str(
|
||
|
FreshnessExecutionResultArtifact.dbt_schema_version
|
||
|
)
|
||
|
)
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class FreshnessResult(ExecutionResult):
|
||
|
metadata: FreshnessMetadata
|
||
|
results: Sequence[FreshnessNodeResult]
|
||
|
|
||
|
@classmethod
|
||
|
def from_node_results(
|
||
|
cls,
|
||
|
results: List[FreshnessNodeResult],
|
||
|
elapsed_time: float,
|
||
|
generated_at: datetime,
|
||
|
):
|
||
|
meta = FreshnessMetadata(generated_at=generated_at)
|
||
|
return cls(metadata=meta, results=results, elapsed_time=elapsed_time)
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
@schema_version('sources', 2)
|
||
|
class FreshnessExecutionResultArtifact(
|
||
|
ArtifactMixin,
|
||
|
VersionedSchema,
|
||
|
):
|
||
|
metadata: FreshnessMetadata
|
||
|
results: Sequence[FreshnessNodeOutput]
|
||
|
elapsed_time: float
|
||
|
|
||
|
@classmethod
|
||
|
def from_result(cls, base: FreshnessResult):
|
||
|
processed = [process_freshness_result(r) for r in base.results]
|
||
|
return cls(
|
||
|
metadata=base.metadata,
|
||
|
results=processed,
|
||
|
elapsed_time=base.elapsed_time,
|
||
|
)
|
||
|
|
||
|
|
||
|
Primitive = Union[bool, str, float, None]
|
||
|
PrimitiveDict = Dict[str, Primitive]
|
||
|
|
||
|
CatalogKey = NamedTuple(
|
||
|
'CatalogKey',
|
||
|
[('database', Optional[str]), ('schema', str), ('name', str)]
|
||
|
)
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class StatsItem(dbtClassMixin):
|
||
|
id: str
|
||
|
label: str
|
||
|
value: Primitive
|
||
|
include: bool
|
||
|
description: Optional[str] = None
|
||
|
|
||
|
|
||
|
StatsDict = Dict[str, StatsItem]
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class ColumnMetadata(dbtClassMixin):
|
||
|
type: str
|
||
|
index: int
|
||
|
name: str
|
||
|
comment: Optional[str] = None
|
||
|
|
||
|
|
||
|
ColumnMap = Dict[str, ColumnMetadata]
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class TableMetadata(dbtClassMixin):
|
||
|
type: str
|
||
|
schema: str
|
||
|
name: str
|
||
|
database: Optional[str] = None
|
||
|
comment: Optional[str] = None
|
||
|
owner: Optional[str] = None
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class CatalogTable(dbtClassMixin, Replaceable):
|
||
|
metadata: TableMetadata
|
||
|
columns: ColumnMap
|
||
|
stats: StatsDict
|
||
|
# the same table with two unique IDs will just be listed two times
|
||
|
unique_id: Optional[str] = None
|
||
|
|
||
|
def key(self) -> CatalogKey:
|
||
|
return CatalogKey(
|
||
|
lowercase(self.metadata.database),
|
||
|
self.metadata.schema.lower(),
|
||
|
self.metadata.name.lower(),
|
||
|
)
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class CatalogMetadata(BaseArtifactMetadata):
|
||
|
dbt_schema_version: str = field(
|
||
|
default_factory=lambda: str(CatalogArtifact.dbt_schema_version)
|
||
|
)
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class CatalogResults(dbtClassMixin):
|
||
|
nodes: Dict[str, CatalogTable]
|
||
|
sources: Dict[str, CatalogTable]
|
||
|
errors: Optional[List[str]] = None
|
||
|
_compile_results: Optional[Any] = None
|
||
|
|
||
|
def __post_serialize__(self, dct):
|
||
|
dct = super().__post_serialize__(dct)
|
||
|
if '_compile_results' in dct:
|
||
|
del dct['_compile_results']
|
||
|
return dct
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
@schema_version('catalog', 1)
|
||
|
class CatalogArtifact(CatalogResults, ArtifactMixin):
|
||
|
metadata: CatalogMetadata
|
||
|
|
||
|
@classmethod
|
||
|
def from_results(
|
||
|
cls,
|
||
|
generated_at: datetime,
|
||
|
nodes: Dict[str, CatalogTable],
|
||
|
sources: Dict[str, CatalogTable],
|
||
|
compile_results: Optional[Any],
|
||
|
errors: Optional[List[str]]
|
||
|
) -> 'CatalogArtifact':
|
||
|
meta = CatalogMetadata(generated_at=generated_at)
|
||
|
return cls(
|
||
|
metadata=meta,
|
||
|
nodes=nodes,
|
||
|
sources=sources,
|
||
|
errors=errors,
|
||
|
_compile_results=compile_results,
|
||
|
)
|