dbt-selly/dbt-env/lib/python3.8/site-packages/dbt/contracts/results.py

492 lines
12 KiB
Python

from dbt.contracts.graph.manifest import CompileResultNode
from dbt.contracts.graph.unparsed import (
FreshnessThreshold
)
from dbt.contracts.graph.parsed import ParsedSourceDefinition
from dbt.contracts.util import (
BaseArtifactMetadata,
ArtifactMixin,
VersionedSchema,
Replaceable,
schema_version,
)
from dbt.exceptions import InternalException
from dbt.logger import (
TimingProcessor,
JsonOnly,
GLOBAL_LOGGER as logger,
)
from dbt.utils import lowercase
from dbt.dataclass_schema import dbtClassMixin, StrEnum
import agate
from dataclasses import dataclass, field
from datetime import datetime
from typing import (
Union, Dict, List, Optional, Any, NamedTuple, Sequence,
)
from dbt.clients.system import write_json
@dataclass
class TimingInfo(dbtClassMixin):
name: str
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
def begin(self):
self.started_at = datetime.utcnow()
def end(self):
self.completed_at = datetime.utcnow()
class collect_timing_info:
def __init__(self, name: str):
self.timing_info = TimingInfo(name=name)
def __enter__(self):
self.timing_info.begin()
return self.timing_info
def __exit__(self, exc_type, exc_value, traceback):
self.timing_info.end()
with JsonOnly(), TimingProcessor(self.timing_info):
logger.debug('finished collecting timing info')
class NodeStatus(StrEnum):
Success = "success"
Error = "error"
Fail = "fail"
Warn = "warn"
Skipped = "skipped"
Pass = "pass"
RuntimeErr = "runtime error"
class RunStatus(StrEnum):
Success = NodeStatus.Success
Error = NodeStatus.Error
Skipped = NodeStatus.Skipped
class TestStatus(StrEnum):
Pass = NodeStatus.Pass
Error = NodeStatus.Error
Fail = NodeStatus.Fail
Warn = NodeStatus.Warn
Skipped = NodeStatus.Skipped
class FreshnessStatus(StrEnum):
Pass = NodeStatus.Pass
Warn = NodeStatus.Warn
Error = NodeStatus.Error
RuntimeErr = NodeStatus.RuntimeErr
@dataclass
class BaseResult(dbtClassMixin):
status: Union[RunStatus, TestStatus, FreshnessStatus]
timing: List[TimingInfo]
thread_id: str
execution_time: float
adapter_response: Dict[str, Any]
message: Optional[str]
failures: Optional[int]
@classmethod
def __pre_deserialize__(cls, data):
data = super().__pre_deserialize__(data)
if 'message' not in data:
data['message'] = None
if 'failures' not in data:
data['failures'] = None
return data
@dataclass
class NodeResult(BaseResult):
node: CompileResultNode
@dataclass
class RunResult(NodeResult):
agate_table: Optional[agate.Table] = field(
default=None, metadata={
'serialize': lambda x: None, 'deserialize': lambda x: None
}
)
@property
def skipped(self):
return self.status == RunStatus.Skipped
@dataclass
class ExecutionResult(dbtClassMixin):
results: Sequence[BaseResult]
elapsed_time: float
def __len__(self):
return len(self.results)
def __iter__(self):
return iter(self.results)
def __getitem__(self, idx):
return self.results[idx]
@dataclass
class RunResultsMetadata(BaseArtifactMetadata):
dbt_schema_version: str = field(
default_factory=lambda: str(RunResultsArtifact.dbt_schema_version)
)
@dataclass
class RunResultOutput(BaseResult):
unique_id: str
def process_run_result(result: RunResult) -> RunResultOutput:
return RunResultOutput(
unique_id=result.node.unique_id,
status=result.status,
timing=result.timing,
thread_id=result.thread_id,
execution_time=result.execution_time,
message=result.message,
adapter_response=result.adapter_response,
failures=result.failures
)
@dataclass
class RunExecutionResult(
ExecutionResult,
):
results: Sequence[RunResult]
args: Dict[str, Any] = field(default_factory=dict)
generated_at: datetime = field(default_factory=datetime.utcnow)
def write(self, path: str):
writable = RunResultsArtifact.from_execution_results(
results=self.results,
elapsed_time=self.elapsed_time,
generated_at=self.generated_at,
args=self.args,
)
writable.write(path)
@dataclass
@schema_version('run-results', 3)
class RunResultsArtifact(ExecutionResult, ArtifactMixin):
results: Sequence[RunResultOutput]
args: Dict[str, Any] = field(default_factory=dict)
@classmethod
def from_execution_results(
cls,
results: Sequence[RunResult],
elapsed_time: float,
generated_at: datetime,
args: Dict,
):
processed_results = [process_run_result(result) for result in results]
meta = RunResultsMetadata(
dbt_schema_version=str(cls.dbt_schema_version),
generated_at=generated_at,
)
return cls(
metadata=meta,
results=processed_results,
elapsed_time=elapsed_time,
args=args
)
def write(self, path: str):
write_json(path, self.to_dict(omit_none=False))
@dataclass
class RunOperationResult(ExecutionResult):
success: bool
@dataclass
class RunOperationResultMetadata(BaseArtifactMetadata):
dbt_schema_version: str = field(default_factory=lambda: str(
RunOperationResultsArtifact.dbt_schema_version
))
@dataclass
@schema_version('run-operation-result', 1)
class RunOperationResultsArtifact(RunOperationResult, ArtifactMixin):
@classmethod
def from_success(
cls,
success: bool,
elapsed_time: float,
generated_at: datetime,
):
meta = RunOperationResultMetadata(
dbt_schema_version=str(cls.dbt_schema_version),
generated_at=generated_at,
)
return cls(
metadata=meta,
results=[],
elapsed_time=elapsed_time,
success=success,
)
# due to issues with typing.Union collapsing subclasses, this can't subclass
# PartialResult
@dataclass
class SourceFreshnessResult(NodeResult):
node: ParsedSourceDefinition
status: FreshnessStatus
max_loaded_at: datetime
snapshotted_at: datetime
age: float
@property
def skipped(self):
return False
class FreshnessErrorEnum(StrEnum):
runtime_error = 'runtime error'
@dataclass
class SourceFreshnessRuntimeError(dbtClassMixin):
unique_id: str
error: Optional[Union[str, int]]
status: FreshnessErrorEnum
@dataclass
class SourceFreshnessOutput(dbtClassMixin):
unique_id: str
max_loaded_at: datetime
snapshotted_at: datetime
max_loaded_at_time_ago_in_s: float
status: FreshnessStatus
criteria: FreshnessThreshold
adapter_response: Dict[str, Any]
timing: List[TimingInfo]
thread_id: str
execution_time: float
@dataclass
class PartialSourceFreshnessResult(NodeResult):
status: FreshnessStatus
@property
def skipped(self):
return False
FreshnessNodeResult = Union[PartialSourceFreshnessResult,
SourceFreshnessResult]
FreshnessNodeOutput = Union[SourceFreshnessRuntimeError, SourceFreshnessOutput]
def process_freshness_result(
result: FreshnessNodeResult
) -> FreshnessNodeOutput:
unique_id = result.node.unique_id
if result.status == FreshnessStatus.RuntimeErr:
return SourceFreshnessRuntimeError(
unique_id=unique_id,
error=result.message,
status=FreshnessErrorEnum.runtime_error,
)
# we know that this must be a SourceFreshnessResult
if not isinstance(result, SourceFreshnessResult):
raise InternalException(
'Got {} instead of a SourceFreshnessResult for a '
'non-error result in freshness execution!'
.format(type(result))
)
# if we're here, we must have a non-None freshness threshold
criteria = result.node.freshness
if criteria is None:
raise InternalException(
'Somehow evaluated a freshness result for a source '
'that has no freshness criteria!'
)
return SourceFreshnessOutput(
unique_id=unique_id,
max_loaded_at=result.max_loaded_at,
snapshotted_at=result.snapshotted_at,
max_loaded_at_time_ago_in_s=result.age,
status=result.status,
criteria=criteria,
adapter_response=result.adapter_response,
timing=result.timing,
thread_id=result.thread_id,
execution_time=result.execution_time,
)
@dataclass
class FreshnessMetadata(BaseArtifactMetadata):
dbt_schema_version: str = field(
default_factory=lambda: str(
FreshnessExecutionResultArtifact.dbt_schema_version
)
)
@dataclass
class FreshnessResult(ExecutionResult):
metadata: FreshnessMetadata
results: Sequence[FreshnessNodeResult]
@classmethod
def from_node_results(
cls,
results: List[FreshnessNodeResult],
elapsed_time: float,
generated_at: datetime,
):
meta = FreshnessMetadata(generated_at=generated_at)
return cls(metadata=meta, results=results, elapsed_time=elapsed_time)
@dataclass
@schema_version('sources', 2)
class FreshnessExecutionResultArtifact(
ArtifactMixin,
VersionedSchema,
):
metadata: FreshnessMetadata
results: Sequence[FreshnessNodeOutput]
elapsed_time: float
@classmethod
def from_result(cls, base: FreshnessResult):
processed = [process_freshness_result(r) for r in base.results]
return cls(
metadata=base.metadata,
results=processed,
elapsed_time=base.elapsed_time,
)
Primitive = Union[bool, str, float, None]
PrimitiveDict = Dict[str, Primitive]
CatalogKey = NamedTuple(
'CatalogKey',
[('database', Optional[str]), ('schema', str), ('name', str)]
)
@dataclass
class StatsItem(dbtClassMixin):
id: str
label: str
value: Primitive
include: bool
description: Optional[str] = None
StatsDict = Dict[str, StatsItem]
@dataclass
class ColumnMetadata(dbtClassMixin):
type: str
index: int
name: str
comment: Optional[str] = None
ColumnMap = Dict[str, ColumnMetadata]
@dataclass
class TableMetadata(dbtClassMixin):
type: str
schema: str
name: str
database: Optional[str] = None
comment: Optional[str] = None
owner: Optional[str] = None
@dataclass
class CatalogTable(dbtClassMixin, Replaceable):
metadata: TableMetadata
columns: ColumnMap
stats: StatsDict
# the same table with two unique IDs will just be listed two times
unique_id: Optional[str] = None
def key(self) -> CatalogKey:
return CatalogKey(
lowercase(self.metadata.database),
self.metadata.schema.lower(),
self.metadata.name.lower(),
)
@dataclass
class CatalogMetadata(BaseArtifactMetadata):
dbt_schema_version: str = field(
default_factory=lambda: str(CatalogArtifact.dbt_schema_version)
)
@dataclass
class CatalogResults(dbtClassMixin):
nodes: Dict[str, CatalogTable]
sources: Dict[str, CatalogTable]
errors: Optional[List[str]] = None
_compile_results: Optional[Any] = None
def __post_serialize__(self, dct):
dct = super().__post_serialize__(dct)
if '_compile_results' in dct:
del dct['_compile_results']
return dct
@dataclass
@schema_version('catalog', 1)
class CatalogArtifact(CatalogResults, ArtifactMixin):
metadata: CatalogMetadata
@classmethod
def from_results(
cls,
generated_at: datetime,
nodes: Dict[str, CatalogTable],
sources: Dict[str, CatalogTable],
compile_results: Optional[Any],
errors: Optional[List[str]]
) -> 'CatalogArtifact':
meta = CatalogMetadata(generated_at=generated_at)
return cls(
metadata=meta,
nodes=nodes,
sources=sources,
errors=errors,
_compile_results=compile_results,
)