1201 lines
40 KiB
Python
1201 lines
40 KiB
Python
import enum
|
||
from dataclasses import dataclass, field
|
||
from itertools import chain, islice
|
||
from mashumaro import DataClassMessagePackMixin
|
||
from multiprocessing.synchronize import Lock
|
||
from typing import (
|
||
Dict, List, Optional, Union, Mapping, MutableMapping, Any, Set, Tuple,
|
||
TypeVar, Callable, Iterable, Generic, cast, AbstractSet, ClassVar
|
||
)
|
||
from typing_extensions import Protocol
|
||
from uuid import UUID
|
||
|
||
from dbt.contracts.graph.compiled import (
|
||
CompileResultNode, ManifestNode, NonSourceCompiledNode, GraphMemberNode
|
||
)
|
||
from dbt.contracts.graph.parsed import (
|
||
ParsedMacro, ParsedDocumentation,
|
||
ParsedSourceDefinition, ParsedExposure, HasUniqueID,
|
||
UnpatchedSourceDefinition, ManifestNodes
|
||
)
|
||
from dbt.contracts.graph.unparsed import SourcePatch
|
||
from dbt.contracts.files import SourceFile, SchemaSourceFile, FileHash, AnySourceFile
|
||
from dbt.contracts.util import (
|
||
BaseArtifactMetadata, SourceKey, ArtifactMixin, schema_version
|
||
)
|
||
from dbt.dataclass_schema import dbtClassMixin
|
||
from dbt.exceptions import (
|
||
CompilationException,
|
||
raise_duplicate_resource_name, raise_compiler_error,
|
||
)
|
||
from dbt.helper_types import PathSet
|
||
from dbt.logger import GLOBAL_LOGGER as logger
|
||
from dbt.node_types import NodeType
|
||
from dbt.ui import line_wrap_message
|
||
from dbt import flags
|
||
from dbt import tracking
|
||
import dbt.utils
|
||
|
||
NodeEdgeMap = Dict[str, List[str]]
|
||
PackageName = str
|
||
DocName = str
|
||
RefName = str
|
||
UniqueID = str
|
||
|
||
|
||
def find_unique_id_for_package(storage, key, package: Optional[PackageName]):
|
||
if key not in storage:
|
||
return None
|
||
|
||
pkg_dct: Mapping[PackageName, UniqueID] = storage[key]
|
||
|
||
if package is None:
|
||
if not pkg_dct:
|
||
return None
|
||
else:
|
||
return next(iter(pkg_dct.values()))
|
||
elif package in pkg_dct:
|
||
return pkg_dct[package]
|
||
else:
|
||
return None
|
||
|
||
|
||
class DocLookup(dbtClassMixin):
|
||
def __init__(self, manifest: 'Manifest'):
|
||
self.storage: Dict[str, Dict[PackageName, UniqueID]] = {}
|
||
self.populate(manifest)
|
||
|
||
def get_unique_id(self, key, package: Optional[PackageName]):
|
||
return find_unique_id_for_package(self.storage, key, package)
|
||
|
||
def find(self, key, package: Optional[PackageName], manifest: 'Manifest'):
|
||
unique_id = self.get_unique_id(key, package)
|
||
if unique_id is not None:
|
||
return self.perform_lookup(unique_id, manifest)
|
||
return None
|
||
|
||
def add_doc(self, doc: ParsedDocumentation):
|
||
if doc.name not in self.storage:
|
||
self.storage[doc.name] = {}
|
||
self.storage[doc.name][doc.package_name] = doc.unique_id
|
||
|
||
def populate(self, manifest):
|
||
for doc in manifest.docs.values():
|
||
self.add_doc(doc)
|
||
|
||
def perform_lookup(
|
||
self, unique_id: UniqueID, manifest
|
||
) -> ParsedDocumentation:
|
||
if unique_id not in manifest.docs:
|
||
raise dbt.exceptions.InternalException(
|
||
f'Doc {unique_id} found in cache but not found in manifest'
|
||
)
|
||
return manifest.docs[unique_id]
|
||
|
||
|
||
class SourceLookup(dbtClassMixin):
|
||
def __init__(self, manifest: 'Manifest'):
|
||
self.storage: Dict[Tuple[str, str], Dict[PackageName, UniqueID]] = {}
|
||
self.populate(manifest)
|
||
|
||
def get_unique_id(self, key, package: Optional[PackageName]):
|
||
return find_unique_id_for_package(self.storage, key, package)
|
||
|
||
def find(self, key, package: Optional[PackageName], manifest: 'Manifest'):
|
||
unique_id = self.get_unique_id(key, package)
|
||
if unique_id is not None:
|
||
return self.perform_lookup(unique_id, manifest)
|
||
return None
|
||
|
||
def add_source(self, source: ParsedSourceDefinition):
|
||
key = (source.source_name, source.name)
|
||
if key not in self.storage:
|
||
self.storage[key] = {}
|
||
|
||
self.storage[key][source.package_name] = source.unique_id
|
||
|
||
def populate(self, manifest):
|
||
for source in manifest.sources.values():
|
||
if hasattr(source, 'source_name'):
|
||
self.add_source(source)
|
||
|
||
def perform_lookup(
|
||
self, unique_id: UniqueID, manifest: 'Manifest'
|
||
) -> ParsedSourceDefinition:
|
||
if unique_id not in manifest.sources:
|
||
raise dbt.exceptions.InternalException(
|
||
f'Source {unique_id} found in cache but not found in manifest'
|
||
)
|
||
return manifest.sources[unique_id]
|
||
|
||
|
||
class RefableLookup(dbtClassMixin):
|
||
# model, seed, snapshot
|
||
_lookup_types: ClassVar[set] = set(NodeType.refable())
|
||
|
||
# refables are actually unique, so the Dict[PackageName, UniqueID] will
|
||
# only ever have exactly one value, but doing 3 dict lookups instead of 1
|
||
# is not a big deal at all and retains consistency
|
||
def __init__(self, manifest: 'Manifest'):
|
||
self.storage: Dict[str, Dict[PackageName, UniqueID]] = {}
|
||
self.populate(manifest)
|
||
|
||
def get_unique_id(self, key, package: Optional[PackageName]):
|
||
return find_unique_id_for_package(self.storage, key, package)
|
||
|
||
def find(self, key, package: Optional[PackageName], manifest: 'Manifest'):
|
||
unique_id = self.get_unique_id(key, package)
|
||
if unique_id is not None:
|
||
return self.perform_lookup(unique_id, manifest)
|
||
return None
|
||
|
||
def add_node(self, node: ManifestNode):
|
||
if node.resource_type in self._lookup_types:
|
||
if node.name not in self.storage:
|
||
self.storage[node.name] = {}
|
||
self.storage[node.name][node.package_name] = node.unique_id
|
||
|
||
def populate(self, manifest):
|
||
for node in manifest.nodes.values():
|
||
self.add_node(node)
|
||
|
||
def perform_lookup(
|
||
self, unique_id: UniqueID, manifest
|
||
) -> ManifestNode:
|
||
if unique_id not in manifest.nodes:
|
||
raise dbt.exceptions.InternalException(
|
||
f'Node {unique_id} found in cache but not found in manifest'
|
||
)
|
||
return manifest.nodes[unique_id]
|
||
|
||
|
||
class DisabledLookup(dbtClassMixin):
|
||
# model, seed, snapshot
|
||
_lookup_types: ClassVar[set] = set(NodeType.refable())
|
||
|
||
def __init__(self, manifest: 'Manifest'):
|
||
self.storage: Dict[str, Dict[PackageName, List[Any]]] = {}
|
||
self.populate(manifest)
|
||
|
||
def populate(self, manifest):
|
||
for node in manifest.disabled:
|
||
self.add_node(node)
|
||
for node in list(chain.from_iterable(manifest._disabled.values())):
|
||
self.add_node(node)
|
||
|
||
def add_node(self, node: ManifestNode):
|
||
if node.resource_type in self._lookup_types:
|
||
if node.name not in self.storage:
|
||
self.storage[node.name] = {}
|
||
if node.package_name not in self.storage[node.name]:
|
||
self.storage[node.name][node.package_name] = []
|
||
self.storage[node.name][node.package_name].append(node)
|
||
|
||
# This should return a list of disabled nodes
|
||
def find(self, key, package: PackageName):
|
||
if key not in self.storage:
|
||
return None
|
||
|
||
pkg_dct: Mapping[PackageName, List[ManifestNode]] = self.storage[key]
|
||
|
||
if not pkg_dct:
|
||
return None
|
||
elif package in pkg_dct:
|
||
return pkg_dct[package]
|
||
else:
|
||
return None
|
||
|
||
|
||
class AnalysisLookup(RefableLookup):
|
||
_lookup_types: ClassVar[set] = set([NodeType.Analysis])
|
||
|
||
|
||
def _search_packages(
|
||
current_project: str,
|
||
node_package: str,
|
||
target_package: Optional[str] = None,
|
||
) -> List[Optional[str]]:
|
||
if target_package is not None:
|
||
return [target_package]
|
||
elif current_project == node_package:
|
||
return [current_project, None]
|
||
else:
|
||
return [current_project, node_package, None]
|
||
|
||
|
||
@dataclass
|
||
class ManifestMetadata(BaseArtifactMetadata):
|
||
"""Metadata for the manifest."""
|
||
dbt_schema_version: str = field(
|
||
default_factory=lambda: str(WritableManifest.dbt_schema_version)
|
||
)
|
||
project_id: Optional[str] = field(
|
||
default=None,
|
||
metadata={
|
||
'description': 'A unique identifier for the project',
|
||
},
|
||
)
|
||
user_id: Optional[UUID] = field(
|
||
default=None,
|
||
metadata={
|
||
'description': 'A unique identifier for the user',
|
||
},
|
||
)
|
||
send_anonymous_usage_stats: Optional[bool] = field(
|
||
default=None,
|
||
metadata=dict(description=(
|
||
'Whether dbt is configured to send anonymous usage statistics'
|
||
)),
|
||
)
|
||
adapter_type: Optional[str] = field(
|
||
default=None,
|
||
metadata=dict(description='The type name of the adapter'),
|
||
)
|
||
|
||
def __post_init__(self):
|
||
if tracking.active_user is None:
|
||
return
|
||
|
||
if self.user_id is None:
|
||
self.user_id = tracking.active_user.id
|
||
|
||
if self.send_anonymous_usage_stats is None:
|
||
self.send_anonymous_usage_stats = (
|
||
not tracking.active_user.do_not_track
|
||
)
|
||
|
||
@classmethod
|
||
def default(cls):
|
||
return cls(
|
||
dbt_schema_version=str(WritableManifest.dbt_schema_version),
|
||
)
|
||
|
||
|
||
def _sort_values(dct):
|
||
"""Given a dictionary, sort each value. This makes output deterministic,
|
||
which helps for tests.
|
||
"""
|
||
return {k: sorted(v) for k, v in dct.items()}
|
||
|
||
|
||
def build_node_edges(nodes: List[ManifestNode]):
|
||
"""Build the forward and backward edges on the given list of ParsedNodes
|
||
and return them as two separate dictionaries, each mapping unique IDs to
|
||
lists of edges.
|
||
"""
|
||
backward_edges: Dict[str, List[str]] = {}
|
||
# pre-populate the forward edge dict for simplicity
|
||
forward_edges: Dict[str, List[str]] = {n.unique_id: [] for n in nodes}
|
||
for node in nodes:
|
||
backward_edges[node.unique_id] = node.depends_on_nodes[:]
|
||
for unique_id in node.depends_on_nodes:
|
||
if unique_id in forward_edges.keys():
|
||
forward_edges[unique_id].append(node.unique_id)
|
||
return _sort_values(forward_edges), _sort_values(backward_edges)
|
||
|
||
|
||
# Build a map of children of macros
|
||
def build_macro_edges(nodes: List[Any]):
|
||
forward_edges: Dict[str, List[str]] = {
|
||
n.unique_id: [] for n in nodes if n.unique_id.startswith('macro') or n.depends_on.macros
|
||
}
|
||
for node in nodes:
|
||
for unique_id in node.depends_on.macros:
|
||
if unique_id in forward_edges.keys():
|
||
forward_edges[unique_id].append(node.unique_id)
|
||
return _sort_values(forward_edges)
|
||
|
||
|
||
def _deepcopy(value):
|
||
return value.from_dict(value.to_dict(omit_none=True))
|
||
|
||
|
||
class Locality(enum.IntEnum):
|
||
Core = 1
|
||
Imported = 2
|
||
Root = 3
|
||
|
||
|
||
class Specificity(enum.IntEnum):
|
||
Default = 1
|
||
Adapter = 2
|
||
|
||
|
||
@dataclass
|
||
class MacroCandidate:
|
||
locality: Locality
|
||
macro: ParsedMacro
|
||
|
||
def __eq__(self, other: object) -> bool:
|
||
if not isinstance(other, MacroCandidate):
|
||
return NotImplemented
|
||
return self.locality == other.locality
|
||
|
||
def __lt__(self, other: object) -> bool:
|
||
if not isinstance(other, MacroCandidate):
|
||
return NotImplemented
|
||
if self.locality < other.locality:
|
||
return True
|
||
if self.locality > other.locality:
|
||
return False
|
||
return False
|
||
|
||
|
||
@dataclass
|
||
class MaterializationCandidate(MacroCandidate):
|
||
specificity: Specificity
|
||
|
||
@classmethod
|
||
def from_macro(
|
||
cls, candidate: MacroCandidate, specificity: Specificity
|
||
) -> 'MaterializationCandidate':
|
||
return cls(
|
||
locality=candidate.locality,
|
||
macro=candidate.macro,
|
||
specificity=specificity,
|
||
)
|
||
|
||
def __eq__(self, other: object) -> bool:
|
||
if not isinstance(other, MaterializationCandidate):
|
||
return NotImplemented
|
||
equal = (
|
||
self.specificity == other.specificity and
|
||
self.locality == other.locality
|
||
)
|
||
if equal:
|
||
raise_compiler_error(
|
||
'Found two materializations with the name {} (packages {} and '
|
||
'{}). dbt cannot resolve this ambiguity'
|
||
.format(self.macro.name, self.macro.package_name,
|
||
other.macro.package_name)
|
||
)
|
||
|
||
return equal
|
||
|
||
def __lt__(self, other: object) -> bool:
|
||
if not isinstance(other, MaterializationCandidate):
|
||
return NotImplemented
|
||
if self.specificity < other.specificity:
|
||
return True
|
||
if self.specificity > other.specificity:
|
||
return False
|
||
if self.locality < other.locality:
|
||
return True
|
||
if self.locality > other.locality:
|
||
return False
|
||
return False
|
||
|
||
|
||
M = TypeVar('M', bound=MacroCandidate)
|
||
|
||
|
||
class CandidateList(List[M]):
|
||
def last(self) -> Optional[ParsedMacro]:
|
||
if not self:
|
||
return None
|
||
self.sort()
|
||
return self[-1].macro
|
||
|
||
|
||
def _get_locality(
|
||
macro: ParsedMacro, root_project_name: str, internal_packages: Set[str]
|
||
) -> Locality:
|
||
if macro.package_name == root_project_name:
|
||
return Locality.Root
|
||
elif macro.package_name in internal_packages:
|
||
return Locality.Core
|
||
else:
|
||
return Locality.Imported
|
||
|
||
|
||
class Searchable(Protocol):
|
||
resource_type: NodeType
|
||
package_name: str
|
||
|
||
@property
|
||
def search_name(self) -> str:
|
||
raise NotImplementedError('search_name not implemented')
|
||
|
||
|
||
N = TypeVar('N', bound=Searchable)
|
||
|
||
|
||
@dataclass
|
||
class NameSearcher(Generic[N]):
|
||
name: str
|
||
package: Optional[str]
|
||
nodetypes: List[NodeType]
|
||
|
||
def _matches(self, model: N) -> bool:
|
||
"""Return True if the model matches the given name, package, and type.
|
||
|
||
If package is None, any package is allowed.
|
||
nodetypes should be a container of NodeTypes that implements the 'in'
|
||
operator.
|
||
"""
|
||
if model.resource_type not in self.nodetypes:
|
||
return False
|
||
|
||
if self.name != model.search_name:
|
||
return False
|
||
|
||
return self.package is None or self.package == model.package_name
|
||
|
||
def search(self, haystack: Iterable[N]) -> Optional[N]:
|
||
"""Find an entry in the given iterable by name."""
|
||
for model in haystack:
|
||
if self._matches(model):
|
||
return model
|
||
return None
|
||
|
||
|
||
D = TypeVar('D')
|
||
|
||
|
||
@dataclass
|
||
class Disabled(Generic[D]):
|
||
target: D
|
||
|
||
|
||
MaybeDocumentation = Optional[ParsedDocumentation]
|
||
|
||
|
||
MaybeParsedSource = Optional[Union[
|
||
ParsedSourceDefinition,
|
||
Disabled[ParsedSourceDefinition],
|
||
]]
|
||
|
||
|
||
MaybeNonSource = Optional[Union[
|
||
ManifestNode,
|
||
Disabled[ManifestNode]
|
||
]]
|
||
|
||
|
||
T = TypeVar('T', bound=GraphMemberNode)
|
||
|
||
|
||
def _update_into(dest: MutableMapping[str, T], new_item: T):
|
||
"""Update dest to overwrite whatever is at dest[new_item.unique_id] with
|
||
new_itme. There must be an existing value to overwrite, and they two nodes
|
||
must have the same original file path.
|
||
"""
|
||
unique_id = new_item.unique_id
|
||
if unique_id not in dest:
|
||
raise dbt.exceptions.RuntimeException(
|
||
f'got an update_{new_item.resource_type} call with an '
|
||
f'unrecognized {new_item.resource_type}: {new_item.unique_id}'
|
||
)
|
||
existing = dest[unique_id]
|
||
if new_item.original_file_path != existing.original_file_path:
|
||
raise dbt.exceptions.RuntimeException(
|
||
f'cannot update a {new_item.resource_type} to have a new file '
|
||
f'path!'
|
||
)
|
||
dest[unique_id] = new_item
|
||
|
||
|
||
# This contains macro methods that are in both the Manifest
|
||
# and the MacroManifest
|
||
class MacroMethods:
|
||
# Just to make mypy happy. There must be a better way.
|
||
def __init__(self):
|
||
self.macros = []
|
||
self.metadata = {}
|
||
|
||
def find_macro_by_name(
|
||
self, name: str, root_project_name: str, package: Optional[str]
|
||
) -> Optional[ParsedMacro]:
|
||
"""Find a macro in the graph by its name and package name, or None for
|
||
any package. The root project name is used to determine priority:
|
||
- locally defined macros come first
|
||
- then imported macros
|
||
- then macros defined in the root project
|
||
"""
|
||
filter: Optional[Callable[[MacroCandidate], bool]] = None
|
||
if package is not None:
|
||
def filter(candidate: MacroCandidate) -> bool:
|
||
return package == candidate.macro.package_name
|
||
|
||
candidates: CandidateList = self._find_macros_by_name(
|
||
name=name,
|
||
root_project_name=root_project_name,
|
||
filter=filter,
|
||
)
|
||
|
||
return candidates.last()
|
||
|
||
def find_generate_macro_by_name(
|
||
self, component: str, root_project_name: str
|
||
) -> Optional[ParsedMacro]:
|
||
"""
|
||
The `generate_X_name` macros are similar to regular ones, but ignore
|
||
imported packages.
|
||
- if there is a `generate_{component}_name` macro in the root
|
||
project, return it
|
||
- return the `generate_{component}_name` macro from the 'dbt'
|
||
internal project
|
||
"""
|
||
def filter(candidate: MacroCandidate) -> bool:
|
||
return candidate.locality != Locality.Imported
|
||
|
||
candidates: CandidateList = self._find_macros_by_name(
|
||
name=f'generate_{component}_name',
|
||
root_project_name=root_project_name,
|
||
# filter out imported packages
|
||
filter=filter,
|
||
)
|
||
return candidates.last()
|
||
|
||
def _find_macros_by_name(
|
||
self,
|
||
name: str,
|
||
root_project_name: str,
|
||
filter: Optional[Callable[[MacroCandidate], bool]] = None
|
||
) -> CandidateList:
|
||
"""Find macros by their name.
|
||
"""
|
||
# avoid an import cycle
|
||
from dbt.adapters.factory import get_adapter_package_names
|
||
candidates: CandidateList = CandidateList()
|
||
packages = set(get_adapter_package_names(self.metadata.adapter_type))
|
||
for unique_id, macro in self.macros.items():
|
||
if macro.name != name:
|
||
continue
|
||
candidate = MacroCandidate(
|
||
locality=_get_locality(macro, root_project_name, packages),
|
||
macro=macro,
|
||
)
|
||
if filter is None or filter(candidate):
|
||
candidates.append(candidate)
|
||
|
||
return candidates
|
||
|
||
|
||
@dataclass
|
||
class ParsingInfo:
|
||
static_analysis_parsed_path_count: int = 0
|
||
static_analysis_path_count: int = 0
|
||
|
||
|
||
@dataclass
|
||
class ManifestStateCheck(dbtClassMixin):
|
||
vars_hash: FileHash = field(default_factory=FileHash.empty)
|
||
profile_hash: FileHash = field(default_factory=FileHash.empty)
|
||
project_hashes: MutableMapping[str, FileHash] = field(default_factory=dict)
|
||
|
||
|
||
@dataclass
|
||
class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
|
||
"""The manifest for the full graph, after parsing and during compilation.
|
||
"""
|
||
# These attributes are both positional and by keyword. If an attribute
|
||
# is added it must all be added in the __reduce_ex__ method in the
|
||
# args tuple in the right position.
|
||
nodes: MutableMapping[str, ManifestNode] = field(default_factory=dict)
|
||
sources: MutableMapping[str, ParsedSourceDefinition] = field(default_factory=dict)
|
||
macros: MutableMapping[str, ParsedMacro] = field(default_factory=dict)
|
||
docs: MutableMapping[str, ParsedDocumentation] = field(default_factory=dict)
|
||
exposures: MutableMapping[str, ParsedExposure] = field(default_factory=dict)
|
||
selectors: MutableMapping[str, Any] = field(default_factory=dict)
|
||
disabled: List[CompileResultNode] = field(default_factory=list)
|
||
files: MutableMapping[str, AnySourceFile] = field(default_factory=dict)
|
||
metadata: ManifestMetadata = field(default_factory=ManifestMetadata)
|
||
flat_graph: Dict[str, Any] = field(default_factory=dict)
|
||
state_check: ManifestStateCheck = field(default_factory=ManifestStateCheck)
|
||
# Moved from the ParseResult object
|
||
source_patches: MutableMapping[SourceKey, SourcePatch] = field(default_factory=dict)
|
||
# following contains new disabled nodes until parsing is finished. This changes in 1.0.0
|
||
_disabled: MutableMapping[str, List[CompileResultNode]] = field(default_factory=dict)
|
||
_doc_lookup: Optional[DocLookup] = field(
|
||
default=None, metadata={'serialize': lambda x: None, 'deserialize': lambda x: None}
|
||
)
|
||
_source_lookup: Optional[SourceLookup] = field(
|
||
default=None, metadata={'serialize': lambda x: None, 'deserialize': lambda x: None}
|
||
)
|
||
_ref_lookup: Optional[RefableLookup] = field(
|
||
default=None, metadata={'serialize': lambda x: None, 'deserialize': lambda x: None}
|
||
)
|
||
_disabled_lookup: Optional[DisabledLookup] = field(
|
||
default=None, metadata={'serialize': lambda x: None, 'deserialize': lambda x: None}
|
||
)
|
||
_analysis_lookup: Optional[AnalysisLookup] = field(
|
||
default=None, metadata={'serialize': lambda x: None, 'deserialize': lambda x: None}
|
||
)
|
||
_parsing_info: ParsingInfo = field(
|
||
default_factory=ParsingInfo,
|
||
metadata={'serialize': lambda x: None, 'deserialize': lambda x: None}
|
||
)
|
||
_lock: Lock = field(
|
||
default_factory=flags.MP_CONTEXT.Lock,
|
||
metadata={'serialize': lambda x: None, 'deserialize': lambda x: None}
|
||
)
|
||
|
||
def __pre_serialize__(self):
|
||
# serialization won't work with anything except an empty source_patches because
|
||
# tuple keys are not supported, so ensure it's empty
|
||
self.source_patches = {}
|
||
return self
|
||
|
||
@classmethod
|
||
def __post_deserialize__(cls, obj):
|
||
obj._lock = flags.MP_CONTEXT.Lock()
|
||
return obj
|
||
|
||
def sync_update_node(
|
||
self, new_node: NonSourceCompiledNode
|
||
) -> NonSourceCompiledNode:
|
||
"""update the node with a lock. The only time we should want to lock is
|
||
when compiling an ephemeral ancestor of a node at runtime, because
|
||
multiple threads could be just-in-time compiling the same ephemeral
|
||
dependency, and we want them to have a consistent view of the manifest.
|
||
|
||
If the existing node is not compiled, update it with the new node and
|
||
return that. If the existing node is compiled, do not update the
|
||
manifest and return the existing node.
|
||
"""
|
||
with self._lock:
|
||
existing = self.nodes[new_node.unique_id]
|
||
if getattr(existing, 'compiled', False):
|
||
# already compiled -> must be a NonSourceCompiledNode
|
||
return cast(NonSourceCompiledNode, existing)
|
||
_update_into(self.nodes, new_node)
|
||
return new_node
|
||
|
||
def update_exposure(self, new_exposure: ParsedExposure):
|
||
_update_into(self.exposures, new_exposure)
|
||
|
||
def update_node(self, new_node: ManifestNode):
|
||
_update_into(self.nodes, new_node)
|
||
|
||
def update_source(self, new_source: ParsedSourceDefinition):
|
||
_update_into(self.sources, new_source)
|
||
|
||
def build_flat_graph(self):
|
||
"""This attribute is used in context.common by each node, so we want to
|
||
only build it once and avoid any concurrency issues around it.
|
||
Make sure you don't call this until you're done with building your
|
||
manifest!
|
||
"""
|
||
self.flat_graph = {
|
||
'exposures': {
|
||
k: v.to_dict(omit_none=False)
|
||
for k, v in self.exposures.items()
|
||
},
|
||
'nodes': {
|
||
k: v.to_dict(omit_none=False)
|
||
for k, v in self.nodes.items()
|
||
},
|
||
'sources': {
|
||
k: v.to_dict(omit_none=False)
|
||
for k, v in self.sources.items()
|
||
}
|
||
}
|
||
|
||
def build_disabled_by_file_id(self):
|
||
disabled_by_file_id = {}
|
||
for node in self.disabled:
|
||
disabled_by_file_id[node.file_id] = node
|
||
for node_list in self._disabled.values():
|
||
for node in node_list:
|
||
disabled_by_file_id[node.file_id] = node
|
||
return disabled_by_file_id
|
||
|
||
def find_disabled_by_name(
|
||
self, name: str, package: Optional[str] = None
|
||
) -> Optional[ManifestNode]:
|
||
searcher: NameSearcher = NameSearcher(
|
||
name, package, NodeType.refable()
|
||
)
|
||
result = searcher.search(self.disabled)
|
||
return result
|
||
|
||
def find_disabled_source_by_name(
|
||
self, source_name: str, table_name: str, package: Optional[str] = None
|
||
) -> Optional[ParsedSourceDefinition]:
|
||
search_name = f'{source_name}.{table_name}'
|
||
searcher: NameSearcher = NameSearcher(
|
||
search_name, package, [NodeType.Source]
|
||
)
|
||
result = searcher.search(self.disabled)
|
||
if result is not None:
|
||
assert isinstance(result, ParsedSourceDefinition)
|
||
return result
|
||
|
||
def _materialization_candidates_for(
|
||
self, project_name: str,
|
||
materialization_name: str,
|
||
adapter_type: Optional[str],
|
||
) -> CandidateList:
|
||
|
||
if adapter_type is None:
|
||
specificity = Specificity.Default
|
||
else:
|
||
specificity = Specificity.Adapter
|
||
|
||
full_name = dbt.utils.get_materialization_macro_name(
|
||
materialization_name=materialization_name,
|
||
adapter_type=adapter_type,
|
||
with_prefix=False,
|
||
)
|
||
return CandidateList(
|
||
MaterializationCandidate.from_macro(m, specificity)
|
||
for m in self._find_macros_by_name(full_name, project_name)
|
||
)
|
||
|
||
def find_materialization_macro_by_name(
|
||
self, project_name: str, materialization_name: str, adapter_type: str
|
||
) -> Optional[ParsedMacro]:
|
||
candidates: CandidateList = CandidateList(chain.from_iterable(
|
||
self._materialization_candidates_for(
|
||
project_name=project_name,
|
||
materialization_name=materialization_name,
|
||
adapter_type=atype,
|
||
) for atype in (adapter_type, None)
|
||
))
|
||
return candidates.last()
|
||
|
||
def get_resource_fqns(self) -> Mapping[str, PathSet]:
|
||
resource_fqns: Dict[str, Set[Tuple[str, ...]]] = {}
|
||
all_resources = chain(self.exposures.values(), self.nodes.values(), self.sources.values())
|
||
for resource in all_resources:
|
||
resource_type_plural = resource.resource_type.pluralize()
|
||
if resource_type_plural not in resource_fqns:
|
||
resource_fqns[resource_type_plural] = set()
|
||
resource_fqns[resource_type_plural].add(tuple(resource.fqn))
|
||
return resource_fqns
|
||
|
||
def get_used_schemas(self, resource_types=None):
|
||
return frozenset({
|
||
(node.database, node.schema) for node in
|
||
chain(self.nodes.values(), self.sources.values())
|
||
if not resource_types or node.resource_type in resource_types
|
||
})
|
||
|
||
def get_used_databases(self):
|
||
return frozenset(
|
||
x.database for x in
|
||
chain(self.nodes.values(), self.sources.values())
|
||
)
|
||
|
||
# This is used in dbt.task.rpc.sql_commands 'add_new_refs'
|
||
def deepcopy(self):
|
||
return Manifest(
|
||
nodes={k: _deepcopy(v) for k, v in self.nodes.items()},
|
||
sources={k: _deepcopy(v) for k, v in self.sources.items()},
|
||
macros={k: _deepcopy(v) for k, v in self.macros.items()},
|
||
docs={k: _deepcopy(v) for k, v in self.docs.items()},
|
||
exposures={k: _deepcopy(v) for k, v in self.exposures.items()},
|
||
selectors={k: _deepcopy(v) for k, v in self.selectors.items()},
|
||
metadata=self.metadata,
|
||
disabled=[_deepcopy(n) for n in self.disabled],
|
||
files={k: _deepcopy(v) for k, v in self.files.items()},
|
||
state_check=_deepcopy(self.state_check),
|
||
)
|
||
|
||
def build_parent_and_child_maps(self):
|
||
edge_members = list(chain(
|
||
self.nodes.values(),
|
||
self.sources.values(),
|
||
self.exposures.values(),
|
||
))
|
||
forward_edges, backward_edges = build_node_edges(edge_members)
|
||
self.child_map = forward_edges
|
||
self.parent_map = backward_edges
|
||
|
||
def build_macro_child_map(self):
|
||
edge_members = list(chain(
|
||
self.nodes.values(),
|
||
self.macros.values(),
|
||
))
|
||
forward_edges = build_macro_edges(edge_members)
|
||
return forward_edges
|
||
|
||
def writable_manifest(self):
|
||
self.build_parent_and_child_maps()
|
||
return WritableManifest(
|
||
nodes=self.nodes,
|
||
sources=self.sources,
|
||
macros=self.macros,
|
||
docs=self.docs,
|
||
exposures=self.exposures,
|
||
selectors=self.selectors,
|
||
metadata=self.metadata,
|
||
disabled=self.disabled,
|
||
child_map=self.child_map,
|
||
parent_map=self.parent_map,
|
||
)
|
||
|
||
def write(self, path):
|
||
self.writable_manifest().write(path)
|
||
|
||
# Called in dbt.compilation.Linker.write_graph and
|
||
# dbt.graph.queue.get and ._include_in_cost
|
||
def expect(self, unique_id: str) -> GraphMemberNode:
|
||
if unique_id in self.nodes:
|
||
return self.nodes[unique_id]
|
||
elif unique_id in self.sources:
|
||
return self.sources[unique_id]
|
||
elif unique_id in self.exposures:
|
||
return self.exposures[unique_id]
|
||
else:
|
||
# something terrible has happened
|
||
raise dbt.exceptions.InternalException(
|
||
'Expected node {} not found in manifest'.format(unique_id)
|
||
)
|
||
|
||
@property
|
||
def doc_lookup(self) -> DocLookup:
|
||
if self._doc_lookup is None:
|
||
self._doc_lookup = DocLookup(self)
|
||
return self._doc_lookup
|
||
|
||
def rebuild_doc_lookup(self):
|
||
self._doc_lookup = DocLookup(self)
|
||
|
||
@property
|
||
def source_lookup(self) -> SourceLookup:
|
||
if self._source_lookup is None:
|
||
self._source_lookup = SourceLookup(self)
|
||
return self._source_lookup
|
||
|
||
def rebuild_source_lookup(self):
|
||
self._source_lookup = SourceLookup(self)
|
||
|
||
@property
|
||
def ref_lookup(self) -> RefableLookup:
|
||
if self._ref_lookup is None:
|
||
self._ref_lookup = RefableLookup(self)
|
||
return self._ref_lookup
|
||
|
||
def rebuild_ref_lookup(self):
|
||
self._ref_lookup = RefableLookup(self)
|
||
|
||
@property
|
||
def disabled_lookup(self) -> DisabledLookup:
|
||
if self._disabled_lookup is None:
|
||
self._disabled_lookup = DisabledLookup(self)
|
||
return self._disabled_lookup
|
||
|
||
def rebuild_disabled_lookup(self):
|
||
self._disabled_lookup = DisabledLookup(self)
|
||
|
||
@property
|
||
def analysis_lookup(self) -> AnalysisLookup:
|
||
if self._analysis_lookup is None:
|
||
self._analysis_lookup = AnalysisLookup(self)
|
||
return self._analysis_lookup
|
||
|
||
# Called by dbt.parser.manifest._resolve_refs_for_exposure
|
||
# and dbt.parser.manifest._process_refs_for_node
|
||
def resolve_ref(
|
||
self,
|
||
target_model_name: str,
|
||
target_model_package: Optional[str],
|
||
current_project: str,
|
||
node_package: str,
|
||
) -> MaybeNonSource:
|
||
|
||
node: Optional[ManifestNode] = None
|
||
disabled: Optional[ManifestNode] = None
|
||
|
||
candidates = _search_packages(
|
||
current_project, node_package, target_model_package
|
||
)
|
||
for pkg in candidates:
|
||
node = self.ref_lookup.find(target_model_name, pkg, self)
|
||
|
||
if node is not None and node.config.enabled:
|
||
return node
|
||
|
||
# it's possible that the node is disabled
|
||
if disabled is None:
|
||
disabled = self.find_disabled_by_name(
|
||
target_model_name, pkg
|
||
)
|
||
|
||
if disabled is not None:
|
||
return Disabled(disabled)
|
||
return None
|
||
|
||
# Called by dbt.parser.manifest._resolve_sources_for_exposure
|
||
# and dbt.parser.manifest._process_source_for_node
|
||
def resolve_source(
|
||
self,
|
||
target_source_name: str,
|
||
target_table_name: str,
|
||
current_project: str,
|
||
node_package: str
|
||
) -> MaybeParsedSource:
|
||
key = (target_source_name, target_table_name)
|
||
candidates = _search_packages(current_project, node_package)
|
||
|
||
source: Optional[ParsedSourceDefinition] = None
|
||
disabled: Optional[ParsedSourceDefinition] = None
|
||
|
||
for pkg in candidates:
|
||
source = self.source_lookup.find(key, pkg, self)
|
||
if source is not None and source.config.enabled:
|
||
return source
|
||
|
||
if disabled is None:
|
||
disabled = self.find_disabled_source_by_name(
|
||
target_source_name, target_table_name, pkg
|
||
)
|
||
|
||
if disabled is not None:
|
||
return Disabled(disabled)
|
||
return None
|
||
|
||
# Called by DocsRuntimeContext.doc
|
||
def resolve_doc(
|
||
self,
|
||
name: str,
|
||
package: Optional[str],
|
||
current_project: str,
|
||
node_package: str,
|
||
) -> Optional[ParsedDocumentation]:
|
||
"""Resolve the given documentation. This follows the same algorithm as
|
||
resolve_ref except the is_enabled checks are unnecessary as docs are
|
||
always enabled.
|
||
"""
|
||
candidates = _search_packages(
|
||
current_project, node_package, package
|
||
)
|
||
|
||
for pkg in candidates:
|
||
result = self.doc_lookup.find(name, pkg, self)
|
||
if result is not None:
|
||
return result
|
||
return None
|
||
|
||
# Called by RunTask.defer_to_manifest
|
||
def merge_from_artifact(
|
||
self,
|
||
adapter,
|
||
other: 'WritableManifest',
|
||
selected: AbstractSet[UniqueID],
|
||
) -> None:
|
||
"""Given the selected unique IDs and a writable manifest, update this
|
||
manifest by replacing any unselected nodes with their counterpart.
|
||
|
||
Only non-ephemeral refable nodes are examined.
|
||
"""
|
||
refables = set(NodeType.refable())
|
||
merged = set()
|
||
for unique_id, node in other.nodes.items():
|
||
current = self.nodes.get(unique_id)
|
||
if current and (
|
||
node.resource_type in refables and
|
||
not node.is_ephemeral and
|
||
unique_id not in selected and
|
||
not adapter.get_relation(
|
||
current.database, current.schema, current.identifier
|
||
)
|
||
):
|
||
merged.add(unique_id)
|
||
self.nodes[unique_id] = node.replace(deferred=True)
|
||
|
||
# log up to 5 items
|
||
sample = list(islice(merged, 5))
|
||
logger.debug(
|
||
f'Merged {len(merged)} items from state (sample: {sample})'
|
||
)
|
||
|
||
# Methods that were formerly in ParseResult
|
||
|
||
def add_macro(self, source_file: SourceFile, macro: ParsedMacro):
|
||
if macro.unique_id in self.macros:
|
||
# detect that the macro exists and emit an error
|
||
other_path = self.macros[macro.unique_id].original_file_path
|
||
# subtract 2 for the "Compilation Error" indent
|
||
# note that the line wrap eats newlines, so if you want newlines,
|
||
# this is the result :(
|
||
msg = line_wrap_message(
|
||
f'''\
|
||
dbt found two macros named "{macro.name}" in the project
|
||
"{macro.package_name}".
|
||
|
||
|
||
To fix this error, rename or remove one of the following
|
||
macros:
|
||
|
||
- {macro.original_file_path}
|
||
|
||
- {other_path}
|
||
''',
|
||
subtract=2
|
||
)
|
||
raise_compiler_error(msg)
|
||
|
||
self.macros[macro.unique_id] = macro
|
||
source_file.macros.append(macro.unique_id)
|
||
|
||
def has_file(self, source_file: SourceFile) -> bool:
|
||
key = source_file.file_id
|
||
if key is None:
|
||
return False
|
||
if key not in self.files:
|
||
return False
|
||
my_checksum = self.files[key].checksum
|
||
return my_checksum == source_file.checksum
|
||
|
||
def add_source(
|
||
self, source_file: SchemaSourceFile, source: UnpatchedSourceDefinition
|
||
):
|
||
# sources can't be overwritten!
|
||
_check_duplicates(source, self.sources)
|
||
self.sources[source.unique_id] = source # type: ignore
|
||
source_file.sources.append(source.unique_id)
|
||
|
||
def add_node_nofile(self, node: ManifestNodes):
|
||
# nodes can't be overwritten!
|
||
_check_duplicates(node, self.nodes)
|
||
self.nodes[node.unique_id] = node
|
||
|
||
def add_node(self, source_file: AnySourceFile, node: ManifestNodes, test_from=None):
|
||
self.add_node_nofile(node)
|
||
if isinstance(source_file, SchemaSourceFile):
|
||
assert test_from
|
||
source_file.add_test(node.unique_id, test_from)
|
||
else:
|
||
source_file.nodes.append(node.unique_id)
|
||
|
||
def add_exposure(self, source_file: SchemaSourceFile, exposure: ParsedExposure):
|
||
_check_duplicates(exposure, self.exposures)
|
||
self.exposures[exposure.unique_id] = exposure
|
||
source_file.exposures.append(exposure.unique_id)
|
||
|
||
def add_disabled_nofile(self, node: CompileResultNode):
|
||
if node.unique_id in self._disabled:
|
||
self._disabled[node.unique_id].append(node)
|
||
else:
|
||
self._disabled[node.unique_id] = [node]
|
||
|
||
def add_disabled(self, source_file: AnySourceFile, node: CompileResultNode, test_from=None):
|
||
self.add_disabled_nofile(node)
|
||
if isinstance(source_file, SchemaSourceFile):
|
||
assert test_from
|
||
source_file.add_test(node.unique_id, test_from)
|
||
else:
|
||
source_file.nodes.append(node.unique_id)
|
||
|
||
def add_doc(self, source_file: SourceFile, doc: ParsedDocumentation):
|
||
_check_duplicates(doc, self.docs)
|
||
self.docs[doc.unique_id] = doc
|
||
source_file.docs.append(doc.unique_id)
|
||
|
||
# end of methods formerly in ParseResult
|
||
|
||
# Provide support for copy.deepcopy() - we just need to avoid the lock!
|
||
# pickle and deepcopy use this. It returns a callable object used to
|
||
# create the initial version of the object and a tuple of arguments
|
||
# for the object, i.e. the Manifest.
|
||
# The order of the arguments must match the order of the attributes
|
||
# in the Manifest class declaration, because they are used as
|
||
# positional arguments to construct a Manifest.
|
||
def __reduce_ex__(self, protocol):
|
||
args = (
|
||
self.nodes,
|
||
self.sources,
|
||
self.macros,
|
||
self.docs,
|
||
self.exposures,
|
||
self.selectors,
|
||
self.disabled,
|
||
self.files,
|
||
self.metadata,
|
||
self.flat_graph,
|
||
self.state_check,
|
||
self.source_patches,
|
||
self._disabled,
|
||
self._doc_lookup,
|
||
self._source_lookup,
|
||
self._ref_lookup,
|
||
self._disabled_lookup,
|
||
self._analysis_lookup
|
||
)
|
||
return self.__class__, args
|
||
|
||
|
||
class MacroManifest(MacroMethods):
|
||
def __init__(self, macros):
|
||
self.macros = macros
|
||
self.metadata = ManifestMetadata()
|
||
# This is returned by the 'graph' context property
|
||
# in the ProviderContext class.
|
||
self.flat_graph = {}
|
||
|
||
|
||
AnyManifest = Union[Manifest, MacroManifest]
|
||
|
||
|
||
@dataclass
|
||
@schema_version('manifest', 3)
|
||
class WritableManifest(ArtifactMixin):
|
||
nodes: Mapping[UniqueID, ManifestNode] = field(
|
||
metadata=dict(description=(
|
||
'The nodes defined in the dbt project and its dependencies'
|
||
))
|
||
)
|
||
sources: Mapping[UniqueID, ParsedSourceDefinition] = field(
|
||
metadata=dict(description=(
|
||
'The sources defined in the dbt project and its dependencies'
|
||
))
|
||
)
|
||
macros: Mapping[UniqueID, ParsedMacro] = field(
|
||
metadata=dict(description=(
|
||
'The macros defined in the dbt project and its dependencies'
|
||
))
|
||
)
|
||
docs: Mapping[UniqueID, ParsedDocumentation] = field(
|
||
metadata=dict(description=(
|
||
'The docs defined in the dbt project and its dependencies'
|
||
))
|
||
)
|
||
exposures: Mapping[UniqueID, ParsedExposure] = field(
|
||
metadata=dict(description=(
|
||
'The exposures defined in the dbt project and its dependencies'
|
||
))
|
||
)
|
||
selectors: Mapping[UniqueID, Any] = field(
|
||
metadata=dict(description=(
|
||
'The selectors defined in selectors.yml'
|
||
))
|
||
)
|
||
disabled: Optional[List[CompileResultNode]] = field(metadata=dict(
|
||
description='A list of the disabled nodes in the target'
|
||
))
|
||
parent_map: Optional[NodeEdgeMap] = field(metadata=dict(
|
||
description='A mapping from child nodes to their dependencies',
|
||
))
|
||
child_map: Optional[NodeEdgeMap] = field(metadata=dict(
|
||
description='A mapping from parent nodes to their dependents',
|
||
))
|
||
metadata: ManifestMetadata = field(metadata=dict(
|
||
description='Metadata about the manifest',
|
||
))
|
||
|
||
|
||
def _check_duplicates(
|
||
value: HasUniqueID, src: Mapping[str, HasUniqueID]
|
||
):
|
||
if value.unique_id in src:
|
||
raise_duplicate_resource_name(value, src[value.unique_id])
|
||
|
||
|
||
K_T = TypeVar('K_T')
|
||
V_T = TypeVar('V_T')
|
||
|
||
|
||
def _expect_value(
|
||
key: K_T, src: Mapping[K_T, V_T], old_file: SourceFile, name: str
|
||
) -> V_T:
|
||
if key not in src:
|
||
raise CompilationException(
|
||
'Expected to find "{}" in cached "result.{}" based '
|
||
'on cached file information: {}!'
|
||
.format(key, name, old_file)
|
||
)
|
||
return src[key]
|