dbt-selly/dbt-env/lib/python3.8/site-packages/dbt/graph/selector_methods.py

551 lines
19 KiB
Python

import abc
from itertools import chain
from pathlib import Path
from typing import Set, List, Dict, Iterator, Tuple, Any, Union, Type, Optional
from dbt.dataclass_schema import StrEnum
from .graph import UniqueId
from dbt.contracts.graph.compiled import (
CompiledDataTestNode,
CompiledSchemaTestNode,
CompileResultNode,
ManifestNode,
)
from dbt.contracts.graph.manifest import Manifest, WritableManifest
from dbt.contracts.graph.parsed import (
HasTestMetadata,
ParsedDataTestNode,
ParsedExposure,
ParsedSchemaTestNode,
ParsedSourceDefinition,
)
from dbt.contracts.state import PreviousState
from dbt.exceptions import (
InternalException,
RuntimeException,
)
from dbt.node_types import NodeType
SELECTOR_GLOB = '*'
SELECTOR_DELIMITER = ':'
class MethodName(StrEnum):
FQN = 'fqn'
Tag = 'tag'
Source = 'source'
Path = 'path'
Package = 'package'
Config = 'config'
TestName = 'test_name'
TestType = 'test_type'
ResourceType = 'resource_type'
State = 'state'
Exposure = 'exposure'
def is_selected_node(fqn: List[str], node_selector: str):
# If qualified_name exactly matches model name (fqn's leaf), return True
if fqn[-1] == node_selector:
return True
# Flatten node parts. Dots in model names act as namespace separators
flat_fqn = [item for segment in fqn for item in segment.split('.')]
# Selector components cannot be more than fqn's
if len(flat_fqn) < len(node_selector.split('.')):
return False
for i, selector_part in enumerate(node_selector.split('.')):
# if we hit a GLOB, then this node is selected
if selector_part == SELECTOR_GLOB:
return True
elif flat_fqn[i] == selector_part:
continue
else:
return False
# if we get all the way down here, then the node is a match
return True
SelectorTarget = Union[ParsedSourceDefinition, ManifestNode, ParsedExposure]
class SelectorMethod(metaclass=abc.ABCMeta):
def __init__(
self,
manifest: Manifest,
previous_state: Optional[PreviousState],
arguments: List[str]
):
self.manifest: Manifest = manifest
self.previous_state = previous_state
self.arguments: List[str] = arguments
def parsed_nodes(
self,
included_nodes: Set[UniqueId]
) -> Iterator[Tuple[UniqueId, ManifestNode]]:
for key, node in self.manifest.nodes.items():
unique_id = UniqueId(key)
if unique_id not in included_nodes:
continue
yield unique_id, node
def source_nodes(
self,
included_nodes: Set[UniqueId]
) -> Iterator[Tuple[UniqueId, ParsedSourceDefinition]]:
for key, source in self.manifest.sources.items():
unique_id = UniqueId(key)
if unique_id not in included_nodes:
continue
yield unique_id, source
def exposure_nodes(
self,
included_nodes: Set[UniqueId]
) -> Iterator[Tuple[UniqueId, ParsedExposure]]:
for key, exposure in self.manifest.exposures.items():
unique_id = UniqueId(key)
if unique_id not in included_nodes:
continue
yield unique_id, exposure
def all_nodes(
self,
included_nodes: Set[UniqueId]
) -> Iterator[Tuple[UniqueId, SelectorTarget]]:
yield from chain(self.parsed_nodes(included_nodes),
self.source_nodes(included_nodes),
self.exposure_nodes(included_nodes))
def configurable_nodes(
self,
included_nodes: Set[UniqueId]
) -> Iterator[Tuple[UniqueId, CompileResultNode]]:
yield from chain(self.parsed_nodes(included_nodes),
self.source_nodes(included_nodes))
def non_source_nodes(
self,
included_nodes: Set[UniqueId],
) -> Iterator[Tuple[UniqueId, Union[ParsedExposure, ManifestNode]]]:
yield from chain(self.parsed_nodes(included_nodes),
self.exposure_nodes(included_nodes))
@abc.abstractmethod
def search(
self,
included_nodes: Set[UniqueId],
selector: str,
) -> Iterator[UniqueId]:
raise NotImplementedError('subclasses should implement this')
class QualifiedNameSelectorMethod(SelectorMethod):
def node_is_match(self, qualified_name: str, fqn: List[str]) -> bool:
"""Determine if a qualified name matches an fqn for all package
names in the graph.
:param str qualified_name: The qualified name to match the nodes with
:param List[str] fqn: The node's fully qualified name in the graph.
"""
unscoped_fqn = fqn[1:]
if is_selected_node(fqn, qualified_name):
return True
# Match nodes across different packages
elif is_selected_node(unscoped_fqn, qualified_name):
return True
return False
def search(
self, included_nodes: Set[UniqueId], selector: str
) -> Iterator[UniqueId]:
"""Yield all nodes in the graph that match the selector.
:param str selector: The selector or node name
"""
parsed_nodes = list(self.parsed_nodes(included_nodes))
for node, real_node in parsed_nodes:
if self.node_is_match(selector, real_node.fqn):
yield node
class TagSelectorMethod(SelectorMethod):
def search(
self, included_nodes: Set[UniqueId], selector: str
) -> Iterator[UniqueId]:
""" yields nodes from included that have the specified tag """
for node, real_node in self.all_nodes(included_nodes):
if selector in real_node.tags:
yield node
class SourceSelectorMethod(SelectorMethod):
def search(
self, included_nodes: Set[UniqueId], selector: str
) -> Iterator[UniqueId]:
"""yields nodes from included are the specified source."""
parts = selector.split('.')
target_package = SELECTOR_GLOB
if len(parts) == 1:
target_source, target_table = parts[0], None
elif len(parts) == 2:
target_source, target_table = parts
elif len(parts) == 3:
target_package, target_source, target_table = parts
else: # len(parts) > 3 or len(parts) == 0
msg = (
'Invalid source selector value "{}". Sources must be of the '
'form `${{source_name}}`, '
'`${{source_name}}.${{target_name}}`, or '
'`${{package_name}}.${{source_name}}.${{target_name}}'
).format(selector)
raise RuntimeException(msg)
for node, real_node in self.source_nodes(included_nodes):
if target_package not in (real_node.package_name, SELECTOR_GLOB):
continue
if target_source not in (real_node.source_name, SELECTOR_GLOB):
continue
if target_table not in (None, real_node.name, SELECTOR_GLOB):
continue
yield node
class ExposureSelectorMethod(SelectorMethod):
def search(
self, included_nodes: Set[UniqueId], selector: str
) -> Iterator[UniqueId]:
parts = selector.split('.')
target_package = SELECTOR_GLOB
if len(parts) == 1:
target_name = parts[0]
elif len(parts) == 2:
target_package, target_name = parts
else:
msg = (
'Invalid exposure selector value "{}". Exposures must be of '
'the form ${{exposure_name}} or '
'${{exposure_package.exposure_name}}'
).format(selector)
raise RuntimeException(msg)
for node, real_node in self.exposure_nodes(included_nodes):
if target_package not in (real_node.package_name, SELECTOR_GLOB):
continue
if target_name not in (real_node.name, SELECTOR_GLOB):
continue
yield node
class PathSelectorMethod(SelectorMethod):
def search(
self, included_nodes: Set[UniqueId], selector: str
) -> Iterator[UniqueId]:
"""Yields nodes from inclucded that match the given path.
"""
# use '.' and not 'root' for easy comparison
root = Path.cwd()
paths = set(p.relative_to(root) for p in root.glob(selector))
for node, real_node in self.all_nodes(included_nodes):
if Path(real_node.root_path) != root:
continue
ofp = Path(real_node.original_file_path)
if ofp in paths:
yield node
elif any(parent in paths for parent in ofp.parents):
yield node
class PackageSelectorMethod(SelectorMethod):
def search(
self, included_nodes: Set[UniqueId], selector: str
) -> Iterator[UniqueId]:
"""Yields nodes from included that have the specified package"""
for node, real_node in self.all_nodes(included_nodes):
if real_node.package_name == selector:
yield node
def _getattr_descend(obj: Any, attrs: List[str]) -> Any:
value = obj
for attr in attrs:
try:
value = getattr(value, attr)
except AttributeError:
# if it implements getitem (dict, list, ...), use that. On failure,
# raise an attribute error instead of the KeyError, TypeError, etc.
# that arbitrary getitem calls might raise
try:
value = value[attr]
except Exception as exc:
raise AttributeError(
f"'{type(value)}' object has no attribute '{attr}'"
) from exc
return value
class CaseInsensitive(str):
def __eq__(self, other):
if isinstance(other, str):
return self.upper() == other.upper()
else:
return self.upper() == other
class ConfigSelectorMethod(SelectorMethod):
def search(
self,
included_nodes: Set[UniqueId],
selector: Any,
) -> Iterator[UniqueId]:
parts = self.arguments
# special case: if the user wanted to compare test severity,
# make the comparison case-insensitive
if parts == ['severity']:
selector = CaseInsensitive(selector)
# search sources is kind of useless now source configs only have
# 'enabled', which you can't really filter on anyway, but maybe we'll
# add more someday, so search them anyway.
for node, real_node in self.configurable_nodes(included_nodes):
try:
value = _getattr_descend(real_node.config, parts)
except AttributeError:
continue
else:
if selector == value:
yield node
class ResourceTypeSelectorMethod(SelectorMethod):
def search(
self, included_nodes: Set[UniqueId], selector: str
) -> Iterator[UniqueId]:
try:
resource_type = NodeType(selector)
except ValueError as exc:
raise RuntimeException(
f'Invalid resource_type selector "{selector}"'
) from exc
for node, real_node in self.parsed_nodes(included_nodes):
if real_node.resource_type == resource_type:
yield node
class TestNameSelectorMethod(SelectorMethod):
def search(
self, included_nodes: Set[UniqueId], selector: str
) -> Iterator[UniqueId]:
for node, real_node in self.parsed_nodes(included_nodes):
if isinstance(real_node, HasTestMetadata):
if real_node.test_metadata.name == selector:
yield node
class TestTypeSelectorMethod(SelectorMethod):
def search(
self, included_nodes: Set[UniqueId], selector: str
) -> Iterator[UniqueId]:
search_types: Tuple[Type, ...]
if selector == 'schema':
search_types = (ParsedSchemaTestNode, CompiledSchemaTestNode)
elif selector == 'data':
search_types = (ParsedDataTestNode, CompiledDataTestNode)
else:
raise RuntimeException(
f'Invalid test type selector {selector}: expected "data" or '
'"schema"'
)
for node, real_node in self.parsed_nodes(included_nodes):
if isinstance(real_node, search_types):
yield node
class StateSelectorMethod(SelectorMethod):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.modified_macros: Optional[List[str]] = None
def _macros_modified(self) -> List[str]:
# we checked in the caller!
if self.previous_state is None or self.previous_state.manifest is None:
raise InternalException(
'No comparison manifest in _macros_modified'
)
old_macros = self.previous_state.manifest.macros
new_macros = self.manifest.macros
modified = []
for uid, macro in new_macros.items():
if uid in old_macros:
old_macro = old_macros[uid]
if macro.macro_sql != old_macro.macro_sql:
modified.append(uid)
else:
modified.append(uid)
for uid, macro in old_macros.items():
if uid not in new_macros:
modified.append(uid)
return modified
def recursively_check_macros_modified(self, node, previous_macros):
# loop through all macros that this node depends on
for macro_uid in node.depends_on.macros:
# avoid infinite recursion if we've already seen this macro
if macro_uid in previous_macros:
continue
previous_macros.append(macro_uid)
# is this macro one of the modified macros?
if macro_uid in self.modified_macros:
return True
# if not, and this macro depends on other macros, keep looping
macro_node = self.manifest.macros[macro_uid]
if len(macro_node.depends_on.macros) > 0:
return self.recursively_check_macros_modified(macro_node, previous_macros)
else:
return False
def check_macros_modified(self, node):
# check if there are any changes in macros the first time
if self.modified_macros is None:
self.modified_macros = self._macros_modified()
# no macros have been modified, skip looping entirely
if not self.modified_macros:
return False
# recursively loop through upstream macros to see if any is modified
else:
previous_macros = []
return self.recursively_check_macros_modified(node, previous_macros)
def check_modified(self, old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
different_contents = not new.same_contents(old) # type: ignore
upstream_macro_change = self.check_macros_modified(new)
return different_contents or upstream_macro_change
def check_modified_body(self, old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
if hasattr(new, "same_body"):
return not new.same_body(old) # type: ignore
else:
return False
def check_modified_configs(self, old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
if hasattr(new, "same_config"):
return not new.same_config(old) # type: ignore
else:
return False
def check_modified_persisted_descriptions(
self, old: Optional[SelectorTarget], new: SelectorTarget
) -> bool:
if hasattr(new, "same_persisted_description"):
return not new.same_persisted_description(old) # type: ignore
else:
return False
def check_modified_relation(
self, old: Optional[SelectorTarget], new: SelectorTarget
) -> bool:
if hasattr(new, "same_database_representation"):
return not new.same_database_representation(old) # type: ignore
else:
return False
def check_modified_macros(self, _, new: SelectorTarget) -> bool:
return self.check_macros_modified(new)
def check_new(self, old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
return old is None
def search(
self, included_nodes: Set[UniqueId], selector: str
) -> Iterator[UniqueId]:
if self.previous_state is None or self.previous_state.manifest is None:
raise RuntimeException(
'Got a state selector method, but no comparison manifest'
)
state_checks = {
# it's new if there is no old version
'new': lambda old, _: old is None,
# use methods defined above to compare properties of old + new
'modified': self.check_modified,
'modified.body': self.check_modified_body,
'modified.configs': self.check_modified_configs,
'modified.persisted_descriptions': self.check_modified_persisted_descriptions,
'modified.relation': self.check_modified_relation,
'modified.macros': self.check_modified_macros,
}
if selector in state_checks:
checker = state_checks[selector]
else:
raise RuntimeException(
f'Got an invalid selector "{selector}", expected one of '
f'"{list(state_checks)}"'
)
manifest: WritableManifest = self.previous_state.manifest
for node, real_node in self.all_nodes(included_nodes):
previous_node: Optional[SelectorTarget] = None
if node in manifest.nodes:
previous_node = manifest.nodes[node]
elif node in manifest.sources:
previous_node = manifest.sources[node]
elif node in manifest.exposures:
previous_node = manifest.exposures[node]
if checker(previous_node, real_node):
yield node
class MethodManager:
SELECTOR_METHODS: Dict[MethodName, Type[SelectorMethod]] = {
MethodName.FQN: QualifiedNameSelectorMethod,
MethodName.Tag: TagSelectorMethod,
MethodName.Source: SourceSelectorMethod,
MethodName.Path: PathSelectorMethod,
MethodName.Package: PackageSelectorMethod,
MethodName.Config: ConfigSelectorMethod,
MethodName.TestName: TestNameSelectorMethod,
MethodName.TestType: TestTypeSelectorMethod,
MethodName.State: StateSelectorMethod,
MethodName.Exposure: ExposureSelectorMethod,
}
def __init__(
self,
manifest: Manifest,
previous_state: Optional[PreviousState],
):
self.manifest = manifest
self.previous_state = previous_state
def get_method(
self, method: MethodName, method_arguments: List[str]
) -> SelectorMethod:
if method not in self.SELECTOR_METHODS:
raise InternalException(
f'Method name "{method}" is a valid node selection '
f'method name, but it is not handled'
)
cls: Type[SelectorMethod] = self.SELECTOR_METHODS[method]
return cls(self.manifest, self.previous_state, method_arguments)