dbt-selly/dbt-env/lib/python3.8/site-packages/dbt/graph/selector_methods.py

import abc
from itertools import chain
from pathlib import Path
from typing import Set, List, Dict, Iterator, Tuple, Any, Union, Type, Optional

from dbt.dataclass_schema import StrEnum

from .graph import UniqueId

from dbt.contracts.graph.compiled import (
    CompiledDataTestNode,
    CompiledSchemaTestNode,
    CompileResultNode,
    ManifestNode,
)
from dbt.contracts.graph.manifest import Manifest, WritableManifest
from dbt.contracts.graph.parsed import (
    HasTestMetadata,
    ParsedDataTestNode,
    ParsedExposure,
    ParsedSchemaTestNode,
    ParsedSourceDefinition,
)
from dbt.contracts.state import PreviousState
from dbt.exceptions import (
    InternalException,
    RuntimeException,
)
from dbt.node_types import NodeType


SELECTOR_GLOB = '*'
SELECTOR_DELIMITER = ':'


class MethodName(StrEnum):
    FQN = 'fqn'
    Tag = 'tag'
    Source = 'source'
    Path = 'path'
    Package = 'package'
    Config = 'config'
    TestName = 'test_name'
    TestType = 'test_type'
    ResourceType = 'resource_type'
    State = 'state'
    Exposure = 'exposure'


def is_selected_node(fqn: List[str], node_selector: str):

    # If qualified_name exactly matches model name (fqn's leaf), return True
    if fqn[-1] == node_selector:
        return True
    # Flatten node parts. Dots in model names act as namespace separators
    flat_fqn = [item for segment in fqn for item in segment.split('.')]
    # Selector components cannot be more than fqn's
    if len(flat_fqn) < len(node_selector.split('.')):
        return False

    for i, selector_part in enumerate(node_selector.split('.')):
        # if we hit a GLOB, then this node is selected
        if selector_part == SELECTOR_GLOB:
            return True
        elif flat_fqn[i] == selector_part:
            continue
        else:
            return False

    # if we get all the way down here, then the node is a match
    return True


SelectorTarget = Union[ParsedSourceDefinition, ManifestNode, ParsedExposure]


class SelectorMethod(metaclass=abc.ABCMeta):
    def __init__(
        self,
        manifest: Manifest,
        previous_state: Optional[PreviousState],
        arguments: List[str]
    ):
        self.manifest: Manifest = manifest
        self.previous_state = previous_state
        self.arguments: List[str] = arguments

    def parsed_nodes(
        self,
        included_nodes: Set[UniqueId]
    ) -> Iterator[Tuple[UniqueId, ManifestNode]]:

        for key, node in self.manifest.nodes.items():
            unique_id = UniqueId(key)
            if unique_id not in included_nodes:
                continue
            yield unique_id, node

    def source_nodes(
        self,
        included_nodes: Set[UniqueId]
    ) -> Iterator[Tuple[UniqueId, ParsedSourceDefinition]]:

        for key, source in self.manifest.sources.items():
            unique_id = UniqueId(key)
            if unique_id not in included_nodes:
                continue
            yield unique_id, source

    def exposure_nodes(
        self,
        included_nodes: Set[UniqueId]
    ) -> Iterator[Tuple[UniqueId, ParsedExposure]]:

        for key, exposure in self.manifest.exposures.items():
            unique_id = UniqueId(key)
            if unique_id not in included_nodes:
                continue
            yield unique_id, exposure

    def all_nodes(
        self,
        included_nodes: Set[UniqueId]
    ) -> Iterator[Tuple[UniqueId, SelectorTarget]]:
        yield from chain(self.parsed_nodes(included_nodes),
                         self.source_nodes(included_nodes),
                         self.exposure_nodes(included_nodes))

    def configurable_nodes(
        self,
        included_nodes: Set[UniqueId]
    ) -> Iterator[Tuple[UniqueId, CompileResultNode]]:
        yield from chain(self.parsed_nodes(included_nodes),
                         self.source_nodes(included_nodes))

    def non_source_nodes(
        self,
        included_nodes: Set[UniqueId],
    ) -> Iterator[Tuple[UniqueId, Union[ParsedExposure, ManifestNode]]]:
        yield from chain(self.parsed_nodes(included_nodes),
                         self.exposure_nodes(included_nodes))

    @abc.abstractmethod
    def search(
        self,
        included_nodes: Set[UniqueId],
        selector: str,
    ) -> Iterator[UniqueId]:
        raise NotImplementedError('subclasses should implement this')


class QualifiedNameSelectorMethod(SelectorMethod):
    def node_is_match(self, qualified_name: str, fqn: List[str]) -> bool:
        """Determine if a qualified name matches an fqn for all package
        names in the graph.

        :param str qualified_name: The qualified name to match the nodes with
        :param List[str] fqn: The node's fully qualified name in the graph.
        """
        unscoped_fqn = fqn[1:]

        if is_selected_node(fqn, qualified_name):
            return True
        # Match nodes across different packages
        elif is_selected_node(unscoped_fqn, qualified_name):
            return True

        return False

    def search(
        self, included_nodes: Set[UniqueId], selector: str
    ) -> Iterator[UniqueId]:
        """Yield all nodes in the graph that match the selector.

        :param str selector: The selector or node name
        """
        parsed_nodes = list(self.parsed_nodes(included_nodes))
        for node, real_node in parsed_nodes:
            if self.node_is_match(selector, real_node.fqn):
                yield node


class TagSelectorMethod(SelectorMethod):
    def search(
        self, included_nodes: Set[UniqueId], selector: str
    ) -> Iterator[UniqueId]:
        """ yields nodes from included that have the specified tag """
        for node, real_node in self.all_nodes(included_nodes):
            if selector in real_node.tags:
                yield node


class SourceSelectorMethod(SelectorMethod):
    def search(
        self, included_nodes: Set[UniqueId], selector: str
    ) -> Iterator[UniqueId]:
        """yields nodes from included are the specified source."""
        parts = selector.split('.')
        target_package = SELECTOR_GLOB
        if len(parts) == 1:
            target_source, target_table = parts[0], None
        elif len(parts) == 2:
            target_source, target_table = parts
        elif len(parts) == 3:
            target_package, target_source, target_table = parts
        else:  # len(parts) > 3 or len(parts) == 0
            msg = (
                'Invalid source selector value "{}". Sources must be of the '
                'form `${{source_name}}`, '
                '`${{source_name}}.${{target_name}}`, or '
                '`${{package_name}}.${{source_name}}.${{target_name}}'
            ).format(selector)
            raise RuntimeException(msg)

        for node, real_node in self.source_nodes(included_nodes):
            if target_package not in (real_node.package_name, SELECTOR_GLOB):
                continue
            if target_source not in (real_node.source_name, SELECTOR_GLOB):
                continue
            if target_table not in (None, real_node.name, SELECTOR_GLOB):
                continue

            yield node


class ExposureSelectorMethod(SelectorMethod):
    def search(
        self, included_nodes: Set[UniqueId], selector: str
    ) -> Iterator[UniqueId]:
        parts = selector.split('.')
        target_package = SELECTOR_GLOB
        if len(parts) == 1:
            target_name = parts[0]
        elif len(parts) == 2:
            target_package, target_name = parts
        else:
            msg = (
                'Invalid exposure selector value "{}". Exposures must be of '
                'the form ${{exposure_name}} or '
                '${{exposure_package.exposure_name}}'
            ).format(selector)
            raise RuntimeException(msg)

        for node, real_node in self.exposure_nodes(included_nodes):
            if target_package not in (real_node.package_name, SELECTOR_GLOB):
                continue
            if target_name not in (real_node.name, SELECTOR_GLOB):
                continue

            yield node


class PathSelectorMethod(SelectorMethod):
    def search(
        self, included_nodes: Set[UniqueId], selector: str
    ) -> Iterator[UniqueId]:
        """Yields nodes from inclucded that match the given path.

        """
        # use '.' and not 'root' for easy comparison
        root = Path.cwd()
        paths = set(p.relative_to(root) for p in root.glob(selector))
        for node, real_node in self.all_nodes(included_nodes):
            if Path(real_node.root_path) != root:
                continue
            ofp = Path(real_node.original_file_path)
            if ofp in paths:
                yield node
            elif any(parent in paths for parent in ofp.parents):
                yield node


class PackageSelectorMethod(SelectorMethod):
    def search(
        self, included_nodes: Set[UniqueId], selector: str
    ) -> Iterator[UniqueId]:
        """Yields nodes from included that have the specified package"""
        for node, real_node in self.all_nodes(included_nodes):
            if real_node.package_name == selector:
                yield node


def _getattr_descend(obj: Any, attrs: List[str]) -> Any:
    value = obj
    for attr in attrs:
        try:
            value = getattr(value, attr)
        except AttributeError:
            # if it implements getitem (dict, list, ...), use that. On failure,
            # raise an attribute error instead of the KeyError, TypeError, etc.
            # that arbitrary getitem calls might raise
            try:
                value = value[attr]
            except Exception as exc:
                raise AttributeError(
                    f"'{type(value)}' object has no attribute '{attr}'"
                ) from exc
    return value


class CaseInsensitive(str):
    def __eq__(self, other):
        if isinstance(other, str):
            return self.upper() == other.upper()
        else:
            return self.upper() == other


class ConfigSelectorMethod(SelectorMethod):
    def search(
        self,
        included_nodes: Set[UniqueId],
        selector: Any,
    ) -> Iterator[UniqueId]:
        parts = self.arguments
        # special case: if the user wanted to compare test severity,
        # make the comparison case-insensitive
        if parts == ['severity']:
            selector = CaseInsensitive(selector)

        # search sources is kind of useless now source configs only have
        # 'enabled', which you can't really filter on anyway, but maybe we'll
        # add more someday, so search them anyway.
        for node, real_node in self.configurable_nodes(included_nodes):
            try:
                value = _getattr_descend(real_node.config, parts)
            except AttributeError:
                continue
            else:
                if selector == value:
                    yield node


class ResourceTypeSelectorMethod(SelectorMethod):
    def search(
        self, included_nodes: Set[UniqueId], selector: str
    ) -> Iterator[UniqueId]:
        try:
            resource_type = NodeType(selector)
        except ValueError as exc:
            raise RuntimeException(
                f'Invalid resource_type selector "{selector}"'
            ) from exc
        for node, real_node in self.parsed_nodes(included_nodes):
            if real_node.resource_type == resource_type:
                yield node


class TestNameSelectorMethod(SelectorMethod):
    def search(
        self, included_nodes: Set[UniqueId], selector: str
    ) -> Iterator[UniqueId]:
        for node, real_node in self.parsed_nodes(included_nodes):
            if isinstance(real_node, HasTestMetadata):
                if real_node.test_metadata.name == selector:
                    yield node


class TestTypeSelectorMethod(SelectorMethod):
    def search(
        self, included_nodes: Set[UniqueId], selector: str
    ) -> Iterator[UniqueId]:
        search_types: Tuple[Type, ...]
        if selector == 'schema':
            search_types = (ParsedSchemaTestNode, CompiledSchemaTestNode)
        elif selector == 'data':
            search_types = (ParsedDataTestNode, CompiledDataTestNode)
        else:
            raise RuntimeException(
                f'Invalid test type selector {selector}: expected "data" or '
                '"schema"'
            )

        for node, real_node in self.parsed_nodes(included_nodes):
            if isinstance(real_node, search_types):
                yield node


class StateSelectorMethod(SelectorMethod):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.modified_macros: Optional[List[str]] = None

    def _macros_modified(self) -> List[str]:
        # we checked in the caller!
        if self.previous_state is None or self.previous_state.manifest is None:
            raise InternalException(
                'No comparison manifest in _macros_modified'
            )
        old_macros = self.previous_state.manifest.macros
        new_macros = self.manifest.macros

        modified = []
        for uid, macro in new_macros.items():
            if uid in old_macros:
                old_macro = old_macros[uid]
                if macro.macro_sql != old_macro.macro_sql:
                    modified.append(uid)
            else:
                modified.append(uid)

        for uid, macro in old_macros.items():
            if uid not in new_macros:
                modified.append(uid)

        return modified

    def recursively_check_macros_modified(self, node, previous_macros):
        # loop through all macros that this node depends on
        for macro_uid in node.depends_on.macros:
            # avoid infinite recursion if we've already seen this macro
            if macro_uid in previous_macros:
                continue
            previous_macros.append(macro_uid)
            # is this macro one of the modified macros?
            if macro_uid in self.modified_macros:
                return True
            # if not, and this macro depends on other macros, keep looping
            macro_node = self.manifest.macros[macro_uid]
            if len(macro_node.depends_on.macros) > 0:
                return self.recursively_check_macros_modified(macro_node, previous_macros)
            else:
                return False

    def check_macros_modified(self, node):
        # check if there are any changes in macros the first time
        if self.modified_macros is None:
            self.modified_macros = self._macros_modified()
        # no macros have been modified, skip looping entirely
        if not self.modified_macros:
            return False
        # recursively loop through upstream macros to see if any is modified
        else:
            previous_macros = []
            return self.recursively_check_macros_modified(node, previous_macros)

    def check_modified(self, old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
        different_contents = not new.same_contents(old)  # type: ignore
        upstream_macro_change = self.check_macros_modified(new)
        return different_contents or upstream_macro_change

    def check_modified_body(self, old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
        if hasattr(new, "same_body"):
            return not new.same_body(old)  # type: ignore
        else:
            return False

    def check_modified_configs(self, old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
        if hasattr(new, "same_config"):
            return not new.same_config(old)  # type: ignore
        else:
            return False

    def check_modified_persisted_descriptions(
        self, old: Optional[SelectorTarget], new: SelectorTarget
    ) -> bool:
        if hasattr(new, "same_persisted_description"):
            return not new.same_persisted_description(old)  # type: ignore
        else:
            return False

    def check_modified_relation(
        self, old: Optional[SelectorTarget], new: SelectorTarget
    ) -> bool:
        if hasattr(new, "same_database_representation"):
            return not new.same_database_representation(old)  # type: ignore
        else:
            return False

    def check_modified_macros(self, _, new: SelectorTarget) -> bool:
        return self.check_macros_modified(new)

    def check_new(self, old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
        return old is None

    def search(
        self, included_nodes: Set[UniqueId], selector: str
    ) -> Iterator[UniqueId]:
        if self.previous_state is None or self.previous_state.manifest is None:
            raise RuntimeException(
                'Got a state selector method, but no comparison manifest'
            )

        state_checks = {
            # it's new if there is no old version
            'new': lambda old, _: old is None,
            # use methods defined above to compare properties of old + new
            'modified': self.check_modified,
            'modified.body': self.check_modified_body,
            'modified.configs': self.check_modified_configs,
            'modified.persisted_descriptions': self.check_modified_persisted_descriptions,
            'modified.relation': self.check_modified_relation,
            'modified.macros': self.check_modified_macros,
        }
        if selector in state_checks:
            checker = state_checks[selector]
        else:
            raise RuntimeException(
                f'Got an invalid selector "{selector}", expected one of '
                f'"{list(state_checks)}"'
            )

        manifest: WritableManifest = self.previous_state.manifest

        for node, real_node in self.all_nodes(included_nodes):
            previous_node: Optional[SelectorTarget] = None
            if node in manifest.nodes:
                previous_node = manifest.nodes[node]
            elif node in manifest.sources:
                previous_node = manifest.sources[node]
            elif node in manifest.exposures:
                previous_node = manifest.exposures[node]

            if checker(previous_node, real_node):
                yield node


class MethodManager:
    SELECTOR_METHODS: Dict[MethodName, Type[SelectorMethod]] = {
        MethodName.FQN: QualifiedNameSelectorMethod,
        MethodName.Tag: TagSelectorMethod,
        MethodName.Source: SourceSelectorMethod,
        MethodName.Path: PathSelectorMethod,
        MethodName.Package: PackageSelectorMethod,
        MethodName.Config: ConfigSelectorMethod,
        MethodName.TestName: TestNameSelectorMethod,
        MethodName.TestType: TestTypeSelectorMethod,
        MethodName.State: StateSelectorMethod,
        MethodName.Exposure: ExposureSelectorMethod,
    }

    def __init__(
        self,
        manifest: Manifest,
        previous_state: Optional[PreviousState],
    ):
        self.manifest = manifest
        self.previous_state = previous_state

    def get_method(
        self, method: MethodName, method_arguments: List[str]
    ) -> SelectorMethod:

        if method not in self.SELECTOR_METHODS:
            raise InternalException(
                f'Method name "{method}" is a valid node selection '
                f'method name, but it is not handled'
            )
        cls: Type[SelectorMethod] = self.SELECTOR_METHODS[method]
        return cls(self.manifest, self.previous_state, method_arguments)