320 lines
12 KiB
Python
320 lines
12 KiB
Python
|
from typing import Set, List, Optional, Tuple
|
||
|
|
||
|
from .graph import Graph, UniqueId
|
||
|
from .queue import GraphQueue
|
||
|
from .selector_methods import MethodManager
|
||
|
from .selector_spec import SelectionCriteria, SelectionSpec
|
||
|
|
||
|
from dbt.logger import GLOBAL_LOGGER as logger
|
||
|
from dbt.node_types import NodeType
|
||
|
from dbt.exceptions import (
|
||
|
InternalException,
|
||
|
InvalidSelectorException,
|
||
|
warn_or_error,
|
||
|
)
|
||
|
from dbt.contracts.graph.compiled import GraphMemberNode
|
||
|
from dbt.contracts.graph.manifest import Manifest
|
||
|
from dbt.contracts.state import PreviousState
|
||
|
|
||
|
|
||
|
def get_package_names(nodes):
|
||
|
return set([node.split(".")[1] for node in nodes])
|
||
|
|
||
|
|
||
|
def alert_non_existence(raw_spec, nodes):
|
||
|
if len(nodes) == 0:
|
||
|
warn_or_error(
|
||
|
f"The selection criterion '{str(raw_spec)}' does not match"
|
||
|
f" any nodes"
|
||
|
)
|
||
|
|
||
|
|
||
|
def alert_unused_nodes(raw_spec, node_names):
|
||
|
summary_nodes_str = ("\n - ").join(node_names[:3])
|
||
|
debug_nodes_str = ("\n - ").join(node_names)
|
||
|
and_more_str = f"\n - and {len(node_names) - 3} more" if len(node_names) > 4 else ""
|
||
|
summary_msg = (
|
||
|
f"\nSome tests were excluded because at least one parent is not selected. "
|
||
|
f"Use the --greedy flag to include them."
|
||
|
f"\n - {summary_nodes_str}{and_more_str}"
|
||
|
)
|
||
|
logger.info(summary_msg)
|
||
|
if len(node_names) > 4:
|
||
|
debug_msg = (
|
||
|
f"Full list of tests that were excluded:"
|
||
|
f"\n - {debug_nodes_str}"
|
||
|
)
|
||
|
logger.debug(debug_msg)
|
||
|
|
||
|
|
||
|
def can_select_indirectly(node):
|
||
|
"""If a node is not selected itself, but its parent(s) are, it may qualify
|
||
|
for indirect selection.
|
||
|
Today, only Test nodes can be indirectly selected. In the future,
|
||
|
other node types or invocation flags might qualify.
|
||
|
"""
|
||
|
if node.resource_type == NodeType.Test:
|
||
|
return True
|
||
|
else:
|
||
|
return False
|
||
|
|
||
|
|
||
|
class NodeSelector(MethodManager):
|
||
|
"""The node selector is aware of the graph and manifest,
|
||
|
"""
|
||
|
def __init__(
|
||
|
self,
|
||
|
graph: Graph,
|
||
|
manifest: Manifest,
|
||
|
previous_state: Optional[PreviousState] = None,
|
||
|
):
|
||
|
super().__init__(manifest, previous_state)
|
||
|
self.full_graph = graph
|
||
|
|
||
|
# build a subgraph containing only non-empty, enabled nodes and enabled
|
||
|
# sources.
|
||
|
graph_members = {
|
||
|
unique_id for unique_id in self.full_graph.nodes()
|
||
|
if self._is_graph_member(unique_id)
|
||
|
}
|
||
|
self.graph = self.full_graph.subgraph(graph_members)
|
||
|
|
||
|
def select_included(
|
||
|
self, included_nodes: Set[UniqueId], spec: SelectionCriteria,
|
||
|
) -> Set[UniqueId]:
|
||
|
"""Select the explicitly included nodes, using the given spec. Return
|
||
|
the selected set of unique IDs.
|
||
|
"""
|
||
|
method = self.get_method(spec.method, spec.method_arguments)
|
||
|
return set(method.search(included_nodes, spec.value))
|
||
|
|
||
|
def get_nodes_from_criteria(
|
||
|
self,
|
||
|
spec: SelectionCriteria
|
||
|
) -> Tuple[Set[UniqueId], Set[UniqueId]]:
|
||
|
"""Get all nodes specified by the single selection criteria.
|
||
|
|
||
|
- collect the directly included nodes
|
||
|
- find their specified relatives
|
||
|
- perform any selector-specific expansion
|
||
|
"""
|
||
|
|
||
|
nodes = self.graph.nodes()
|
||
|
try:
|
||
|
collected = self.select_included(nodes, spec)
|
||
|
except InvalidSelectorException:
|
||
|
valid_selectors = ", ".join(self.SELECTOR_METHODS)
|
||
|
logger.info(
|
||
|
f"The '{spec.method}' selector specified in {spec.raw} is "
|
||
|
f"invalid. Must be one of [{valid_selectors}]"
|
||
|
)
|
||
|
return set(), set()
|
||
|
|
||
|
neighbors = self.collect_specified_neighbors(spec, collected)
|
||
|
direct_nodes, indirect_nodes = self.expand_selection(
|
||
|
selected=(collected | neighbors),
|
||
|
greedy=spec.greedy
|
||
|
)
|
||
|
return direct_nodes, indirect_nodes
|
||
|
|
||
|
def collect_specified_neighbors(
|
||
|
self, spec: SelectionCriteria, selected: Set[UniqueId]
|
||
|
) -> Set[UniqueId]:
|
||
|
"""Given the set of models selected by the explicit part of the
|
||
|
selector (like "tag:foo"), apply the modifiers on the spec ("+"/"@").
|
||
|
Return the set of additional nodes that should be collected (which may
|
||
|
overlap with the selected set).
|
||
|
"""
|
||
|
additional: Set[UniqueId] = set()
|
||
|
if spec.childrens_parents:
|
||
|
additional.update(self.graph.select_childrens_parents(selected))
|
||
|
|
||
|
if spec.parents:
|
||
|
depth = spec.parents_depth
|
||
|
additional.update(self.graph.select_parents(selected, depth))
|
||
|
|
||
|
if spec.children:
|
||
|
depth = spec.children_depth
|
||
|
additional.update(self.graph.select_children(selected, depth))
|
||
|
return additional
|
||
|
|
||
|
def select_nodes_recursively(self, spec: SelectionSpec) -> Tuple[Set[UniqueId], Set[UniqueId]]:
|
||
|
"""If the spec is a composite spec (a union, difference, or intersection),
|
||
|
recurse into its selections and combine them. If the spec is a concrete
|
||
|
selection criteria, resolve that using the given graph.
|
||
|
"""
|
||
|
if isinstance(spec, SelectionCriteria):
|
||
|
direct_nodes, indirect_nodes = self.get_nodes_from_criteria(spec)
|
||
|
else:
|
||
|
bundles = [
|
||
|
self.select_nodes_recursively(component)
|
||
|
for component in spec
|
||
|
]
|
||
|
|
||
|
direct_sets = []
|
||
|
indirect_sets = []
|
||
|
|
||
|
for direct, indirect in bundles:
|
||
|
direct_sets.append(direct)
|
||
|
indirect_sets.append(direct | indirect)
|
||
|
|
||
|
initial_direct = spec.combined(direct_sets)
|
||
|
indirect_nodes = spec.combined(indirect_sets)
|
||
|
|
||
|
direct_nodes = self.incorporate_indirect_nodes(initial_direct, indirect_nodes)
|
||
|
|
||
|
if spec.expect_exists:
|
||
|
alert_non_existence(spec.raw, direct_nodes)
|
||
|
|
||
|
return direct_nodes, indirect_nodes
|
||
|
|
||
|
def select_nodes(self, spec: SelectionSpec) -> Tuple[Set[UniqueId], Set[UniqueId]]:
|
||
|
"""Select the nodes in the graph according to the spec.
|
||
|
|
||
|
This is the main point of entry for turning a spec into a set of nodes:
|
||
|
- Recurse through spec, select by criteria, combine by set operation
|
||
|
- Return final (unfiltered) selection set
|
||
|
"""
|
||
|
direct_nodes, indirect_nodes = self.select_nodes_recursively(spec)
|
||
|
indirect_only = indirect_nodes.difference(direct_nodes)
|
||
|
return direct_nodes, indirect_only
|
||
|
|
||
|
def _is_graph_member(self, unique_id: UniqueId) -> bool:
|
||
|
if unique_id in self.manifest.sources:
|
||
|
source = self.manifest.sources[unique_id]
|
||
|
return source.config.enabled
|
||
|
elif unique_id in self.manifest.exposures:
|
||
|
return True
|
||
|
node = self.manifest.nodes[unique_id]
|
||
|
return not node.empty and node.config.enabled
|
||
|
|
||
|
def node_is_match(self, node: GraphMemberNode) -> bool:
|
||
|
"""Determine if a node is a match for the selector. Non-match nodes
|
||
|
will be excluded from results during filtering.
|
||
|
"""
|
||
|
return True
|
||
|
|
||
|
def _is_match(self, unique_id: UniqueId) -> bool:
|
||
|
node: GraphMemberNode
|
||
|
if unique_id in self.manifest.nodes:
|
||
|
node = self.manifest.nodes[unique_id]
|
||
|
elif unique_id in self.manifest.sources:
|
||
|
node = self.manifest.sources[unique_id]
|
||
|
elif unique_id in self.manifest.exposures:
|
||
|
node = self.manifest.exposures[unique_id]
|
||
|
else:
|
||
|
raise InternalException(
|
||
|
f'Node {unique_id} not found in the manifest!'
|
||
|
)
|
||
|
return self.node_is_match(node)
|
||
|
|
||
|
def filter_selection(self, selected: Set[UniqueId]) -> Set[UniqueId]:
|
||
|
"""Return the subset of selected nodes that is a match for this
|
||
|
selector.
|
||
|
"""
|
||
|
return {
|
||
|
unique_id for unique_id in selected if self._is_match(unique_id)
|
||
|
}
|
||
|
|
||
|
def expand_selection(
|
||
|
self, selected: Set[UniqueId], greedy: bool = False
|
||
|
) -> Tuple[Set[UniqueId], Set[UniqueId]]:
|
||
|
# Test selection can expand to include an implicitly/indirectly selected test.
|
||
|
# In this way, `dbt test -m model_a` also includes tests that directly depend on `model_a`.
|
||
|
# Expansion has two modes, GREEDY and NOT GREEDY.
|
||
|
#
|
||
|
# GREEDY mode: If ANY parent is selected, select the test. We use this for EXCLUSION.
|
||
|
#
|
||
|
# NOT GREEDY mode:
|
||
|
# - If ALL parents are selected, select the test.
|
||
|
# - If ANY parent is missing, return it separately. We'll keep it around
|
||
|
# for later and see if its other parents show up.
|
||
|
# We use this for INCLUSION.
|
||
|
# Users can also opt in to inclusive GREEDY mode by passing --greedy flag,
|
||
|
# or by specifying `greedy: true` in a yaml selector
|
||
|
|
||
|
direct_nodes = set(selected)
|
||
|
indirect_nodes = set()
|
||
|
|
||
|
for unique_id in self.graph.select_successors(selected):
|
||
|
if unique_id in self.manifest.nodes:
|
||
|
node = self.manifest.nodes[unique_id]
|
||
|
if can_select_indirectly(node):
|
||
|
# should we add it in directly?
|
||
|
if greedy or set(node.depends_on.nodes) <= set(selected):
|
||
|
direct_nodes.add(unique_id)
|
||
|
# if not:
|
||
|
else:
|
||
|
indirect_nodes.add(unique_id)
|
||
|
|
||
|
return direct_nodes, indirect_nodes
|
||
|
|
||
|
def incorporate_indirect_nodes(
|
||
|
self, direct_nodes: Set[UniqueId], indirect_nodes: Set[UniqueId] = set()
|
||
|
) -> Set[UniqueId]:
|
||
|
# Check tests previously selected indirectly to see if ALL their
|
||
|
# parents are now present.
|
||
|
|
||
|
selected = set(direct_nodes)
|
||
|
|
||
|
for unique_id in indirect_nodes:
|
||
|
if unique_id in self.manifest.nodes:
|
||
|
node = self.manifest.nodes[unique_id]
|
||
|
if set(node.depends_on.nodes) <= set(selected):
|
||
|
selected.add(unique_id)
|
||
|
|
||
|
return selected
|
||
|
|
||
|
def get_selected(self, spec: SelectionSpec) -> Set[UniqueId]:
|
||
|
"""get_selected runs through the node selection process:
|
||
|
|
||
|
- node selection. Based on the include/exclude sets, the set
|
||
|
of matched unique IDs is returned
|
||
|
- includes direct + indirect selection (for tests)
|
||
|
- filtering:
|
||
|
- selectors can filter the nodes after all of them have been
|
||
|
selected
|
||
|
"""
|
||
|
selected_nodes, indirect_only = self.select_nodes(spec)
|
||
|
filtered_nodes = self.filter_selection(selected_nodes)
|
||
|
|
||
|
if indirect_only:
|
||
|
filtered_unused_nodes = self.filter_selection(indirect_only)
|
||
|
if filtered_unused_nodes and spec.greedy_warning:
|
||
|
# log anything that didn't make the cut
|
||
|
unused_node_names = []
|
||
|
for unique_id in filtered_unused_nodes:
|
||
|
name = self.manifest.nodes[unique_id].name
|
||
|
unused_node_names.append(name)
|
||
|
alert_unused_nodes(spec, unused_node_names)
|
||
|
|
||
|
return filtered_nodes
|
||
|
|
||
|
def get_graph_queue(self, spec: SelectionSpec) -> GraphQueue:
|
||
|
"""Returns a queue over nodes in the graph that tracks progress of
|
||
|
dependecies.
|
||
|
"""
|
||
|
selected_nodes = self.get_selected(spec)
|
||
|
new_graph = self.full_graph.get_subset_graph(selected_nodes)
|
||
|
# should we give a way here for consumers to mutate the graph?
|
||
|
return GraphQueue(new_graph.graph, self.manifest, selected_nodes)
|
||
|
|
||
|
|
||
|
class ResourceTypeSelector(NodeSelector):
|
||
|
def __init__(
|
||
|
self,
|
||
|
graph: Graph,
|
||
|
manifest: Manifest,
|
||
|
previous_state: Optional[PreviousState],
|
||
|
resource_types: List[NodeType],
|
||
|
):
|
||
|
super().__init__(
|
||
|
graph=graph,
|
||
|
manifest=manifest,
|
||
|
previous_state=previous_state,
|
||
|
)
|
||
|
self.resource_types: Set[NodeType] = set(resource_types)
|
||
|
|
||
|
def node_is_match(self, node):
|
||
|
return node.resource_type in self.resource_types
|