dbt-selly/dbt-env/lib/python3.8/site-packages/dbt/parser/base.py

440 lines
15 KiB
Python
Raw Normal View History

2022-03-22 15:13:27 +00:00
import abc
import itertools
import os
from typing import (
List, Dict, Any, Generic, Optional, TypeVar
)
from dbt.dataclass_schema import ValidationError
from dbt import utils
from dbt.clients.jinja import MacroGenerator
from dbt.context.providers import (
generate_parser_model,
generate_generate_component_name_macro,
)
from dbt.adapters.factory import get_adapter
from dbt.clients.jinja import get_rendered
from dbt.config import Project, RuntimeConfig
from dbt.context.context_config import (
ContextConfig
)
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.graph.parsed import HasUniqueID, ManifestNodes
from dbt.contracts.graph.unparsed import UnparsedNode
from dbt.exceptions import (
CompilationException, validator_error_message, InternalException
)
from dbt import hooks
from dbt.node_types import NodeType
from dbt.parser.search import FileBlock
# internally, the parser may store a less-restrictive type that will be
# transformed into the final type. But it will have to be derived from
# ParsedNode to be operable.
FinalValue = TypeVar('FinalValue', bound=HasUniqueID)
IntermediateValue = TypeVar('IntermediateValue', bound=HasUniqueID)
IntermediateNode = TypeVar('IntermediateNode', bound=Any)
FinalNode = TypeVar('FinalNode', bound=ManifestNodes)
ConfiguredBlockType = TypeVar('ConfiguredBlockType', bound=FileBlock)
class BaseParser(Generic[FinalValue]):
def __init__(self, project: Project, manifest: Manifest) -> None:
self.project = project
self.manifest = manifest
@abc.abstractmethod
def parse_file(self, block: FileBlock) -> None:
pass
@abc.abstractproperty
def resource_type(self) -> NodeType:
pass
def generate_unique_id(
self,
resource_name: str,
hash: Optional[str] = None
) -> str:
"""Returns a unique identifier for a resource
An optional hash may be passed in to ensure uniqueness for edge cases"""
return '.'.join(
filter(
None,
[
self.resource_type,
self.project.project_name,
resource_name,
hash
]
)
)
class Parser(BaseParser[FinalValue], Generic[FinalValue]):
def __init__(
self,
project: Project,
manifest: Manifest,
root_project: RuntimeConfig,
) -> None:
super().__init__(project, manifest)
self.root_project = root_project
class RelationUpdate:
def __init__(
self, config: RuntimeConfig, manifest: Manifest,
component: str
) -> None:
macro = manifest.find_generate_macro_by_name(
component=component,
root_project_name=config.project_name,
)
if macro is None:
raise InternalException(
f'No macro with name generate_{component}_name found'
)
root_context = generate_generate_component_name_macro(
macro, config, manifest
)
self.updater = MacroGenerator(macro, root_context)
self.component = component
def __call__(
self, parsed_node: Any, config_dict: Dict[str, Any]
) -> None:
override = config_dict.get(self.component)
new_value = self.updater(override, parsed_node)
if isinstance(new_value, str):
new_value = new_value.strip()
setattr(parsed_node, self.component, new_value)
class ConfiguredParser(
Parser[FinalNode],
Generic[ConfiguredBlockType, IntermediateNode, FinalNode],
):
def __init__(
self,
project: Project,
manifest: Manifest,
root_project: RuntimeConfig,
) -> None:
super().__init__(project, manifest, root_project)
self._update_node_database = RelationUpdate(
manifest=manifest, config=root_project,
component='database'
)
self._update_node_schema = RelationUpdate(
manifest=manifest, config=root_project,
component='schema'
)
self._update_node_alias = RelationUpdate(
manifest=manifest, config=root_project,
component='alias'
)
@abc.abstractclassmethod
def get_compiled_path(cls, block: ConfiguredBlockType) -> str:
pass
@abc.abstractmethod
def parse_from_dict(self, dict, validate=True) -> IntermediateNode:
pass
@abc.abstractproperty
def resource_type(self) -> NodeType:
pass
@property
def default_schema(self):
return self.root_project.credentials.schema
@property
def default_database(self):
return self.root_project.credentials.database
def get_fqn_prefix(self, path: str) -> List[str]:
no_ext = os.path.splitext(path)[0]
fqn = [self.project.project_name]
fqn.extend(utils.split_path(no_ext)[:-1])
return fqn
def get_fqn(self, path: str, name: str) -> List[str]:
"""Get the FQN for the node. This impacts node selection and config
application.
"""
fqn = self.get_fqn_prefix(path)
fqn.append(name)
return fqn
def _mangle_hooks(self, config):
"""Given a config dict that may have `pre-hook`/`post-hook` keys,
convert it from the yucky maybe-a-string, maybe-a-dict to a dict.
"""
# Like most of parsing, this is a horrible hack :(
for key in hooks.ModelHookType:
if key in config:
config[key] = [hooks.get_hook_dict(h) for h in config[key]]
def _create_error_node(
self, name: str, path: str, original_file_path: str, raw_sql: str,
) -> UnparsedNode:
"""If we hit an error before we've actually parsed a node, provide some
level of useful information by attaching this to the exception.
"""
# this is a bit silly, but build an UnparsedNode just for error
# message reasons
return UnparsedNode(
name=name,
resource_type=self.resource_type,
path=path,
original_file_path=original_file_path,
root_path=self.project.project_root,
package_name=self.project.project_name,
raw_sql=raw_sql,
)
def _create_parsetime_node(
self,
block: ConfiguredBlockType,
path: str,
config: ContextConfig,
fqn: List[str],
name=None,
**kwargs,
) -> IntermediateNode:
"""Create the node that will be passed in to the parser context for
"rendering". Some information may be partial, as it'll be updated by
config() and any ref()/source() calls discovered during rendering.
"""
if name is None:
name = block.name
dct = {
'alias': name,
'schema': self.default_schema,
'database': self.default_database,
'fqn': fqn,
'name': name,
'root_path': self.project.project_root,
'resource_type': self.resource_type,
'path': path,
'original_file_path': block.path.original_file_path,
'package_name': self.project.project_name,
'raw_sql': block.contents,
'unique_id': self.generate_unique_id(name),
'config': self.config_dict(config),
'checksum': block.file.checksum.to_dict(omit_none=True),
}
dct.update(kwargs)
try:
return self.parse_from_dict(dct, validate=True)
except ValidationError as exc:
msg = validator_error_message(exc)
# this is a bit silly, but build an UnparsedNode just for error
# message reasons
node = self._create_error_node(
name=block.name,
path=path,
original_file_path=block.path.original_file_path,
raw_sql=block.contents,
)
raise CompilationException(msg, node=node)
def _context_for(
self, parsed_node: IntermediateNode, config: ContextConfig
) -> Dict[str, Any]:
return generate_parser_model(
parsed_node, self.root_project, self.manifest, config
)
def render_with_context(self, parsed_node: IntermediateNode, config: ContextConfig):
# Given the parsed node and a ContextConfig to use during parsing,
# render the node's sql wtih macro capture enabled.
# Note: this mutates the config object when config calls are rendered.
# during parsing, we don't have a connection, but we might need one, so
# we have to acquire it.
with get_adapter(self.root_project).connection_for(parsed_node):
context = self._context_for(parsed_node, config)
# this goes through the process of rendering, but just throws away
# the rendered result. The "macro capture" is the point?
get_rendered(
parsed_node.raw_sql, context, parsed_node, capture_macros=True
)
return context
# This is taking the original config for the node, converting it to a dict,
# updating the config with new config passed in, then re-creating the
# config from the dict in the node.
def update_parsed_node_config_dict(
self, parsed_node: IntermediateNode, config_dict: Dict[str, Any]
) -> None:
# Overwrite node config
final_config_dict = parsed_node.config.to_dict(omit_none=True)
final_config_dict.update({k.strip(): v for (k, v) in config_dict.items()})
# re-mangle hooks, in case we got new ones
self._mangle_hooks(final_config_dict)
parsed_node.config = parsed_node.config.from_dict(final_config_dict)
def update_parsed_node_name(
self, parsed_node: IntermediateNode, config_dict: Dict[str, Any]
) -> None:
self._update_node_database(parsed_node, config_dict)
self._update_node_schema(parsed_node, config_dict)
self._update_node_alias(parsed_node, config_dict)
def update_parsed_node_config(
self, parsed_node: IntermediateNode, config: ContextConfig,
context=None, patch_config_dict=None
) -> None:
"""Given the ContextConfig used for parsing and the parsed node,
generate and set the true values to use, overriding the temporary parse
values set in _build_intermediate_parsed_node.
"""
# build_config_dict takes the config_call_dict in the ContextConfig object
# and calls calculate_node_config to combine dbt_project configs and
# config calls from SQL files
config_dict = config.build_config_dict(patch_config_dict=patch_config_dict)
# Set tags on node provided in config blocks. Tags are additive, so even if
# config has been built before, we don't have to reset tags in the parsed_node.
model_tags = config_dict.get('tags', [])
for tag in model_tags:
if tag not in parsed_node.tags:
parsed_node.tags.append(tag)
# If we have meta in the config, copy to node level, for backwards
# compatibility with earlier node-only config.
if 'meta' in config_dict and config_dict['meta']:
parsed_node.meta = config_dict['meta']
# unrendered_config is used to compare the original database/schema/alias
# values and to handle 'same_config' and 'same_contents' calls
parsed_node.unrendered_config = config.build_config_dict(
rendered=False
)
parsed_node.config_call_dict = config._config_call_dict
# do this once before we parse the node database/schema/alias, so
# parsed_node.config is what it would be if they did nothing
self.update_parsed_node_config_dict(parsed_node, config_dict)
# This updates the node database/schema/alias
self.update_parsed_node_name(parsed_node, config_dict)
# tests don't have hooks
if parsed_node.resource_type == NodeType.Test:
return
# at this point, we've collected our hooks. Use the node context to
# render each hook and collect refs/sources
hooks = list(itertools.chain(parsed_node.config.pre_hook,
parsed_node.config.post_hook))
# skip context rebuilding if there aren't any hooks
if not hooks:
return
if not context:
context = self._context_for(parsed_node, config)
for hook in hooks:
get_rendered(hook.sql, context, parsed_node, capture_macros=True)
def initial_config(self, fqn: List[str]) -> ContextConfig:
config_version = min(
[self.project.config_version, self.root_project.config_version]
)
if config_version == 2:
return ContextConfig(
self.root_project,
fqn,
self.resource_type,
self.project.project_name,
)
else:
raise InternalException(
f'Got an unexpected project version={config_version}, '
f'expected 2'
)
def config_dict(
self, config: ContextConfig,
) -> Dict[str, Any]:
config_dict = config.build_config_dict(base=True)
self._mangle_hooks(config_dict)
return config_dict
def render_update(
self, node: IntermediateNode, config: ContextConfig
) -> None:
try:
context = self.render_with_context(node, config)
self.update_parsed_node_config(node, config, context=context)
except ValidationError as exc:
# we got a ValidationError - probably bad types in config()
msg = validator_error_message(exc)
raise CompilationException(msg, node=node) from exc
def add_result_node(self, block: FileBlock, node: ManifestNodes):
if node.config.enabled:
self.manifest.add_node(block.file, node)
else:
self.manifest.add_disabled(block.file, node)
def parse_node(self, block: ConfiguredBlockType) -> FinalNode:
compiled_path: str = self.get_compiled_path(block)
fqn = self.get_fqn(compiled_path, block.name)
config: ContextConfig = self.initial_config(fqn)
node = self._create_parsetime_node(
block=block,
path=compiled_path,
config=config,
fqn=fqn,
)
self.render_update(node, config)
result = self.transform(node)
self.add_result_node(block, result)
return result
@abc.abstractmethod
def parse_file(self, file_block: FileBlock) -> None:
pass
@abc.abstractmethod
def transform(self, node: IntermediateNode) -> FinalNode:
pass
class SimpleParser(
ConfiguredParser[ConfiguredBlockType, FinalNode, FinalNode],
Generic[ConfiguredBlockType, FinalNode]
):
def transform(self, node):
return node
class SQLParser(
ConfiguredParser[FileBlock, IntermediateNode, FinalNode],
Generic[IntermediateNode, FinalNode]
):
def parse_file(self, file_block: FileBlock) -> None:
self.parse_node(file_block)
class SimpleSQLParser(
SQLParser[FinalNode, FinalNode]
):
def transform(self, node):
return node