dbt-selly/dbt-env/lib/python3.8/site-packages/dbt/parser/partial.py

764 lines
37 KiB
Python

from typing import MutableMapping, Dict, List
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.files import (
AnySourceFile, ParseFileType, parse_file_type_to_parser,
)
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.node_types import NodeType
mssat_files = (
ParseFileType.Model,
ParseFileType.Seed,
ParseFileType.Snapshot,
ParseFileType.Analysis,
ParseFileType.Test,
)
key_to_prefix = {
'models': 'model',
'seeds': 'seed',
'snapshots': 'snapshot',
'analyses': 'analysis',
}
parse_file_type_to_key = {
ParseFileType.Model: 'models',
ParseFileType.Seed: 'seeds',
ParseFileType.Snapshot: 'snapshots',
ParseFileType.Analysis: 'analyses',
}
# Partial parsing. Create a diff of files from saved manifest and current
# files and produce a project_parser_file dictionary to drive parsing of
# only the necessary changes.
# Will produce a 'skip_parsing' method, and a project_parser_file dictionary
class PartialParsing:
def __init__(self, saved_manifest: Manifest, new_files: MutableMapping[str, AnySourceFile]):
self.saved_manifest = saved_manifest
self.new_files = new_files
self.project_parser_files: Dict = {}
self.saved_files = self.saved_manifest.files
self.project_parser_files = {}
self.deleted_manifest = Manifest()
self.macro_child_map: Dict[str, List[str]] = {}
self.build_file_diff()
self.processing_file = None
self.disabled_by_file_id = self.saved_manifest.build_disabled_by_file_id()
def skip_parsing(self):
return (
not self.file_diff['deleted'] and
not self.file_diff['added'] and
not self.file_diff['changed'] and
not self.file_diff['changed_schema_files'] and
not self.file_diff['deleted_schema_files']
)
# Compare the previously saved manifest files and the just-loaded manifest
# files to see if anything changed
def build_file_diff(self):
saved_file_ids = set(self.saved_files.keys())
new_file_ids = set(self.new_files.keys())
deleted_all_files = saved_file_ids.difference(new_file_ids)
added = new_file_ids.difference(saved_file_ids)
common = saved_file_ids.intersection(new_file_ids)
changed_or_deleted_macro_file = False
# separate out deleted schema files
deleted_schema_files = []
deleted = []
for file_id in deleted_all_files:
if self.saved_files[file_id].parse_file_type == ParseFileType.Schema:
deleted_schema_files.append(file_id)
else:
if self.saved_files[file_id].parse_file_type == ParseFileType.Macro:
changed_or_deleted_macro_file = True
deleted.append(file_id)
changed = []
changed_schema_files = []
unchanged = []
for file_id in common:
if self.saved_files[file_id].checksum == self.new_files[file_id].checksum:
unchanged.append(file_id)
else:
# separate out changed schema files
if self.saved_files[file_id].parse_file_type == ParseFileType.Schema:
sf = self.saved_files[file_id]
if type(sf).__name__ != 'SchemaSourceFile':
raise Exception(f"Serialization failure for {file_id}")
changed_schema_files.append(file_id)
else:
if self.saved_files[file_id].parse_file_type == ParseFileType.Macro:
changed_or_deleted_macro_file = True
changed.append(file_id)
file_diff = {
"deleted": deleted,
"deleted_schema_files": deleted_schema_files,
"added": added,
"changed": changed,
"changed_schema_files": changed_schema_files,
"unchanged": unchanged,
}
if changed_or_deleted_macro_file:
self.macro_child_map = self.saved_manifest.build_macro_child_map()
logger.info(f"Partial parsing enabled: "
f"{len(deleted) + len(deleted_schema_files)} files deleted, "
f"{len(added)} files added, "
f"{len(changed) + len(changed_schema_files)} files changed.")
self.file_diff = file_diff
# generate the list of files that need parsing
# uses self.manifest.files generated by 'read_files'
def get_parsing_files(self):
if self.skip_parsing():
return {}
# Need to add new files first, because changes in schema files
# might refer to them
for file_id in self.file_diff['added']:
self.processing_file = file_id
self.add_to_saved(file_id)
# Need to process schema files next, because the dictionaries
# need to be in place for handling SQL file changes
for file_id in self.file_diff['changed_schema_files']:
self.processing_file = file_id
self.change_schema_file(file_id)
for file_id in self.file_diff['deleted_schema_files']:
self.processing_file = file_id
self.delete_schema_file(file_id)
for file_id in self.file_diff['deleted']:
self.processing_file = file_id
self.delete_from_saved(file_id)
for file_id in self.file_diff['changed']:
self.processing_file = file_id
self.update_in_saved(file_id)
return self.project_parser_files
# Add the file to the project parser dictionaries to schedule parsing
def add_to_pp_files(self, source_file):
file_id = source_file.file_id
parser_name = parse_file_type_to_parser[source_file.parse_file_type]
project_name = source_file.project_name
if not parser_name or not project_name:
raise Exception(f"Did not find parse_file_type or project_name "
f"in SourceFile for {source_file.file_id}")
if project_name not in self.project_parser_files:
self.project_parser_files[project_name] = {}
if parser_name not in self.project_parser_files[project_name]:
self.project_parser_files[project_name][parser_name] = []
if (file_id not in self.project_parser_files[project_name][parser_name] and
file_id not in self.file_diff['deleted']):
self.project_parser_files[project_name][parser_name].append(file_id)
def already_scheduled_for_parsing(self, source_file):
file_id = source_file.file_id
project_name = source_file.project_name
if project_name not in self.project_parser_files:
return False
parser_name = parse_file_type_to_parser[source_file.parse_file_type]
if parser_name not in self.project_parser_files[project_name]:
return False
if file_id not in self.project_parser_files[project_name][parser_name]:
return False
return True
# Add new files, including schema files
def add_to_saved(self, file_id):
# add file object to saved manifest.files
source_file = self.new_files[file_id]
if source_file.parse_file_type == ParseFileType.Schema:
self.handle_added_schema_file(source_file)
self.saved_files[file_id] = source_file
# update pp_files to parse
self.add_to_pp_files(source_file)
logger.debug(f"Partial parsing: added file: {file_id}")
def handle_added_schema_file(self, source_file):
source_file.pp_dict = source_file.dict_from_yaml.copy()
if 'sources' in source_file.pp_dict:
for source in source_file.pp_dict['sources']:
# We need to remove the original source, so it can
# be properly patched
if 'overrides' in source:
self.remove_source_override_target(source)
# Deletes for all non-schema files
def delete_from_saved(self, file_id):
# Look at all things touched by file, remove those
# nodes, and update pp_files to parse unless the
# file creating those nodes has also been deleted
saved_source_file = self.saved_files[file_id]
# SQL file: models, seeds, snapshots, analyses, tests: SQL files, except
# macros/tests
if saved_source_file.parse_file_type in mssat_files:
self.remove_mssat_file(saved_source_file)
self.deleted_manifest.files[file_id] = self.saved_manifest.files.pop(file_id)
# macros
if saved_source_file.parse_file_type == ParseFileType.Macro:
self.delete_macro_file(saved_source_file, follow_references=True)
# docs
if saved_source_file.parse_file_type == ParseFileType.Documentation:
self.delete_doc_node(saved_source_file)
logger.debug(f"Partial parsing: deleted file: {file_id}")
# Updates for non-schema files
def update_in_saved(self, file_id):
new_source_file = self.new_files[file_id]
old_source_file = self.saved_files[file_id]
if new_source_file.parse_file_type in mssat_files:
self.update_mssat_in_saved(new_source_file, old_source_file)
elif new_source_file.parse_file_type == ParseFileType.Macro:
self.update_macro_in_saved(new_source_file, old_source_file)
elif new_source_file.parse_file_type == ParseFileType.Documentation:
self.update_doc_in_saved(new_source_file, old_source_file)
else:
raise Exception(f"Invalid parse_file_type in source_file {file_id}")
logger.debug(f"Partial parsing: updated file: {file_id}")
# Models, seeds, snapshots: patches and tests
# analyses: patches, no tests
# tests: not touched by schema files (no patches, no tests)
# Updated schema files should have been processed already.
def update_mssat_in_saved(self, new_source_file, old_source_file):
if self.already_scheduled_for_parsing(old_source_file):
return
# These files only have one node.
unique_id = None
if old_source_file.nodes:
unique_id = old_source_file.nodes[0]
else:
# It's not clear when this would actually happen.
# Logging in case there are other associated errors.
logger.debug(f"Partial parsing: node not found for source_file {old_source_file}")
# replace source_file in saved and add to parsing list
file_id = new_source_file.file_id
self.deleted_manifest.files[file_id] = old_source_file
self.saved_files[file_id] = new_source_file
self.add_to_pp_files(new_source_file)
if unique_id:
self.remove_node_in_saved(new_source_file, unique_id)
def remove_node_in_saved(self, source_file, unique_id):
if unique_id in self.saved_manifest.nodes:
# delete node in saved
node = self.saved_manifest.nodes.pop(unique_id)
self.deleted_manifest.nodes[unique_id] = node
elif source_file.file_id in self.disabled_by_file_id:
for dis_index, dis_node in self.saved_manifest.disabled:
if dis_node.file_id == source_file.file_id:
node = dis_node
break
if dis_node:
del self.saved_manifest.disabled[dis_index]
else:
# Has already been deleted by another action
return
# look at patch_path in model node to see if we need
# to reapply a patch from a schema_file.
if node.patch_path:
file_id = node.patch_path
# it might be changed... then what?
if file_id not in self.file_diff['deleted']:
# schema_files should already be updated
schema_file = self.saved_files[file_id]
dict_key = parse_file_type_to_key[source_file.parse_file_type]
# look for a matching list dictionary
elem_patch = None
if dict_key in schema_file.dict_from_yaml:
for elem in schema_file.dict_from_yaml[dict_key]:
if elem['name'] == node.name:
elem_patch = elem
break
if elem_patch:
self.delete_schema_mssa_links(schema_file, dict_key, elem_patch)
self.merge_patch(schema_file, dict_key, elem_patch)
if unique_id in schema_file.node_patches:
schema_file.node_patches.remove(unique_id)
if unique_id in self.saved_manifest.disabled:
# We have a patch_path in disabled nodes with a patch so
# that we can connect the patch to the node
for node in self.saved_manifest.disabled[unique_id]:
node.patch_path = None
def update_macro_in_saved(self, new_source_file, old_source_file):
if self.already_scheduled_for_parsing(old_source_file):
return
self.handle_macro_file_links(old_source_file, follow_references=True)
file_id = new_source_file.file_id
self.saved_files[file_id] = new_source_file
self.add_to_pp_files(new_source_file)
def update_doc_in_saved(self, new_source_file, old_source_file):
if self.already_scheduled_for_parsing(old_source_file):
return
self.delete_doc_node(old_source_file)
self.saved_files[new_source_file.file_id] = new_source_file
self.add_to_pp_files(new_source_file)
def remove_mssat_file(self, source_file):
# nodes [unique_ids] -- SQL files
# There should always be a node for a SQL file
if not source_file.nodes:
logger.debug(f"No nodes found for source file {source_file.file_id}")
return
# There is generally only 1 node for SQL files, except for macros
for unique_id in source_file.nodes:
self.remove_node_in_saved(source_file, unique_id)
self.schedule_referencing_nodes_for_parsing(unique_id)
# We need to re-parse nodes that reference another removed node
def schedule_referencing_nodes_for_parsing(self, unique_id):
# Look at "children", i.e. nodes that reference this node
if unique_id in self.saved_manifest.child_map:
self.schedule_nodes_for_parsing(self.saved_manifest.child_map[unique_id])
else:
logger.debug(f"Partial parsing: {unique_id} not found in child_map")
def schedule_nodes_for_parsing(self, unique_ids):
for unique_id in unique_ids:
if unique_id in self.saved_manifest.nodes:
node = self.saved_manifest.nodes[unique_id]
if node.resource_type == NodeType.Test:
# test nodes are handled separately. Must be removed from schema file
continue
file_id = node.file_id
if file_id in self.saved_files and file_id not in self.file_diff['deleted']:
source_file = self.saved_files[file_id]
self.remove_mssat_file(source_file)
# content of non-schema files is only in new files
self.saved_files[file_id] = self.new_files[file_id]
self.add_to_pp_files(self.saved_files[file_id])
elif unique_id in self.saved_manifest.sources:
source = self.saved_manifest.sources[unique_id]
file_id = source.file_id
if file_id in self.saved_files and file_id not in self.file_diff['deleted']:
schema_file = self.saved_files[file_id]
sources = []
if 'sources' in schema_file.dict_from_yaml:
sources = schema_file.dict_from_yaml['sources']
source_element = self.get_schema_element(sources, source.source_name)
if source_element:
self.delete_schema_source(schema_file, source_element)
self.remove_tests(schema_file, 'sources', source_element['name'])
self.merge_patch(schema_file, 'sources', source_element)
elif unique_id in self.saved_manifest.exposures:
exposure = self.saved_manifest.exposures[unique_id]
file_id = exposure.file_id
if file_id in self.saved_files and file_id not in self.file_diff['deleted']:
schema_file = self.saved_files[file_id]
exposures = []
if 'exposures' in schema_file.dict_from_yaml:
exposures = schema_file.dict_from_yaml['exposures']
exposure_element = self.get_schema_element(exposures, exposure.name)
if exposure_element:
self.delete_schema_exposure(schema_file, exposure_element)
self.merge_patch(schema_file, 'exposures', exposure_element)
elif unique_id in self.saved_manifest.macros:
macro = self.saved_manifest.macros[unique_id]
file_id = macro.file_id
if file_id in self.saved_files and file_id not in self.file_diff['deleted']:
source_file = self.saved_files[file_id]
self.delete_macro_file(source_file)
self.saved_files[file_id] = self.new_files[file_id]
self.add_to_pp_files(self.saved_files[file_id])
def delete_macro_file(self, source_file, follow_references=False):
self.handle_macro_file_links(source_file, follow_references)
file_id = source_file.file_id
self.deleted_manifest.files[file_id] = self.saved_files.pop(file_id)
def recursively_gather_macro_references(self, macro_unique_id, referencing_nodes):
for unique_id in self.macro_child_map[macro_unique_id]:
if unique_id in referencing_nodes:
continue
referencing_nodes.append(unique_id)
if unique_id.startswith('macro.'):
self.recursively_gather_macro_references(unique_id, referencing_nodes)
def handle_macro_file_links(self, source_file, follow_references=False):
# remove the macros in the 'macros' dictionary
macros = source_file.macros.copy()
for unique_id in macros:
if unique_id not in self.saved_manifest.macros:
# This happens when a macro has already been removed
if unique_id in source_file.macros:
source_file.macros.remove(unique_id)
continue
base_macro = self.saved_manifest.macros.pop(unique_id)
self.deleted_manifest.macros[unique_id] = base_macro
# Recursively check children of this macro
# The macro_child_map might not exist if a macro is removed by
# schedule_nodes_for parsing. We only want to follow
# references if the macro file itself has been updated or
# deleted, not if we're just updating referenced nodes.
if self.macro_child_map and follow_references:
referencing_nodes = []
self.recursively_gather_macro_references(unique_id, referencing_nodes)
self.schedule_macro_nodes_for_parsing(referencing_nodes)
if base_macro.patch_path:
file_id = base_macro.patch_path
if file_id in self.saved_files:
schema_file = self.saved_files[file_id]
macro_patches = []
if 'macros' in schema_file.dict_from_yaml:
macro_patches = schema_file.dict_from_yaml['macros']
macro_patch = self.get_schema_element(macro_patches, base_macro.name)
self.delete_schema_macro_patch(schema_file, macro_patch)
self.merge_patch(schema_file, 'macros', macro_patch)
# The macro may have already been removed by handling macro children
if unique_id in source_file.macros:
source_file.macros.remove(unique_id)
# similar to schedule_nodes_for_parsing but doesn't do sources and exposures
# and handles schema tests
def schedule_macro_nodes_for_parsing(self, unique_ids):
for unique_id in unique_ids:
if unique_id in self.saved_manifest.nodes:
node = self.saved_manifest.nodes[unique_id]
if node.resource_type == NodeType.Test:
schema_file_id = node.file_id
schema_file = self.saved_manifest.files[schema_file_id]
(key, name) = schema_file.get_key_and_name_for_test(node.unique_id)
if key and name:
patch_list = []
if key in schema_file.dict_from_yaml:
patch_list = schema_file.dict_from_yaml[key]
patch = self.get_schema_element(patch_list, name)
if patch:
if key in ['models', 'seeds', 'snapshots']:
self.delete_schema_mssa_links(schema_file, key, patch)
self.merge_patch(schema_file, key, patch)
if unique_id in schema_file.node_patches:
schema_file.node_patches.remove(unique_id)
elif key == 'sources':
# re-schedule source
if 'overrides' in patch:
# This is a source patch; need to re-parse orig source
self.remove_source_override_target(patch)
self.delete_schema_source(schema_file, patch)
self.remove_tests(schema_file, 'sources', patch['name'])
self.merge_patch(schema_file, 'sources', patch)
else:
file_id = node.file_id
if file_id in self.saved_files and file_id not in self.file_diff['deleted']:
source_file = self.saved_files[file_id]
self.remove_mssat_file(source_file)
# content of non-schema files is only in new files
self.saved_files[file_id] = self.new_files[file_id]
self.add_to_pp_files(self.saved_files[file_id])
elif unique_id in self.saved_manifest.macros:
macro = self.saved_manifest.macros[unique_id]
file_id = macro.file_id
if file_id in self.saved_files and file_id not in self.file_diff['deleted']:
source_file = self.saved_files[file_id]
self.delete_macro_file(source_file)
self.saved_files[file_id] = self.new_files[file_id]
self.add_to_pp_files(self.saved_files[file_id])
def delete_doc_node(self, source_file):
# remove the nodes in the 'docs' dictionary
docs = source_file.docs.copy()
for unique_id in docs:
self.deleted_manifest.docs[unique_id] = self.saved_manifest.docs.pop(unique_id)
source_file.docs.remove(unique_id)
# The unique_id of objects that contain a doc call are stored in the
# doc source_file.nodes
self.schedule_nodes_for_parsing(source_file.nodes)
source_file.nodes = []
# Schema files -----------------------
# Changed schema files
def change_schema_file(self, file_id):
saved_schema_file = self.saved_files[file_id]
new_schema_file = self.new_files[file_id]
saved_yaml_dict = saved_schema_file.dict_from_yaml
new_yaml_dict = new_schema_file.dict_from_yaml
if 'version' in new_yaml_dict:
# despite the fact that this goes in the saved_schema_file, it
# should represent the new yaml dictionary, and should produce
# an error if the updated yaml file doesn't have a version
saved_schema_file.pp_dict = {"version": new_yaml_dict['version']}
else:
saved_schema_file.pp_dict = {}
self.handle_schema_file_changes(saved_schema_file, saved_yaml_dict, new_yaml_dict)
# copy from new schema_file to saved_schema_file to preserve references
# that weren't removed
saved_schema_file.contents = new_schema_file.contents
saved_schema_file.checksum = new_schema_file.checksum
saved_schema_file.dfy = new_schema_file.dfy
# schedule parsing
self.add_to_pp_files(saved_schema_file)
# schema_file pp_dict should have been generated already
logger.debug(f"Partial parsing: update schema file: {file_id}")
# Delete schema files -- a variation on change_schema_file
def delete_schema_file(self, file_id):
saved_schema_file = self.saved_files[file_id]
saved_yaml_dict = saved_schema_file.dict_from_yaml
new_yaml_dict = {}
self.handle_schema_file_changes(saved_schema_file, saved_yaml_dict, new_yaml_dict)
self.deleted_manifest.files[file_id] = self.saved_manifest.files.pop(file_id)
# For each key in a schema file dictionary, process the changed, deleted, and added
# elemnts for the key lists
def handle_schema_file_changes(self, schema_file, saved_yaml_dict, new_yaml_dict):
# loop through comparing previous dict_from_yaml with current dict_from_yaml
# Need to do the deleted/added/changed thing, just like the files lists
# models, seeds, snapshots, analyses
for dict_key in ['models', 'seeds', 'snapshots', 'analyses']:
key_diff = self.get_diff_for(dict_key, saved_yaml_dict, new_yaml_dict)
if key_diff['changed']:
for elem in key_diff['changed']:
self.delete_schema_mssa_links(schema_file, dict_key, elem)
self.merge_patch(schema_file, dict_key, elem)
if key_diff['deleted']:
for elem in key_diff['deleted']:
self.delete_schema_mssa_links(schema_file, dict_key, elem)
if key_diff['added']:
for elem in key_diff['added']:
self.merge_patch(schema_file, dict_key, elem)
# sources
source_diff = self.get_diff_for('sources', saved_yaml_dict, new_yaml_dict)
if source_diff['changed']:
for source in source_diff['changed']:
if 'overrides' in source: # This is a source patch; need to re-parse orig source
self.remove_source_override_target(source)
self.delete_schema_source(schema_file, source)
self.remove_tests(schema_file, 'sources', source['name'])
self.merge_patch(schema_file, 'sources', source)
if source_diff['deleted']:
for source in source_diff['deleted']:
if 'overrides' in source: # This is a source patch; need to re-parse orig source
self.remove_source_override_target(source)
self.delete_schema_source(schema_file, source)
self.remove_tests(schema_file, 'sources', source['name'])
if source_diff['added']:
for source in source_diff['added']:
if 'overrides' in source: # This is a source patch; need to re-parse orig source
self.remove_source_override_target(source)
self.merge_patch(schema_file, 'sources', source)
# macros
macro_diff = self.get_diff_for('macros', saved_yaml_dict, new_yaml_dict)
if macro_diff['changed']:
for macro in macro_diff['changed']:
self.delete_schema_macro_patch(schema_file, macro)
self.merge_patch(schema_file, 'macros', macro)
if macro_diff['deleted']:
for macro in macro_diff['deleted']:
self.delete_schema_macro_patch(schema_file, macro)
if macro_diff['added']:
for macro in macro_diff['added']:
self.merge_patch(schema_file, 'macros', macro)
# exposures
exposure_diff = self.get_diff_for('exposures', saved_yaml_dict, new_yaml_dict)
if exposure_diff['changed']:
for exposure in exposure_diff['changed']:
self.delete_schema_exposure(schema_file, exposure)
self.merge_patch(schema_file, 'exposures', exposure)
if exposure_diff['deleted']:
for exposure in exposure_diff['deleted']:
self.delete_schema_exposure(schema_file, exposure)
if exposure_diff['added']:
for exposure in exposure_diff['added']:
self.merge_patch(schema_file, 'exposures', exposure)
# Take a "section" of the schema file yaml dictionary from saved and new schema files
# and determine which parts have changed
def get_diff_for(self, key, saved_yaml_dict, new_yaml_dict):
if key in saved_yaml_dict or key in new_yaml_dict:
saved_elements = saved_yaml_dict[key] if key in saved_yaml_dict else []
new_elements = new_yaml_dict[key] if key in new_yaml_dict else []
else:
return {'deleted': [], 'added': [], 'changed': []}
# for each set of keys, need to create a dictionary of names pointing to entry
saved_elements_by_name = {}
new_elements_by_name = {}
# sources have two part names?
for element in saved_elements:
saved_elements_by_name[element['name']] = element
for element in new_elements:
new_elements_by_name[element['name']] = element
# now determine which elements, by name, are added, deleted or changed
saved_element_names = set(saved_elements_by_name.keys())
new_element_names = set(new_elements_by_name.keys())
deleted = saved_element_names.difference(new_element_names)
added = new_element_names.difference(saved_element_names)
common = saved_element_names.intersection(new_element_names)
changed = []
for element_name in common:
if saved_elements_by_name[element_name] != new_elements_by_name[element_name]:
changed.append(element_name)
# make lists of yaml elements to return as diffs
deleted_elements = [saved_elements_by_name[name].copy() for name in deleted]
added_elements = [new_elements_by_name[name].copy() for name in added]
changed_elements = [new_elements_by_name[name].copy() for name in changed]
diff = {
"deleted": deleted_elements,
"added": added_elements,
"changed": changed_elements,
}
return diff
# Merge a patch file into the pp_dict in a schema file
def merge_patch(self, schema_file, key, patch):
if not schema_file.pp_dict:
schema_file.pp_dict = {"version": schema_file.dict_from_yaml['version']}
pp_dict = schema_file.pp_dict
if key not in pp_dict:
pp_dict[key] = [patch]
else:
# check that this patch hasn't already been saved
found = False
for elem in pp_dict[key]:
if elem['name'] == patch['name']:
found = True
if not found:
pp_dict[key].append(patch)
self.add_to_pp_files(schema_file)
# For model, seed, snapshot, analysis schema dictionary keys,
# delete the patches and tests from the patch
def delete_schema_mssa_links(self, schema_file, dict_key, elem):
# find elem node unique_id in node_patches
prefix = key_to_prefix[dict_key]
elem_unique_id = ''
for unique_id in schema_file.node_patches:
if not unique_id.startswith(prefix):
continue
parts = unique_id.split('.')
elem_name = parts[-1]
if elem_name == elem['name']:
elem_unique_id = unique_id
break
# remove elem node and remove unique_id from node_patches
if elem_unique_id:
# might have been already removed
if elem_unique_id in self.saved_manifest.nodes:
node = self.saved_manifest.nodes.pop(elem_unique_id)
self.deleted_manifest.nodes[elem_unique_id] = node
# need to add the node source_file to pp_files
file_id = node.file_id
# need to copy new file to saved files in order to get content
if file_id in self.new_files:
self.saved_files[file_id] = self.new_files[file_id]
if self.saved_files[file_id]:
source_file = self.saved_files[file_id]
self.add_to_pp_files(source_file)
# remove from patches
schema_file.node_patches.remove(elem_unique_id)
# for models, seeds, snapshots (not analyses)
if dict_key in ['models', 'seeds', 'snapshots']:
# find related tests and remove them
self.remove_tests(schema_file, dict_key, elem['name'])
def remove_tests(self, schema_file, dict_key, name):
tests = schema_file.get_tests(dict_key, name)
for test_unique_id in tests:
if test_unique_id in self.saved_manifest.nodes:
node = self.saved_manifest.nodes.pop(test_unique_id)
self.deleted_manifest.nodes[test_unique_id] = node
schema_file.remove_tests(dict_key, name)
def delete_schema_source(self, schema_file, source_dict):
# both patches, tests, and source nodes
source_name = source_dict['name']
# There may be multiple sources for each source dict, since
# there will be a separate source node for each table.
# ParsedSourceDefinition name = table name, dict name is source_name
sources = schema_file.sources.copy()
for unique_id in sources:
if unique_id in self.saved_manifest.sources:
source = self.saved_manifest.sources[unique_id]
if source.source_name == source_name:
source = self.saved_manifest.sources.pop(unique_id)
self.deleted_manifest.sources[unique_id] = source
schema_file.sources.remove(unique_id)
self.schedule_referencing_nodes_for_parsing(unique_id)
logger.debug(f"Partial parsing: deleted source {unique_id}")
def delete_schema_macro_patch(self, schema_file, macro):
# This is just macro patches that need to be reapplied
macro_unique_id = None
if macro['name'] in schema_file.macro_patches:
macro_unique_id = schema_file.macro_patches[macro['name']]
del schema_file.macro_patches[macro['name']]
if macro_unique_id and macro_unique_id in self.saved_manifest.macros:
macro = self.saved_manifest.macros.pop(macro_unique_id)
self.deleted_manifest.macros[macro_unique_id] = macro
macro_file_id = macro.file_id
if macro_file_id in self.new_files:
self.saved_files[macro_file_id] = self.new_files[macro_file_id]
self.add_to_pp_files(self.saved_files[macro_file_id])
# exposures are created only from schema files, so just delete
# the exposure.
def delete_schema_exposure(self, schema_file, exposure_dict):
exposure_name = exposure_dict['name']
exposures = schema_file.exposures.copy()
for unique_id in exposures:
exposure = self.saved_manifest.exposures[unique_id]
if unique_id in self.saved_manifest.exposures:
if exposure.name == exposure_name:
self.deleted_manifest.exposures[unique_id] = \
self.saved_manifest.exposures.pop(unique_id)
schema_file.exposures.remove(unique_id)
logger.debug(f"Partial parsing: deleted exposure {unique_id}")
def get_schema_element(self, elem_list, elem_name):
for element in elem_list:
if 'name' in element and element['name'] == elem_name:
return element
return None
def get_schema_file_for_source(self, package_name, source_name):
schema_file = None
for source in self.saved_manifest.sources.values():
if source.package_name == package_name and source.source_name == source_name:
file_id = source.file_id
if file_id in self.saved_files:
schema_file = self.saved_files[file_id]
break
return schema_file
def get_source_override_file_and_dict(self, source):
package = source['overrides']
source_name = source['name']
orig_source_schema_file = self.get_schema_file_for_source(package, source_name)
orig_sources = orig_source_schema_file.dict_from_yaml['sources']
orig_source = self.get_schema_element(orig_sources, source_name)
return (orig_source_schema_file, orig_source)
def remove_source_override_target(self, source_dict):
(orig_file, orig_source) = self.get_source_override_file_and_dict(source_dict)
if orig_source:
self.delete_schema_source(orig_file, orig_source)
self.remove_tests(orig_file, 'sources', orig_source['name'])
self.merge_patch(orig_file, 'sources', orig_source)
self.add_to_pp_files(orig_file)