from typing import MutableMapping, Dict, List from dbt.contracts.graph.manifest import Manifest from dbt.contracts.files import ( AnySourceFile, ParseFileType, parse_file_type_to_parser, ) from dbt.logger import GLOBAL_LOGGER as logger from dbt.node_types import NodeType mssat_files = ( ParseFileType.Model, ParseFileType.Seed, ParseFileType.Snapshot, ParseFileType.Analysis, ParseFileType.Test, ) key_to_prefix = { 'models': 'model', 'seeds': 'seed', 'snapshots': 'snapshot', 'analyses': 'analysis', } parse_file_type_to_key = { ParseFileType.Model: 'models', ParseFileType.Seed: 'seeds', ParseFileType.Snapshot: 'snapshots', ParseFileType.Analysis: 'analyses', } # Partial parsing. Create a diff of files from saved manifest and current # files and produce a project_parser_file dictionary to drive parsing of # only the necessary changes. # Will produce a 'skip_parsing' method, and a project_parser_file dictionary class PartialParsing: def __init__(self, saved_manifest: Manifest, new_files: MutableMapping[str, AnySourceFile]): self.saved_manifest = saved_manifest self.new_files = new_files self.project_parser_files: Dict = {} self.saved_files = self.saved_manifest.files self.project_parser_files = {} self.deleted_manifest = Manifest() self.macro_child_map: Dict[str, List[str]] = {} self.build_file_diff() self.processing_file = None self.disabled_by_file_id = self.saved_manifest.build_disabled_by_file_id() def skip_parsing(self): return ( not self.file_diff['deleted'] and not self.file_diff['added'] and not self.file_diff['changed'] and not self.file_diff['changed_schema_files'] and not self.file_diff['deleted_schema_files'] ) # Compare the previously saved manifest files and the just-loaded manifest # files to see if anything changed def build_file_diff(self): saved_file_ids = set(self.saved_files.keys()) new_file_ids = set(self.new_files.keys()) deleted_all_files = saved_file_ids.difference(new_file_ids) added = new_file_ids.difference(saved_file_ids) common = saved_file_ids.intersection(new_file_ids) changed_or_deleted_macro_file = False # separate out deleted schema files deleted_schema_files = [] deleted = [] for file_id in deleted_all_files: if self.saved_files[file_id].parse_file_type == ParseFileType.Schema: deleted_schema_files.append(file_id) else: if self.saved_files[file_id].parse_file_type == ParseFileType.Macro: changed_or_deleted_macro_file = True deleted.append(file_id) changed = [] changed_schema_files = [] unchanged = [] for file_id in common: if self.saved_files[file_id].checksum == self.new_files[file_id].checksum: unchanged.append(file_id) else: # separate out changed schema files if self.saved_files[file_id].parse_file_type == ParseFileType.Schema: sf = self.saved_files[file_id] if type(sf).__name__ != 'SchemaSourceFile': raise Exception(f"Serialization failure for {file_id}") changed_schema_files.append(file_id) else: if self.saved_files[file_id].parse_file_type == ParseFileType.Macro: changed_or_deleted_macro_file = True changed.append(file_id) file_diff = { "deleted": deleted, "deleted_schema_files": deleted_schema_files, "added": added, "changed": changed, "changed_schema_files": changed_schema_files, "unchanged": unchanged, } if changed_or_deleted_macro_file: self.macro_child_map = self.saved_manifest.build_macro_child_map() logger.info(f"Partial parsing enabled: " f"{len(deleted) + len(deleted_schema_files)} files deleted, " f"{len(added)} files added, " f"{len(changed) + len(changed_schema_files)} files changed.") self.file_diff = file_diff # generate the list of files that need parsing # uses self.manifest.files generated by 'read_files' def get_parsing_files(self): if self.skip_parsing(): return {} # Need to add new files first, because changes in schema files # might refer to them for file_id in self.file_diff['added']: self.processing_file = file_id self.add_to_saved(file_id) # Need to process schema files next, because the dictionaries # need to be in place for handling SQL file changes for file_id in self.file_diff['changed_schema_files']: self.processing_file = file_id self.change_schema_file(file_id) for file_id in self.file_diff['deleted_schema_files']: self.processing_file = file_id self.delete_schema_file(file_id) for file_id in self.file_diff['deleted']: self.processing_file = file_id self.delete_from_saved(file_id) for file_id in self.file_diff['changed']: self.processing_file = file_id self.update_in_saved(file_id) return self.project_parser_files # Add the file to the project parser dictionaries to schedule parsing def add_to_pp_files(self, source_file): file_id = source_file.file_id parser_name = parse_file_type_to_parser[source_file.parse_file_type] project_name = source_file.project_name if not parser_name or not project_name: raise Exception(f"Did not find parse_file_type or project_name " f"in SourceFile for {source_file.file_id}") if project_name not in self.project_parser_files: self.project_parser_files[project_name] = {} if parser_name not in self.project_parser_files[project_name]: self.project_parser_files[project_name][parser_name] = [] if (file_id not in self.project_parser_files[project_name][parser_name] and file_id not in self.file_diff['deleted']): self.project_parser_files[project_name][parser_name].append(file_id) def already_scheduled_for_parsing(self, source_file): file_id = source_file.file_id project_name = source_file.project_name if project_name not in self.project_parser_files: return False parser_name = parse_file_type_to_parser[source_file.parse_file_type] if parser_name not in self.project_parser_files[project_name]: return False if file_id not in self.project_parser_files[project_name][parser_name]: return False return True # Add new files, including schema files def add_to_saved(self, file_id): # add file object to saved manifest.files source_file = self.new_files[file_id] if source_file.parse_file_type == ParseFileType.Schema: self.handle_added_schema_file(source_file) self.saved_files[file_id] = source_file # update pp_files to parse self.add_to_pp_files(source_file) logger.debug(f"Partial parsing: added file: {file_id}") def handle_added_schema_file(self, source_file): source_file.pp_dict = source_file.dict_from_yaml.copy() if 'sources' in source_file.pp_dict: for source in source_file.pp_dict['sources']: # We need to remove the original source, so it can # be properly patched if 'overrides' in source: self.remove_source_override_target(source) # Deletes for all non-schema files def delete_from_saved(self, file_id): # Look at all things touched by file, remove those # nodes, and update pp_files to parse unless the # file creating those nodes has also been deleted saved_source_file = self.saved_files[file_id] # SQL file: models, seeds, snapshots, analyses, tests: SQL files, except # macros/tests if saved_source_file.parse_file_type in mssat_files: self.remove_mssat_file(saved_source_file) self.deleted_manifest.files[file_id] = self.saved_manifest.files.pop(file_id) # macros if saved_source_file.parse_file_type == ParseFileType.Macro: self.delete_macro_file(saved_source_file, follow_references=True) # docs if saved_source_file.parse_file_type == ParseFileType.Documentation: self.delete_doc_node(saved_source_file) logger.debug(f"Partial parsing: deleted file: {file_id}") # Updates for non-schema files def update_in_saved(self, file_id): new_source_file = self.new_files[file_id] old_source_file = self.saved_files[file_id] if new_source_file.parse_file_type in mssat_files: self.update_mssat_in_saved(new_source_file, old_source_file) elif new_source_file.parse_file_type == ParseFileType.Macro: self.update_macro_in_saved(new_source_file, old_source_file) elif new_source_file.parse_file_type == ParseFileType.Documentation: self.update_doc_in_saved(new_source_file, old_source_file) else: raise Exception(f"Invalid parse_file_type in source_file {file_id}") logger.debug(f"Partial parsing: updated file: {file_id}") # Models, seeds, snapshots: patches and tests # analyses: patches, no tests # tests: not touched by schema files (no patches, no tests) # Updated schema files should have been processed already. def update_mssat_in_saved(self, new_source_file, old_source_file): if self.already_scheduled_for_parsing(old_source_file): return # These files only have one node. unique_id = None if old_source_file.nodes: unique_id = old_source_file.nodes[0] else: # It's not clear when this would actually happen. # Logging in case there are other associated errors. logger.debug(f"Partial parsing: node not found for source_file {old_source_file}") # replace source_file in saved and add to parsing list file_id = new_source_file.file_id self.deleted_manifest.files[file_id] = old_source_file self.saved_files[file_id] = new_source_file self.add_to_pp_files(new_source_file) if unique_id: self.remove_node_in_saved(new_source_file, unique_id) def remove_node_in_saved(self, source_file, unique_id): if unique_id in self.saved_manifest.nodes: # delete node in saved node = self.saved_manifest.nodes.pop(unique_id) self.deleted_manifest.nodes[unique_id] = node elif source_file.file_id in self.disabled_by_file_id: for dis_index, dis_node in self.saved_manifest.disabled: if dis_node.file_id == source_file.file_id: node = dis_node break if dis_node: del self.saved_manifest.disabled[dis_index] else: # Has already been deleted by another action return # look at patch_path in model node to see if we need # to reapply a patch from a schema_file. if node.patch_path: file_id = node.patch_path # it might be changed... then what? if file_id not in self.file_diff['deleted']: # schema_files should already be updated schema_file = self.saved_files[file_id] dict_key = parse_file_type_to_key[source_file.parse_file_type] # look for a matching list dictionary elem_patch = None if dict_key in schema_file.dict_from_yaml: for elem in schema_file.dict_from_yaml[dict_key]: if elem['name'] == node.name: elem_patch = elem break if elem_patch: self.delete_schema_mssa_links(schema_file, dict_key, elem_patch) self.merge_patch(schema_file, dict_key, elem_patch) if unique_id in schema_file.node_patches: schema_file.node_patches.remove(unique_id) if unique_id in self.saved_manifest.disabled: # We have a patch_path in disabled nodes with a patch so # that we can connect the patch to the node for node in self.saved_manifest.disabled[unique_id]: node.patch_path = None def update_macro_in_saved(self, new_source_file, old_source_file): if self.already_scheduled_for_parsing(old_source_file): return self.handle_macro_file_links(old_source_file, follow_references=True) file_id = new_source_file.file_id self.saved_files[file_id] = new_source_file self.add_to_pp_files(new_source_file) def update_doc_in_saved(self, new_source_file, old_source_file): if self.already_scheduled_for_parsing(old_source_file): return self.delete_doc_node(old_source_file) self.saved_files[new_source_file.file_id] = new_source_file self.add_to_pp_files(new_source_file) def remove_mssat_file(self, source_file): # nodes [unique_ids] -- SQL files # There should always be a node for a SQL file if not source_file.nodes: logger.debug(f"No nodes found for source file {source_file.file_id}") return # There is generally only 1 node for SQL files, except for macros for unique_id in source_file.nodes: self.remove_node_in_saved(source_file, unique_id) self.schedule_referencing_nodes_for_parsing(unique_id) # We need to re-parse nodes that reference another removed node def schedule_referencing_nodes_for_parsing(self, unique_id): # Look at "children", i.e. nodes that reference this node if unique_id in self.saved_manifest.child_map: self.schedule_nodes_for_parsing(self.saved_manifest.child_map[unique_id]) else: logger.debug(f"Partial parsing: {unique_id} not found in child_map") def schedule_nodes_for_parsing(self, unique_ids): for unique_id in unique_ids: if unique_id in self.saved_manifest.nodes: node = self.saved_manifest.nodes[unique_id] if node.resource_type == NodeType.Test: # test nodes are handled separately. Must be removed from schema file continue file_id = node.file_id if file_id in self.saved_files and file_id not in self.file_diff['deleted']: source_file = self.saved_files[file_id] self.remove_mssat_file(source_file) # content of non-schema files is only in new files self.saved_files[file_id] = self.new_files[file_id] self.add_to_pp_files(self.saved_files[file_id]) elif unique_id in self.saved_manifest.sources: source = self.saved_manifest.sources[unique_id] file_id = source.file_id if file_id in self.saved_files and file_id not in self.file_diff['deleted']: schema_file = self.saved_files[file_id] sources = [] if 'sources' in schema_file.dict_from_yaml: sources = schema_file.dict_from_yaml['sources'] source_element = self.get_schema_element(sources, source.source_name) if source_element: self.delete_schema_source(schema_file, source_element) self.remove_tests(schema_file, 'sources', source_element['name']) self.merge_patch(schema_file, 'sources', source_element) elif unique_id in self.saved_manifest.exposures: exposure = self.saved_manifest.exposures[unique_id] file_id = exposure.file_id if file_id in self.saved_files and file_id not in self.file_diff['deleted']: schema_file = self.saved_files[file_id] exposures = [] if 'exposures' in schema_file.dict_from_yaml: exposures = schema_file.dict_from_yaml['exposures'] exposure_element = self.get_schema_element(exposures, exposure.name) if exposure_element: self.delete_schema_exposure(schema_file, exposure_element) self.merge_patch(schema_file, 'exposures', exposure_element) elif unique_id in self.saved_manifest.macros: macro = self.saved_manifest.macros[unique_id] file_id = macro.file_id if file_id in self.saved_files and file_id not in self.file_diff['deleted']: source_file = self.saved_files[file_id] self.delete_macro_file(source_file) self.saved_files[file_id] = self.new_files[file_id] self.add_to_pp_files(self.saved_files[file_id]) def delete_macro_file(self, source_file, follow_references=False): self.handle_macro_file_links(source_file, follow_references) file_id = source_file.file_id self.deleted_manifest.files[file_id] = self.saved_files.pop(file_id) def recursively_gather_macro_references(self, macro_unique_id, referencing_nodes): for unique_id in self.macro_child_map[macro_unique_id]: if unique_id in referencing_nodes: continue referencing_nodes.append(unique_id) if unique_id.startswith('macro.'): self.recursively_gather_macro_references(unique_id, referencing_nodes) def handle_macro_file_links(self, source_file, follow_references=False): # remove the macros in the 'macros' dictionary macros = source_file.macros.copy() for unique_id in macros: if unique_id not in self.saved_manifest.macros: # This happens when a macro has already been removed if unique_id in source_file.macros: source_file.macros.remove(unique_id) continue base_macro = self.saved_manifest.macros.pop(unique_id) self.deleted_manifest.macros[unique_id] = base_macro # Recursively check children of this macro # The macro_child_map might not exist if a macro is removed by # schedule_nodes_for parsing. We only want to follow # references if the macro file itself has been updated or # deleted, not if we're just updating referenced nodes. if self.macro_child_map and follow_references: referencing_nodes = [] self.recursively_gather_macro_references(unique_id, referencing_nodes) self.schedule_macro_nodes_for_parsing(referencing_nodes) if base_macro.patch_path: file_id = base_macro.patch_path if file_id in self.saved_files: schema_file = self.saved_files[file_id] macro_patches = [] if 'macros' in schema_file.dict_from_yaml: macro_patches = schema_file.dict_from_yaml['macros'] macro_patch = self.get_schema_element(macro_patches, base_macro.name) self.delete_schema_macro_patch(schema_file, macro_patch) self.merge_patch(schema_file, 'macros', macro_patch) # The macro may have already been removed by handling macro children if unique_id in source_file.macros: source_file.macros.remove(unique_id) # similar to schedule_nodes_for_parsing but doesn't do sources and exposures # and handles schema tests def schedule_macro_nodes_for_parsing(self, unique_ids): for unique_id in unique_ids: if unique_id in self.saved_manifest.nodes: node = self.saved_manifest.nodes[unique_id] if node.resource_type == NodeType.Test: schema_file_id = node.file_id schema_file = self.saved_manifest.files[schema_file_id] (key, name) = schema_file.get_key_and_name_for_test(node.unique_id) if key and name: patch_list = [] if key in schema_file.dict_from_yaml: patch_list = schema_file.dict_from_yaml[key] patch = self.get_schema_element(patch_list, name) if patch: if key in ['models', 'seeds', 'snapshots']: self.delete_schema_mssa_links(schema_file, key, patch) self.merge_patch(schema_file, key, patch) if unique_id in schema_file.node_patches: schema_file.node_patches.remove(unique_id) elif key == 'sources': # re-schedule source if 'overrides' in patch: # This is a source patch; need to re-parse orig source self.remove_source_override_target(patch) self.delete_schema_source(schema_file, patch) self.remove_tests(schema_file, 'sources', patch['name']) self.merge_patch(schema_file, 'sources', patch) else: file_id = node.file_id if file_id in self.saved_files and file_id not in self.file_diff['deleted']: source_file = self.saved_files[file_id] self.remove_mssat_file(source_file) # content of non-schema files is only in new files self.saved_files[file_id] = self.new_files[file_id] self.add_to_pp_files(self.saved_files[file_id]) elif unique_id in self.saved_manifest.macros: macro = self.saved_manifest.macros[unique_id] file_id = macro.file_id if file_id in self.saved_files and file_id not in self.file_diff['deleted']: source_file = self.saved_files[file_id] self.delete_macro_file(source_file) self.saved_files[file_id] = self.new_files[file_id] self.add_to_pp_files(self.saved_files[file_id]) def delete_doc_node(self, source_file): # remove the nodes in the 'docs' dictionary docs = source_file.docs.copy() for unique_id in docs: self.deleted_manifest.docs[unique_id] = self.saved_manifest.docs.pop(unique_id) source_file.docs.remove(unique_id) # The unique_id of objects that contain a doc call are stored in the # doc source_file.nodes self.schedule_nodes_for_parsing(source_file.nodes) source_file.nodes = [] # Schema files ----------------------- # Changed schema files def change_schema_file(self, file_id): saved_schema_file = self.saved_files[file_id] new_schema_file = self.new_files[file_id] saved_yaml_dict = saved_schema_file.dict_from_yaml new_yaml_dict = new_schema_file.dict_from_yaml if 'version' in new_yaml_dict: # despite the fact that this goes in the saved_schema_file, it # should represent the new yaml dictionary, and should produce # an error if the updated yaml file doesn't have a version saved_schema_file.pp_dict = {"version": new_yaml_dict['version']} else: saved_schema_file.pp_dict = {} self.handle_schema_file_changes(saved_schema_file, saved_yaml_dict, new_yaml_dict) # copy from new schema_file to saved_schema_file to preserve references # that weren't removed saved_schema_file.contents = new_schema_file.contents saved_schema_file.checksum = new_schema_file.checksum saved_schema_file.dfy = new_schema_file.dfy # schedule parsing self.add_to_pp_files(saved_schema_file) # schema_file pp_dict should have been generated already logger.debug(f"Partial parsing: update schema file: {file_id}") # Delete schema files -- a variation on change_schema_file def delete_schema_file(self, file_id): saved_schema_file = self.saved_files[file_id] saved_yaml_dict = saved_schema_file.dict_from_yaml new_yaml_dict = {} self.handle_schema_file_changes(saved_schema_file, saved_yaml_dict, new_yaml_dict) self.deleted_manifest.files[file_id] = self.saved_manifest.files.pop(file_id) # For each key in a schema file dictionary, process the changed, deleted, and added # elemnts for the key lists def handle_schema_file_changes(self, schema_file, saved_yaml_dict, new_yaml_dict): # loop through comparing previous dict_from_yaml with current dict_from_yaml # Need to do the deleted/added/changed thing, just like the files lists # models, seeds, snapshots, analyses for dict_key in ['models', 'seeds', 'snapshots', 'analyses']: key_diff = self.get_diff_for(dict_key, saved_yaml_dict, new_yaml_dict) if key_diff['changed']: for elem in key_diff['changed']: self.delete_schema_mssa_links(schema_file, dict_key, elem) self.merge_patch(schema_file, dict_key, elem) if key_diff['deleted']: for elem in key_diff['deleted']: self.delete_schema_mssa_links(schema_file, dict_key, elem) if key_diff['added']: for elem in key_diff['added']: self.merge_patch(schema_file, dict_key, elem) # sources source_diff = self.get_diff_for('sources', saved_yaml_dict, new_yaml_dict) if source_diff['changed']: for source in source_diff['changed']: if 'overrides' in source: # This is a source patch; need to re-parse orig source self.remove_source_override_target(source) self.delete_schema_source(schema_file, source) self.remove_tests(schema_file, 'sources', source['name']) self.merge_patch(schema_file, 'sources', source) if source_diff['deleted']: for source in source_diff['deleted']: if 'overrides' in source: # This is a source patch; need to re-parse orig source self.remove_source_override_target(source) self.delete_schema_source(schema_file, source) self.remove_tests(schema_file, 'sources', source['name']) if source_diff['added']: for source in source_diff['added']: if 'overrides' in source: # This is a source patch; need to re-parse orig source self.remove_source_override_target(source) self.merge_patch(schema_file, 'sources', source) # macros macro_diff = self.get_diff_for('macros', saved_yaml_dict, new_yaml_dict) if macro_diff['changed']: for macro in macro_diff['changed']: self.delete_schema_macro_patch(schema_file, macro) self.merge_patch(schema_file, 'macros', macro) if macro_diff['deleted']: for macro in macro_diff['deleted']: self.delete_schema_macro_patch(schema_file, macro) if macro_diff['added']: for macro in macro_diff['added']: self.merge_patch(schema_file, 'macros', macro) # exposures exposure_diff = self.get_diff_for('exposures', saved_yaml_dict, new_yaml_dict) if exposure_diff['changed']: for exposure in exposure_diff['changed']: self.delete_schema_exposure(schema_file, exposure) self.merge_patch(schema_file, 'exposures', exposure) if exposure_diff['deleted']: for exposure in exposure_diff['deleted']: self.delete_schema_exposure(schema_file, exposure) if exposure_diff['added']: for exposure in exposure_diff['added']: self.merge_patch(schema_file, 'exposures', exposure) # Take a "section" of the schema file yaml dictionary from saved and new schema files # and determine which parts have changed def get_diff_for(self, key, saved_yaml_dict, new_yaml_dict): if key in saved_yaml_dict or key in new_yaml_dict: saved_elements = saved_yaml_dict[key] if key in saved_yaml_dict else [] new_elements = new_yaml_dict[key] if key in new_yaml_dict else [] else: return {'deleted': [], 'added': [], 'changed': []} # for each set of keys, need to create a dictionary of names pointing to entry saved_elements_by_name = {} new_elements_by_name = {} # sources have two part names? for element in saved_elements: saved_elements_by_name[element['name']] = element for element in new_elements: new_elements_by_name[element['name']] = element # now determine which elements, by name, are added, deleted or changed saved_element_names = set(saved_elements_by_name.keys()) new_element_names = set(new_elements_by_name.keys()) deleted = saved_element_names.difference(new_element_names) added = new_element_names.difference(saved_element_names) common = saved_element_names.intersection(new_element_names) changed = [] for element_name in common: if saved_elements_by_name[element_name] != new_elements_by_name[element_name]: changed.append(element_name) # make lists of yaml elements to return as diffs deleted_elements = [saved_elements_by_name[name].copy() for name in deleted] added_elements = [new_elements_by_name[name].copy() for name in added] changed_elements = [new_elements_by_name[name].copy() for name in changed] diff = { "deleted": deleted_elements, "added": added_elements, "changed": changed_elements, } return diff # Merge a patch file into the pp_dict in a schema file def merge_patch(self, schema_file, key, patch): if not schema_file.pp_dict: schema_file.pp_dict = {"version": schema_file.dict_from_yaml['version']} pp_dict = schema_file.pp_dict if key not in pp_dict: pp_dict[key] = [patch] else: # check that this patch hasn't already been saved found = False for elem in pp_dict[key]: if elem['name'] == patch['name']: found = True if not found: pp_dict[key].append(patch) self.add_to_pp_files(schema_file) # For model, seed, snapshot, analysis schema dictionary keys, # delete the patches and tests from the patch def delete_schema_mssa_links(self, schema_file, dict_key, elem): # find elem node unique_id in node_patches prefix = key_to_prefix[dict_key] elem_unique_id = '' for unique_id in schema_file.node_patches: if not unique_id.startswith(prefix): continue parts = unique_id.split('.') elem_name = parts[-1] if elem_name == elem['name']: elem_unique_id = unique_id break # remove elem node and remove unique_id from node_patches if elem_unique_id: # might have been already removed if elem_unique_id in self.saved_manifest.nodes: node = self.saved_manifest.nodes.pop(elem_unique_id) self.deleted_manifest.nodes[elem_unique_id] = node # need to add the node source_file to pp_files file_id = node.file_id # need to copy new file to saved files in order to get content if file_id in self.new_files: self.saved_files[file_id] = self.new_files[file_id] if self.saved_files[file_id]: source_file = self.saved_files[file_id] self.add_to_pp_files(source_file) # remove from patches schema_file.node_patches.remove(elem_unique_id) # for models, seeds, snapshots (not analyses) if dict_key in ['models', 'seeds', 'snapshots']: # find related tests and remove them self.remove_tests(schema_file, dict_key, elem['name']) def remove_tests(self, schema_file, dict_key, name): tests = schema_file.get_tests(dict_key, name) for test_unique_id in tests: if test_unique_id in self.saved_manifest.nodes: node = self.saved_manifest.nodes.pop(test_unique_id) self.deleted_manifest.nodes[test_unique_id] = node schema_file.remove_tests(dict_key, name) def delete_schema_source(self, schema_file, source_dict): # both patches, tests, and source nodes source_name = source_dict['name'] # There may be multiple sources for each source dict, since # there will be a separate source node for each table. # ParsedSourceDefinition name = table name, dict name is source_name sources = schema_file.sources.copy() for unique_id in sources: if unique_id in self.saved_manifest.sources: source = self.saved_manifest.sources[unique_id] if source.source_name == source_name: source = self.saved_manifest.sources.pop(unique_id) self.deleted_manifest.sources[unique_id] = source schema_file.sources.remove(unique_id) self.schedule_referencing_nodes_for_parsing(unique_id) logger.debug(f"Partial parsing: deleted source {unique_id}") def delete_schema_macro_patch(self, schema_file, macro): # This is just macro patches that need to be reapplied macro_unique_id = None if macro['name'] in schema_file.macro_patches: macro_unique_id = schema_file.macro_patches[macro['name']] del schema_file.macro_patches[macro['name']] if macro_unique_id and macro_unique_id in self.saved_manifest.macros: macro = self.saved_manifest.macros.pop(macro_unique_id) self.deleted_manifest.macros[macro_unique_id] = macro macro_file_id = macro.file_id if macro_file_id in self.new_files: self.saved_files[macro_file_id] = self.new_files[macro_file_id] self.add_to_pp_files(self.saved_files[macro_file_id]) # exposures are created only from schema files, so just delete # the exposure. def delete_schema_exposure(self, schema_file, exposure_dict): exposure_name = exposure_dict['name'] exposures = schema_file.exposures.copy() for unique_id in exposures: exposure = self.saved_manifest.exposures[unique_id] if unique_id in self.saved_manifest.exposures: if exposure.name == exposure_name: self.deleted_manifest.exposures[unique_id] = \ self.saved_manifest.exposures.pop(unique_id) schema_file.exposures.remove(unique_id) logger.debug(f"Partial parsing: deleted exposure {unique_id}") def get_schema_element(self, elem_list, elem_name): for element in elem_list: if 'name' in element and element['name'] == elem_name: return element return None def get_schema_file_for_source(self, package_name, source_name): schema_file = None for source in self.saved_manifest.sources.values(): if source.package_name == package_name and source.source_name == source_name: file_id = source.file_id if file_id in self.saved_files: schema_file = self.saved_files[file_id] break return schema_file def get_source_override_file_and_dict(self, source): package = source['overrides'] source_name = source['name'] orig_source_schema_file = self.get_schema_file_for_source(package, source_name) orig_sources = orig_source_schema_file.dict_from_yaml['sources'] orig_source = self.get_schema_element(orig_sources, source_name) return (orig_source_schema_file, orig_source) def remove_source_override_target(self, source_dict): (orig_file, orig_source) = self.get_source_override_file_and_dict(source_dict) if orig_source: self.delete_schema_source(orig_file, orig_source) self.remove_tests(orig_file, 'sources', orig_source['name']) self.merge_patch(orig_file, 'sources', orig_source) self.add_to_pp_files(orig_file)