dbt-selly/dbt-env/lib/python3.8/site-packages/dbt/parser/partial.py

from typing import MutableMapping, Dict, List
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.files import (
    AnySourceFile, ParseFileType, parse_file_type_to_parser,
)
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.node_types import NodeType


mssat_files = (
    ParseFileType.Model,
    ParseFileType.Seed,
    ParseFileType.Snapshot,
    ParseFileType.Analysis,
    ParseFileType.Test,
)


key_to_prefix = {
    'models': 'model',
    'seeds': 'seed',
    'snapshots': 'snapshot',
    'analyses': 'analysis',
}


parse_file_type_to_key = {
    ParseFileType.Model: 'models',
    ParseFileType.Seed: 'seeds',
    ParseFileType.Snapshot: 'snapshots',
    ParseFileType.Analysis: 'analyses',
}


# Partial parsing. Create a diff of files from saved manifest and current
# files and produce a project_parser_file dictionary to drive parsing of
# only the necessary changes.
# Will produce a 'skip_parsing' method, and a project_parser_file dictionary
class PartialParsing:
    def __init__(self, saved_manifest: Manifest, new_files: MutableMapping[str, AnySourceFile]):
        self.saved_manifest = saved_manifest
        self.new_files = new_files
        self.project_parser_files: Dict = {}
        self.saved_files = self.saved_manifest.files
        self.project_parser_files = {}
        self.deleted_manifest = Manifest()
        self.macro_child_map: Dict[str, List[str]] = {}
        self.build_file_diff()
        self.processing_file = None
        self.disabled_by_file_id = self.saved_manifest.build_disabled_by_file_id()

    def skip_parsing(self):
        return (
            not self.file_diff['deleted'] and
            not self.file_diff['added'] and
            not self.file_diff['changed'] and
            not self.file_diff['changed_schema_files'] and
            not self.file_diff['deleted_schema_files']
        )

    # Compare the previously saved manifest files and the just-loaded manifest
    # files to see if anything changed
    def build_file_diff(self):
        saved_file_ids = set(self.saved_files.keys())
        new_file_ids = set(self.new_files.keys())
        deleted_all_files = saved_file_ids.difference(new_file_ids)
        added = new_file_ids.difference(saved_file_ids)
        common = saved_file_ids.intersection(new_file_ids)
        changed_or_deleted_macro_file = False

        # separate out deleted schema files
        deleted_schema_files = []
        deleted = []
        for file_id in deleted_all_files:
            if self.saved_files[file_id].parse_file_type == ParseFileType.Schema:
                deleted_schema_files.append(file_id)
            else:
                if self.saved_files[file_id].parse_file_type == ParseFileType.Macro:
                    changed_or_deleted_macro_file = True
                deleted.append(file_id)

        changed = []
        changed_schema_files = []
        unchanged = []
        for file_id in common:
            if self.saved_files[file_id].checksum == self.new_files[file_id].checksum:
                unchanged.append(file_id)
            else:
                # separate out changed schema files
                if self.saved_files[file_id].parse_file_type == ParseFileType.Schema:
                    sf = self.saved_files[file_id]
                    if type(sf).__name__ != 'SchemaSourceFile':
                        raise Exception(f"Serialization failure for {file_id}")
                    changed_schema_files.append(file_id)
                else:
                    if self.saved_files[file_id].parse_file_type == ParseFileType.Macro:
                        changed_or_deleted_macro_file = True
                    changed.append(file_id)
        file_diff = {
            "deleted": deleted,
            "deleted_schema_files": deleted_schema_files,
            "added": added,
            "changed": changed,
            "changed_schema_files": changed_schema_files,
            "unchanged": unchanged,
        }
        if changed_or_deleted_macro_file:
            self.macro_child_map = self.saved_manifest.build_macro_child_map()
        logger.info(f"Partial parsing enabled: "
                    f"{len(deleted) + len(deleted_schema_files)} files deleted, "
                    f"{len(added)} files added, "
                    f"{len(changed) + len(changed_schema_files)} files changed.")
        self.file_diff = file_diff

    # generate the list of files that need parsing
    # uses self.manifest.files generated by 'read_files'
    def get_parsing_files(self):
        if self.skip_parsing():
            return {}
        # Need to add new files first, because changes in schema files
        # might refer to them
        for file_id in self.file_diff['added']:
            self.processing_file = file_id
            self.add_to_saved(file_id)
        # Need to process schema files next, because the dictionaries
        # need to be in place for handling SQL file changes
        for file_id in self.file_diff['changed_schema_files']:
            self.processing_file = file_id
            self.change_schema_file(file_id)
        for file_id in self.file_diff['deleted_schema_files']:
            self.processing_file = file_id
            self.delete_schema_file(file_id)
        for file_id in self.file_diff['deleted']:
            self.processing_file = file_id
            self.delete_from_saved(file_id)
        for file_id in self.file_diff['changed']:
            self.processing_file = file_id
            self.update_in_saved(file_id)
        return self.project_parser_files

    # Add the file to the project parser dictionaries to schedule parsing
    def add_to_pp_files(self, source_file):
        file_id = source_file.file_id
        parser_name = parse_file_type_to_parser[source_file.parse_file_type]
        project_name = source_file.project_name
        if not parser_name or not project_name:
            raise Exception(f"Did not find parse_file_type or project_name "
                            f"in SourceFile for {source_file.file_id}")
        if project_name not in self.project_parser_files:
            self.project_parser_files[project_name] = {}
        if parser_name not in self.project_parser_files[project_name]:
            self.project_parser_files[project_name][parser_name] = []
        if (file_id not in self.project_parser_files[project_name][parser_name] and
                file_id not in self.file_diff['deleted']):
            self.project_parser_files[project_name][parser_name].append(file_id)

    def already_scheduled_for_parsing(self, source_file):
        file_id = source_file.file_id
        project_name = source_file.project_name
        if project_name not in self.project_parser_files:
            return False
        parser_name = parse_file_type_to_parser[source_file.parse_file_type]
        if parser_name not in self.project_parser_files[project_name]:
            return False
        if file_id not in self.project_parser_files[project_name][parser_name]:
            return False
        return True

    # Add new files, including schema files
    def add_to_saved(self, file_id):
        # add file object to saved manifest.files
        source_file = self.new_files[file_id]
        if source_file.parse_file_type == ParseFileType.Schema:
            self.handle_added_schema_file(source_file)
        self.saved_files[file_id] = source_file
        # update pp_files to parse
        self.add_to_pp_files(source_file)
        logger.debug(f"Partial parsing: added file: {file_id}")

    def handle_added_schema_file(self, source_file):
        source_file.pp_dict = source_file.dict_from_yaml.copy()
        if 'sources' in source_file.pp_dict:
            for source in source_file.pp_dict['sources']:
                # We need to remove the original source, so it can
                # be properly patched
                if 'overrides' in source:
                    self.remove_source_override_target(source)

    # Deletes for all non-schema files
    def delete_from_saved(self, file_id):
        # Look at all things touched by file, remove those
        # nodes, and update pp_files to parse unless the
        # file creating those nodes has also been deleted
        saved_source_file = self.saved_files[file_id]

        # SQL file: models, seeds, snapshots, analyses, tests: SQL files, except
        # macros/tests
        if saved_source_file.parse_file_type in mssat_files:
            self.remove_mssat_file(saved_source_file)
            self.deleted_manifest.files[file_id] = self.saved_manifest.files.pop(file_id)

        # macros
        if saved_source_file.parse_file_type == ParseFileType.Macro:
            self.delete_macro_file(saved_source_file, follow_references=True)

        # docs
        if saved_source_file.parse_file_type == ParseFileType.Documentation:
            self.delete_doc_node(saved_source_file)

        logger.debug(f"Partial parsing: deleted file: {file_id}")

    # Updates for non-schema files
    def update_in_saved(self, file_id):
        new_source_file = self.new_files[file_id]
        old_source_file = self.saved_files[file_id]

        if new_source_file.parse_file_type in mssat_files:
            self.update_mssat_in_saved(new_source_file, old_source_file)
        elif new_source_file.parse_file_type == ParseFileType.Macro:
            self.update_macro_in_saved(new_source_file, old_source_file)
        elif new_source_file.parse_file_type == ParseFileType.Documentation:
            self.update_doc_in_saved(new_source_file, old_source_file)
        else:
            raise Exception(f"Invalid parse_file_type in source_file {file_id}")
        logger.debug(f"Partial parsing: updated file: {file_id}")

    # Models, seeds, snapshots: patches and tests
    # analyses: patches, no tests
    # tests: not touched by schema files (no patches, no tests)
    # Updated schema files should have been processed already.
    def update_mssat_in_saved(self, new_source_file, old_source_file):

        if self.already_scheduled_for_parsing(old_source_file):
            return

        # These files only have one node.
        unique_id = None
        if old_source_file.nodes:
            unique_id = old_source_file.nodes[0]
        else:
            # It's not clear when this would actually happen.
            # Logging in case there are other associated errors.
            logger.debug(f"Partial parsing: node not found for source_file {old_source_file}")

        # replace source_file in saved and add to parsing list
        file_id = new_source_file.file_id
        self.deleted_manifest.files[file_id] = old_source_file
        self.saved_files[file_id] = new_source_file
        self.add_to_pp_files(new_source_file)
        if unique_id:
            self.remove_node_in_saved(new_source_file, unique_id)

    def remove_node_in_saved(self, source_file, unique_id):
        if unique_id in self.saved_manifest.nodes:
            # delete node in saved
            node = self.saved_manifest.nodes.pop(unique_id)
            self.deleted_manifest.nodes[unique_id] = node
        elif source_file.file_id in self.disabled_by_file_id:
            for dis_index, dis_node in self.saved_manifest.disabled:
                if dis_node.file_id == source_file.file_id:
                    node = dis_node
                    break
            if dis_node:
                del self.saved_manifest.disabled[dis_index]
        else:
            # Has already been deleted by another action
            return

        # look at patch_path in model node to see if we need
        # to reapply a patch from a schema_file.
        if node.patch_path:
            file_id = node.patch_path
            # it might be changed...  then what?
            if file_id not in self.file_diff['deleted']:
                # schema_files should already be updated
                schema_file = self.saved_files[file_id]
                dict_key = parse_file_type_to_key[source_file.parse_file_type]
                # look for a matching list dictionary
                elem_patch = None
                if dict_key in schema_file.dict_from_yaml:
                    for elem in schema_file.dict_from_yaml[dict_key]:
                        if elem['name'] == node.name:
                            elem_patch = elem
                            break
                if elem_patch:
                    self.delete_schema_mssa_links(schema_file, dict_key, elem_patch)
                    self.merge_patch(schema_file, dict_key, elem_patch)
                    if unique_id in schema_file.node_patches:
                        schema_file.node_patches.remove(unique_id)
            if unique_id in self.saved_manifest.disabled:
                # We have a patch_path in disabled nodes with a patch so
                # that we can connect the patch to the node
                for node in self.saved_manifest.disabled[unique_id]:
                    node.patch_path = None

    def update_macro_in_saved(self, new_source_file, old_source_file):
        if self.already_scheduled_for_parsing(old_source_file):
            return
        self.handle_macro_file_links(old_source_file, follow_references=True)
        file_id = new_source_file.file_id
        self.saved_files[file_id] = new_source_file
        self.add_to_pp_files(new_source_file)

    def update_doc_in_saved(self, new_source_file, old_source_file):
        if self.already_scheduled_for_parsing(old_source_file):
            return
        self.delete_doc_node(old_source_file)
        self.saved_files[new_source_file.file_id] = new_source_file
        self.add_to_pp_files(new_source_file)

    def remove_mssat_file(self, source_file):
        # nodes [unique_ids] -- SQL files
        # There should always be a node for a SQL file
        if not source_file.nodes:
            logger.debug(f"No nodes found for source file {source_file.file_id}")
            return
        # There is generally only 1 node for SQL files, except for macros
        for unique_id in source_file.nodes:
            self.remove_node_in_saved(source_file, unique_id)
            self.schedule_referencing_nodes_for_parsing(unique_id)

    # We need to re-parse nodes that reference another removed node
    def schedule_referencing_nodes_for_parsing(self, unique_id):
        # Look at "children", i.e. nodes that reference this node
        if unique_id in self.saved_manifest.child_map:
            self.schedule_nodes_for_parsing(self.saved_manifest.child_map[unique_id])
        else:
            logger.debug(f"Partial parsing: {unique_id} not found in child_map")

    def schedule_nodes_for_parsing(self, unique_ids):
        for unique_id in unique_ids:
            if unique_id in self.saved_manifest.nodes:
                node = self.saved_manifest.nodes[unique_id]
                if node.resource_type == NodeType.Test:
                    # test nodes are handled separately. Must be removed from schema file
                    continue
                file_id = node.file_id
                if file_id in self.saved_files and file_id not in self.file_diff['deleted']:
                    source_file = self.saved_files[file_id]
                    self.remove_mssat_file(source_file)
                    # content of non-schema files is only in new files
                    self.saved_files[file_id] = self.new_files[file_id]
                    self.add_to_pp_files(self.saved_files[file_id])
            elif unique_id in self.saved_manifest.sources:
                source = self.saved_manifest.sources[unique_id]
                file_id = source.file_id
                if file_id in self.saved_files and file_id not in self.file_diff['deleted']:
                    schema_file = self.saved_files[file_id]
                    sources = []
                    if 'sources' in schema_file.dict_from_yaml:
                        sources = schema_file.dict_from_yaml['sources']
                    source_element = self.get_schema_element(sources, source.source_name)
                    if source_element:
                        self.delete_schema_source(schema_file, source_element)
                        self.remove_tests(schema_file, 'sources', source_element['name'])
                        self.merge_patch(schema_file, 'sources', source_element)
            elif unique_id in self.saved_manifest.exposures:
                exposure = self.saved_manifest.exposures[unique_id]
                file_id = exposure.file_id
                if file_id in self.saved_files and file_id not in self.file_diff['deleted']:
                    schema_file = self.saved_files[file_id]
                    exposures = []
                    if 'exposures' in schema_file.dict_from_yaml:
                        exposures = schema_file.dict_from_yaml['exposures']
                    exposure_element = self.get_schema_element(exposures, exposure.name)
                    if exposure_element:
                        self.delete_schema_exposure(schema_file, exposure_element)
                        self.merge_patch(schema_file, 'exposures', exposure_element)
            elif unique_id in self.saved_manifest.macros:
                macro = self.saved_manifest.macros[unique_id]
                file_id = macro.file_id
                if file_id in self.saved_files and file_id not in self.file_diff['deleted']:
                    source_file = self.saved_files[file_id]
                    self.delete_macro_file(source_file)
                    self.saved_files[file_id] = self.new_files[file_id]
                    self.add_to_pp_files(self.saved_files[file_id])

    def delete_macro_file(self, source_file, follow_references=False):
        self.handle_macro_file_links(source_file, follow_references)
        file_id = source_file.file_id
        self.deleted_manifest.files[file_id] = self.saved_files.pop(file_id)

    def recursively_gather_macro_references(self, macro_unique_id, referencing_nodes):
        for unique_id in self.macro_child_map[macro_unique_id]:
            if unique_id in referencing_nodes:
                continue
            referencing_nodes.append(unique_id)
            if unique_id.startswith('macro.'):
                self.recursively_gather_macro_references(unique_id, referencing_nodes)

    def handle_macro_file_links(self, source_file, follow_references=False):
        # remove the macros in the 'macros' dictionary
        macros = source_file.macros.copy()
        for unique_id in macros:
            if unique_id not in self.saved_manifest.macros:
                # This happens when a macro has already been removed
                if unique_id in source_file.macros:
                    source_file.macros.remove(unique_id)
                continue

            base_macro = self.saved_manifest.macros.pop(unique_id)
            self.deleted_manifest.macros[unique_id] = base_macro

            # Recursively check children of this macro
            # The macro_child_map might not exist if a macro is removed by
            # schedule_nodes_for parsing. We only want to follow
            # references if the macro file itself has been updated or
            # deleted, not if we're just updating referenced nodes.
            if self.macro_child_map and follow_references:
                referencing_nodes = []
                self.recursively_gather_macro_references(unique_id, referencing_nodes)
                self.schedule_macro_nodes_for_parsing(referencing_nodes)

            if base_macro.patch_path:
                file_id = base_macro.patch_path
                if file_id in self.saved_files:
                    schema_file = self.saved_files[file_id]
                    macro_patches = []
                    if 'macros' in schema_file.dict_from_yaml:
                        macro_patches = schema_file.dict_from_yaml['macros']
                    macro_patch = self.get_schema_element(macro_patches, base_macro.name)
                    self.delete_schema_macro_patch(schema_file, macro_patch)
                    self.merge_patch(schema_file, 'macros', macro_patch)
            # The macro may have already been removed by handling macro children
            if unique_id in source_file.macros:
                source_file.macros.remove(unique_id)

    # similar to schedule_nodes_for_parsing but doesn't do sources and exposures
    # and handles schema tests
    def schedule_macro_nodes_for_parsing(self, unique_ids):
        for unique_id in unique_ids:
            if unique_id in self.saved_manifest.nodes:
                node = self.saved_manifest.nodes[unique_id]
                if node.resource_type == NodeType.Test:
                    schema_file_id = node.file_id
                    schema_file = self.saved_manifest.files[schema_file_id]
                    (key, name) = schema_file.get_key_and_name_for_test(node.unique_id)
                    if key and name:
                        patch_list = []
                        if key in schema_file.dict_from_yaml:
                            patch_list = schema_file.dict_from_yaml[key]
                        patch = self.get_schema_element(patch_list, name)
                        if patch:
                            if key in ['models', 'seeds', 'snapshots']:
                                self.delete_schema_mssa_links(schema_file, key, patch)
                                self.merge_patch(schema_file, key, patch)
                                if unique_id in schema_file.node_patches:
                                    schema_file.node_patches.remove(unique_id)
                            elif key == 'sources':
                                # re-schedule source
                                if 'overrides' in patch:
                                    # This is a source patch; need to re-parse orig source
                                    self.remove_source_override_target(patch)
                                self.delete_schema_source(schema_file, patch)
                                self.remove_tests(schema_file, 'sources', patch['name'])
                                self.merge_patch(schema_file, 'sources', patch)
                else:
                    file_id = node.file_id
                    if file_id in self.saved_files and file_id not in self.file_diff['deleted']:
                        source_file = self.saved_files[file_id]
                        self.remove_mssat_file(source_file)
                        # content of non-schema files is only in new files
                        self.saved_files[file_id] = self.new_files[file_id]
                        self.add_to_pp_files(self.saved_files[file_id])
            elif unique_id in self.saved_manifest.macros:
                macro = self.saved_manifest.macros[unique_id]
                file_id = macro.file_id
                if file_id in self.saved_files and file_id not in self.file_diff['deleted']:
                    source_file = self.saved_files[file_id]
                    self.delete_macro_file(source_file)
                    self.saved_files[file_id] = self.new_files[file_id]
                    self.add_to_pp_files(self.saved_files[file_id])

    def delete_doc_node(self, source_file):
        # remove the nodes in the 'docs' dictionary
        docs = source_file.docs.copy()
        for unique_id in docs:
            self.deleted_manifest.docs[unique_id] = self.saved_manifest.docs.pop(unique_id)
            source_file.docs.remove(unique_id)
        # The unique_id of objects that contain a doc call are stored in the
        # doc source_file.nodes
        self.schedule_nodes_for_parsing(source_file.nodes)
        source_file.nodes = []

    # Schema files -----------------------
    # Changed schema files
    def change_schema_file(self, file_id):
        saved_schema_file = self.saved_files[file_id]
        new_schema_file = self.new_files[file_id]
        saved_yaml_dict = saved_schema_file.dict_from_yaml
        new_yaml_dict = new_schema_file.dict_from_yaml
        if 'version' in new_yaml_dict:
            # despite the fact that this goes in the saved_schema_file, it
            # should represent the new yaml dictionary, and should produce
            # an error if the updated yaml file doesn't have a version
            saved_schema_file.pp_dict = {"version": new_yaml_dict['version']}
        else:
            saved_schema_file.pp_dict = {}
        self.handle_schema_file_changes(saved_schema_file, saved_yaml_dict, new_yaml_dict)

        # copy from new schema_file to saved_schema_file to preserve references
        # that weren't removed
        saved_schema_file.contents = new_schema_file.contents
        saved_schema_file.checksum = new_schema_file.checksum
        saved_schema_file.dfy = new_schema_file.dfy
        # schedule parsing
        self.add_to_pp_files(saved_schema_file)
        # schema_file pp_dict should have been generated already
        logger.debug(f"Partial parsing: update schema file: {file_id}")

    # Delete schema files -- a variation on change_schema_file
    def delete_schema_file(self, file_id):
        saved_schema_file = self.saved_files[file_id]
        saved_yaml_dict = saved_schema_file.dict_from_yaml
        new_yaml_dict = {}
        self.handle_schema_file_changes(saved_schema_file, saved_yaml_dict, new_yaml_dict)
        self.deleted_manifest.files[file_id] = self.saved_manifest.files.pop(file_id)

    # For each key in a schema file dictionary, process the changed, deleted, and added
    # elemnts for the key lists
    def handle_schema_file_changes(self, schema_file, saved_yaml_dict, new_yaml_dict):
        # loop through comparing previous dict_from_yaml with current dict_from_yaml
        # Need to do the deleted/added/changed thing, just like the files lists

        # models, seeds, snapshots, analyses
        for dict_key in ['models', 'seeds', 'snapshots', 'analyses']:
            key_diff = self.get_diff_for(dict_key, saved_yaml_dict, new_yaml_dict)
            if key_diff['changed']:
                for elem in key_diff['changed']:
                    self.delete_schema_mssa_links(schema_file, dict_key, elem)
                    self.merge_patch(schema_file, dict_key, elem)
            if key_diff['deleted']:
                for elem in key_diff['deleted']:
                    self.delete_schema_mssa_links(schema_file, dict_key, elem)
            if key_diff['added']:
                for elem in key_diff['added']:
                    self.merge_patch(schema_file, dict_key, elem)

        # sources
        source_diff = self.get_diff_for('sources', saved_yaml_dict, new_yaml_dict)
        if source_diff['changed']:
            for source in source_diff['changed']:
                if 'overrides' in source:  # This is a source patch; need to re-parse orig source
                    self.remove_source_override_target(source)
                self.delete_schema_source(schema_file, source)
                self.remove_tests(schema_file, 'sources', source['name'])
                self.merge_patch(schema_file, 'sources', source)
        if source_diff['deleted']:
            for source in source_diff['deleted']:
                if 'overrides' in source:  # This is a source patch; need to re-parse orig source
                    self.remove_source_override_target(source)
                self.delete_schema_source(schema_file, source)
                self.remove_tests(schema_file, 'sources', source['name'])
        if source_diff['added']:
            for source in source_diff['added']:
                if 'overrides' in source:  # This is a source patch; need to re-parse orig source
                    self.remove_source_override_target(source)
                self.merge_patch(schema_file, 'sources', source)

        # macros
        macro_diff = self.get_diff_for('macros', saved_yaml_dict, new_yaml_dict)
        if macro_diff['changed']:
            for macro in macro_diff['changed']:
                self.delete_schema_macro_patch(schema_file, macro)
                self.merge_patch(schema_file, 'macros', macro)
        if macro_diff['deleted']:
            for macro in macro_diff['deleted']:
                self.delete_schema_macro_patch(schema_file, macro)
        if macro_diff['added']:
            for macro in macro_diff['added']:
                self.merge_patch(schema_file, 'macros', macro)

        # exposures
        exposure_diff = self.get_diff_for('exposures', saved_yaml_dict, new_yaml_dict)
        if exposure_diff['changed']:
            for exposure in exposure_diff['changed']:
                self.delete_schema_exposure(schema_file, exposure)
                self.merge_patch(schema_file, 'exposures', exposure)
        if exposure_diff['deleted']:
            for exposure in exposure_diff['deleted']:
                self.delete_schema_exposure(schema_file, exposure)
        if exposure_diff['added']:
            for exposure in exposure_diff['added']:
                self.merge_patch(schema_file, 'exposures', exposure)

    # Take a "section" of the schema file yaml dictionary from saved and new schema files
    # and determine which parts have changed
    def get_diff_for(self, key, saved_yaml_dict, new_yaml_dict):
        if key in saved_yaml_dict or key in new_yaml_dict:
            saved_elements = saved_yaml_dict[key] if key in saved_yaml_dict else []
            new_elements = new_yaml_dict[key] if key in new_yaml_dict else []
        else:
            return {'deleted': [], 'added': [], 'changed': []}
        # for each set of keys, need to create a dictionary of names pointing to entry
        saved_elements_by_name = {}
        new_elements_by_name = {}
        # sources have two part names?
        for element in saved_elements:
            saved_elements_by_name[element['name']] = element
        for element in new_elements:
            new_elements_by_name[element['name']] = element

        # now determine which elements, by name, are added, deleted or changed
        saved_element_names = set(saved_elements_by_name.keys())
        new_element_names = set(new_elements_by_name.keys())
        deleted = saved_element_names.difference(new_element_names)
        added = new_element_names.difference(saved_element_names)
        common = saved_element_names.intersection(new_element_names)
        changed = []
        for element_name in common:
            if saved_elements_by_name[element_name] != new_elements_by_name[element_name]:
                changed.append(element_name)

        # make lists of yaml elements to return as diffs
        deleted_elements = [saved_elements_by_name[name].copy() for name in deleted]
        added_elements = [new_elements_by_name[name].copy() for name in added]
        changed_elements = [new_elements_by_name[name].copy() for name in changed]

        diff = {
            "deleted": deleted_elements,
            "added": added_elements,
            "changed": changed_elements,
        }
        return diff

    # Merge a patch file into the pp_dict in a schema file
    def merge_patch(self, schema_file, key, patch):
        if not schema_file.pp_dict:
            schema_file.pp_dict = {"version": schema_file.dict_from_yaml['version']}
        pp_dict = schema_file.pp_dict
        if key not in pp_dict:
            pp_dict[key] = [patch]
        else:
            # check that this patch hasn't already been saved
            found = False
            for elem in pp_dict[key]:
                if elem['name'] == patch['name']:
                    found = True
            if not found:
                pp_dict[key].append(patch)
        self.add_to_pp_files(schema_file)

    # For model, seed, snapshot, analysis schema dictionary keys,
    # delete the patches and tests from the patch
    def delete_schema_mssa_links(self, schema_file, dict_key, elem):
        # find elem node unique_id in node_patches
        prefix = key_to_prefix[dict_key]
        elem_unique_id = ''
        for unique_id in schema_file.node_patches:
            if not unique_id.startswith(prefix):
                continue
            parts = unique_id.split('.')
            elem_name = parts[-1]
            if elem_name == elem['name']:
                elem_unique_id = unique_id
                break

        # remove elem node and remove unique_id from node_patches
        if elem_unique_id:
            # might have been already removed
            if elem_unique_id in self.saved_manifest.nodes:
                node = self.saved_manifest.nodes.pop(elem_unique_id)
                self.deleted_manifest.nodes[elem_unique_id] = node
                # need to add the node source_file to pp_files
                file_id = node.file_id
                # need to copy new file to saved files in order to get content
                if file_id in self.new_files:
                    self.saved_files[file_id] = self.new_files[file_id]
                if self.saved_files[file_id]:
                    source_file = self.saved_files[file_id]
                    self.add_to_pp_files(source_file)
            # remove from patches
            schema_file.node_patches.remove(elem_unique_id)

        # for models, seeds, snapshots (not analyses)
        if dict_key in ['models', 'seeds', 'snapshots']:
            # find related tests and remove them
            self.remove_tests(schema_file, dict_key, elem['name'])

    def remove_tests(self, schema_file, dict_key, name):
        tests = schema_file.get_tests(dict_key, name)
        for test_unique_id in tests:
            if test_unique_id in self.saved_manifest.nodes:
                node = self.saved_manifest.nodes.pop(test_unique_id)
                self.deleted_manifest.nodes[test_unique_id] = node
        schema_file.remove_tests(dict_key, name)

    def delete_schema_source(self, schema_file, source_dict):
        # both patches, tests, and source nodes
        source_name = source_dict['name']
        # There may be multiple sources for each source dict, since
        # there will be a separate source node for each table.
        # ParsedSourceDefinition name = table name, dict name is source_name
        sources = schema_file.sources.copy()
        for unique_id in sources:
            if unique_id in self.saved_manifest.sources:
                source = self.saved_manifest.sources[unique_id]
                if source.source_name == source_name:
                    source = self.saved_manifest.sources.pop(unique_id)
                    self.deleted_manifest.sources[unique_id] = source
                    schema_file.sources.remove(unique_id)
                    self.schedule_referencing_nodes_for_parsing(unique_id)
                    logger.debug(f"Partial parsing: deleted source {unique_id}")

    def delete_schema_macro_patch(self, schema_file, macro):
        # This is just macro patches that need to be reapplied
        macro_unique_id = None
        if macro['name'] in schema_file.macro_patches:
            macro_unique_id = schema_file.macro_patches[macro['name']]
            del schema_file.macro_patches[macro['name']]
        if macro_unique_id and macro_unique_id in self.saved_manifest.macros:
            macro = self.saved_manifest.macros.pop(macro_unique_id)
            self.deleted_manifest.macros[macro_unique_id] = macro
            macro_file_id = macro.file_id
            if macro_file_id in self.new_files:
                self.saved_files[macro_file_id] = self.new_files[macro_file_id]
                self.add_to_pp_files(self.saved_files[macro_file_id])

    # exposures are created only from schema files, so just delete
    # the exposure.
    def delete_schema_exposure(self, schema_file, exposure_dict):
        exposure_name = exposure_dict['name']
        exposures = schema_file.exposures.copy()
        for unique_id in exposures:
            exposure = self.saved_manifest.exposures[unique_id]
            if unique_id in self.saved_manifest.exposures:
                if exposure.name == exposure_name:
                    self.deleted_manifest.exposures[unique_id] = \
                        self.saved_manifest.exposures.pop(unique_id)
                    schema_file.exposures.remove(unique_id)
                    logger.debug(f"Partial parsing: deleted exposure {unique_id}")

    def get_schema_element(self, elem_list, elem_name):
        for element in elem_list:
            if 'name' in element and element['name'] == elem_name:
                return element
        return None

    def get_schema_file_for_source(self, package_name, source_name):
        schema_file = None
        for source in self.saved_manifest.sources.values():
            if source.package_name == package_name and source.source_name == source_name:
                file_id = source.file_id
                if file_id in self.saved_files:
                    schema_file = self.saved_files[file_id]
                break
        return schema_file

    def get_source_override_file_and_dict(self, source):
        package = source['overrides']
        source_name = source['name']
        orig_source_schema_file = self.get_schema_file_for_source(package, source_name)
        orig_sources = orig_source_schema_file.dict_from_yaml['sources']
        orig_source = self.get_schema_element(orig_sources, source_name)
        return (orig_source_schema_file, orig_source)

    def remove_source_override_target(self, source_dict):
        (orig_file, orig_source) = self.get_source_override_file_and_dict(source_dict)
        if orig_source:
            self.delete_schema_source(orig_file, orig_source)
            self.remove_tests(orig_file, 'sources', orig_source['name'])
            self.merge_patch(orig_file, 'sources', orig_source)
            self.add_to_pp_files(orig_file)