dbt-selly/dbt-env/lib/python3.8/site-packages/dbt/parser/read_files.py

from dbt.clients.system import load_file_contents
from dbt.contracts.files import (
    FilePath, ParseFileType, SourceFile, FileHash, AnySourceFile, SchemaSourceFile
)

from dbt.parser.schemas import yaml_from_file, schema_file_keys, check_format_version
from dbt.exceptions import CompilationException
from dbt.parser.search import FilesystemSearcher
from typing import Optional


# This loads the files contents and creates the SourceFile object
def load_source_file(
        path: FilePath, parse_file_type: ParseFileType,
        project_name: str, saved_files,) -> Optional[AnySourceFile]:

    sf_cls = SchemaSourceFile if parse_file_type == ParseFileType.Schema else SourceFile
    source_file = sf_cls(path=path, checksum=FileHash.empty(),
                         parse_file_type=parse_file_type, project_name=project_name)

    skip_loading_schema_file = False
    if (parse_file_type == ParseFileType.Schema and
            saved_files and source_file.file_id in saved_files):
        old_source_file = saved_files[source_file.file_id]
        if (source_file.path.modification_time != 0.0 and
                old_source_file.path.modification_time == source_file.path.modification_time):
            source_file.checksum = old_source_file.checksum
            source_file.dfy = old_source_file.dfy
            skip_loading_schema_file = True

    if not skip_loading_schema_file:
        file_contents = load_file_contents(path.absolute_path, strip=False)
        source_file.checksum = FileHash.from_contents(file_contents)
        source_file.contents = file_contents.strip()

    if parse_file_type == ParseFileType.Schema and source_file.contents:
        dfy = yaml_from_file(source_file)
        if dfy:
            validate_yaml(source_file.path.original_file_path, dfy)
            source_file.dfy = dfy
        else:
            source_file = None
    return source_file


# Do some minimal validation of the yaml in a schema file.
# Check version, that key values are lists and that each element in
# the lists has a 'name' key
def validate_yaml(file_path, dct):
    check_format_version(file_path, dct)
    for key in schema_file_keys:
        if key in dct:
            if not isinstance(dct[key], list):
                msg = (f"The schema file at {file_path} is "
                       f"invalid because the value of '{key}' is not a list")
                raise CompilationException(msg)
            for element in dct[key]:
                if not isinstance(element, dict):
                    msg = (f"The schema file at {file_path} is "
                           f"invalid because a list element for '{key}' is not a dictionary")
                    raise CompilationException(msg)
                if 'name' not in element:
                    msg = (f"The schema file at {file_path} is "
                           f"invalid because a list element for '{key}' does not have a "
                           "name attribute.")
                    raise CompilationException(msg)


# Special processing for big seed files
def load_seed_source_file(match: FilePath, project_name) -> SourceFile:
    if match.seed_too_large():
        # We don't want to calculate a hash of this file. Use the path.
        source_file = SourceFile.big_seed(match)
    else:
        file_contents = load_file_contents(match.absolute_path, strip=False)
        checksum = FileHash.from_contents(file_contents)
        source_file = SourceFile(path=match, checksum=checksum)
        source_file.contents = ''
    source_file.parse_file_type = ParseFileType.Seed
    source_file.project_name = project_name
    return source_file


# Use the FilesystemSearcher to get a bunch of FilePaths, then turn
# them into a bunch of FileSource objects
def get_source_files(project, paths, extension, parse_file_type, saved_files):
    # file path list
    fp_list = list(FilesystemSearcher(
        project, paths, extension
    ))
    # file block list
    fb_list = []
    for fp in fp_list:
        if parse_file_type == ParseFileType.Seed:
            fb_list.append(load_seed_source_file(fp, project.project_name))
        else:
            file = load_source_file(fp, parse_file_type, project.project_name, saved_files)
            # only append the list if it has contents. added to fix #3568
            if file:
                fb_list.append(file)
    return fb_list


def read_files_for_parser(project, files, dirs, extension, parse_ft, saved_files):
    parser_files = []
    source_files = get_source_files(
        project, dirs, extension, parse_ft, saved_files
    )
    for sf in source_files:
        files[sf.file_id] = sf
        parser_files.append(sf.file_id)
    return parser_files


# This needs to read files for multiple projects, so the 'files'
# dictionary needs to be passed in. What determines the order of
# the various projects? Is the root project always last? Do the
# non-root projects need to be done separately in order?
def read_files(project, files, parser_files, saved_files):

    project_files = {}

    project_files['MacroParser'] = read_files_for_parser(
        project, files, project.macro_paths, '.sql', ParseFileType.Macro, saved_files
    )

    project_files['ModelParser'] = read_files_for_parser(
        project, files, project.source_paths, '.sql', ParseFileType.Model, saved_files
    )

    project_files['SnapshotParser'] = read_files_for_parser(
        project, files, project.snapshot_paths, '.sql', ParseFileType.Snapshot, saved_files
    )

    project_files['AnalysisParser'] = read_files_for_parser(
        project, files, project.analysis_paths, '.sql', ParseFileType.Analysis, saved_files
    )

    project_files['DataTestParser'] = read_files_for_parser(
        project, files, project.test_paths, '.sql', ParseFileType.Test, saved_files
    )

    project_files['SeedParser'] = read_files_for_parser(
        project, files, project.data_paths, '.csv', ParseFileType.Seed, saved_files
    )

    project_files['DocumentationParser'] = read_files_for_parser(
        project, files, project.docs_paths, '.md', ParseFileType.Documentation, saved_files
    )

    project_files['SchemaParser'] = read_files_for_parser(
        project, files, project.all_source_paths, '.yml', ParseFileType.Schema, saved_files
    )

    # Also read .yaml files for schema files. Might be better to change
    # 'read_files_for_parser' accept an array in the future.
    yaml_files = read_files_for_parser(
        project, files, project.all_source_paths, '.yaml', ParseFileType.Schema, saved_files
    )
    project_files['SchemaParser'].extend(yaml_files)

    # Store the parser files for this particular project
    parser_files[project.project_name] = project_files
fix order deliveries 2022-03-22 15:13:27 +00:00			`from dbt.clients.system import load_file_contents`
			`from dbt.contracts.files import (`
			`FilePath, ParseFileType, SourceFile, FileHash, AnySourceFile, SchemaSourceFile`
			`)`

			`from dbt.parser.schemas import yaml_from_file, schema_file_keys, check_format_version`
			`from dbt.exceptions import CompilationException`
			`from dbt.parser.search import FilesystemSearcher`
			`from typing import Optional`


			`# This loads the files contents and creates the SourceFile object`
			`def load_source_file(`
			`path: FilePath, parse_file_type: ParseFileType,`
			`project_name: str, saved_files,) -> Optional[AnySourceFile]:`

			`sf_cls = SchemaSourceFile if parse_file_type == ParseFileType.Schema else SourceFile`
			`source_file = sf_cls(path=path, checksum=FileHash.empty(),`
			`parse_file_type=parse_file_type, project_name=project_name)`

			`skip_loading_schema_file = False`
			`if (parse_file_type == ParseFileType.Schema and`
			`saved_files and source_file.file_id in saved_files):`
			`old_source_file = saved_files[source_file.file_id]`
			`if (source_file.path.modification_time != 0.0 and`
			`old_source_file.path.modification_time == source_file.path.modification_time):`
			`source_file.checksum = old_source_file.checksum`
			`source_file.dfy = old_source_file.dfy`
			`skip_loading_schema_file = True`

			`if not skip_loading_schema_file:`
			`file_contents = load_file_contents(path.absolute_path, strip=False)`
			`source_file.checksum = FileHash.from_contents(file_contents)`
			`source_file.contents = file_contents.strip()`

			`if parse_file_type == ParseFileType.Schema and source_file.contents:`
			`dfy = yaml_from_file(source_file)`
			`if dfy:`
			`validate_yaml(source_file.path.original_file_path, dfy)`
			`source_file.dfy = dfy`
			`else:`
			`source_file = None`
			`return source_file`


			`# Do some minimal validation of the yaml in a schema file.`
			`# Check version, that key values are lists and that each element in`
			`# the lists has a 'name' key`
			`def validate_yaml(file_path, dct):`
			`check_format_version(file_path, dct)`
			`for key in schema_file_keys:`
			`if key in dct:`
			`if not isinstance(dct[key], list):`
			`msg = (f"The schema file at {file_path} is "`
			`f"invalid because the value of '{key}' is not a list")`
			`raise CompilationException(msg)`
			`for element in dct[key]:`
			`if not isinstance(element, dict):`
			`msg = (f"The schema file at {file_path} is "`
			`f"invalid because a list element for '{key}' is not a dictionary")`
			`raise CompilationException(msg)`
			`if 'name' not in element:`
			`msg = (f"The schema file at {file_path} is "`
			`f"invalid because a list element for '{key}' does not have a "`
			`"name attribute.")`
			`raise CompilationException(msg)`


			`# Special processing for big seed files`
			`def load_seed_source_file(match: FilePath, project_name) -> SourceFile:`
			`if match.seed_too_large():`
			`# We don't want to calculate a hash of this file. Use the path.`
			`source_file = SourceFile.big_seed(match)`
			`else:`
			`file_contents = load_file_contents(match.absolute_path, strip=False)`
			`checksum = FileHash.from_contents(file_contents)`
			`source_file = SourceFile(path=match, checksum=checksum)`
			`source_file.contents = ''`
			`source_file.parse_file_type = ParseFileType.Seed`
			`source_file.project_name = project_name`
			`return source_file`


			`# Use the FilesystemSearcher to get a bunch of FilePaths, then turn`
			`# them into a bunch of FileSource objects`
			`def get_source_files(project, paths, extension, parse_file_type, saved_files):`
			`# file path list`
			`fp_list = list(FilesystemSearcher(`
			`project, paths, extension`
			`))`
			`# file block list`
			`fb_list = []`
			`for fp in fp_list:`
			`if parse_file_type == ParseFileType.Seed:`
			`fb_list.append(load_seed_source_file(fp, project.project_name))`
			`else:`
			`file = load_source_file(fp, parse_file_type, project.project_name, saved_files)`
			`# only append the list if it has contents. added to fix #3568`
			`if file:`
			`fb_list.append(file)`
			`return fb_list`


			`def read_files_for_parser(project, files, dirs, extension, parse_ft, saved_files):`
			`parser_files = []`
			`source_files = get_source_files(`
			`project, dirs, extension, parse_ft, saved_files`
			`)`
			`for sf in source_files:`
			`files[sf.file_id] = sf`
			`parser_files.append(sf.file_id)`
			`return parser_files`


			`# This needs to read files for multiple projects, so the 'files'`
			`# dictionary needs to be passed in. What determines the order of`
			`# the various projects? Is the root project always last? Do the`
			`# non-root projects need to be done separately in order?`
			`def read_files(project, files, parser_files, saved_files):`

			`project_files = {}`

			`project_files['MacroParser'] = read_files_for_parser(`
			`project, files, project.macro_paths, '.sql', ParseFileType.Macro, saved_files`
			`)`

			`project_files['ModelParser'] = read_files_for_parser(`
			`project, files, project.source_paths, '.sql', ParseFileType.Model, saved_files`
			`)`

			`project_files['SnapshotParser'] = read_files_for_parser(`
			`project, files, project.snapshot_paths, '.sql', ParseFileType.Snapshot, saved_files`
			`)`

			`project_files['AnalysisParser'] = read_files_for_parser(`
			`project, files, project.analysis_paths, '.sql', ParseFileType.Analysis, saved_files`
			`)`

			`project_files['DataTestParser'] = read_files_for_parser(`
			`project, files, project.test_paths, '.sql', ParseFileType.Test, saved_files`
			`)`

			`project_files['SeedParser'] = read_files_for_parser(`
			`project, files, project.data_paths, '.csv', ParseFileType.Seed, saved_files`
			`)`

			`project_files['DocumentationParser'] = read_files_for_parser(`
			`project, files, project.docs_paths, '.md', ParseFileType.Documentation, saved_files`
			`)`

			`project_files['SchemaParser'] = read_files_for_parser(`
			`project, files, project.all_source_paths, '.yml', ParseFileType.Schema, saved_files`
			`)`

			`# Also read .yaml files for schema files. Might be better to change`
			`# 'read_files_for_parser' accept an array in the future.`
			`yaml_files = read_files_for_parser(`
			`project, files, project.all_source_paths, '.yaml', ParseFileType.Schema, saved_files`
			`)`
			`project_files['SchemaParser'].extend(yaml_files)`

			`# Store the parser files for this particular project`
			`parser_files[project.project_name] = project_files`