from dbt.clients.system import load_file_contents from dbt.contracts.files import ( FilePath, ParseFileType, SourceFile, FileHash, AnySourceFile, SchemaSourceFile ) from dbt.parser.schemas import yaml_from_file, schema_file_keys, check_format_version from dbt.exceptions import CompilationException from dbt.parser.search import FilesystemSearcher from typing import Optional # This loads the files contents and creates the SourceFile object def load_source_file( path: FilePath, parse_file_type: ParseFileType, project_name: str, saved_files,) -> Optional[AnySourceFile]: sf_cls = SchemaSourceFile if parse_file_type == ParseFileType.Schema else SourceFile source_file = sf_cls(path=path, checksum=FileHash.empty(), parse_file_type=parse_file_type, project_name=project_name) skip_loading_schema_file = False if (parse_file_type == ParseFileType.Schema and saved_files and source_file.file_id in saved_files): old_source_file = saved_files[source_file.file_id] if (source_file.path.modification_time != 0.0 and old_source_file.path.modification_time == source_file.path.modification_time): source_file.checksum = old_source_file.checksum source_file.dfy = old_source_file.dfy skip_loading_schema_file = True if not skip_loading_schema_file: file_contents = load_file_contents(path.absolute_path, strip=False) source_file.checksum = FileHash.from_contents(file_contents) source_file.contents = file_contents.strip() if parse_file_type == ParseFileType.Schema and source_file.contents: dfy = yaml_from_file(source_file) if dfy: validate_yaml(source_file.path.original_file_path, dfy) source_file.dfy = dfy else: source_file = None return source_file # Do some minimal validation of the yaml in a schema file. # Check version, that key values are lists and that each element in # the lists has a 'name' key def validate_yaml(file_path, dct): check_format_version(file_path, dct) for key in schema_file_keys: if key in dct: if not isinstance(dct[key], list): msg = (f"The schema file at {file_path} is " f"invalid because the value of '{key}' is not a list") raise CompilationException(msg) for element in dct[key]: if not isinstance(element, dict): msg = (f"The schema file at {file_path} is " f"invalid because a list element for '{key}' is not a dictionary") raise CompilationException(msg) if 'name' not in element: msg = (f"The schema file at {file_path} is " f"invalid because a list element for '{key}' does not have a " "name attribute.") raise CompilationException(msg) # Special processing for big seed files def load_seed_source_file(match: FilePath, project_name) -> SourceFile: if match.seed_too_large(): # We don't want to calculate a hash of this file. Use the path. source_file = SourceFile.big_seed(match) else: file_contents = load_file_contents(match.absolute_path, strip=False) checksum = FileHash.from_contents(file_contents) source_file = SourceFile(path=match, checksum=checksum) source_file.contents = '' source_file.parse_file_type = ParseFileType.Seed source_file.project_name = project_name return source_file # Use the FilesystemSearcher to get a bunch of FilePaths, then turn # them into a bunch of FileSource objects def get_source_files(project, paths, extension, parse_file_type, saved_files): # file path list fp_list = list(FilesystemSearcher( project, paths, extension )) # file block list fb_list = [] for fp in fp_list: if parse_file_type == ParseFileType.Seed: fb_list.append(load_seed_source_file(fp, project.project_name)) else: file = load_source_file(fp, parse_file_type, project.project_name, saved_files) # only append the list if it has contents. added to fix #3568 if file: fb_list.append(file) return fb_list def read_files_for_parser(project, files, dirs, extension, parse_ft, saved_files): parser_files = [] source_files = get_source_files( project, dirs, extension, parse_ft, saved_files ) for sf in source_files: files[sf.file_id] = sf parser_files.append(sf.file_id) return parser_files # This needs to read files for multiple projects, so the 'files' # dictionary needs to be passed in. What determines the order of # the various projects? Is the root project always last? Do the # non-root projects need to be done separately in order? def read_files(project, files, parser_files, saved_files): project_files = {} project_files['MacroParser'] = read_files_for_parser( project, files, project.macro_paths, '.sql', ParseFileType.Macro, saved_files ) project_files['ModelParser'] = read_files_for_parser( project, files, project.source_paths, '.sql', ParseFileType.Model, saved_files ) project_files['SnapshotParser'] = read_files_for_parser( project, files, project.snapshot_paths, '.sql', ParseFileType.Snapshot, saved_files ) project_files['AnalysisParser'] = read_files_for_parser( project, files, project.analysis_paths, '.sql', ParseFileType.Analysis, saved_files ) project_files['DataTestParser'] = read_files_for_parser( project, files, project.test_paths, '.sql', ParseFileType.Test, saved_files ) project_files['SeedParser'] = read_files_for_parser( project, files, project.data_paths, '.csv', ParseFileType.Seed, saved_files ) project_files['DocumentationParser'] = read_files_for_parser( project, files, project.docs_paths, '.md', ParseFileType.Documentation, saved_files ) project_files['SchemaParser'] = read_files_for_parser( project, files, project.all_source_paths, '.yml', ParseFileType.Schema, saved_files ) # Also read .yaml files for schema files. Might be better to change # 'read_files_for_parser' accept an array in the future. yaml_files = read_files_for_parser( project, files, project.all_source_paths, '.yaml', ParseFileType.Schema, saved_files ) project_files['SchemaParser'].extend(yaml_files) # Store the parser files for this particular project parser_files[project.project_name] = project_files