dbt-selly/dbt-env/lib/python3.8/site-packages/dbt/task/parse.py

95 lines
3.5 KiB
Python

# This task is intended to be used for diagnosis, development and
# performance analysis.
# It separates out the parsing flows for easier logging and
# debugging.
# To store cProfile performance data, execute with the '-r'
# flag and an output file: dbt -r dbt.cprof parse.
# Use a visualizer such as snakeviz to look at the output:
# snakeviz dbt.cprof
from dbt.task.base import ConfiguredTask
from dbt.adapters.factory import get_adapter
from dbt.parser.manifest import (
Manifest, ManifestLoader, _check_manifest
)
from dbt.logger import DbtProcessState, print_timestamped_line
from dbt.clients.system import write_file
from dbt.graph import Graph
import time
from typing import Optional
import os
import json
import dbt.utils
MANIFEST_FILE_NAME = 'manifest.json'
PERF_INFO_FILE_NAME = 'perf_info.json'
PARSING_STATE = DbtProcessState('parsing')
class ParseTask(ConfiguredTask):
def __init__(self, args, config):
super().__init__(args, config)
self.manifest: Optional[Manifest] = None
self.graph: Optional[Graph] = None
self.loader: Optional[ManifestLoader] = None
def write_manifest(self):
path = os.path.join(self.config.target_path, MANIFEST_FILE_NAME)
self.manifest.write(path)
def write_perf_info(self):
path = os.path.join(self.config.target_path, PERF_INFO_FILE_NAME)
write_file(path, json.dumps(self.loader._perf_info,
cls=dbt.utils.JSONEncoder, indent=4))
print_timestamped_line(f"Performance info: {path}")
# This method takes code that normally exists in other files
# and pulls it in here, to simplify logging and make the
# parsing flow-of-control easier to understand and manage,
# with the downside that if changes happen in those other methods,
# similar changes might need to be made here.
# ManifestLoader.get_full_manifest
# ManifestLoader.load
# ManifestLoader.load_all
def get_full_manifest(self):
adapter = get_adapter(self.config) # type: ignore
root_config = self.config
macro_hook = adapter.connections.set_query_header
with PARSING_STATE:
start_load_all = time.perf_counter()
projects = root_config.load_dependencies()
print_timestamped_line("Dependencies loaded")
loader = ManifestLoader(root_config, projects, macro_hook)
print_timestamped_line("ManifestLoader created")
manifest = loader.load()
print_timestamped_line("Manifest loaded")
_check_manifest(manifest, root_config)
print_timestamped_line("Manifest checked")
manifest.build_flat_graph()
print_timestamped_line("Flat graph built")
loader._perf_info.load_all_elapsed = (
time.perf_counter() - start_load_all
)
self.loader = loader
self.manifest = manifest
print_timestamped_line("Manifest loaded")
def compile_manifest(self):
adapter = get_adapter(self.config)
compiler = adapter.get_compiler()
self.graph = compiler.compile(self.manifest)
def run(self):
print_timestamped_line('Start parsing.')
self.get_full_manifest()
if self.args.compile:
print_timestamped_line('Compiling.')
self.compile_manifest()
if self.args.write_manifest:
print_timestamped_line('Writing manifest.')
self.write_manifest()
self.write_perf_info()
print_timestamped_line('Done.')