95 lines
3.5 KiB
Python
95 lines
3.5 KiB
Python
# This task is intended to be used for diagnosis, development and
|
|
# performance analysis.
|
|
# It separates out the parsing flows for easier logging and
|
|
# debugging.
|
|
# To store cProfile performance data, execute with the '-r'
|
|
# flag and an output file: dbt -r dbt.cprof parse.
|
|
# Use a visualizer such as snakeviz to look at the output:
|
|
# snakeviz dbt.cprof
|
|
from dbt.task.base import ConfiguredTask
|
|
from dbt.adapters.factory import get_adapter
|
|
from dbt.parser.manifest import (
|
|
Manifest, ManifestLoader, _check_manifest
|
|
)
|
|
from dbt.logger import DbtProcessState, print_timestamped_line
|
|
from dbt.clients.system import write_file
|
|
from dbt.graph import Graph
|
|
import time
|
|
from typing import Optional
|
|
import os
|
|
import json
|
|
import dbt.utils
|
|
|
|
MANIFEST_FILE_NAME = 'manifest.json'
|
|
PERF_INFO_FILE_NAME = 'perf_info.json'
|
|
PARSING_STATE = DbtProcessState('parsing')
|
|
|
|
|
|
class ParseTask(ConfiguredTask):
|
|
def __init__(self, args, config):
|
|
super().__init__(args, config)
|
|
self.manifest: Optional[Manifest] = None
|
|
self.graph: Optional[Graph] = None
|
|
self.loader: Optional[ManifestLoader] = None
|
|
|
|
def write_manifest(self):
|
|
path = os.path.join(self.config.target_path, MANIFEST_FILE_NAME)
|
|
self.manifest.write(path)
|
|
|
|
def write_perf_info(self):
|
|
path = os.path.join(self.config.target_path, PERF_INFO_FILE_NAME)
|
|
write_file(path, json.dumps(self.loader._perf_info,
|
|
cls=dbt.utils.JSONEncoder, indent=4))
|
|
print_timestamped_line(f"Performance info: {path}")
|
|
|
|
# This method takes code that normally exists in other files
|
|
# and pulls it in here, to simplify logging and make the
|
|
# parsing flow-of-control easier to understand and manage,
|
|
# with the downside that if changes happen in those other methods,
|
|
# similar changes might need to be made here.
|
|
# ManifestLoader.get_full_manifest
|
|
# ManifestLoader.load
|
|
# ManifestLoader.load_all
|
|
|
|
def get_full_manifest(self):
|
|
adapter = get_adapter(self.config) # type: ignore
|
|
root_config = self.config
|
|
macro_hook = adapter.connections.set_query_header
|
|
with PARSING_STATE:
|
|
start_load_all = time.perf_counter()
|
|
projects = root_config.load_dependencies()
|
|
print_timestamped_line("Dependencies loaded")
|
|
loader = ManifestLoader(root_config, projects, macro_hook)
|
|
print_timestamped_line("ManifestLoader created")
|
|
manifest = loader.load()
|
|
print_timestamped_line("Manifest loaded")
|
|
_check_manifest(manifest, root_config)
|
|
print_timestamped_line("Manifest checked")
|
|
manifest.build_flat_graph()
|
|
print_timestamped_line("Flat graph built")
|
|
loader._perf_info.load_all_elapsed = (
|
|
time.perf_counter() - start_load_all
|
|
)
|
|
|
|
self.loader = loader
|
|
self.manifest = manifest
|
|
print_timestamped_line("Manifest loaded")
|
|
|
|
def compile_manifest(self):
|
|
adapter = get_adapter(self.config)
|
|
compiler = adapter.get_compiler()
|
|
self.graph = compiler.compile(self.manifest)
|
|
|
|
def run(self):
|
|
print_timestamped_line('Start parsing.')
|
|
self.get_full_manifest()
|
|
if self.args.compile:
|
|
print_timestamped_line('Compiling.')
|
|
self.compile_manifest()
|
|
if self.args.write_manifest:
|
|
print_timestamped_line('Writing manifest.')
|
|
self.write_manifest()
|
|
|
|
self.write_perf_info()
|
|
print_timestamped_line('Done.')
|