562 lines
18 KiB
Python
562 lines
18 KiB
Python
import errno
|
|
import functools
|
|
import fnmatch
|
|
import json
|
|
import os
|
|
import os.path
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import tarfile
|
|
import requests
|
|
import stat
|
|
from typing import (
|
|
Type, NoReturn, List, Optional, Dict, Any, Tuple, Callable, Union
|
|
)
|
|
|
|
import dbt.exceptions
|
|
from dbt.logger import GLOBAL_LOGGER as logger
|
|
from dbt.utils import _connection_exception_retry as connection_exception_retry
|
|
|
|
if sys.platform == 'win32':
|
|
from ctypes import WinDLL, c_bool
|
|
else:
|
|
WinDLL = None
|
|
c_bool = None
|
|
|
|
|
|
def find_matching(
|
|
root_path: str,
|
|
relative_paths_to_search: List[str],
|
|
file_pattern: str,
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Given an absolute `root_path`, a list of relative paths to that
|
|
absolute root path (`relative_paths_to_search`), and a `file_pattern`
|
|
like '*.sql', returns information about the files. For example:
|
|
|
|
> find_matching('/root/path', ['models'], '*.sql')
|
|
|
|
[ { 'absolute_path': '/root/path/models/model_one.sql',
|
|
'relative_path': 'model_one.sql',
|
|
'searched_path': 'models' },
|
|
{ 'absolute_path': '/root/path/models/subdirectory/model_two.sql',
|
|
'relative_path': 'subdirectory/model_two.sql',
|
|
'searched_path': 'models' } ]
|
|
"""
|
|
matching = []
|
|
root_path = os.path.normpath(root_path)
|
|
regex = fnmatch.translate(file_pattern)
|
|
reobj = re.compile(regex, re.IGNORECASE)
|
|
|
|
for relative_path_to_search in relative_paths_to_search:
|
|
absolute_path_to_search = os.path.join(
|
|
root_path, relative_path_to_search)
|
|
walk_results = os.walk(absolute_path_to_search)
|
|
|
|
for current_path, subdirectories, local_files in walk_results:
|
|
for local_file in local_files:
|
|
absolute_path = os.path.join(current_path, local_file)
|
|
relative_path = os.path.relpath(
|
|
absolute_path, absolute_path_to_search
|
|
)
|
|
modification_time = 0.0
|
|
try:
|
|
modification_time = os.path.getmtime(absolute_path)
|
|
except OSError:
|
|
logger.exception(
|
|
f"Error retrieving modification time for file {absolute_path}"
|
|
)
|
|
if reobj.match(local_file):
|
|
matching.append({
|
|
'searched_path': relative_path_to_search,
|
|
'absolute_path': absolute_path,
|
|
'relative_path': relative_path,
|
|
'modification_time': modification_time,
|
|
})
|
|
|
|
return matching
|
|
|
|
|
|
def load_file_contents(path: str, strip: bool = True) -> str:
|
|
path = convert_path(path)
|
|
with open(path, 'rb') as handle:
|
|
to_return = handle.read().decode('utf-8')
|
|
|
|
if strip:
|
|
to_return = to_return.strip()
|
|
|
|
return to_return
|
|
|
|
|
|
def make_directory(path: str) -> None:
|
|
"""
|
|
Make a directory and any intermediate directories that don't already
|
|
exist. This function handles the case where two threads try to create
|
|
a directory at once.
|
|
"""
|
|
path = convert_path(path)
|
|
if not os.path.exists(path):
|
|
# concurrent writes that try to create the same dir can fail
|
|
try:
|
|
os.makedirs(path)
|
|
|
|
except OSError as e:
|
|
if e.errno == errno.EEXIST:
|
|
pass
|
|
else:
|
|
raise e
|
|
|
|
|
|
def make_file(path: str, contents: str = '', overwrite: bool = False) -> bool:
|
|
"""
|
|
Make a file at `path` assuming that the directory it resides in already
|
|
exists. The file is saved with contents `contents`
|
|
"""
|
|
if overwrite or not os.path.exists(path):
|
|
path = convert_path(path)
|
|
with open(path, 'w') as fh:
|
|
fh.write(contents)
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def make_symlink(source: str, link_path: str) -> None:
|
|
"""
|
|
Create a symlink at `link_path` referring to `source`.
|
|
"""
|
|
if not supports_symlinks():
|
|
dbt.exceptions.system_error('create a symbolic link')
|
|
|
|
os.symlink(source, link_path)
|
|
|
|
|
|
def supports_symlinks() -> bool:
|
|
return getattr(os, "symlink", None) is not None
|
|
|
|
|
|
def write_file(path: str, contents: str = '') -> bool:
|
|
path = convert_path(path)
|
|
try:
|
|
make_directory(os.path.dirname(path))
|
|
with open(path, 'w', encoding='utf-8') as f:
|
|
f.write(str(contents))
|
|
except Exception as exc:
|
|
# note that you can't just catch FileNotFound, because sometimes
|
|
# windows apparently raises something else.
|
|
# It's also not sufficient to look at the path length, because
|
|
# sometimes windows fails to write paths that are less than the length
|
|
# limit. So on windows, suppress all errors that happen from writing
|
|
# to disk.
|
|
if os.name == 'nt':
|
|
# sometimes we get a winerror of 3 which means the path was
|
|
# definitely too long, but other times we don't and it means the
|
|
# path was just probably too long. This is probably based on the
|
|
# windows/python version.
|
|
if getattr(exc, 'winerror', 0) == 3:
|
|
reason = 'Path was too long'
|
|
else:
|
|
reason = 'Path was possibly too long'
|
|
# all our hard work and the path was still too long. Log and
|
|
# continue.
|
|
logger.debug(
|
|
f'Could not write to path {path}({len(path)} characters): '
|
|
f'{reason}\nexception: {exc}'
|
|
)
|
|
else:
|
|
raise
|
|
return True
|
|
|
|
|
|
def read_json(path: str) -> Dict[str, Any]:
|
|
return json.loads(load_file_contents(path))
|
|
|
|
|
|
def write_json(path: str, data: Dict[str, Any]) -> bool:
|
|
return write_file(path, json.dumps(data, cls=dbt.utils.JSONEncoder))
|
|
|
|
|
|
def _windows_rmdir_readonly(
|
|
func: Callable[[str], Any], path: str, exc: Tuple[Any, OSError, Any]
|
|
):
|
|
exception_val = exc[1]
|
|
if exception_val.errno == errno.EACCES:
|
|
os.chmod(path, stat.S_IWUSR)
|
|
func(path)
|
|
else:
|
|
raise
|
|
|
|
|
|
def resolve_path_from_base(path_to_resolve: str, base_path: str) -> str:
|
|
"""
|
|
If path-to_resolve is a relative path, create an absolute path
|
|
with base_path as the base.
|
|
|
|
If path_to_resolve is an absolute path or a user path (~), just
|
|
resolve it to an absolute path and return.
|
|
"""
|
|
return os.path.abspath(
|
|
os.path.join(
|
|
base_path,
|
|
os.path.expanduser(path_to_resolve)))
|
|
|
|
|
|
def rmdir(path: str) -> None:
|
|
"""
|
|
Recursively deletes a directory. Includes an error handler to retry with
|
|
different permissions on Windows. Otherwise, removing directories (eg.
|
|
cloned via git) can cause rmtree to throw a PermissionError exception
|
|
"""
|
|
path = convert_path(path)
|
|
if sys.platform == 'win32':
|
|
onerror = _windows_rmdir_readonly
|
|
else:
|
|
onerror = None
|
|
|
|
shutil.rmtree(path, onerror=onerror)
|
|
|
|
|
|
def _win_prepare_path(path: str) -> str:
|
|
"""Given a windows path, prepare it for use by making sure it is absolute
|
|
and normalized.
|
|
"""
|
|
path = os.path.normpath(path)
|
|
|
|
# if a path starts with '\', splitdrive() on it will return '' for the
|
|
# drive, but the prefix requires a drive letter. So let's add the drive
|
|
# letter back in.
|
|
# Unless it starts with '\\'. In that case, the path is a UNC mount point
|
|
# and splitdrive will be fine.
|
|
if not path.startswith('\\\\') and path.startswith('\\'):
|
|
curdrive = os.path.splitdrive(os.getcwd())[0]
|
|
path = curdrive + path
|
|
|
|
# now our path is either an absolute UNC path or relative to the current
|
|
# directory. If it's relative, we need to make it absolute or the prefix
|
|
# won't work. `ntpath.abspath` allegedly doesn't always play nice with long
|
|
# paths, so do this instead.
|
|
if not os.path.splitdrive(path)[0]:
|
|
path = os.path.join(os.getcwd(), path)
|
|
|
|
return path
|
|
|
|
|
|
def _supports_long_paths() -> bool:
|
|
if sys.platform != 'win32':
|
|
return True
|
|
# Eryk Sun says to use `WinDLL('ntdll')` instead of `windll.ntdll` because
|
|
# of pointer caching in a comment here:
|
|
# https://stackoverflow.com/a/35097999/11262881
|
|
# I don't know exaclty what he means, but I am inclined to believe him as
|
|
# he's pretty active on Python windows bugs!
|
|
try:
|
|
dll = WinDLL('ntdll')
|
|
except OSError: # I don't think this happens? you need ntdll to run python
|
|
return False
|
|
# not all windows versions have it at all
|
|
if not hasattr(dll, 'RtlAreLongPathsEnabled'):
|
|
return False
|
|
# tell windows we want to get back a single unsigned byte (a bool).
|
|
dll.RtlAreLongPathsEnabled.restype = c_bool
|
|
return dll.RtlAreLongPathsEnabled()
|
|
|
|
|
|
def convert_path(path: str) -> str:
|
|
"""Convert a path that dbt has, which might be >260 characters long, to one
|
|
that will be writable/readable on Windows.
|
|
|
|
On other platforms, this is a no-op.
|
|
"""
|
|
# some parts of python seem to append '\*.*' to strings, better safe than
|
|
# sorry.
|
|
if len(path) < 250:
|
|
return path
|
|
if _supports_long_paths():
|
|
return path
|
|
|
|
prefix = '\\\\?\\'
|
|
# Nothing to do
|
|
if path.startswith(prefix):
|
|
return path
|
|
|
|
path = _win_prepare_path(path)
|
|
|
|
# add the prefix. The check is just in case os.getcwd() does something
|
|
# unexpected - I believe this if-state should always be True though!
|
|
if not path.startswith(prefix):
|
|
path = prefix + path
|
|
return path
|
|
|
|
|
|
def remove_file(path: str) -> None:
|
|
path = convert_path(path)
|
|
os.remove(path)
|
|
|
|
|
|
def path_exists(path: str) -> bool:
|
|
path = convert_path(path)
|
|
return os.path.lexists(path)
|
|
|
|
|
|
def path_is_symlink(path: str) -> bool:
|
|
path = convert_path(path)
|
|
return os.path.islink(path)
|
|
|
|
|
|
def open_dir_cmd() -> str:
|
|
# https://docs.python.org/2/library/sys.html#sys.platform
|
|
if sys.platform == 'win32':
|
|
return 'start'
|
|
|
|
elif sys.platform == 'darwin':
|
|
return 'open'
|
|
|
|
else:
|
|
return 'xdg-open'
|
|
|
|
|
|
def _handle_posix_cwd_error(
|
|
exc: OSError, cwd: str, cmd: List[str]
|
|
) -> NoReturn:
|
|
if exc.errno == errno.ENOENT:
|
|
message = 'Directory does not exist'
|
|
elif exc.errno == errno.EACCES:
|
|
message = 'Current user cannot access directory, check permissions'
|
|
elif exc.errno == errno.ENOTDIR:
|
|
message = 'Not a directory'
|
|
else:
|
|
message = 'Unknown OSError: {} - cwd'.format(str(exc))
|
|
raise dbt.exceptions.WorkingDirectoryError(cwd, cmd, message)
|
|
|
|
|
|
def _handle_posix_cmd_error(
|
|
exc: OSError, cwd: str, cmd: List[str]
|
|
) -> NoReturn:
|
|
if exc.errno == errno.ENOENT:
|
|
message = "Could not find command, ensure it is in the user's PATH"
|
|
elif exc.errno == errno.EACCES:
|
|
message = 'User does not have permissions for this command'
|
|
else:
|
|
message = 'Unknown OSError: {} - cmd'.format(str(exc))
|
|
raise dbt.exceptions.ExecutableError(cwd, cmd, message)
|
|
|
|
|
|
def _handle_posix_error(exc: OSError, cwd: str, cmd: List[str]) -> NoReturn:
|
|
"""OSError handling for posix systems.
|
|
|
|
Some things that could happen to trigger an OSError:
|
|
- cwd could not exist
|
|
- exc.errno == ENOENT
|
|
- exc.filename == cwd
|
|
- cwd could have permissions that prevent the current user moving to it
|
|
- exc.errno == EACCES
|
|
- exc.filename == cwd
|
|
- cwd could exist but not be a directory
|
|
- exc.errno == ENOTDIR
|
|
- exc.filename == cwd
|
|
- cmd[0] could not exist
|
|
- exc.errno == ENOENT
|
|
- exc.filename == None(?)
|
|
- cmd[0] could exist but have permissions that prevents the current
|
|
user from executing it (executable bit not set for the user)
|
|
- exc.errno == EACCES
|
|
- exc.filename == None(?)
|
|
"""
|
|
if getattr(exc, 'filename', None) == cwd:
|
|
_handle_posix_cwd_error(exc, cwd, cmd)
|
|
else:
|
|
_handle_posix_cmd_error(exc, cwd, cmd)
|
|
|
|
|
|
def _handle_windows_error(exc: OSError, cwd: str, cmd: List[str]) -> NoReturn:
|
|
cls: Type[dbt.exceptions.Exception] = dbt.exceptions.CommandError
|
|
if exc.errno == errno.ENOENT:
|
|
message = ("Could not find command, ensure it is in the user's PATH "
|
|
"and that the user has permissions to run it")
|
|
cls = dbt.exceptions.ExecutableError
|
|
elif exc.errno == errno.ENOEXEC:
|
|
message = ('Command was not executable, ensure it is valid')
|
|
cls = dbt.exceptions.ExecutableError
|
|
elif exc.errno == errno.ENOTDIR:
|
|
message = ('Unable to cd: path does not exist, user does not have'
|
|
' permissions, or not a directory')
|
|
cls = dbt.exceptions.WorkingDirectoryError
|
|
else:
|
|
message = 'Unknown error: {} (errno={}: "{}")'.format(
|
|
str(exc), exc.errno, errno.errorcode.get(exc.errno, '<Unknown!>')
|
|
)
|
|
raise cls(cwd, cmd, message)
|
|
|
|
|
|
def _interpret_oserror(exc: OSError, cwd: str, cmd: List[str]) -> NoReturn:
|
|
"""Interpret an OSError exc and raise the appropriate dbt exception.
|
|
|
|
"""
|
|
if len(cmd) == 0:
|
|
raise dbt.exceptions.CommandError(cwd, cmd)
|
|
|
|
# all of these functions raise unconditionally
|
|
if os.name == 'nt':
|
|
_handle_windows_error(exc, cwd, cmd)
|
|
else:
|
|
_handle_posix_error(exc, cwd, cmd)
|
|
|
|
# this should not be reachable, raise _something_ at least!
|
|
raise dbt.exceptions.InternalException(
|
|
'Unhandled exception in _interpret_oserror: {}'.format(exc)
|
|
)
|
|
|
|
|
|
def run_cmd(
|
|
cwd: str, cmd: List[str], env: Optional[Dict[str, Any]] = None
|
|
) -> Tuple[bytes, bytes]:
|
|
logger.debug('Executing "{}"'.format(' '.join(cmd)))
|
|
if len(cmd) == 0:
|
|
raise dbt.exceptions.CommandError(cwd, cmd)
|
|
|
|
# the env argument replaces the environment entirely, which has exciting
|
|
# consequences on Windows! Do an update instead.
|
|
full_env = env
|
|
if env is not None:
|
|
full_env = os.environ.copy()
|
|
full_env.update(env)
|
|
|
|
try:
|
|
exe_pth = shutil.which(cmd[0])
|
|
if exe_pth:
|
|
cmd = [os.path.abspath(exe_pth)] + list(cmd[1:])
|
|
proc = subprocess.Popen(
|
|
cmd,
|
|
cwd=cwd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
env=full_env)
|
|
|
|
out, err = proc.communicate()
|
|
except OSError as exc:
|
|
_interpret_oserror(exc, cwd, cmd)
|
|
|
|
logger.debug('STDOUT: "{!s}"'.format(out))
|
|
logger.debug('STDERR: "{!s}"'.format(err))
|
|
|
|
if proc.returncode != 0:
|
|
logger.debug('command return code={}'.format(proc.returncode))
|
|
raise dbt.exceptions.CommandResultError(cwd, cmd, proc.returncode,
|
|
out, err)
|
|
|
|
return out, err
|
|
|
|
|
|
def download_with_retries(
|
|
url: str, path: str, timeout: Optional[Union[float, tuple]] = None
|
|
) -> None:
|
|
download_fn = functools.partial(download, url, path, timeout)
|
|
connection_exception_retry(download_fn, 5)
|
|
|
|
|
|
def download(
|
|
url: str, path: str, timeout: Optional[Union[float, tuple]] = None
|
|
) -> None:
|
|
path = convert_path(path)
|
|
connection_timeout = timeout or float(os.getenv('DBT_HTTP_TIMEOUT', 10))
|
|
response = requests.get(url, timeout=connection_timeout)
|
|
with open(path, 'wb') as handle:
|
|
for block in response.iter_content(1024 * 64):
|
|
handle.write(block)
|
|
|
|
|
|
def rename(from_path: str, to_path: str, force: bool = False) -> None:
|
|
from_path = convert_path(from_path)
|
|
to_path = convert_path(to_path)
|
|
is_symlink = path_is_symlink(to_path)
|
|
|
|
if os.path.exists(to_path) and force:
|
|
if is_symlink:
|
|
remove_file(to_path)
|
|
else:
|
|
rmdir(to_path)
|
|
|
|
shutil.move(from_path, to_path)
|
|
|
|
|
|
def untar_package(
|
|
tar_path: str, dest_dir: str, rename_to: Optional[str] = None
|
|
) -> None:
|
|
tar_path = convert_path(tar_path)
|
|
tar_dir_name = None
|
|
with tarfile.open(tar_path, 'r') as tarball:
|
|
tarball.extractall(dest_dir)
|
|
tar_dir_name = os.path.commonprefix(tarball.getnames())
|
|
if rename_to:
|
|
downloaded_path = os.path.join(dest_dir, tar_dir_name)
|
|
desired_path = os.path.join(dest_dir, rename_to)
|
|
dbt.clients.system.rename(downloaded_path, desired_path, force=True)
|
|
|
|
|
|
def chmod_and_retry(func, path, exc_info):
|
|
"""Define an error handler to pass to shutil.rmtree.
|
|
On Windows, when a file is marked read-only as git likes to do, rmtree will
|
|
fail. To handle that, on errors try to make the file writable.
|
|
We want to retry most operations here, but listdir is one that we know will
|
|
be useless.
|
|
"""
|
|
if func is os.listdir or os.name != 'nt':
|
|
raise
|
|
os.chmod(path, stat.S_IREAD | stat.S_IWRITE)
|
|
# on error,this will raise.
|
|
func(path)
|
|
|
|
|
|
def _absnorm(path):
|
|
return os.path.normcase(os.path.abspath(path))
|
|
|
|
|
|
def move(src, dst):
|
|
"""A re-implementation of shutil.move that properly removes the source
|
|
directory on windows when it has read-only files in it and the move is
|
|
between two drives.
|
|
|
|
This is almost identical to the real shutil.move, except it uses our rmtree
|
|
and skips handling non-windows OSes since the existing one works ok there.
|
|
"""
|
|
src = convert_path(src)
|
|
dst = convert_path(dst)
|
|
if os.name != 'nt':
|
|
return shutil.move(src, dst)
|
|
|
|
if os.path.isdir(dst):
|
|
if _absnorm(src) == _absnorm(dst):
|
|
os.rename(src, dst)
|
|
return
|
|
|
|
dst = os.path.join(dst, os.path.basename(src.rstrip('/\\')))
|
|
if os.path.exists(dst):
|
|
raise EnvironmentError("Path '{}' already exists".format(dst))
|
|
|
|
try:
|
|
os.rename(src, dst)
|
|
except OSError:
|
|
# probably different drives
|
|
if os.path.isdir(src):
|
|
if _absnorm(dst + '\\').startswith(_absnorm(src + '\\')):
|
|
# dst is inside src
|
|
raise EnvironmentError(
|
|
"Cannot move a directory '{}' into itself '{}'"
|
|
.format(src, dst)
|
|
)
|
|
shutil.copytree(src, dst, symlinks=True)
|
|
rmtree(src)
|
|
else:
|
|
shutil.copy2(src, dst)
|
|
os.unlink(src)
|
|
|
|
|
|
def rmtree(path):
|
|
"""Recursively remove path. On permissions errors on windows, try to remove
|
|
the read-only flag and try again.
|
|
"""
|
|
path = convert_path(path)
|
|
return shutil.rmtree(path, onerror=chmod_and_retry)
|