dbt-selly/dbt-env/lib/python3.8/site-packages/dbt/rpc/gc.py

128 lines
4.1 KiB
Python

import operator
from datetime import datetime, timedelta
from typing import Optional, List, Iterable, Tuple
import dbt.exceptions
from dbt.contracts.rpc import (
GCSettings,
GCResultState,
GCResult,
TaskID,
)
from dbt.rpc.task_handler_protocol import TaskHandlerMap
# import this to make sure our timedelta encoder is registered
from dbt import helper_types # noqa
class GarbageCollector:
def __init__(
self,
active_tasks: TaskHandlerMap,
settings: Optional[GCSettings] = None,
) -> None:
self.active_tasks: TaskHandlerMap = active_tasks
self.settings: GCSettings
if settings is None:
self.settings = GCSettings(
maxsize=1000, reapsize=500, auto_reap_age=timedelta(days=30)
)
else:
self.settings = settings
def _remove_task_if_finished(self, task_id: TaskID) -> GCResultState:
"""Remove the task if it was finished. Raises a KeyError if the entry
is removed during operation (so hold the lock).
"""
if task_id not in self.active_tasks:
return GCResultState.Missing
task = self.active_tasks[task_id]
if not task.state.finished:
return GCResultState.Running
del self.active_tasks[task_id]
return GCResultState.Deleted
def _get_before_list(self, when: datetime) -> List[TaskID]:
removals: List[TaskID] = []
for task in self.active_tasks.values():
if not task.state.finished:
continue
elif task.ended is None:
continue
elif task.ended < when:
removals.append(task.task_id)
return removals
def _get_oldest_ended_list(self, num: int) -> List[TaskID]:
candidates: List[Tuple[datetime, TaskID]] = []
for task in self.active_tasks.values():
if not task.state.finished:
continue
elif task.ended is None:
continue
else:
candidates.append((task.ended, task.task_id))
candidates.sort(key=operator.itemgetter(0))
return [task_id for _, task_id in candidates[:num]]
def collect_task_id(
self, result: GCResult, task_id: TaskID
) -> None:
"""To collect a task ID, we just delete it from the tasks dict.
You must hold the lock, as this mutates `tasks`.
"""
try:
state = self._remove_task_if_finished(task_id)
except KeyError:
# someone was mutating tasks while we had the lock, that's
# not right!
raise dbt.exceptions.InternalException(
'Got a KeyError for task uuid={} during gc'
.format(task_id)
)
return result.add_result(task_id=task_id, state=state)
def collect_multiple_task_ids(
self, task_ids: Iterable[TaskID]
) -> GCResult:
result = GCResult()
for task_id in task_ids:
self.collect_task_id(result, task_id)
return result
def collect_as_required(self) -> None:
to_remove: List[TaskID] = []
num_tasks = len(self.active_tasks)
if num_tasks > self.settings.maxsize:
num = self.settings.maxsize - num_tasks
to_remove = self._get_oldest_ended_list(num)
elif num_tasks > self.settings.reapsize:
before = datetime.utcnow() - self.settings.auto_reap_age
to_remove = self._get_before_list(before)
if to_remove:
self.collect_multiple_task_ids(to_remove)
def collect_selected(
self,
task_ids: Optional[List[TaskID]] = None,
before: Optional[datetime] = None,
settings: Optional[GCSettings] = None,
) -> GCResult:
to_gc = set()
if task_ids is not None:
to_gc.update(task_ids)
if settings:
self.settings = settings
# we need the lock for this!
if before is not None:
to_gc.update(self._get_before_list(before))
return self.collect_multiple_task_ids(to_gc)