dbt-selly/dbt-env/lib/python3.8/site-packages/agate/table/homogenize.py

77 lines
2.8 KiB
Python

#!/usr/bin/env python
# pylint: disable=W0212
from agate.rows import Row
from agate import utils
def homogenize(self, key, compare_values, default_row=None):
"""
Fill in missing rows in a series.
This can be used, for instance, to add rows for missing years in a time
series.
Missing rows are found by comparing the values in the :code:`key` columns
with those provided as :code:`compare_values`.
Values not found in the table will be used to generate new rows with
the given :code:`default_row`.
:code:`default_row` should be an array of values or an array-generating
function. If not specified, the new rows will have :code:`None` in columns
all columns not specified in :code:`key`.
If :code:`default_row` is an array of values, its length should be row
length minus the number of column names provided in the :code:`key`.
If it is an array-generating function, the function should take an array
of missing values for each new row and output a full row including those
values.
:param key:
Either a column name or a sequence of such names.
:param compare_values:
Either an array of column values if key is a single column name or a
sequence of arrays of values if key is a sequence of names. It can
also be a generator that yields either of the two. A row is created for
each value or list of values not found in the rows of the table.
:param default_row:
An array of values or a function to generate new rows. The length of
the input array should be equal to row length minus column_names
count. The length of array generated by the function should be the
row length.
:returns:
A new :class:`.Table`.
"""
rows = list(self._rows)
if not utils.issequence(key):
key = [key]
if len(key) == 1:
if any(not utils.issequence(compare_value) for compare_value in compare_values):
compare_values = [[compare_value] for compare_value in compare_values]
column_values = [self._columns.get(name) for name in key]
column_indexes = [self._column_names.index(name) for name in key]
column_values = zip(*column_values)
differences = list(set(map(tuple, compare_values)) - set(column_values))
for difference in differences:
if callable(default_row):
rows.append(Row(default_row(difference), self._column_names))
else:
if default_row is not None:
new_row = default_row
else:
new_row = [None] * (len(self._column_names) - len(key))
for i, d in zip(column_indexes, difference):
new_row.insert(i, d)
rows.append(Row(new_row, self._column_names))
return self._fork(rows)