dbt-selly/dbt-env/lib/python3.8/site-packages/agate/table/homogenize.py

#!/usr/bin/env python
# pylint: disable=W0212

from agate.rows import Row
from agate import utils


def homogenize(self, key, compare_values, default_row=None):
    """
    Fill in missing rows in a series.

    This can be used, for instance, to add rows for missing years in a time
    series.

    Missing rows are found by comparing the values in the :code:`key` columns
    with those provided as :code:`compare_values`.

    Values not found in the table will be used to generate new rows with
    the given :code:`default_row`.

    :code:`default_row` should be an array of values or an array-generating
    function. If not specified, the new rows will have :code:`None` in columns
    all columns not specified in :code:`key`.

    If :code:`default_row` is an array of values, its length should be row
    length minus the number of column names provided in the :code:`key`.

    If it is an array-generating function, the function should take an array
    of missing values for each new row and output a full row including those
    values.

    :param key:
        Either a column name or a sequence of such names.
    :param compare_values:
        Either an array of column values if key is a single column name or a
        sequence of arrays of values if key is a sequence of names. It can
        also be a generator that yields either of the two. A row is created for
        each value or list of values not found in the rows of the table.
    :param default_row:
        An array of values or a function to generate new rows. The length of
        the input array should be equal to row length minus column_names
        count. The length of array generated by the function should be the
        row length.
    :returns:
        A new :class:`.Table`.
    """
    rows = list(self._rows)

    if not utils.issequence(key):
        key = [key]

    if len(key) == 1:
        if any(not utils.issequence(compare_value) for compare_value in compare_values):
            compare_values = [[compare_value] for compare_value in compare_values]

    column_values = [self._columns.get(name) for name in key]
    column_indexes = [self._column_names.index(name) for name in key]

    column_values = zip(*column_values)
    differences = list(set(map(tuple, compare_values)) - set(column_values))

    for difference in differences:
        if callable(default_row):
            rows.append(Row(default_row(difference), self._column_names))
        else:
            if default_row is not None:
                new_row = default_row
            else:
                new_row = [None] * (len(self._column_names) - len(key))

            for i, d in zip(column_indexes, difference):
                new_row.insert(i, d)

            rows.append(Row(new_row, self._column_names))

    return self._fork(rows)