77 lines
2.8 KiB
Python
77 lines
2.8 KiB
Python
#!/usr/bin/env python
|
|
# pylint: disable=W0212
|
|
|
|
from agate.rows import Row
|
|
from agate import utils
|
|
|
|
|
|
def homogenize(self, key, compare_values, default_row=None):
|
|
"""
|
|
Fill in missing rows in a series.
|
|
|
|
This can be used, for instance, to add rows for missing years in a time
|
|
series.
|
|
|
|
Missing rows are found by comparing the values in the :code:`key` columns
|
|
with those provided as :code:`compare_values`.
|
|
|
|
Values not found in the table will be used to generate new rows with
|
|
the given :code:`default_row`.
|
|
|
|
:code:`default_row` should be an array of values or an array-generating
|
|
function. If not specified, the new rows will have :code:`None` in columns
|
|
all columns not specified in :code:`key`.
|
|
|
|
If :code:`default_row` is an array of values, its length should be row
|
|
length minus the number of column names provided in the :code:`key`.
|
|
|
|
If it is an array-generating function, the function should take an array
|
|
of missing values for each new row and output a full row including those
|
|
values.
|
|
|
|
:param key:
|
|
Either a column name or a sequence of such names.
|
|
:param compare_values:
|
|
Either an array of column values if key is a single column name or a
|
|
sequence of arrays of values if key is a sequence of names. It can
|
|
also be a generator that yields either of the two. A row is created for
|
|
each value or list of values not found in the rows of the table.
|
|
:param default_row:
|
|
An array of values or a function to generate new rows. The length of
|
|
the input array should be equal to row length minus column_names
|
|
count. The length of array generated by the function should be the
|
|
row length.
|
|
:returns:
|
|
A new :class:`.Table`.
|
|
"""
|
|
rows = list(self._rows)
|
|
|
|
if not utils.issequence(key):
|
|
key = [key]
|
|
|
|
if len(key) == 1:
|
|
if any(not utils.issequence(compare_value) for compare_value in compare_values):
|
|
compare_values = [[compare_value] for compare_value in compare_values]
|
|
|
|
column_values = [self._columns.get(name) for name in key]
|
|
column_indexes = [self._column_names.index(name) for name in key]
|
|
|
|
column_values = zip(*column_values)
|
|
differences = list(set(map(tuple, compare_values)) - set(column_values))
|
|
|
|
for difference in differences:
|
|
if callable(default_row):
|
|
rows.append(Row(default_row(difference), self._column_names))
|
|
else:
|
|
if default_row is not None:
|
|
new_row = default_row
|
|
else:
|
|
new_row = [None] * (len(self._column_names) - len(key))
|
|
|
|
for i, d in zip(column_indexes, difference):
|
|
new_row.insert(i, d)
|
|
|
|
rows.append(Row(new_row, self._column_names))
|
|
|
|
return self._fork(rows)
|