#!/usr/bin/env python # pylint: disable=W0212 from agate.rows import Row from agate import utils def homogenize(self, key, compare_values, default_row=None): """ Fill in missing rows in a series. This can be used, for instance, to add rows for missing years in a time series. Missing rows are found by comparing the values in the :code:`key` columns with those provided as :code:`compare_values`. Values not found in the table will be used to generate new rows with the given :code:`default_row`. :code:`default_row` should be an array of values or an array-generating function. If not specified, the new rows will have :code:`None` in columns all columns not specified in :code:`key`. If :code:`default_row` is an array of values, its length should be row length minus the number of column names provided in the :code:`key`. If it is an array-generating function, the function should take an array of missing values for each new row and output a full row including those values. :param key: Either a column name or a sequence of such names. :param compare_values: Either an array of column values if key is a single column name or a sequence of arrays of values if key is a sequence of names. It can also be a generator that yields either of the two. A row is created for each value or list of values not found in the rows of the table. :param default_row: An array of values or a function to generate new rows. The length of the input array should be equal to row length minus column_names count. The length of array generated by the function should be the row length. :returns: A new :class:`.Table`. """ rows = list(self._rows) if not utils.issequence(key): key = [key] if len(key) == 1: if any(not utils.issequence(compare_value) for compare_value in compare_values): compare_values = [[compare_value] for compare_value in compare_values] column_values = [self._columns.get(name) for name in key] column_indexes = [self._column_names.index(name) for name in key] column_values = zip(*column_values) differences = list(set(map(tuple, compare_values)) - set(column_values)) for difference in differences: if callable(default_row): rows.append(Row(default_row(difference), self._column_names)) else: if default_row is not None: new_row = default_row else: new_row = [None] * (len(self._column_names) - len(key)) for i, d in zip(column_indexes, difference): new_row.insert(i, d) rows.append(Row(new_row, self._column_names)) return self._fork(rows)