dbt-selly/dbt-env/lib/python3.8/site-packages/agate/table/denormalize.py

141 lines
4.4 KiB
Python
Raw Normal View History

2022-03-22 15:13:27 +00:00
#!/usr/bin/env python
# pylint: disable=W0212
from collections import OrderedDict
try:
from cdecimal import Decimal
except ImportError: # pragma: no cover
from decimal import Decimal
import six
from agate.data_types import Number
from agate.type_tester import TypeTester
from agate.rows import Row
from agate import utils
def denormalize(self, key=None, property_column='property', value_column='value', default_value=utils.default, column_types=None):
"""
Create a new table with row values converted into columns.
For example:
+---------+-----------+---------+
| name | property | value |
+=========+===========+=========+
| Jane | gender | female |
+---------+-----------+---------+
| Jane | race | black |
+---------+-----------+---------+
| Jane | age | 24 |
+---------+-----------+---------+
| ... | ... | ... |
+---------+-----------+---------+
Can be denormalized so that each unique value in `field` becomes a
column with `value` used for its values.
+---------+----------+--------+-------+
| name | gender | race | age |
+=========+==========+========+=======+
| Jane | female | black | 24 |
+---------+----------+--------+-------+
| Jack | male | white | 35 |
+---------+----------+--------+-------+
| Joe | male | black | 28 |
+---------+----------+--------+-------+
If one or more keys are specified then the resulting table will
automatically have :code:`row_names` set to those keys.
This is the opposite of :meth:`.Table.normalize`.
:param key:
A column name or a sequence of column names that should be
maintained as they are in the normalized table. Typically these
are the tables unique identifiers and any metadata about them. Or,
:code:`None` if there are no key columns.
:param field_column:
The column whose values should become column names in the new table.
:param property_column:
The column whose values should become the values of the property
columns in the new table.
:param default_value:
Value to be used for missing values in the pivot table. If not
specified :code:`Decimal(0)` will be used for aggregations that
return :class:`.Number` data and :code:`None` will be used for
all others.
:param column_types:
A sequence of column types with length equal to number of unique
values in field_column or an instance of :class:`.TypeTester`.
Defaults to a generic :class:`.TypeTester`.
:returns:
A new :class:`.Table`.
"""
from agate.table import Table
if key is None:
key = []
elif not utils.issequence(key):
key = [key]
field_names = []
row_data = OrderedDict()
for row in self.rows:
row_key = tuple(row[k] for k in key)
if row_key not in row_data:
row_data[row_key] = OrderedDict()
f = six.text_type(row[property_column])
v = row[value_column]
if f not in field_names:
field_names.append(f)
row_data[row_key][f] = v
if default_value == utils.default:
if isinstance(self.columns[value_column].data_type, Number):
default_value = Decimal(0)
else:
default_value = None
new_column_names = key + field_names
new_rows = []
row_names = []
for k, v in row_data.items():
row = list(k)
if len(k) == 1:
row_names.append(k[0])
else:
row_names.append(k)
for f in field_names:
if f in v:
row.append(v[f])
else:
row.append(default_value)
new_rows.append(Row(row, new_column_names))
key_column_types = [self.column_types[self.column_names.index(name)] for name in key]
if column_types is None or isinstance(column_types, TypeTester):
tester = TypeTester() if column_types is None else column_types
force_update = dict(zip(key, key_column_types))
force_update.update(tester._force)
tester._force = force_update
new_column_types = tester.run(new_rows, new_column_names)
else:
new_column_types = key_column_types + list(column_types)
return Table(new_rows, new_column_names, new_column_types, row_names=row_names)