dbt-selly/dbt-env/lib/python3.8/site-packages/agate/table/normalize.py

98 lines
3.2 KiB
Python
Raw Normal View History

2022-03-22 15:13:27 +00:00
#!/usr/bin/env python
# pylint: disable=W0212
from agate.type_tester import TypeTester
from agate.rows import Row
from agate import utils
def normalize(self, key, properties, property_column='property', value_column='value', column_types=None):
"""
Create a new table with columns converted into rows values.
For example:
+---------+----------+--------+-------+
| name | gender | race | age |
+=========+==========+========+=======+
| Jane | female | black | 24 |
+---------+----------+--------+-------+
| Jack | male | white | 35 |
+---------+----------+--------+-------+
| Joe | male | black | 28 |
+---------+----------+--------+-------+
can be normalized on columns 'gender', 'race' and 'age':
+---------+-----------+---------+
| name | property | value |
+=========+===========+=========+
| Jane | gender | female |
+---------+-----------+---------+
| Jane | race | black |
+---------+-----------+---------+
| Jane | age | 24 |
+---------+-----------+---------+
| ... | ... | ... |
+---------+-----------+---------+
This is the opposite of :meth:`.Table.denormalize`.
:param key:
A column name or a sequence of column names that should be
maintained as they are in the normalized self. Typically these
are the tables unique identifiers and any metadata about them.
:param properties:
A column name or a sequence of column names that should be
converted to properties in the new self.
:param property_column:
The name to use for the column containing the property names.
:param value_column:
The name to use for the column containing the property values.
:param column_types:
A sequence of two column types for the property and value column in
that order or an instance of :class:`.TypeTester`. Defaults to a
generic :class:`.TypeTester`.
:returns:
A new :class:`.Table`.
"""
from agate.table import Table
new_rows = []
if not utils.issequence(key):
key = [key]
if not utils.issequence(properties):
properties = [properties]
new_column_names = key + [property_column, value_column]
row_names = []
for row in self._rows:
k = tuple(row[n] for n in key)
left_row = list(k)
if len(k) == 1:
row_names.append(k[0])
else:
row_names.append(k)
for f in properties:
new_rows.append(Row((left_row + [f, row[f]]), new_column_names))
key_column_types = [self._column_types[self._column_names.index(name)] for name in key]
if column_types is None or isinstance(column_types, TypeTester):
tester = TypeTester() if column_types is None else column_types
force_update = dict(zip(key, key_column_types))
force_update.update(tester._force)
tester._force = force_update
new_column_types = tester.run(new_rows, new_column_names)
else:
new_column_types = key_column_types + list(column_types)
return Table(new_rows, new_column_names, new_column_types, row_names=row_names)