205 lines
6.9 KiB
Python
205 lines
6.9 KiB
Python
#!/usr/bin/env python
|
|
|
|
"""
|
|
The :class:`.TableSet` class collects a set of related tables in a single data
|
|
structure. The most common way of creating a :class:`.TableSet` is using the
|
|
:meth:`.Table.group_by` method, which is similar to SQL's ``GROUP BY`` keyword.
|
|
The resulting set of tables will all have identical columns structure.
|
|
|
|
:class:`.TableSet` functions as a dictionary. Individual tables in the set can
|
|
be accessed by using their name as a key. If the table set was created using
|
|
:meth:`.Table.group_by` then the names of the tables will be the grouping
|
|
factors found in the original data.
|
|
|
|
:class:`.TableSet` replicates the majority of the features of :class:`.Table`.
|
|
When methods such as :meth:`.TableSet.select`, :meth:`.TableSet.where` or
|
|
:meth:`.TableSet.order_by` are used, the operation is applied to *each* table
|
|
in the set and the result is a new :class:`TableSet` instance made up of
|
|
entirely new :class:`.Table` instances.
|
|
|
|
:class:`.TableSet` instances can also contain other TableSet's. This means you
|
|
can chain calls to :meth:`.Table.group_by` and :meth:`.TableSet.group_by`
|
|
and end up with data grouped across multiple dimensions.
|
|
:meth:`.TableSet.aggregate` on nested TableSets will then group across multiple
|
|
dimensions.
|
|
"""
|
|
|
|
import six
|
|
from six.moves import zip_longest
|
|
|
|
from agate.data_types import Text
|
|
from agate.mapped_sequence import MappedSequence
|
|
from agate.table import Table
|
|
|
|
|
|
class TableSet(MappedSequence):
|
|
"""
|
|
An group of named tables with identical column definitions. Supports
|
|
(almost) all the same operations as :class:`.Table`. When executed on a
|
|
:class:`TableSet`, any operation that would have returned a new
|
|
:class:`.Table` instead returns a new :class:`TableSet`. Any operation
|
|
that would have returned a single value instead returns a dictionary of
|
|
values.
|
|
|
|
TableSet is implemented as a subclass of :class:`.MappedSequence`
|
|
|
|
:param tables:
|
|
A sequence :class:`Table` instances.
|
|
:param keys:
|
|
A sequence of keys corresponding to the tables. These may be any type
|
|
except :class:`int`.
|
|
:param key_name:
|
|
A name that describes the grouping properties. Used as the column
|
|
header when the groups are aggregated. Defaults to the column name that
|
|
was grouped on.
|
|
:param key_type:
|
|
An instance some subclass of :class:`.DataType`. If not provided it
|
|
will default to a :class`.Text`.
|
|
:param _is_fork:
|
|
Used internally to skip certain validation steps when data
|
|
is propagated from an existing tablset.
|
|
"""
|
|
def __init__(self, tables, keys, key_name='group', key_type=None, _is_fork=False):
|
|
tables = tuple(tables)
|
|
keys = tuple(keys)
|
|
|
|
self._key_name = key_name
|
|
self._key_type = key_type or Text()
|
|
self._sample_table = tables[0]
|
|
|
|
while isinstance(self._sample_table, TableSet):
|
|
self._sample_table = self._sample_table[0]
|
|
|
|
self._column_types = self._sample_table.column_types
|
|
self._column_names = self._sample_table.column_names
|
|
|
|
if not _is_fork:
|
|
for table in tables:
|
|
if any(not isinstance(a, type(b)) for a, b in zip_longest(table.column_types, self._column_types)):
|
|
raise ValueError('Not all tables have the same column types!')
|
|
|
|
if table.column_names != self._column_names:
|
|
raise ValueError('Not all tables have the same column names!')
|
|
|
|
MappedSequence.__init__(self, tables, keys)
|
|
|
|
def __str__(self):
|
|
"""
|
|
Print the tableset's structure via :meth:`TableSet.print_structure`.
|
|
"""
|
|
structure = six.StringIO()
|
|
|
|
self.print_structure(output=structure)
|
|
|
|
return structure.getvalue()
|
|
|
|
@property
|
|
def key_name(self):
|
|
"""
|
|
Get the name of the key this TableSet is grouped by. (If created using
|
|
:meth:`.Table.group_by` then this is the original column name.)
|
|
"""
|
|
return self._key_name
|
|
|
|
@property
|
|
def key_type(self):
|
|
"""
|
|
Get the :class:`.DataType` this TableSet is grouped by. (If created
|
|
using :meth:`.Table.group_by` then this is the original column type.)
|
|
"""
|
|
return self._key_type
|
|
|
|
@property
|
|
def column_types(self):
|
|
"""
|
|
Get an ordered list of this :class:`.TableSet`'s column types.
|
|
|
|
:returns:
|
|
A :class:`tuple` of :class:`.DataType` instances.
|
|
"""
|
|
return self._column_types
|
|
|
|
@property
|
|
def column_names(self):
|
|
"""
|
|
Get an ordered list of this :class:`TableSet`'s column names.
|
|
|
|
:returns:
|
|
A :class:`tuple` of strings.
|
|
"""
|
|
return self._column_names
|
|
|
|
def _fork(self, tables, keys, key_name=None, key_type=None):
|
|
"""
|
|
Create a new :class:`.TableSet` using the metadata from this one.
|
|
|
|
This method is used internally by functions like
|
|
:meth:`.TableSet.having`.
|
|
"""
|
|
if key_name is None:
|
|
key_name = self._key_name
|
|
|
|
if key_type is None:
|
|
key_type = self._key_type
|
|
|
|
return TableSet(tables, keys, key_name, key_type, _is_fork=True)
|
|
|
|
def _proxy(self, method_name, *args, **kwargs):
|
|
"""
|
|
Calls a method on each table in this :class:`.TableSet`.
|
|
"""
|
|
tables = []
|
|
|
|
for key, table in self.items():
|
|
tables.append(getattr(table, method_name)(*args, **kwargs))
|
|
|
|
return self._fork(
|
|
tables,
|
|
self.keys()
|
|
)
|
|
|
|
|
|
from agate.tableset.aggregate import aggregate
|
|
from agate.tableset.bar_chart import bar_chart
|
|
from agate.tableset.column_chart import column_chart
|
|
from agate.tableset.from_csv import from_csv
|
|
from agate.tableset.from_json import from_json
|
|
from agate.tableset.having import having
|
|
from agate.tableset.line_chart import line_chart
|
|
from agate.tableset.merge import merge
|
|
from agate.tableset.print_structure import print_structure
|
|
from agate.tableset.proxy_methods import bins, compute, denormalize, distinct, \
|
|
exclude, find, group_by, homogenize, join, limit, normalize, order_by, \
|
|
pivot, select, where
|
|
from agate.tableset.scatterplot import scatterplot
|
|
from agate.tableset.to_csv import to_csv
|
|
from agate.tableset.to_json import to_json
|
|
|
|
TableSet.aggregate = aggregate
|
|
TableSet.bar_chart = bar_chart
|
|
TableSet.bins = bins
|
|
TableSet.column_chart = column_chart
|
|
TableSet.compute = compute
|
|
TableSet.denormalize = denormalize
|
|
TableSet.distinct = distinct
|
|
TableSet.exclude = exclude
|
|
TableSet.find = find
|
|
TableSet.from_csv = from_csv
|
|
TableSet.from_json = from_json
|
|
TableSet.group_by = group_by
|
|
TableSet.having = having
|
|
TableSet.homogenize = homogenize
|
|
TableSet.join = join
|
|
TableSet.limit = limit
|
|
TableSet.line_chart = line_chart
|
|
TableSet.merge = merge
|
|
TableSet.normalize = normalize
|
|
TableSet.order_by = order_by
|
|
TableSet.pivot = pivot
|
|
TableSet.print_structure = print_structure
|
|
TableSet.scatterplot = scatterplot
|
|
TableSet.select = select
|
|
TableSet.to_csv = to_csv
|
|
TableSet.to_json = to_json
|
|
TableSet.where = where
|