dbt-selly/dbt-env/lib/python3.8/site-packages/agate/columns.py

139 lines
3.6 KiB
Python

#!/usr/bin/env python
"""
This module contains the :class:`Column` class, which defines a "vertical"
array of tabular data. Whereas :class:`.Row` instances are independent of their
parent :class:`.Table`, columns depend on knowledge of both their position in
the parent (column name, data type) as well as the rows that contain their data.
"""
import six
from agate.mapped_sequence import MappedSequence
from agate.utils import NullOrder, memoize
if six.PY3: # pragma: no cover
# pylint: disable=W0622
xrange = range
def null_handler(k):
"""
Key method for sorting nulls correctly.
"""
if k is None:
return NullOrder()
return k
class Column(MappedSequence):
"""
Proxy access to column data. Instances of :class:`Column` should
not be constructed directly. They are created by :class:`.Table`
instances and are unique to them.
Columns are implemented as subclass of :class:`.MappedSequence`. They
deviate from the underlying implementation in that loading of their data
is deferred until it is needed.
:param name:
The name of this column.
:param data_type:
An instance of :class:`.DataType`.
:param rows:
A :class:`.MappedSequence` that contains the :class:`.Row` instances
containing the data for this column.
:param row_names:
An optional list of row names (keys) for this column.
"""
__slots__ = ['_index', '_name', '_data_type', '_rows', '_row_names']
def __init__(self, index, name, data_type, rows, row_names=None):
self._index = index
self._name = name
self._data_type = data_type
self._rows = rows
self._keys = row_names
def __getstate__(self):
"""
Return state values to be pickled.
This is necessary on Python2.7 when using :code:`__slots__`.
"""
return {
'_index': self._index,
'_name': self._name,
'_data_type': self._data_type,
'_rows': self._rows,
'_keys': self._keys
}
def __setstate__(self, data):
"""
Restore pickled state.
This is necessary on Python2.7 when using :code:`__slots__`.
"""
self._index = data['_index']
self._name = data['_name']
self._data_type = data['_data_type']
self._rows = data['_rows']
self._keys = data['_keys']
@property
def index(self):
"""
This column's index.
"""
return self._index
@property
def name(self):
"""
This column's name.
"""
return self._name
@property
def data_type(self):
"""
This column's data type.
"""
return self._data_type
@memoize
def values(self):
"""
Get the values in this column, as a tuple.
"""
return tuple(row[self._index] for row in self._rows)
@memoize
def values_distinct(self):
"""
Get the distinct values in this column, as a tuple.
"""
return tuple(set(self.values()))
@memoize
def values_without_nulls(self):
"""
Get the values in this column with any null values removed.
"""
return tuple(d for d in self.values() if d is not None)
@memoize
def values_sorted(self):
"""
Get the values in this column sorted.
"""
return sorted(self.values(), key=null_handler)
@memoize
def values_without_nulls_sorted(self):
"""
Get the values in this column with any null values removed and sorted.
"""
return sorted(self.values_without_nulls(), key=null_handler)