#!/usr/bin/env python """ This module contains the :class:`Column` class, which defines a "vertical" array of tabular data. Whereas :class:`.Row` instances are independent of their parent :class:`.Table`, columns depend on knowledge of both their position in the parent (column name, data type) as well as the rows that contain their data. """ import six from agate.mapped_sequence import MappedSequence from agate.utils import NullOrder, memoize if six.PY3: # pragma: no cover # pylint: disable=W0622 xrange = range def null_handler(k): """ Key method for sorting nulls correctly. """ if k is None: return NullOrder() return k class Column(MappedSequence): """ Proxy access to column data. Instances of :class:`Column` should not be constructed directly. They are created by :class:`.Table` instances and are unique to them. Columns are implemented as subclass of :class:`.MappedSequence`. They deviate from the underlying implementation in that loading of their data is deferred until it is needed. :param name: The name of this column. :param data_type: An instance of :class:`.DataType`. :param rows: A :class:`.MappedSequence` that contains the :class:`.Row` instances containing the data for this column. :param row_names: An optional list of row names (keys) for this column. """ __slots__ = ['_index', '_name', '_data_type', '_rows', '_row_names'] def __init__(self, index, name, data_type, rows, row_names=None): self._index = index self._name = name self._data_type = data_type self._rows = rows self._keys = row_names def __getstate__(self): """ Return state values to be pickled. This is necessary on Python2.7 when using :code:`__slots__`. """ return { '_index': self._index, '_name': self._name, '_data_type': self._data_type, '_rows': self._rows, '_keys': self._keys } def __setstate__(self, data): """ Restore pickled state. This is necessary on Python2.7 when using :code:`__slots__`. """ self._index = data['_index'] self._name = data['_name'] self._data_type = data['_data_type'] self._rows = data['_rows'] self._keys = data['_keys'] @property def index(self): """ This column's index. """ return self._index @property def name(self): """ This column's name. """ return self._name @property def data_type(self): """ This column's data type. """ return self._data_type @memoize def values(self): """ Get the values in this column, as a tuple. """ return tuple(row[self._index] for row in self._rows) @memoize def values_distinct(self): """ Get the distinct values in this column, as a tuple. """ return tuple(set(self.values())) @memoize def values_without_nulls(self): """ Get the values in this column with any null values removed. """ return tuple(d for d in self.values() if d is not None) @memoize def values_sorted(self): """ Get the values in this column sorted. """ return sorted(self.values(), key=null_handler) @memoize def values_without_nulls_sorted(self): """ Get the values in this column with any null values removed and sorted. """ return sorted(self.values_without_nulls(), key=null_handler)