Files
catalyst/zipline/data/dataset.py
T
Scott Sanderson ef4f642e62 ENH: Compute engine architecture for FFC API.
This patch lays the groundwork for a compute engine designed to
facilitate construction of factor-based universe screening and portfolio
allocation.  It contains:

A new module, `zipline.modelling`, containing entities that can be used
to express computations as dependency graphs.  Each node in such a graph
is an instance of the base `Term` class, defined in
`zipline.modelling.term`.  Dependency graphs are executed by instances
of `FFCEngine`, defined in `zipline.modelling.engine`.

A new module, `zipline.data.ffc`, containing loaders and dataset
definitions for inputs to the modelling API.

New `TradingAlgorithm` api methods: `add_factor`, and `add_filter`.
These methods can only be called from `initialize`, and are used to
inform the algorithm that each day it should compute the given terms.
Computed factor results are made available through a new attribute of
the `data` object in `before_trading_start` and `handle_data`.  Computed
filter results control which assets are available in the factor matrix
on each day.
2015-07-29 12:30:46 -04:00

104 lines
2.4 KiB
Python

"""
dataset.py
"""
from six import (
iteritems,
with_metaclass,
)
from zipline.modelling.term import Term
class Column(object):
"""
An abstract column of data, not yet associated with a dataset.
"""
def __init__(self, dtype):
self.dtype = dtype
def bind(self, dataset, name):
"""
Bind a column to a concrete dataset.
"""
return BoundColumn(dtype=self.dtype, dataset=dataset, name=name)
class BoundColumn(Term):
"""
A Column of data that's been concretely bound to a particular dataset.
"""
def __new__(cls, dtype, dataset, name):
return super(BoundColumn, cls).__new__(
cls,
inputs=(),
window_length=0,
domain=dataset.domain,
dtype=dtype,
dataset=dataset,
name=name,
)
def _init(self, dataset, name, *args, **kwargs):
self._dataset = dataset
self._name = name
return super(BoundColumn, self)._init(*args, **kwargs)
@classmethod
def static_identity(cls, dataset, name, *args, **kwargs):
return (
super(BoundColumn, cls).static_identity(*args, **kwargs),
dataset,
name,
)
@property
def dataset(self):
return self._dataset
@property
def name(self):
return self._name
@property
def qualname(self):
"""
Fully qualified of this column.
"""
return '.'.join([self.dataset.__name__, self.name])
def __repr__(self):
return "{qualname}::{dtype}".format(
qualname=self.qualname,
dtype=self.dtype.__name__,
)
class DataSetMeta(type):
"""
Metaclass for DataSets
Supplies name and dataset information to Column attributes.
"""
def __new__(mcls, name, bases, dict_):
newtype = type.__new__(mcls, name, bases, dict_)
_columns = []
for maybe_colname, maybe_column in iteritems(dict_):
if isinstance(maybe_column, Column):
bound_column = maybe_column.bind(newtype, maybe_colname)
setattr(newtype, maybe_colname, bound_column)
_columns.append(bound_column)
newtype._columns = _columns
return newtype
@property
def columns(self):
return self._columns
class DataSet(with_metaclass(DataSetMeta)):
domain = None