Files
catalyst/tests/modelling/test_factor.py
T
Scott Sanderson 8e59d12daf ENH: Pipeline API
- Adds `zipline.pipeline.Pipeline`, a new user-facing class for managing
  pipelines of Modeling API expressions.

- Adds `attach_pipeline` and `drain_pipeline` as API methods

- Removes `add_factor` and `add_filter` as API methods.  These have been
  replaced two new methods on `Pipeline`: `add`, and `apply_screen`.

- Adding a `Filter` as a column no longer implicitly truncates rows from
  the Modelling API output.  It simply causes a new column, of dtype
  `bool` to show up in the output. Removal of rows is now handled by the
  new `apply_screen` method of `Pipeline`.

- Refactors the existing Modeling API tests to reflect the new APIs.
2015-10-01 18:03:53 -04:00

195 lines
7.5 KiB
Python

"""
Tests for Factor terms.
"""
from numpy import array, eye, nan, ones
from zipline.errors import UnknownRankMethod
from zipline.modelling.factor import Factor
from zipline.modelling.filter import Filter
from zipline.modelling.graph import TermGraph
from zipline.utils.test_utils import check_arrays
from .base import BaseFFCTestCase
class F(Factor):
inputs = ()
window_length = 0
class Mask(Filter):
inputs = ()
window_length = 0
class FactorTestCase(BaseFFCTestCase):
def setUp(self):
super(FactorTestCase, self).setUp()
self.f = F()
def test_bad_input(self):
with self.assertRaises(UnknownRankMethod):
self.f.rank("not a real rank method")
def test_rank_ascending(self):
# Generated with:
# data = arange(25).reshape(5, 5).transpose() % 4
data = array([[0, 1, 2, 3, 0],
[1, 2, 3, 0, 1],
[2, 3, 0, 1, 2],
[3, 0, 1, 2, 3],
[0, 1, 2, 3, 0]], dtype=float)
expected_ranks = {
'ordinal': array([[1., 3., 4., 5., 2.],
[2., 4., 5., 1., 3.],
[3., 5., 1., 2., 4.],
[4., 1., 2., 3., 5.],
[1., 3., 4., 5., 2.]]),
'average': array([[1.5, 3., 4., 5., 1.5],
[2.5, 4., 5., 1., 2.5],
[3.5, 5., 1., 2., 3.5],
[4.5, 1., 2., 3., 4.5],
[1.5, 3., 4., 5., 1.5]]),
'min': array([[1., 3., 4., 5., 1.],
[2., 4., 5., 1., 2.],
[3., 5., 1., 2., 3.],
[4., 1., 2., 3., 4.],
[1., 3., 4., 5., 1.]]),
'max': array([[2., 3., 4., 5., 2.],
[3., 4., 5., 1., 3.],
[4., 5., 1., 2., 4.],
[5., 1., 2., 3., 5.],
[2., 3., 4., 5., 2.]]),
'dense': array([[1., 2., 3., 4., 1.],
[2., 3., 4., 1., 2.],
[3., 4., 1., 2., 3.],
[4., 1., 2., 3., 4.],
[1., 2., 3., 4., 1.]]),
}
def check(terms):
graph = TermGraph(terms)
results = self.run_graph(
graph,
initial_workspace={self.f: data},
mask=self.build_mask(ones((5, 5))),
)
for method in terms:
check_arrays(results[method], expected_ranks[method])
check({meth: self.f.rank(method=meth) for meth in expected_ranks})
check({
meth: self.f.rank(method=meth, ascending=True)
for meth in expected_ranks
})
# Not passing a method should default to ordinal.
check({'ordinal': self.f.rank()})
check({'ordinal': self.f.rank(ascending=True)})
def test_rank_descending(self):
# Generated with:
# data = arange(25).reshape(5, 5).transpose() % 4
data = array([[0, 1, 2, 3, 0],
[1, 2, 3, 0, 1],
[2, 3, 0, 1, 2],
[3, 0, 1, 2, 3],
[0, 1, 2, 3, 0]], dtype=float)
expected_ranks = {
'ordinal': array([[4., 3., 2., 1., 5.],
[3., 2., 1., 5., 4.],
[2., 1., 5., 4., 3.],
[1., 5., 4., 3., 2.],
[4., 3., 2., 1., 5.]]),
'average': array([[4.5, 3., 2., 1., 4.5],
[3.5, 2., 1., 5., 3.5],
[2.5, 1., 5., 4., 2.5],
[1.5, 5., 4., 3., 1.5],
[4.5, 3., 2., 1., 4.5]]),
'min': array([[4., 3., 2., 1., 4.],
[3., 2., 1., 5., 3.],
[2., 1., 5., 4., 2.],
[1., 5., 4., 3., 1.],
[4., 3., 2., 1., 4.]]),
'max': array([[5., 3., 2., 1., 5.],
[4., 2., 1., 5., 4.],
[3., 1., 5., 4., 3.],
[2., 5., 4., 3., 2.],
[5., 3., 2., 1., 5.]]),
'dense': array([[4., 3., 2., 1., 4.],
[3., 2., 1., 4., 3.],
[2., 1., 4., 3., 2.],
[1., 4., 3., 2., 1.],
[4., 3., 2., 1., 4.]]),
}
def check(terms):
graph = TermGraph(terms)
results = self.run_graph(
graph,
initial_workspace={self.f: data},
mask=self.build_mask(ones((5, 5))),
)
for method in terms:
check_arrays(results[method], expected_ranks[method])
check({
meth: self.f.rank(method=meth, ascending=False)
for meth in expected_ranks
})
# Not passing a method should default to ordinal.
check({'ordinal': self.f.rank(ascending=False)})
def test_rank_after_mask(self):
# data = arange(25).reshape(5, 5).transpose() % 4
data = array([[0, 1, 2, 3, 0],
[1, 2, 3, 0, 1],
[2, 3, 0, 1, 2],
[3, 0, 1, 2, 3],
[0, 1, 2, 3, 0]], dtype=float)
mask_data = ~eye(5, dtype=bool)
initial_workspace = {self.f: data, Mask(): mask_data}
graph = TermGraph(
{
"ascending_nomask": self.f.rank(ascending=True),
"ascending_mask": self.f.rank(ascending=True, mask=Mask()),
"descending_nomask": self.f.rank(ascending=False),
"descending_mask": self.f.rank(ascending=False, mask=Mask()),
}
)
expected = {
"ascending_nomask": array([[1., 3., 4., 5., 2.],
[2., 4., 5., 1., 3.],
[3., 5., 1., 2., 4.],
[4., 1., 2., 3., 5.],
[1., 3., 4., 5., 2.]]),
"descending_nomask": array([[4., 3., 2., 1., 5.],
[3., 2., 1., 5., 4.],
[2., 1., 5., 4., 3.],
[1., 5., 4., 3., 2.],
[4., 3., 2., 1., 5.]]),
# Diagonal should be all nans, and anything whose rank was less
# than the diagonal in the unmasked calc should go down by 1.
"ascending_mask": array([[nan, 2., 3., 4., 1.],
[2., nan, 4., 1., 3.],
[2., 4., nan, 1., 3.],
[3., 1., 2., nan, 4.],
[1., 2., 3., 4., nan]]),
"descending_mask": array([[nan, 3., 2., 1., 4.],
[2., nan, 1., 4., 3.],
[2., 1., nan, 4., 3.],
[1., 4., 3., nan, 2.],
[4., 3., 2., 1., nan]]),
}
results = self.run_graph(
graph,
initial_workspace,
mask=self.build_mask(ones((5, 5))),
)
for method in results:
check_arrays(expected[method], results[method])