TST: add test for missing num_quarters and clean up tests

This commit is contained in:
Maya Tydykov
2016-09-13 10:53:00 -04:00
parent 5e1a8224cd
commit 7d75615fd2
+130 -79
View File
@@ -4,7 +4,7 @@ from nose.tools import assert_true
from nose_parameterized import parameterized
import numpy as np
import pandas as pd
from pandas.util.testing import assert_frame_equal
from pandas.util.testing import assert_frame_equal, assert_series_equal
from toolz import merge
from zipline.pipeline import SimplePipelineEngine, Pipeline, CustomFactor
@@ -22,6 +22,7 @@ from zipline.pipeline.loaders.blaze.estimates import (
BlazePreviousEstimatesLoader
)
from zipline.pipeline.loaders.quarter_estimates import (
INVALID_NUM_QTRS_MESSAGE,
NextQuartersEstimatesLoader,
normalize_quarters,
PreviousQuartersEstimatesLoader,
@@ -32,7 +33,7 @@ from zipline.testing.fixtures import (
WithTradingSessions,
ZiplineTestCase,
)
from zipline.testing.predicates import assert_equal
from zipline.testing.predicates import assert_equal, assert_raises_regex
from zipline.utils.numpy_utils import datetime64ns_dtype
from zipline.utils.numpy_utils import float64_dtype
@@ -51,6 +52,12 @@ def QuartersEstimates(num_qtr):
return QtrEstimates
def QuartersEstimatesNoNumQuartersAttr(num_qtr):
class QtrEstimates(Estimates):
name = Estimates
return QtrEstimates
class WithEstimates(WithTradingSessions, WithAssetFinder):
"""
ZiplineTestCase mixin providing cls.loader and cls.events as class
@@ -77,8 +84,13 @@ class WithEstimates(WithTradingSessions, WithAssetFinder):
def make_loader(cls, events, columns):
raise NotImplementedError('make_loader')
@classmethod
def make_events(cls):
raise NotImplementedError('make_events')
@classmethod
def init_class_fixtures(cls):
cls.events = cls.make_events()
cls.sids = cls.events[SID_FIELD_NAME].unique()
cls.columns = {
Estimates.event_date: 'event_date',
@@ -98,7 +110,7 @@ class WithEstimates(WithTradingSessions, WithAssetFinder):
super(WithEstimates, cls).init_class_fixtures()
class WithWrongNumQuarters(WithEstimates):
class WithWrongLoaderDefinition(WithEstimates):
"""
ZiplineTestCase mixin providing cls.events as a class level fixture and
defining a test for all inheritors to use.
@@ -113,18 +125,48 @@ class WithWrongNumQuarters(WithEstimates):
------
test_wrong_num_quarters_passed()
Tests that loading with an incorrect quarter number raises an error.
test_no_num_quarters_attr()
Tests that the loader throws an AssertionError if the dataset being
loaded has no `num_quarters` attribute.
"""
events = pd.DataFrame({SID_FIELD_NAME: 0},
columns=[SID_FIELD_NAME,
TS_FIELD_NAME,
EVENT_DATE_FIELD_NAME,
FISCAL_QUARTER_FIELD_NAME,
FISCAL_YEAR_FIELD_NAME,
'estimate'],
index=[0])
@classmethod
def make_events(cls):
return pd.DataFrame({SID_FIELD_NAME: 0},
columns=[SID_FIELD_NAME,
TS_FIELD_NAME,
EVENT_DATE_FIELD_NAME,
FISCAL_QUARTER_FIELD_NAME,
FISCAL_YEAR_FIELD_NAME,
'estimate'],
index=[0])
def test_wrong_num_quarters_passed(self):
dataset = QuartersEstimates(-1)
bad_dataset1 = QuartersEstimates(-1)
bad_dataset2 = QuartersEstimates(-2)
good_dataset = QuartersEstimates(1)
engine = SimplePipelineEngine(
lambda x: self.loader,
self.trading_days,
self.asset_finder,
)
columns = {c.name + str(dataset.num_quarters): c.latest
for dataset in (bad_dataset1,
bad_dataset2,
good_dataset)
for c in dataset.columns}
p = Pipeline(columns)
with self.assertRaises(ValueError) as e:
engine.run_pipeline(
p,
start_date=self.trading_days[0],
end_date=self.trading_days[-1],
)
assert_raises_regex(e, INVALID_NUM_QTRS_MESSAGE % "-1,-2")
def test_no_num_quarters_attr(self):
dataset = QuartersEstimatesNoNumQuartersAttr(1)
engine = SimplePipelineEngine(
lambda x: self.loader,
self.trading_days,
@@ -132,7 +174,7 @@ class WithWrongNumQuarters(WithEstimates):
)
p = Pipeline({c.name: c.latest for c in dataset.columns})
with self.assertRaises(ValueError):
with self.assertRaises(AttributeError):
engine.run_pipeline(
p,
start_date=self.trading_days[0],
@@ -140,7 +182,7 @@ class WithWrongNumQuarters(WithEstimates):
)
class PreviousWithWrongNumQuarters(WithWrongNumQuarters,
class PreviousWithWrongNumQuarters(WithWrongLoaderDefinition,
ZiplineTestCase):
"""
Tests that previous quarter loader correctly breaks if an incorrect
@@ -151,7 +193,7 @@ class PreviousWithWrongNumQuarters(WithWrongNumQuarters,
return PreviousQuartersEstimatesLoader(events, columns)
class NextWithWrongNumQuarters(WithWrongNumQuarters,
class NextWithWrongNumQuarters(WithWrongLoaderDefinition,
ZiplineTestCase):
"""
Tests that next quarter loader correctly breaks if an incorrect
@@ -162,7 +204,7 @@ class NextWithWrongNumQuarters(WithWrongNumQuarters,
return NextQuartersEstimatesLoader(events, columns)
class WithEstimatesT0(WithEstimates):
class WithEstimatesTimeZero(WithEstimates):
"""
ZiplineTestCase mixin providing cls.events as a class level fixture and
defining a test for all inheritors to use.
@@ -173,9 +215,9 @@ class WithEstimatesT0(WithEstimates):
Generated dynamically in order to test inter-leavings of estimates and
event dates for multiple quarters to make sure that we select the
right immediate 'next' or 'previous' quarter relative to each date -
i.e., the right 't0' on the timeline. We care about selecting the
right 't0' because we use that to calculate which quarter's data needs
to be returned for each day.
i.e., the right 'time zero' on the timeline. We care about selecting
the right 'time zero' because we use that to calculate which quarter's
data needs to be returned for each day.
Methods
-------
@@ -189,8 +231,8 @@ class WithEstimatesT0(WithEstimates):
Tests
------
test_estimates()
Tests that we get the right 't0' value on each day for each sid and
for each column.
Tests that we get the right 'time zero' value on each day for each
sid and for each column.
"""
q1_knowledge_dates = [pd.Timestamp('2015-01-01'),
pd.Timestamp('2015-01-04'),
@@ -212,7 +254,7 @@ class WithEstimatesT0(WithEstimates):
pd.Timestamp('2015-01-31')]
@classmethod
def gen_estimates(cls):
def make_events(cls):
"""
In order to determine which estimate we care about for a particular
sid, we need to look at all estimates that we have for that sid and
@@ -296,8 +338,8 @@ class WithEstimatesT0(WithEstimates):
@classmethod
def init_class_fixtures(cls):
# Must be generated before call to super since super uses `events`.
cls.events = cls.gen_estimates()
super(WithEstimatesT0, cls).init_class_fixtures()
cls.events = cls.make_events()
super(WithEstimatesTimeZero, cls).init_class_fixtures()
def get_expected_estimate(self,
q1_knowledge,
@@ -344,14 +386,17 @@ class WithEstimatesT0(WithEstimates):
q2_knowledge,
comparable_date,
)
if not expected_estimate.empty:
for colname in sid_estimates.columns:
expected_value = expected_estimate[colname]
computed_value = sid_estimates.iloc[i][colname]
assert_equal(expected_value, computed_value)
# Have to explicitly check for None because
# `expected_estimate` might be a DataFrame.
if expected_estimate is not None:
assert_series_equal(
sid_estimates.iloc[i],
expected_estimate[sid_estimates.columns],
check_names=False
)
else:
# There are no eligible 'next' estimates on this day;
# everything should be null.
# There are no eligible 'next'/'previous' estimates on
# this day; everything should be null.
assert_true(sid_estimates.iloc[i].isnull().all())
else:
# We don't know about any estimates on this day;
@@ -359,7 +404,7 @@ class WithEstimatesT0(WithEstimates):
assert_true(sid_estimates.iloc[i].isnull().all())
class NextEstimate(WithEstimatesT0, ZiplineTestCase):
class NextEstimate(WithEstimatesTimeZero, ZiplineTestCase):
@classmethod
def make_loader(cls, events, columns):
return NextQuartersEstimatesLoader(events, columns)
@@ -372,17 +417,17 @@ class NextEstimate(WithEstimatesT0, ZiplineTestCase):
# happening on this simulation date or later, then that's
# the estimate we want to use.
if (not q1_knowledge.empty and
q1_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] >=
q1_knowledge[EVENT_DATE_FIELD_NAME].iloc[-1] >=
comparable_date):
return q1_knowledge.iloc[-1]
# If q1 has already happened or we don't know about it
# yet and our latest knowledge indicates that q2 hasn't
# happened yet, then that's the estimate we want to use.
elif (not q2_knowledge.empty and
q2_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] >=
q2_knowledge[EVENT_DATE_FIELD_NAME].iloc[-1] >=
comparable_date):
return q2_knowledge.iloc[-1]
return pd.DataFrame()
return None
class BlazeNextEstimateLoaderTestCase(NextEstimate):
@@ -398,7 +443,7 @@ class BlazeNextEstimateLoaderTestCase(NextEstimate):
)
class PreviousEstimate(WithEstimatesT0, ZiplineTestCase):
class PreviousEstimate(WithEstimatesTimeZero, ZiplineTestCase):
@classmethod
def make_loader(cls, events, columns):
return PreviousQuartersEstimatesLoader(events, columns)
@@ -413,14 +458,14 @@ class PreviousEstimate(WithEstimatesT0, ZiplineTestCase):
# Otherwise, it'll be for q1, as long as the release date
# for q1 has already happened.
if (not q2_knowledge.empty and
q2_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] <=
q2_knowledge[EVENT_DATE_FIELD_NAME].iloc[-1] <=
comparable_date):
return q2_knowledge.iloc[-1]
elif (not q1_knowledge.empty and
q1_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] <=
q1_knowledge[EVENT_DATE_FIELD_NAME].iloc[-1] <=
comparable_date):
return q1_knowledge.iloc[-1]
return pd.DataFrame()
return None
class BlazePreviousEstimateLoaderTestCase(PreviousEstimate):
@@ -461,16 +506,19 @@ class WithEstimateMultipleQuarters(WithEstimates):
out and checks that the returned columns contain data for the correct
number of quarters out.
"""
events = pd.DataFrame({
SID_FIELD_NAME: [0] * 2,
TS_FIELD_NAME: [pd.Timestamp('2015-01-01'),
pd.Timestamp('2015-01-06')],
EVENT_DATE_FIELD_NAME: [pd.Timestamp('2015-01-10'),
pd.Timestamp('2015-01-20')],
'estimate': [1., 2.],
FISCAL_QUARTER_FIELD_NAME: [1, 2],
FISCAL_YEAR_FIELD_NAME: [2015, 2015]
})
@classmethod
def make_events(cls):
return pd.DataFrame({
SID_FIELD_NAME: [0] * 2,
TS_FIELD_NAME: [pd.Timestamp('2015-01-01'),
pd.Timestamp('2015-01-06')],
EVENT_DATE_FIELD_NAME: [pd.Timestamp('2015-01-10'),
pd.Timestamp('2015-01-20')],
'estimate': [1., 2.],
FISCAL_QUARTER_FIELD_NAME: [1, 2],
FISCAL_YEAR_FIELD_NAME: [2015, 2015]
})
@classmethod
def init_class_fixtures(cls):
@@ -635,35 +683,6 @@ class WithEstimateWindows(WithEstimates):
Tests that we overwrite values with the correct quarter's estimate at
the correct dates when we have a factor that asks for a window of data.
"""
sid_0_timeline = pd.DataFrame({
TS_FIELD_NAME: [pd.Timestamp('2015-01-05'),
pd.Timestamp('2015-01-07'),
pd.Timestamp('2015-01-05'),
pd.Timestamp('2015-01-17')],
EVENT_DATE_FIELD_NAME:
[pd.Timestamp('2015-01-10'),
pd.Timestamp('2015-01-10'),
pd.Timestamp('2015-01-20'),
pd.Timestamp('2015-01-20')],
'estimate': [10., 11.] + [20., 21.],
FISCAL_QUARTER_FIELD_NAME: [1] * 2 + [2] * 2,
FISCAL_YEAR_FIELD_NAME: 2015,
SID_FIELD_NAME: 0,
})
sid_1_timeline = pd.DataFrame({
TS_FIELD_NAME: [pd.Timestamp('2015-01-09'),
pd.Timestamp('2015-01-12'),
pd.Timestamp('2015-01-09'),
pd.Timestamp('2015-01-15')],
EVENT_DATE_FIELD_NAME:
[pd.Timestamp('2015-01-12'), pd.Timestamp('2015-01-12'),
pd.Timestamp('2015-01-15'), pd.Timestamp('2015-01-15')],
'estimate': [10., 11.] + [30., 31.],
FISCAL_QUARTER_FIELD_NAME: [1] * 2 + [3] * 2,
FISCAL_YEAR_FIELD_NAME: 2015,
SID_FIELD_NAME: 1
})
window_test_start_date = pd.Timestamp('2015-01-05')
critical_dates = [pd.Timestamp('2015-01-09', tz='utc'),
@@ -673,7 +692,39 @@ class WithEstimateWindows(WithEstimates):
# window length, starting date, num quarters out, timeline. Parameterizes
# over number of quarters out.
window_test_cases = list(itertools.product(critical_dates, (1, 2)))
events = pd.concat([sid_0_timeline, sid_1_timeline])
@classmethod
def make_events(cls):
sid_0_timeline = pd.DataFrame({
TS_FIELD_NAME: [pd.Timestamp('2015-01-05'),
pd.Timestamp('2015-01-07'),
pd.Timestamp('2015-01-05'),
pd.Timestamp('2015-01-17')],
EVENT_DATE_FIELD_NAME:
[pd.Timestamp('2015-01-10'),
pd.Timestamp('2015-01-10'),
pd.Timestamp('2015-01-20'),
pd.Timestamp('2015-01-20')],
'estimate': [10., 11.] + [20., 21.],
FISCAL_QUARTER_FIELD_NAME: [1] * 2 + [2] * 2,
FISCAL_YEAR_FIELD_NAME: 2015,
SID_FIELD_NAME: 0,
})
sid_1_timeline = pd.DataFrame({
TS_FIELD_NAME: [pd.Timestamp('2015-01-09'),
pd.Timestamp('2015-01-12'),
pd.Timestamp('2015-01-09'),
pd.Timestamp('2015-01-15')],
EVENT_DATE_FIELD_NAME:
[pd.Timestamp('2015-01-12'), pd.Timestamp('2015-01-12'),
pd.Timestamp('2015-01-15'), pd.Timestamp('2015-01-15')],
'estimate': [10., 11.] + [30., 31.],
FISCAL_QUARTER_FIELD_NAME: [1] * 2 + [3] * 2,
FISCAL_YEAR_FIELD_NAME: 2015,
SID_FIELD_NAME: 1
})
return pd.concat([sid_0_timeline, sid_1_timeline])
@classmethod
def make_expected_timelines(cls):