Merge pull request #1041 from quantopian/fix-buyback-auth-tests-in-pipeline

Fix buyback auth tests in pipeline
This commit is contained in:
Maya Tydykov
2016-03-14 11:20:49 -04:00
5 changed files with 278 additions and 297 deletions
+13 -125
View File
@@ -1,7 +1,8 @@
"""
Base class for Pipeline API unittests.
"""
from functools import wraps, partial
import abc
from functools import wraps
from unittest import TestCase
from nose_parameterized import parameterized
@@ -13,7 +14,6 @@ from pandas.util.testing import assert_series_equal
from six import iteritems
from zipline.pipeline import Pipeline
from zipline.pipeline.common import TS_FIELD_NAME
from zipline.pipeline.engine import SimplePipelineEngine
from zipline.pipeline.term import AssetExists
from zipline.testing import (
@@ -148,43 +148,18 @@ class BasePipelineTestCase(TestCase):
return arange(prod(shape), dtype=dtype).reshape(shape)
DATE_FIELD_NAME = "event_date"
class EventLoaderCommonMixin(object):
sids = A, B, C, D, E = range(5)
equity_info = make_simple_equity_info(
sids,
start_date=pd.Timestamp('2013-01-01', tz='UTC'),
end_date=pd.Timestamp('2015-01-01', tz='UTC'),
)
@abc.abstractproperty
def get_sids(cls):
raise NotImplementedError('get_sids')
event_dates_cases = [
# K1--K2--E1--E2.
pd.DataFrame({
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
DATE_FIELD_NAME: pd.to_datetime(['2014-01-15', '2014-01-20'])
}),
# K1--K2--E2--E1.
pd.DataFrame({
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
DATE_FIELD_NAME: pd.to_datetime(['2014-01-20', '2014-01-15'])
}),
# K1--E1--K2--E2.
pd.DataFrame({
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-15']),
DATE_FIELD_NAME: pd.to_datetime(['2014-01-10', '2014-01-20'])
}),
# K1 == K2.
pd.DataFrame({
TS_FIELD_NAME: pd.to_datetime(['2014-01-05'] * 2),
DATE_FIELD_NAME: pd.to_datetime(['2014-01-10', '2014-01-15'])
}),
pd.DataFrame({
TS_FIELD_NAME: pd.to_datetime([]),
DATE_FIELD_NAME: pd.to_datetime([])
})
]
@classmethod
def get_equity_info(cls):
return make_simple_equity_info(
cls.get_sids(),
start_date=pd.Timestamp('2013-01-01', tz='UTC'),
end_date=pd.Timestamp('2015-01-01', tz='UTC'),
)
def zip_with_floats(self, dates, flts):
return pd.Series(flts, index=dates).astype('float')
@@ -217,93 +192,6 @@ class EventLoaderCommonMixin(object):
loader = self.loader_type(*self.loader_args(dates))
return SimplePipelineEngine(lambda _: loader, dates, self.finder)
def get_expected_next_event_dates(self, dates):
num_days_between_for_dates = partial(self.num_days_between, dates)
zip_with_dates_for_dates = partial(self.zip_with_dates, dates)
return pd.DataFrame({
0: zip_with_dates_for_dates(
['NaT'] *
num_days_between_for_dates(None, '2014-01-04') +
['2014-01-15'] *
num_days_between_for_dates('2014-01-05', '2014-01-15') +
['2014-01-20'] *
num_days_between_for_dates('2014-01-16', '2014-01-20') +
['NaT'] *
num_days_between_for_dates('2014-01-21', None)
),
1: zip_with_dates_for_dates(
['NaT'] *
num_days_between_for_dates(None, '2014-01-04') +
['2014-01-20'] *
num_days_between_for_dates('2014-01-05', '2014-01-09') +
['2014-01-15'] *
num_days_between_for_dates('2014-01-10', '2014-01-15') +
['2014-01-20'] *
num_days_between_for_dates('2014-01-16', '2014-01-20') +
['NaT'] *
num_days_between_for_dates('2014-01-21', None)
),
2: zip_with_dates_for_dates(
['NaT'] *
num_days_between_for_dates(None, '2014-01-04') +
['2014-01-10'] *
num_days_between_for_dates('2014-01-05', '2014-01-10') +
['NaT'] *
num_days_between_for_dates('2014-01-11', '2014-01-14') +
['2014-01-20'] *
num_days_between_for_dates('2014-01-15', '2014-01-20') +
['NaT'] *
num_days_between_for_dates('2014-01-21', None)
),
3: zip_with_dates_for_dates(
['NaT'] *
num_days_between_for_dates(None, '2014-01-04') +
['2014-01-10'] *
num_days_between_for_dates('2014-01-05', '2014-01-10') +
['2014-01-15'] *
num_days_between_for_dates('2014-01-11', '2014-01-15') +
['NaT'] *
num_days_between_for_dates('2014-01-16', None)
),
4: zip_with_dates_for_dates(['NaT'] *
len(dates)),
}, index=dates)
def get_expected_previous_event_dates(self, dates):
num_days_between_for_dates = partial(self.num_days_between, dates)
zip_with_dates_for_dates = partial(self.zip_with_dates, dates)
return pd.DataFrame({
0: zip_with_dates_for_dates(
['NaT'] * num_days_between_for_dates(None, '2014-01-14') +
['2014-01-15'] * num_days_between_for_dates('2014-01-15',
'2014-01-19') +
['2014-01-20'] * num_days_between_for_dates('2014-01-20',
None),
),
1: zip_with_dates_for_dates(
['NaT'] * num_days_between_for_dates(None, '2014-01-14') +
['2014-01-15'] * num_days_between_for_dates('2014-01-15',
'2014-01-19') +
['2014-01-20'] * num_days_between_for_dates('2014-01-20',
None),
),
2: zip_with_dates_for_dates(
['NaT'] * num_days_between_for_dates(None, '2014-01-09') +
['2014-01-10'] * num_days_between_for_dates('2014-01-10',
'2014-01-19') +
['2014-01-20'] * num_days_between_for_dates('2014-01-20',
None),
),
3: zip_with_dates_for_dates(
['NaT'] * num_days_between_for_dates(None, '2014-01-09') +
['2014-01-10'] * num_days_between_for_dates('2014-01-10',
'2014-01-14') +
['2014-01-15'] * num_days_between_for_dates('2014-01-15',
None),
),
4: zip_with_dates_for_dates(['NaT'] * len(dates)),
}, index=dates)
@staticmethod
def _compute_busday_offsets(announcement_dates):
"""
@@ -364,7 +252,7 @@ class EventLoaderCommonMixin(object):
end_date=dates[-1],
)
for sid in self.sids:
for sid in self.get_sids():
for col_name in self.cols.keys():
assert_series_equal(result[col_name].xs(sid, level=1),
self.cols[col_name][sid],
+91 -90
View File
@@ -7,8 +7,10 @@ from unittest import TestCase
import blaze as bz
from blaze.compute.core import swap_resources_into_scope
from contextlib2 import ExitStack
import itertools
import pandas as pd
from six import iteritems
from tests.pipeline.base import EventLoaderCommonMixin
from zipline.pipeline.common import(
BUYBACK_ANNOUNCEMENT_FIELD_NAME,
@@ -38,51 +40,67 @@ from zipline.pipeline.loaders.blaze import (
BlazeShareBuybackAuthorizationsLoader,
)
from zipline.testing import tmp_asset_finder
from .base import EventLoaderCommonMixin, DATE_FIELD_NAME
date_intervals = [[None, '2014-01-04'], ['2014-01-05', '2014-01-09'],
['2014-01-10', None]]
buyback_authorizations = [
# K1--K2--A1--A2.
buyback_authorizations_cases = [
pd.DataFrame({
SHARE_COUNT_FIELD_NAME: [1, 15],
CASH_FIELD_NAME: [10, 20]
}),
# K1--K2--A2--A1.
pd.DataFrame({
SHARE_COUNT_FIELD_NAME: [7, 13],
CASH_FIELD_NAME: [10, 22]
}),
# K1--A1--K2--A2.
pd.DataFrame({
SHARE_COUNT_FIELD_NAME: [3, 1],
CASH_FIELD_NAME: [4, 7]
}),
# K1 == K2.
pd.DataFrame({
SHARE_COUNT_FIELD_NAME: [6, 23],
CASH_FIELD_NAME: [1, 2]
CASH_FIELD_NAME: [10, 20],
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
BUYBACK_ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-04',
'2014-01-09'])
}),
pd.DataFrame(
columns=[SHARE_COUNT_FIELD_NAME,
CASH_FIELD_NAME],
CASH_FIELD_NAME,
BUYBACK_ANNOUNCEMENT_FIELD_NAME,
TS_FIELD_NAME],
dtype='datetime64[ns]'
),
]
def create_buyback_auth_tst_frame(cases, field_to_drop):
buyback_auth_df = {
sid:
pd.concat([df, buyback_authorizations[sid]], axis=1).drop(
field_to_drop, 1)
for sid, df
in enumerate(case.rename(columns={DATE_FIELD_NAME:
BUYBACK_ANNOUNCEMENT_FIELD_NAME}
)
for case in cases
)
}
return buyback_auth_df
def get_values_for_date_ranges(zip_with_floats_dates,
num_days_between_dates,
vals_for_date_intervals):
# Fill in given values for given date ranges.
return zip_with_floats_dates(
list(
itertools.chain(*[
[val] * num_days_between_dates(*date_intervals[i])
for i, val in enumerate(vals_for_date_intervals)
])
)
)
def get_expected_previous_values(zip_with_floats_dates,
num_days_between_dates,
dates,
vals_for_date_intervals):
return pd.DataFrame({
0: get_values_for_date_ranges(zip_with_floats_dates,
num_days_between_dates,
vals_for_date_intervals),
1: zip_with_floats_dates(['NaN'] * len(dates)),
}, index=dates)
def get_expected_previous_dates(zip_with_dates_for_dates,
num_days_between_for_dates,
dates):
return pd.DataFrame({
0: zip_with_dates_for_dates(
['NaT'] * num_days_between_for_dates(None, '2014-01-04') +
['2014-01-04'] * num_days_between_for_dates('2014-01-05',
'2014-01-09') +
['2014-01-09'] * num_days_between_for_dates('2014-01-10',
None),
),
1: zip_with_dates_for_dates(['NaT'] * len(dates))
})
class CashBuybackAuthLoaderTestCase(TestCase, EventLoaderCommonMixin):
@@ -98,15 +116,21 @@ class CashBuybackAuthLoaderTestCase(TestCase, EventLoaderCommonMixin):
BusinessDaysSinceCashBuybackAuth(),
}
@classmethod
def get_sids(cls):
return range(2)
@classmethod
def setUpClass(cls):
cls._cleanup_stack = stack = ExitStack()
cls.finder = stack.enter_context(
tmp_asset_finder(equities=cls.equity_info),
tmp_asset_finder(equities=cls.get_equity_info()),
)
cls.cols = {}
cls.dataset = create_buyback_auth_tst_frame(cls.event_dates_cases,
SHARE_COUNT_FIELD_NAME)
cls.dataset = {sid:
frame.drop(SHARE_COUNT_FIELD_NAME, axis=1)
for sid, frame
in enumerate(buyback_authorizations_cases)}
cls.loader_type = CashBuybackAuthorizationsLoader
@classmethod
@@ -116,31 +140,17 @@ class CashBuybackAuthLoaderTestCase(TestCase, EventLoaderCommonMixin):
def setup(self, dates):
zip_with_floats_dates = partial(self.zip_with_floats, dates)
num_days_between_dates = partial(self.num_days_between, dates)
_expected_previous_cash = pd.DataFrame({
0: zip_with_floats_dates(
['NaN'] * num_days_between_dates(None, '2014-01-14') +
[10] * num_days_between_dates('2014-01-15', '2014-01-19') +
[20] * num_days_between_dates('2014-01-20', None)
),
1: zip_with_floats_dates(
['NaN'] * num_days_between_dates(None, '2014-01-14') +
[22] * num_days_between_dates('2014-01-15', '2014-01-19') +
[10] * num_days_between_dates('2014-01-20', None)
),
2: zip_with_floats_dates(
['NaN'] * num_days_between_dates(None, '2014-01-09') +
[4] * num_days_between_dates('2014-01-10', '2014-01-19') +
[7] * num_days_between_dates('2014-01-20', None)
),
3: zip_with_floats_dates(
['NaN'] * num_days_between_dates(None, '2014-01-09') +
[1] * num_days_between_dates('2014-01-10', '2014-01-14') +
[2] * num_days_between_dates('2014-01-15', None)
),
4: zip_with_floats_dates(['NaN'] * len(dates)),
}, index=dates)
self.cols[PREVIOUS_BUYBACK_ANNOUNCEMENT] = \
self.get_expected_previous_event_dates(dates)
num_days_between_for_dates = partial(self.num_days_between, dates)
zip_with_dates_for_dates = partial(self.zip_with_dates, dates)
_expected_previous_cash = get_expected_previous_values(
zip_with_floats_dates, num_days_between_dates, dates,
['NaN', 10, 20]
)
self.cols[
PREVIOUS_BUYBACK_ANNOUNCEMENT
] = get_expected_previous_dates(zip_with_dates_for_dates,
num_days_between_for_dates,
dates)
self.cols[PREVIOUS_BUYBACK_CASH] = _expected_previous_cash
self.cols[DAYS_SINCE_PREV] = self._compute_busday_offsets(
self.cols[PREVIOUS_BUYBACK_ANNOUNCEMENT]
@@ -160,15 +170,21 @@ class ShareBuybackAuthLoaderTestCase(TestCase, EventLoaderCommonMixin):
BusinessDaysSinceShareBuybackAuth(),
}
@classmethod
def get_sids(cls):
return range(2)
@classmethod
def setUpClass(cls):
cls._cleanup_stack = stack = ExitStack()
cls.finder = stack.enter_context(
tmp_asset_finder(equities=cls.equity_info),
tmp_asset_finder(equities=cls.get_equity_info()),
)
cls.cols = {}
cls.dataset = create_buyback_auth_tst_frame(cls.event_dates_cases,
CASH_FIELD_NAME)
cls.dataset = {sid:
frame.drop(CASH_FIELD_NAME, axis=1)
for sid, frame
in enumerate(buyback_authorizations_cases)}
cls.loader_type = ShareBuybackAuthorizationsLoader
@classmethod
@@ -178,34 +194,19 @@ class ShareBuybackAuthLoaderTestCase(TestCase, EventLoaderCommonMixin):
def setup(self, dates):
zip_with_floats_dates = partial(self.zip_with_floats, dates)
num_days_between_dates = partial(self.num_days_between, dates)
_expected_previous_buyback_share_count = pd.DataFrame({
0: zip_with_floats_dates(
['NaN'] * num_days_between_dates(None, '2014-01-14') +
[1] * num_days_between_dates('2014-01-15', '2014-01-19') +
[15] * num_days_between_dates('2014-01-20', None)
),
1: zip_with_floats_dates(
['NaN'] * num_days_between_dates(None, '2014-01-14') +
[13] * num_days_between_dates('2014-01-15', '2014-01-19') +
[7] * num_days_between_dates('2014-01-20', None)
),
2: zip_with_floats_dates(
['NaN'] * num_days_between_dates(None, '2014-01-09') +
[3] * num_days_between_dates('2014-01-10', '2014-01-19') +
[1] * num_days_between_dates('2014-01-20', None)
),
3: zip_with_floats_dates(
['NaN'] * num_days_between_dates(None, '2014-01-09') +
[6] * num_days_between_dates('2014-01-10', '2014-01-14') +
[23] * num_days_between_dates('2014-01-15', None)
),
4: zip_with_floats_dates(['NaN'] * len(dates)),
}, index=dates)
num_days_between_for_dates = partial(self.num_days_between, dates)
zip_with_dates_for_dates = partial(self.zip_with_dates, dates)
self.cols[
PREVIOUS_BUYBACK_SHARE_COUNT
] = _expected_previous_buyback_share_count
self.cols[PREVIOUS_BUYBACK_ANNOUNCEMENT] = \
self.get_expected_previous_event_dates(dates)
] = get_expected_previous_values(zip_with_floats_dates,
num_days_between_dates, dates,
['NaN', 1, 15])
self.cols[
PREVIOUS_BUYBACK_ANNOUNCEMENT
] = get_expected_previous_dates(zip_with_dates_for_dates,
num_days_between_for_dates,
dates)
self.cols[DAYS_SINCE_PREV] = self._compute_busday_offsets(
self.cols[PREVIOUS_BUYBACK_ANNOUNCEMENT]
)
+125 -15
View File
@@ -1,6 +1,7 @@
"""
Tests for the reference loader for EarningsCalendar.
"""
from functools import partial
from unittest import TestCase
import blaze as bz
@@ -8,6 +9,7 @@ from blaze.compute.core import swap_resources_into_scope
from contextlib2 import ExitStack
import pandas as pd
from six import iteritems
from tests.pipeline.base import EventLoaderCommonMixin
from zipline.pipeline.common import (
ANNOUNCEMENT_FIELD_NAME,
@@ -27,11 +29,36 @@ from zipline.pipeline.loaders.earnings import EarningsCalendarLoader
from zipline.pipeline.loaders.blaze import (
BlazeEarningsCalendarLoader,
)
from zipline.testing import (
make_simple_equity_info,
tmp_asset_finder,
)
from .base import EventLoaderCommonMixin, DATE_FIELD_NAME
from zipline.testing import tmp_asset_finder
earnings_cases = [
# K1--K2--A1--A2.
pd.DataFrame({
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-15', '2014-01-20'])
}),
# K1--K2--A2--A1.
pd.DataFrame({
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-20', '2014-01-15'])
}),
# K1--A1--K2--A2.
pd.DataFrame({
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-15']),
ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-10', '2014-01-20'])
}),
# K1 == K2.
pd.DataFrame({
TS_FIELD_NAME: pd.to_datetime(['2014-01-05'] * 2),
ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-10', '2014-01-15'])
}),
pd.DataFrame(
columns=[ANNOUNCEMENT_FIELD_NAME,
TS_FIELD_NAME],
dtype='datetime64[ns]'
),
]
class EarningsCalendarLoaderTestCase(TestCase, EventLoaderCommonMixin):
@@ -45,25 +72,108 @@ class EarningsCalendarLoaderTestCase(TestCase, EventLoaderCommonMixin):
DAYS_TO_NEXT: BusinessDaysUntilNextEarnings(),
}
@classmethod
def get_sids(cls):
return range(5)
@classmethod
def setUpClass(cls):
cls._cleanup_stack = stack = ExitStack()
equity_info = make_simple_equity_info(
cls.sids,
start_date=pd.Timestamp('2013-01-01', tz='UTC'),
end_date=pd.Timestamp('2015-01-01', tz='UTC'),
)
cls.cols = {}
cls.dataset = {sid: df for sid, df in enumerate(
case.rename(
columns={DATE_FIELD_NAME: ANNOUNCEMENT_FIELD_NAME}
) for case in cls.event_dates_cases)}
cls.dataset = {sid: df for sid, df in enumerate(earnings_cases)}
cls.finder = stack.enter_context(
tmp_asset_finder(equities=equity_info),
tmp_asset_finder(equities=cls.get_equity_info()),
)
cls.loader_type = EarningsCalendarLoader
def get_expected_next_event_dates(self, dates):
num_days_between_for_dates = partial(self.num_days_between, dates)
zip_with_dates_for_dates = partial(self.zip_with_dates, dates)
return pd.DataFrame({
0: zip_with_dates_for_dates(
['NaT'] *
num_days_between_for_dates(None, '2014-01-04') +
['2014-01-15'] *
num_days_between_for_dates('2014-01-05', '2014-01-15') +
['2014-01-20'] *
num_days_between_for_dates('2014-01-16', '2014-01-20') +
['NaT'] *
num_days_between_for_dates('2014-01-21', None)
),
1: zip_with_dates_for_dates(
['NaT'] *
num_days_between_for_dates(None, '2014-01-04') +
['2014-01-20'] *
num_days_between_for_dates('2014-01-05', '2014-01-09') +
['2014-01-15'] *
num_days_between_for_dates('2014-01-10', '2014-01-15') +
['2014-01-20'] *
num_days_between_for_dates('2014-01-16', '2014-01-20') +
['NaT'] *
num_days_between_for_dates('2014-01-21', None)
),
2: zip_with_dates_for_dates(
['NaT'] *
num_days_between_for_dates(None, '2014-01-04') +
['2014-01-10'] *
num_days_between_for_dates('2014-01-05', '2014-01-10') +
['NaT'] *
num_days_between_for_dates('2014-01-11', '2014-01-14') +
['2014-01-20'] *
num_days_between_for_dates('2014-01-15', '2014-01-20') +
['NaT'] *
num_days_between_for_dates('2014-01-21', None)
),
3: zip_with_dates_for_dates(
['NaT'] *
num_days_between_for_dates(None, '2014-01-04') +
['2014-01-10'] *
num_days_between_for_dates('2014-01-05', '2014-01-10') +
['2014-01-15'] *
num_days_between_for_dates('2014-01-11', '2014-01-15') +
['NaT'] *
num_days_between_for_dates('2014-01-16', None)
),
4: zip_with_dates_for_dates(['NaT'] *
len(dates)),
}, index=dates)
def get_expected_previous_event_dates(self, dates):
num_days_between_for_dates = partial(self.num_days_between, dates)
zip_with_dates_for_dates = partial(self.zip_with_dates, dates)
return pd.DataFrame({
0: zip_with_dates_for_dates(
['NaT'] * num_days_between_for_dates(None, '2014-01-14') +
['2014-01-15'] * num_days_between_for_dates('2014-01-15',
'2014-01-19') +
['2014-01-20'] * num_days_between_for_dates('2014-01-20',
None),
),
1: zip_with_dates_for_dates(
['NaT'] * num_days_between_for_dates(None, '2014-01-14') +
['2014-01-15'] * num_days_between_for_dates('2014-01-15',
'2014-01-19') +
['2014-01-20'] * num_days_between_for_dates('2014-01-20',
None),
),
2: zip_with_dates_for_dates(
['NaT'] * num_days_between_for_dates(None, '2014-01-09') +
['2014-01-10'] * num_days_between_for_dates('2014-01-10',
'2014-01-19') +
['2014-01-20'] * num_days_between_for_dates('2014-01-20',
None),
),
3: zip_with_dates_for_dates(
['NaT'] * num_days_between_for_dates(None, '2014-01-09') +
['2014-01-10'] * num_days_between_for_dates('2014-01-10',
'2014-01-14') +
['2014-01-15'] * num_days_between_for_dates('2014-01-15',
None),
),
4: zip_with_dates_for_dates(['NaT'] * len(dates)),
}, index=dates)
@classmethod
def tearDownClass(cls):
cls._cleanup_stack.close()
+12 -8
View File
@@ -5,8 +5,9 @@ from toolz import merge
from .base import PipelineLoader
from .frame import DataFrameLoader
from .utils import next_date_frame, previous_date_frame, previous_value
from .utils import previous_event_frame, next_date_frame
from zipline.pipeline.common import TS_FIELD_NAME
from zipline.utils.numpy_utils import NaTD
WRONG_COLS_ERROR = "Expected columns {expected_columns} for sid {sid} but " \
"got columns {resulting_columns}."
@@ -179,10 +180,13 @@ class EventsLoader(PipelineLoader):
event_date_field_name):
return DataFrameLoader(
prev_date_field,
previous_date_frame(
self.all_dates,
previous_event_frame(
self.events_by_sid,
self.all_dates,
NaTD,
'datetime64[ns]',
event_date_field_name,
event_date_field_name
),
adjustments=None,
)
@@ -193,13 +197,13 @@ class EventsLoader(PipelineLoader):
value_field_name):
return DataFrameLoader(
previous_value_field,
previous_value(
self.all_dates,
previous_event_frame(
self.events_by_sid,
event_date_field_name,
value_field_name,
self.all_dates,
previous_value_field.missing_value,
previous_value_field.dtype,
previous_value_field.missing_value
event_date_field_name,
value_field_name
),
adjustments=None,
)
+37 -59
View File
@@ -5,7 +5,7 @@ import pandas as pd
from six import iteritems
from six.moves import zip
from zipline.utils.numpy_utils import NaTns, NaTD
from zipline.utils.numpy_utils import NaTns
def next_date_frame(dates, events_by_sid, event_date_field_name):
@@ -60,70 +60,42 @@ def next_date_frame(dates, events_by_sid, event_date_field_name):
return pd.DataFrame(index=dates, data=cols)
def previous_date_frame(date_index, events_by_sid, event_date_field_name):
def previous_event_frame(events_by_sid,
date_index,
missing_value,
field_dtype,
event_date_field,
previous_return_field):
"""
Make a DataFrame representing simulated next earnings date_index.
Make a DataFrame representing simulated previous dates or values for an
event.
Parameters
----------
date_index : DatetimeIndex.
The index of the returned DataFrame.
events_by_sid : dict[int -> pd.DataFrame]
Dict mapping sids to a DataFrame. The index of the DataFrame
represents the date we learned of the event mapping to the event
data.
event_date_field_name : str
The name of the date field that marks when the event occurred.
Returns
-------
previous_events: pd.DataFrame
A DataFrame where each column is a security from `events_by_sid` where
the values are the dates of the previous event that occurred on the
date of the index. Entries falling before the first date will have
`NaT` as the result in the output.
See Also
--------
next_date_frame
"""
sids = list(events_by_sid)
out = np.full((len(date_index), len(sids)), NaTD, dtype='datetime64[ns]')
d_n = date_index[-1].asm8
for col_idx, sid in enumerate(sids):
# events_by_sid[sid] is Series mapping knowledge_date to actual
# event_date. We don't care about the knowledge date for
# computing previous earnings.
values = events_by_sid[sid][event_date_field_name].values
values = values[values <= d_n]
out[date_index.searchsorted(values), col_idx] = values
frame = pd.DataFrame(out, index=date_index, columns=sids)
frame.ffill(inplace=True)
return frame
def previous_value(date_index, events_by_sid, event_date_field, value_field,
value_field_dtype, missing_value):
"""
Make a DataFrame representing simulated next earnings date_index.
Parameters
----------
date_index : DatetimeIndex.
The index of the returned DataFrame.
events_by_sid : dict[int -> DatetimeIndex]
Dict mapping sids to a series of dates. Each k:v pair of the series
represents the date we learned of the event mapping to the date the
event will occur.
date_index : DatetimeIndex.
The index of the returned DataFrame.
missing_value : any
Data which missing values should be filled with.
field_dtype: any
The dtype of the field for which the previous values are being
retrieved.
event_date_field: str
The name of the date field that marks when the event occurred.
return_field: str
The name of the field for which the previous values are being
retrieved.
Returns
-------
previous_events: pd.DataFrame
A DataFrame where each column is a security from `events_by_sid` where
the values are the dates of the previous event that occured on the date
of the index. Entries falling before the first date will have `NaT` as
the result in the output.
A DataFrame where each column is a security from `events_by_sid` and
the values are the values for the previous event that occurred on the
date of the index. Entries falling before the first date will have
`missing_value` filled in as the result in the output.
See Also
--------
@@ -133,18 +105,24 @@ def previous_value(date_index, events_by_sid, event_date_field, value_field,
out = np.full(
(len(date_index), len(sids)),
missing_value,
dtype=value_field_dtype
dtype=field_dtype
)
d_n = date_index[-1].asm8
for col_idx, sid in enumerate(sids):
# events_by_sid[sid] is DataFrame mapping knowledge_date to event
# date and value. We don't care about the knowledge date for computing
# previous values.
# events_by_sid[sid] is a DataFrame mapping knowledge_date to event
# date and values.
df = events_by_sid[sid]
df = df[df[event_date_field] <= d_n]
event_date_vals = df[event_date_field].values
# Get knowledge dates corresponding to the values in which we are
# interested
kd_vals = df[df[event_date_field] <= d_n].index.values
# The date at which a previous event is first known is the max of the
# kd and the event date.
index_dates = np.maximum(kd_vals, event_date_vals)
out[
date_index.searchsorted(df[event_date_field].values), col_idx
] = df[value_field]
date_index.searchsorted(index_dates), col_idx
] = df[previous_return_field]
frame = pd.DataFrame(out, index=date_index, columns=sids)
frame.ffill(inplace=True)