mirror of
https://github.com/wassname/catalyst.git
synced 2026-07-04 06:42:04 +08:00
Merge pull request #1041 from quantopian/fix-buyback-auth-tests-in-pipeline
Fix buyback auth tests in pipeline
This commit is contained in:
+13
-125
@@ -1,7 +1,8 @@
|
||||
"""
|
||||
Base class for Pipeline API unittests.
|
||||
"""
|
||||
from functools import wraps, partial
|
||||
import abc
|
||||
from functools import wraps
|
||||
from unittest import TestCase
|
||||
|
||||
from nose_parameterized import parameterized
|
||||
@@ -13,7 +14,6 @@ from pandas.util.testing import assert_series_equal
|
||||
from six import iteritems
|
||||
|
||||
from zipline.pipeline import Pipeline
|
||||
from zipline.pipeline.common import TS_FIELD_NAME
|
||||
from zipline.pipeline.engine import SimplePipelineEngine
|
||||
from zipline.pipeline.term import AssetExists
|
||||
from zipline.testing import (
|
||||
@@ -148,43 +148,18 @@ class BasePipelineTestCase(TestCase):
|
||||
return arange(prod(shape), dtype=dtype).reshape(shape)
|
||||
|
||||
|
||||
DATE_FIELD_NAME = "event_date"
|
||||
|
||||
|
||||
class EventLoaderCommonMixin(object):
|
||||
sids = A, B, C, D, E = range(5)
|
||||
equity_info = make_simple_equity_info(
|
||||
sids,
|
||||
start_date=pd.Timestamp('2013-01-01', tz='UTC'),
|
||||
end_date=pd.Timestamp('2015-01-01', tz='UTC'),
|
||||
)
|
||||
@abc.abstractproperty
|
||||
def get_sids(cls):
|
||||
raise NotImplementedError('get_sids')
|
||||
|
||||
event_dates_cases = [
|
||||
# K1--K2--E1--E2.
|
||||
pd.DataFrame({
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
|
||||
DATE_FIELD_NAME: pd.to_datetime(['2014-01-15', '2014-01-20'])
|
||||
}),
|
||||
# K1--K2--E2--E1.
|
||||
pd.DataFrame({
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
|
||||
DATE_FIELD_NAME: pd.to_datetime(['2014-01-20', '2014-01-15'])
|
||||
}),
|
||||
# K1--E1--K2--E2.
|
||||
pd.DataFrame({
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-15']),
|
||||
DATE_FIELD_NAME: pd.to_datetime(['2014-01-10', '2014-01-20'])
|
||||
}),
|
||||
# K1 == K2.
|
||||
pd.DataFrame({
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05'] * 2),
|
||||
DATE_FIELD_NAME: pd.to_datetime(['2014-01-10', '2014-01-15'])
|
||||
}),
|
||||
pd.DataFrame({
|
||||
TS_FIELD_NAME: pd.to_datetime([]),
|
||||
DATE_FIELD_NAME: pd.to_datetime([])
|
||||
})
|
||||
]
|
||||
@classmethod
|
||||
def get_equity_info(cls):
|
||||
return make_simple_equity_info(
|
||||
cls.get_sids(),
|
||||
start_date=pd.Timestamp('2013-01-01', tz='UTC'),
|
||||
end_date=pd.Timestamp('2015-01-01', tz='UTC'),
|
||||
)
|
||||
|
||||
def zip_with_floats(self, dates, flts):
|
||||
return pd.Series(flts, index=dates).astype('float')
|
||||
@@ -217,93 +192,6 @@ class EventLoaderCommonMixin(object):
|
||||
loader = self.loader_type(*self.loader_args(dates))
|
||||
return SimplePipelineEngine(lambda _: loader, dates, self.finder)
|
||||
|
||||
def get_expected_next_event_dates(self, dates):
|
||||
num_days_between_for_dates = partial(self.num_days_between, dates)
|
||||
zip_with_dates_for_dates = partial(self.zip_with_dates, dates)
|
||||
return pd.DataFrame({
|
||||
0: zip_with_dates_for_dates(
|
||||
['NaT'] *
|
||||
num_days_between_for_dates(None, '2014-01-04') +
|
||||
['2014-01-15'] *
|
||||
num_days_between_for_dates('2014-01-05', '2014-01-15') +
|
||||
['2014-01-20'] *
|
||||
num_days_between_for_dates('2014-01-16', '2014-01-20') +
|
||||
['NaT'] *
|
||||
num_days_between_for_dates('2014-01-21', None)
|
||||
),
|
||||
1: zip_with_dates_for_dates(
|
||||
['NaT'] *
|
||||
num_days_between_for_dates(None, '2014-01-04') +
|
||||
['2014-01-20'] *
|
||||
num_days_between_for_dates('2014-01-05', '2014-01-09') +
|
||||
['2014-01-15'] *
|
||||
num_days_between_for_dates('2014-01-10', '2014-01-15') +
|
||||
['2014-01-20'] *
|
||||
num_days_between_for_dates('2014-01-16', '2014-01-20') +
|
||||
['NaT'] *
|
||||
num_days_between_for_dates('2014-01-21', None)
|
||||
),
|
||||
2: zip_with_dates_for_dates(
|
||||
['NaT'] *
|
||||
num_days_between_for_dates(None, '2014-01-04') +
|
||||
['2014-01-10'] *
|
||||
num_days_between_for_dates('2014-01-05', '2014-01-10') +
|
||||
['NaT'] *
|
||||
num_days_between_for_dates('2014-01-11', '2014-01-14') +
|
||||
['2014-01-20'] *
|
||||
num_days_between_for_dates('2014-01-15', '2014-01-20') +
|
||||
['NaT'] *
|
||||
num_days_between_for_dates('2014-01-21', None)
|
||||
),
|
||||
3: zip_with_dates_for_dates(
|
||||
['NaT'] *
|
||||
num_days_between_for_dates(None, '2014-01-04') +
|
||||
['2014-01-10'] *
|
||||
num_days_between_for_dates('2014-01-05', '2014-01-10') +
|
||||
['2014-01-15'] *
|
||||
num_days_between_for_dates('2014-01-11', '2014-01-15') +
|
||||
['NaT'] *
|
||||
num_days_between_for_dates('2014-01-16', None)
|
||||
),
|
||||
4: zip_with_dates_for_dates(['NaT'] *
|
||||
len(dates)),
|
||||
}, index=dates)
|
||||
|
||||
def get_expected_previous_event_dates(self, dates):
|
||||
num_days_between_for_dates = partial(self.num_days_between, dates)
|
||||
zip_with_dates_for_dates = partial(self.zip_with_dates, dates)
|
||||
return pd.DataFrame({
|
||||
0: zip_with_dates_for_dates(
|
||||
['NaT'] * num_days_between_for_dates(None, '2014-01-14') +
|
||||
['2014-01-15'] * num_days_between_for_dates('2014-01-15',
|
||||
'2014-01-19') +
|
||||
['2014-01-20'] * num_days_between_for_dates('2014-01-20',
|
||||
None),
|
||||
),
|
||||
1: zip_with_dates_for_dates(
|
||||
['NaT'] * num_days_between_for_dates(None, '2014-01-14') +
|
||||
['2014-01-15'] * num_days_between_for_dates('2014-01-15',
|
||||
'2014-01-19') +
|
||||
['2014-01-20'] * num_days_between_for_dates('2014-01-20',
|
||||
None),
|
||||
),
|
||||
2: zip_with_dates_for_dates(
|
||||
['NaT'] * num_days_between_for_dates(None, '2014-01-09') +
|
||||
['2014-01-10'] * num_days_between_for_dates('2014-01-10',
|
||||
'2014-01-19') +
|
||||
['2014-01-20'] * num_days_between_for_dates('2014-01-20',
|
||||
None),
|
||||
),
|
||||
3: zip_with_dates_for_dates(
|
||||
['NaT'] * num_days_between_for_dates(None, '2014-01-09') +
|
||||
['2014-01-10'] * num_days_between_for_dates('2014-01-10',
|
||||
'2014-01-14') +
|
||||
['2014-01-15'] * num_days_between_for_dates('2014-01-15',
|
||||
None),
|
||||
),
|
||||
4: zip_with_dates_for_dates(['NaT'] * len(dates)),
|
||||
}, index=dates)
|
||||
|
||||
@staticmethod
|
||||
def _compute_busday_offsets(announcement_dates):
|
||||
"""
|
||||
@@ -364,7 +252,7 @@ class EventLoaderCommonMixin(object):
|
||||
end_date=dates[-1],
|
||||
)
|
||||
|
||||
for sid in self.sids:
|
||||
for sid in self.get_sids():
|
||||
for col_name in self.cols.keys():
|
||||
assert_series_equal(result[col_name].xs(sid, level=1),
|
||||
self.cols[col_name][sid],
|
||||
|
||||
@@ -7,8 +7,10 @@ from unittest import TestCase
|
||||
import blaze as bz
|
||||
from blaze.compute.core import swap_resources_into_scope
|
||||
from contextlib2 import ExitStack
|
||||
import itertools
|
||||
import pandas as pd
|
||||
from six import iteritems
|
||||
from tests.pipeline.base import EventLoaderCommonMixin
|
||||
|
||||
from zipline.pipeline.common import(
|
||||
BUYBACK_ANNOUNCEMENT_FIELD_NAME,
|
||||
@@ -38,51 +40,67 @@ from zipline.pipeline.loaders.blaze import (
|
||||
BlazeShareBuybackAuthorizationsLoader,
|
||||
)
|
||||
from zipline.testing import tmp_asset_finder
|
||||
from .base import EventLoaderCommonMixin, DATE_FIELD_NAME
|
||||
|
||||
date_intervals = [[None, '2014-01-04'], ['2014-01-05', '2014-01-09'],
|
||||
['2014-01-10', None]]
|
||||
|
||||
buyback_authorizations = [
|
||||
# K1--K2--A1--A2.
|
||||
buyback_authorizations_cases = [
|
||||
pd.DataFrame({
|
||||
SHARE_COUNT_FIELD_NAME: [1, 15],
|
||||
CASH_FIELD_NAME: [10, 20]
|
||||
}),
|
||||
# K1--K2--A2--A1.
|
||||
pd.DataFrame({
|
||||
SHARE_COUNT_FIELD_NAME: [7, 13],
|
||||
CASH_FIELD_NAME: [10, 22]
|
||||
}),
|
||||
# K1--A1--K2--A2.
|
||||
pd.DataFrame({
|
||||
SHARE_COUNT_FIELD_NAME: [3, 1],
|
||||
CASH_FIELD_NAME: [4, 7]
|
||||
}),
|
||||
# K1 == K2.
|
||||
pd.DataFrame({
|
||||
SHARE_COUNT_FIELD_NAME: [6, 23],
|
||||
CASH_FIELD_NAME: [1, 2]
|
||||
CASH_FIELD_NAME: [10, 20],
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
|
||||
BUYBACK_ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-04',
|
||||
'2014-01-09'])
|
||||
}),
|
||||
pd.DataFrame(
|
||||
columns=[SHARE_COUNT_FIELD_NAME,
|
||||
CASH_FIELD_NAME],
|
||||
CASH_FIELD_NAME,
|
||||
BUYBACK_ANNOUNCEMENT_FIELD_NAME,
|
||||
TS_FIELD_NAME],
|
||||
dtype='datetime64[ns]'
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def create_buyback_auth_tst_frame(cases, field_to_drop):
|
||||
buyback_auth_df = {
|
||||
sid:
|
||||
pd.concat([df, buyback_authorizations[sid]], axis=1).drop(
|
||||
field_to_drop, 1)
|
||||
for sid, df
|
||||
in enumerate(case.rename(columns={DATE_FIELD_NAME:
|
||||
BUYBACK_ANNOUNCEMENT_FIELD_NAME}
|
||||
)
|
||||
for case in cases
|
||||
)
|
||||
}
|
||||
return buyback_auth_df
|
||||
def get_values_for_date_ranges(zip_with_floats_dates,
|
||||
num_days_between_dates,
|
||||
vals_for_date_intervals):
|
||||
# Fill in given values for given date ranges.
|
||||
return zip_with_floats_dates(
|
||||
list(
|
||||
itertools.chain(*[
|
||||
[val] * num_days_between_dates(*date_intervals[i])
|
||||
for i, val in enumerate(vals_for_date_intervals)
|
||||
])
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def get_expected_previous_values(zip_with_floats_dates,
|
||||
num_days_between_dates,
|
||||
dates,
|
||||
vals_for_date_intervals):
|
||||
return pd.DataFrame({
|
||||
0: get_values_for_date_ranges(zip_with_floats_dates,
|
||||
num_days_between_dates,
|
||||
vals_for_date_intervals),
|
||||
1: zip_with_floats_dates(['NaN'] * len(dates)),
|
||||
}, index=dates)
|
||||
|
||||
|
||||
def get_expected_previous_dates(zip_with_dates_for_dates,
|
||||
num_days_between_for_dates,
|
||||
dates):
|
||||
return pd.DataFrame({
|
||||
0: zip_with_dates_for_dates(
|
||||
['NaT'] * num_days_between_for_dates(None, '2014-01-04') +
|
||||
['2014-01-04'] * num_days_between_for_dates('2014-01-05',
|
||||
'2014-01-09') +
|
||||
['2014-01-09'] * num_days_between_for_dates('2014-01-10',
|
||||
None),
|
||||
),
|
||||
1: zip_with_dates_for_dates(['NaT'] * len(dates))
|
||||
})
|
||||
|
||||
|
||||
class CashBuybackAuthLoaderTestCase(TestCase, EventLoaderCommonMixin):
|
||||
@@ -98,15 +116,21 @@ class CashBuybackAuthLoaderTestCase(TestCase, EventLoaderCommonMixin):
|
||||
BusinessDaysSinceCashBuybackAuth(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_sids(cls):
|
||||
return range(2)
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls._cleanup_stack = stack = ExitStack()
|
||||
cls.finder = stack.enter_context(
|
||||
tmp_asset_finder(equities=cls.equity_info),
|
||||
tmp_asset_finder(equities=cls.get_equity_info()),
|
||||
)
|
||||
cls.cols = {}
|
||||
cls.dataset = create_buyback_auth_tst_frame(cls.event_dates_cases,
|
||||
SHARE_COUNT_FIELD_NAME)
|
||||
cls.dataset = {sid:
|
||||
frame.drop(SHARE_COUNT_FIELD_NAME, axis=1)
|
||||
for sid, frame
|
||||
in enumerate(buyback_authorizations_cases)}
|
||||
cls.loader_type = CashBuybackAuthorizationsLoader
|
||||
|
||||
@classmethod
|
||||
@@ -116,31 +140,17 @@ class CashBuybackAuthLoaderTestCase(TestCase, EventLoaderCommonMixin):
|
||||
def setup(self, dates):
|
||||
zip_with_floats_dates = partial(self.zip_with_floats, dates)
|
||||
num_days_between_dates = partial(self.num_days_between, dates)
|
||||
_expected_previous_cash = pd.DataFrame({
|
||||
0: zip_with_floats_dates(
|
||||
['NaN'] * num_days_between_dates(None, '2014-01-14') +
|
||||
[10] * num_days_between_dates('2014-01-15', '2014-01-19') +
|
||||
[20] * num_days_between_dates('2014-01-20', None)
|
||||
),
|
||||
1: zip_with_floats_dates(
|
||||
['NaN'] * num_days_between_dates(None, '2014-01-14') +
|
||||
[22] * num_days_between_dates('2014-01-15', '2014-01-19') +
|
||||
[10] * num_days_between_dates('2014-01-20', None)
|
||||
),
|
||||
2: zip_with_floats_dates(
|
||||
['NaN'] * num_days_between_dates(None, '2014-01-09') +
|
||||
[4] * num_days_between_dates('2014-01-10', '2014-01-19') +
|
||||
[7] * num_days_between_dates('2014-01-20', None)
|
||||
),
|
||||
3: zip_with_floats_dates(
|
||||
['NaN'] * num_days_between_dates(None, '2014-01-09') +
|
||||
[1] * num_days_between_dates('2014-01-10', '2014-01-14') +
|
||||
[2] * num_days_between_dates('2014-01-15', None)
|
||||
),
|
||||
4: zip_with_floats_dates(['NaN'] * len(dates)),
|
||||
}, index=dates)
|
||||
self.cols[PREVIOUS_BUYBACK_ANNOUNCEMENT] = \
|
||||
self.get_expected_previous_event_dates(dates)
|
||||
num_days_between_for_dates = partial(self.num_days_between, dates)
|
||||
zip_with_dates_for_dates = partial(self.zip_with_dates, dates)
|
||||
_expected_previous_cash = get_expected_previous_values(
|
||||
zip_with_floats_dates, num_days_between_dates, dates,
|
||||
['NaN', 10, 20]
|
||||
)
|
||||
self.cols[
|
||||
PREVIOUS_BUYBACK_ANNOUNCEMENT
|
||||
] = get_expected_previous_dates(zip_with_dates_for_dates,
|
||||
num_days_between_for_dates,
|
||||
dates)
|
||||
self.cols[PREVIOUS_BUYBACK_CASH] = _expected_previous_cash
|
||||
self.cols[DAYS_SINCE_PREV] = self._compute_busday_offsets(
|
||||
self.cols[PREVIOUS_BUYBACK_ANNOUNCEMENT]
|
||||
@@ -160,15 +170,21 @@ class ShareBuybackAuthLoaderTestCase(TestCase, EventLoaderCommonMixin):
|
||||
BusinessDaysSinceShareBuybackAuth(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_sids(cls):
|
||||
return range(2)
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls._cleanup_stack = stack = ExitStack()
|
||||
cls.finder = stack.enter_context(
|
||||
tmp_asset_finder(equities=cls.equity_info),
|
||||
tmp_asset_finder(equities=cls.get_equity_info()),
|
||||
)
|
||||
cls.cols = {}
|
||||
cls.dataset = create_buyback_auth_tst_frame(cls.event_dates_cases,
|
||||
CASH_FIELD_NAME)
|
||||
cls.dataset = {sid:
|
||||
frame.drop(CASH_FIELD_NAME, axis=1)
|
||||
for sid, frame
|
||||
in enumerate(buyback_authorizations_cases)}
|
||||
cls.loader_type = ShareBuybackAuthorizationsLoader
|
||||
|
||||
@classmethod
|
||||
@@ -178,34 +194,19 @@ class ShareBuybackAuthLoaderTestCase(TestCase, EventLoaderCommonMixin):
|
||||
def setup(self, dates):
|
||||
zip_with_floats_dates = partial(self.zip_with_floats, dates)
|
||||
num_days_between_dates = partial(self.num_days_between, dates)
|
||||
_expected_previous_buyback_share_count = pd.DataFrame({
|
||||
0: zip_with_floats_dates(
|
||||
['NaN'] * num_days_between_dates(None, '2014-01-14') +
|
||||
[1] * num_days_between_dates('2014-01-15', '2014-01-19') +
|
||||
[15] * num_days_between_dates('2014-01-20', None)
|
||||
),
|
||||
1: zip_with_floats_dates(
|
||||
['NaN'] * num_days_between_dates(None, '2014-01-14') +
|
||||
[13] * num_days_between_dates('2014-01-15', '2014-01-19') +
|
||||
[7] * num_days_between_dates('2014-01-20', None)
|
||||
),
|
||||
2: zip_with_floats_dates(
|
||||
['NaN'] * num_days_between_dates(None, '2014-01-09') +
|
||||
[3] * num_days_between_dates('2014-01-10', '2014-01-19') +
|
||||
[1] * num_days_between_dates('2014-01-20', None)
|
||||
),
|
||||
3: zip_with_floats_dates(
|
||||
['NaN'] * num_days_between_dates(None, '2014-01-09') +
|
||||
[6] * num_days_between_dates('2014-01-10', '2014-01-14') +
|
||||
[23] * num_days_between_dates('2014-01-15', None)
|
||||
),
|
||||
4: zip_with_floats_dates(['NaN'] * len(dates)),
|
||||
}, index=dates)
|
||||
num_days_between_for_dates = partial(self.num_days_between, dates)
|
||||
zip_with_dates_for_dates = partial(self.zip_with_dates, dates)
|
||||
|
||||
self.cols[
|
||||
PREVIOUS_BUYBACK_SHARE_COUNT
|
||||
] = _expected_previous_buyback_share_count
|
||||
self.cols[PREVIOUS_BUYBACK_ANNOUNCEMENT] = \
|
||||
self.get_expected_previous_event_dates(dates)
|
||||
] = get_expected_previous_values(zip_with_floats_dates,
|
||||
num_days_between_dates, dates,
|
||||
['NaN', 1, 15])
|
||||
self.cols[
|
||||
PREVIOUS_BUYBACK_ANNOUNCEMENT
|
||||
] = get_expected_previous_dates(zip_with_dates_for_dates,
|
||||
num_days_between_for_dates,
|
||||
dates)
|
||||
self.cols[DAYS_SINCE_PREV] = self._compute_busday_offsets(
|
||||
self.cols[PREVIOUS_BUYBACK_ANNOUNCEMENT]
|
||||
)
|
||||
|
||||
+125
-15
@@ -1,6 +1,7 @@
|
||||
"""
|
||||
Tests for the reference loader for EarningsCalendar.
|
||||
"""
|
||||
from functools import partial
|
||||
from unittest import TestCase
|
||||
|
||||
import blaze as bz
|
||||
@@ -8,6 +9,7 @@ from blaze.compute.core import swap_resources_into_scope
|
||||
from contextlib2 import ExitStack
|
||||
import pandas as pd
|
||||
from six import iteritems
|
||||
from tests.pipeline.base import EventLoaderCommonMixin
|
||||
|
||||
from zipline.pipeline.common import (
|
||||
ANNOUNCEMENT_FIELD_NAME,
|
||||
@@ -27,11 +29,36 @@ from zipline.pipeline.loaders.earnings import EarningsCalendarLoader
|
||||
from zipline.pipeline.loaders.blaze import (
|
||||
BlazeEarningsCalendarLoader,
|
||||
)
|
||||
from zipline.testing import (
|
||||
make_simple_equity_info,
|
||||
tmp_asset_finder,
|
||||
)
|
||||
from .base import EventLoaderCommonMixin, DATE_FIELD_NAME
|
||||
|
||||
from zipline.testing import tmp_asset_finder
|
||||
|
||||
earnings_cases = [
|
||||
# K1--K2--A1--A2.
|
||||
pd.DataFrame({
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
|
||||
ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-15', '2014-01-20'])
|
||||
}),
|
||||
# K1--K2--A2--A1.
|
||||
pd.DataFrame({
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
|
||||
ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-20', '2014-01-15'])
|
||||
}),
|
||||
# K1--A1--K2--A2.
|
||||
pd.DataFrame({
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-15']),
|
||||
ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-10', '2014-01-20'])
|
||||
}),
|
||||
# K1 == K2.
|
||||
pd.DataFrame({
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05'] * 2),
|
||||
ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-10', '2014-01-15'])
|
||||
}),
|
||||
pd.DataFrame(
|
||||
columns=[ANNOUNCEMENT_FIELD_NAME,
|
||||
TS_FIELD_NAME],
|
||||
dtype='datetime64[ns]'
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class EarningsCalendarLoaderTestCase(TestCase, EventLoaderCommonMixin):
|
||||
@@ -45,25 +72,108 @@ class EarningsCalendarLoaderTestCase(TestCase, EventLoaderCommonMixin):
|
||||
DAYS_TO_NEXT: BusinessDaysUntilNextEarnings(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_sids(cls):
|
||||
return range(5)
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls._cleanup_stack = stack = ExitStack()
|
||||
equity_info = make_simple_equity_info(
|
||||
cls.sids,
|
||||
start_date=pd.Timestamp('2013-01-01', tz='UTC'),
|
||||
end_date=pd.Timestamp('2015-01-01', tz='UTC'),
|
||||
)
|
||||
cls.cols = {}
|
||||
cls.dataset = {sid: df for sid, df in enumerate(
|
||||
case.rename(
|
||||
columns={DATE_FIELD_NAME: ANNOUNCEMENT_FIELD_NAME}
|
||||
) for case in cls.event_dates_cases)}
|
||||
cls.dataset = {sid: df for sid, df in enumerate(earnings_cases)}
|
||||
cls.finder = stack.enter_context(
|
||||
tmp_asset_finder(equities=equity_info),
|
||||
tmp_asset_finder(equities=cls.get_equity_info()),
|
||||
)
|
||||
|
||||
cls.loader_type = EarningsCalendarLoader
|
||||
|
||||
def get_expected_next_event_dates(self, dates):
|
||||
num_days_between_for_dates = partial(self.num_days_between, dates)
|
||||
zip_with_dates_for_dates = partial(self.zip_with_dates, dates)
|
||||
return pd.DataFrame({
|
||||
0: zip_with_dates_for_dates(
|
||||
['NaT'] *
|
||||
num_days_between_for_dates(None, '2014-01-04') +
|
||||
['2014-01-15'] *
|
||||
num_days_between_for_dates('2014-01-05', '2014-01-15') +
|
||||
['2014-01-20'] *
|
||||
num_days_between_for_dates('2014-01-16', '2014-01-20') +
|
||||
['NaT'] *
|
||||
num_days_between_for_dates('2014-01-21', None)
|
||||
),
|
||||
1: zip_with_dates_for_dates(
|
||||
['NaT'] *
|
||||
num_days_between_for_dates(None, '2014-01-04') +
|
||||
['2014-01-20'] *
|
||||
num_days_between_for_dates('2014-01-05', '2014-01-09') +
|
||||
['2014-01-15'] *
|
||||
num_days_between_for_dates('2014-01-10', '2014-01-15') +
|
||||
['2014-01-20'] *
|
||||
num_days_between_for_dates('2014-01-16', '2014-01-20') +
|
||||
['NaT'] *
|
||||
num_days_between_for_dates('2014-01-21', None)
|
||||
),
|
||||
2: zip_with_dates_for_dates(
|
||||
['NaT'] *
|
||||
num_days_between_for_dates(None, '2014-01-04') +
|
||||
['2014-01-10'] *
|
||||
num_days_between_for_dates('2014-01-05', '2014-01-10') +
|
||||
['NaT'] *
|
||||
num_days_between_for_dates('2014-01-11', '2014-01-14') +
|
||||
['2014-01-20'] *
|
||||
num_days_between_for_dates('2014-01-15', '2014-01-20') +
|
||||
['NaT'] *
|
||||
num_days_between_for_dates('2014-01-21', None)
|
||||
),
|
||||
3: zip_with_dates_for_dates(
|
||||
['NaT'] *
|
||||
num_days_between_for_dates(None, '2014-01-04') +
|
||||
['2014-01-10'] *
|
||||
num_days_between_for_dates('2014-01-05', '2014-01-10') +
|
||||
['2014-01-15'] *
|
||||
num_days_between_for_dates('2014-01-11', '2014-01-15') +
|
||||
['NaT'] *
|
||||
num_days_between_for_dates('2014-01-16', None)
|
||||
),
|
||||
4: zip_with_dates_for_dates(['NaT'] *
|
||||
len(dates)),
|
||||
}, index=dates)
|
||||
|
||||
def get_expected_previous_event_dates(self, dates):
|
||||
num_days_between_for_dates = partial(self.num_days_between, dates)
|
||||
zip_with_dates_for_dates = partial(self.zip_with_dates, dates)
|
||||
return pd.DataFrame({
|
||||
0: zip_with_dates_for_dates(
|
||||
['NaT'] * num_days_between_for_dates(None, '2014-01-14') +
|
||||
['2014-01-15'] * num_days_between_for_dates('2014-01-15',
|
||||
'2014-01-19') +
|
||||
['2014-01-20'] * num_days_between_for_dates('2014-01-20',
|
||||
None),
|
||||
),
|
||||
1: zip_with_dates_for_dates(
|
||||
['NaT'] * num_days_between_for_dates(None, '2014-01-14') +
|
||||
['2014-01-15'] * num_days_between_for_dates('2014-01-15',
|
||||
'2014-01-19') +
|
||||
['2014-01-20'] * num_days_between_for_dates('2014-01-20',
|
||||
None),
|
||||
),
|
||||
2: zip_with_dates_for_dates(
|
||||
['NaT'] * num_days_between_for_dates(None, '2014-01-09') +
|
||||
['2014-01-10'] * num_days_between_for_dates('2014-01-10',
|
||||
'2014-01-19') +
|
||||
['2014-01-20'] * num_days_between_for_dates('2014-01-20',
|
||||
None),
|
||||
),
|
||||
3: zip_with_dates_for_dates(
|
||||
['NaT'] * num_days_between_for_dates(None, '2014-01-09') +
|
||||
['2014-01-10'] * num_days_between_for_dates('2014-01-10',
|
||||
'2014-01-14') +
|
||||
['2014-01-15'] * num_days_between_for_dates('2014-01-15',
|
||||
None),
|
||||
),
|
||||
4: zip_with_dates_for_dates(['NaT'] * len(dates)),
|
||||
}, index=dates)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
cls._cleanup_stack.close()
|
||||
|
||||
@@ -5,8 +5,9 @@ from toolz import merge
|
||||
|
||||
from .base import PipelineLoader
|
||||
from .frame import DataFrameLoader
|
||||
from .utils import next_date_frame, previous_date_frame, previous_value
|
||||
from .utils import previous_event_frame, next_date_frame
|
||||
from zipline.pipeline.common import TS_FIELD_NAME
|
||||
from zipline.utils.numpy_utils import NaTD
|
||||
|
||||
WRONG_COLS_ERROR = "Expected columns {expected_columns} for sid {sid} but " \
|
||||
"got columns {resulting_columns}."
|
||||
@@ -179,10 +180,13 @@ class EventsLoader(PipelineLoader):
|
||||
event_date_field_name):
|
||||
return DataFrameLoader(
|
||||
prev_date_field,
|
||||
previous_date_frame(
|
||||
self.all_dates,
|
||||
previous_event_frame(
|
||||
self.events_by_sid,
|
||||
self.all_dates,
|
||||
NaTD,
|
||||
'datetime64[ns]',
|
||||
event_date_field_name,
|
||||
event_date_field_name
|
||||
),
|
||||
adjustments=None,
|
||||
)
|
||||
@@ -193,13 +197,13 @@ class EventsLoader(PipelineLoader):
|
||||
value_field_name):
|
||||
return DataFrameLoader(
|
||||
previous_value_field,
|
||||
previous_value(
|
||||
self.all_dates,
|
||||
previous_event_frame(
|
||||
self.events_by_sid,
|
||||
event_date_field_name,
|
||||
value_field_name,
|
||||
self.all_dates,
|
||||
previous_value_field.missing_value,
|
||||
previous_value_field.dtype,
|
||||
previous_value_field.missing_value
|
||||
event_date_field_name,
|
||||
value_field_name
|
||||
),
|
||||
adjustments=None,
|
||||
)
|
||||
|
||||
@@ -5,7 +5,7 @@ import pandas as pd
|
||||
from six import iteritems
|
||||
from six.moves import zip
|
||||
|
||||
from zipline.utils.numpy_utils import NaTns, NaTD
|
||||
from zipline.utils.numpy_utils import NaTns
|
||||
|
||||
|
||||
def next_date_frame(dates, events_by_sid, event_date_field_name):
|
||||
@@ -60,70 +60,42 @@ def next_date_frame(dates, events_by_sid, event_date_field_name):
|
||||
return pd.DataFrame(index=dates, data=cols)
|
||||
|
||||
|
||||
def previous_date_frame(date_index, events_by_sid, event_date_field_name):
|
||||
def previous_event_frame(events_by_sid,
|
||||
date_index,
|
||||
missing_value,
|
||||
field_dtype,
|
||||
event_date_field,
|
||||
previous_return_field):
|
||||
"""
|
||||
Make a DataFrame representing simulated next earnings date_index.
|
||||
Make a DataFrame representing simulated previous dates or values for an
|
||||
event.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
date_index : DatetimeIndex.
|
||||
The index of the returned DataFrame.
|
||||
events_by_sid : dict[int -> pd.DataFrame]
|
||||
Dict mapping sids to a DataFrame. The index of the DataFrame
|
||||
represents the date we learned of the event mapping to the event
|
||||
data.
|
||||
event_date_field_name : str
|
||||
The name of the date field that marks when the event occurred.
|
||||
|
||||
Returns
|
||||
-------
|
||||
previous_events: pd.DataFrame
|
||||
A DataFrame where each column is a security from `events_by_sid` where
|
||||
the values are the dates of the previous event that occurred on the
|
||||
date of the index. Entries falling before the first date will have
|
||||
`NaT` as the result in the output.
|
||||
|
||||
See Also
|
||||
--------
|
||||
next_date_frame
|
||||
"""
|
||||
sids = list(events_by_sid)
|
||||
out = np.full((len(date_index), len(sids)), NaTD, dtype='datetime64[ns]')
|
||||
d_n = date_index[-1].asm8
|
||||
for col_idx, sid in enumerate(sids):
|
||||
# events_by_sid[sid] is Series mapping knowledge_date to actual
|
||||
# event_date. We don't care about the knowledge date for
|
||||
# computing previous earnings.
|
||||
values = events_by_sid[sid][event_date_field_name].values
|
||||
values = values[values <= d_n]
|
||||
out[date_index.searchsorted(values), col_idx] = values
|
||||
|
||||
frame = pd.DataFrame(out, index=date_index, columns=sids)
|
||||
frame.ffill(inplace=True)
|
||||
return frame
|
||||
|
||||
|
||||
def previous_value(date_index, events_by_sid, event_date_field, value_field,
|
||||
value_field_dtype, missing_value):
|
||||
"""
|
||||
Make a DataFrame representing simulated next earnings date_index.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
date_index : DatetimeIndex.
|
||||
The index of the returned DataFrame.
|
||||
events_by_sid : dict[int -> DatetimeIndex]
|
||||
Dict mapping sids to a series of dates. Each k:v pair of the series
|
||||
represents the date we learned of the event mapping to the date the
|
||||
event will occur.
|
||||
date_index : DatetimeIndex.
|
||||
The index of the returned DataFrame.
|
||||
missing_value : any
|
||||
Data which missing values should be filled with.
|
||||
field_dtype: any
|
||||
The dtype of the field for which the previous values are being
|
||||
retrieved.
|
||||
event_date_field: str
|
||||
The name of the date field that marks when the event occurred.
|
||||
return_field: str
|
||||
The name of the field for which the previous values are being
|
||||
retrieved.
|
||||
|
||||
Returns
|
||||
-------
|
||||
previous_events: pd.DataFrame
|
||||
A DataFrame where each column is a security from `events_by_sid` where
|
||||
the values are the dates of the previous event that occured on the date
|
||||
of the index. Entries falling before the first date will have `NaT` as
|
||||
the result in the output.
|
||||
A DataFrame where each column is a security from `events_by_sid` and
|
||||
the values are the values for the previous event that occurred on the
|
||||
date of the index. Entries falling before the first date will have
|
||||
`missing_value` filled in as the result in the output.
|
||||
|
||||
See Also
|
||||
--------
|
||||
@@ -133,18 +105,24 @@ def previous_value(date_index, events_by_sid, event_date_field, value_field,
|
||||
out = np.full(
|
||||
(len(date_index), len(sids)),
|
||||
missing_value,
|
||||
dtype=value_field_dtype
|
||||
dtype=field_dtype
|
||||
)
|
||||
d_n = date_index[-1].asm8
|
||||
for col_idx, sid in enumerate(sids):
|
||||
# events_by_sid[sid] is DataFrame mapping knowledge_date to event
|
||||
# date and value. We don't care about the knowledge date for computing
|
||||
# previous values.
|
||||
# events_by_sid[sid] is a DataFrame mapping knowledge_date to event
|
||||
# date and values.
|
||||
df = events_by_sid[sid]
|
||||
df = df[df[event_date_field] <= d_n]
|
||||
event_date_vals = df[event_date_field].values
|
||||
# Get knowledge dates corresponding to the values in which we are
|
||||
# interested
|
||||
kd_vals = df[df[event_date_field] <= d_n].index.values
|
||||
# The date at which a previous event is first known is the max of the
|
||||
# kd and the event date.
|
||||
index_dates = np.maximum(kd_vals, event_date_vals)
|
||||
out[
|
||||
date_index.searchsorted(df[event_date_field].values), col_idx
|
||||
] = df[value_field]
|
||||
date_index.searchsorted(index_dates), col_idx
|
||||
] = df[previous_return_field]
|
||||
|
||||
frame = pd.DataFrame(out, index=date_index, columns=sids)
|
||||
frame.ffill(inplace=True)
|
||||
|
||||
Reference in New Issue
Block a user