From 1531568899989b930b70629329fade17112558bb Mon Sep 17 00:00:00 2001 From: Maya Tydykov Date: Thu, 14 Apr 2016 18:10:47 -0400 Subject: [PATCH] ENH: add custom dataset for estimize MAINT: alphabetize constants MAINT: remove obsolete column TST: refactor tests to use common code MAINT: remove unneeded fields from dataset MAINT: remove obsolete earnings estimates columns and refactor --- tests/pipeline/test_buyback_auth.py | 61 ++-- tests/pipeline/test_consensus_estimates.py | 327 ++++++++++++++++++ tests/pipeline/test_dividends.py | 109 +++--- tests/pipeline/test_earnings.py | 161 +-------- zipline/pipeline/common.py | 24 ++ zipline/pipeline/data/__init__.py | 2 + zipline/pipeline/data/consensus_estimates.py | 28 ++ zipline/pipeline/loaders/__init__.py | 2 + zipline/pipeline/loaders/blaze/__init__.py | 2 + .../loaders/blaze/consensus_estimates.py | 91 +++++ zipline/pipeline/loaders/blaze/dividends.py | 2 +- .../pipeline/loaders/consensus_estimates.py | 162 +++++++++ zipline/testing/fixtures.py | 180 +++++++++- 13 files changed, 891 insertions(+), 260 deletions(-) create mode 100644 tests/pipeline/test_consensus_estimates.py create mode 100644 zipline/pipeline/data/consensus_estimates.py create mode 100644 zipline/pipeline/loaders/blaze/consensus_estimates.py create mode 100644 zipline/pipeline/loaders/consensus_estimates.py diff --git a/tests/pipeline/test_buyback_auth.py b/tests/pipeline/test_buyback_auth.py index e86abda5..9bf8e19c 100644 --- a/tests/pipeline/test_buyback_auth.py +++ b/tests/pipeline/test_buyback_auth.py @@ -34,7 +34,6 @@ from zipline.pipeline.loaders.blaze import ( BlazeShareBuybackAuthorizationsLoader, ) from zipline.pipeline.loaders.utils import ( - get_values_for_date_ranges, zip_with_floats, zip_with_dates ) @@ -42,8 +41,9 @@ from zipline.testing.fixtures import ( WithPipelineEventDataLoader, ZiplineTestCase ) -date_intervals = [[None, '2014-01-04'], ['2014-01-05', '2014-01-09'], - ['2014-01-10', None]] +date_intervals = [ + [[None, '2014-01-04'], ['2014-01-05', '2014-01-09'], ['2014-01-10', None]] +] buyback_authorizations_cases = [ pd.DataFrame({ @@ -63,18 +63,6 @@ buyback_authorizations_cases = [ ] -def get_expected_previous_values(zip_date_index_with_vals, - dates, - vals_for_date_intervals): - return pd.DataFrame({ - 0: get_values_for_date_ranges(zip_date_index_with_vals, - vals_for_date_intervals, - date_intervals, - dates), - 1: zip_date_index_with_vals(dates, ['NaN'] * len(dates)), - }, index=dates) - - class CashBuybackAuthLoaderTestCase(WithPipelineEventDataLoader, ZiplineTestCase): """ @@ -103,16 +91,17 @@ class CashBuybackAuthLoaderTestCase(WithPipelineEventDataLoader, loader_type = CashBuybackAuthorizationsLoader def setup(self, dates): - cols = {} - _expected_previous_cash = get_expected_previous_values( - zip_with_floats, dates, - ['NaN', 10, 20] - ) - cols[ - PREVIOUS_BUYBACK_ANNOUNCEMENT - ] = get_expected_previous_values(zip_with_dates, dates, - ['NaT', '2014-01-04', '2014-01-09']) - cols[PREVIOUS_BUYBACK_CASH] = _expected_previous_cash + cols = { + PREVIOUS_BUYBACK_CASH: self.get_sids_to_frames(zip_with_floats, + [['NaN', 10, 20]], + date_intervals, + dates), + PREVIOUS_BUYBACK_ANNOUNCEMENT: self.get_sids_to_frames( + zip_with_dates, + [['NaT', '2014-01-04', '2014-01-09']], + date_intervals, + dates), + } cols[DAYS_SINCE_PREV] = self._compute_busday_offsets( cols[PREVIOUS_BUYBACK_ANNOUNCEMENT] ) @@ -147,16 +136,18 @@ class ShareBuybackAuthLoaderTestCase(WithPipelineEventDataLoader, loader_type = ShareBuybackAuthorizationsLoader def setup(self, dates): - cols = {} - cols[ - PREVIOUS_BUYBACK_SHARE_COUNT - ] = get_expected_previous_values(zip_with_floats, - dates, - ['NaN', 1, 15]) - cols[ - PREVIOUS_BUYBACK_ANNOUNCEMENT - ] = get_expected_previous_values(zip_with_dates, dates, - ['NaT', '2014-01-04', '2014-01-09']) + cols = {PREVIOUS_BUYBACK_SHARE_COUNT: + self.get_sids_to_frames(zip_with_floats, + [['NaN', 1, 15]], + date_intervals, + dates,), + PREVIOUS_BUYBACK_ANNOUNCEMENT: + self.get_sids_to_frames(zip_with_dates, + [['NaT', '2014-01-04', '2014-01-09']], + date_intervals, + dates,), + } + cols[DAYS_SINCE_PREV] = self._compute_busday_offsets( cols[PREVIOUS_BUYBACK_ANNOUNCEMENT] ) diff --git a/tests/pipeline/test_consensus_estimates.py b/tests/pipeline/test_consensus_estimates.py new file mode 100644 index 00000000..e49f1b1e --- /dev/null +++ b/tests/pipeline/test_consensus_estimates.py @@ -0,0 +1,327 @@ +""" +Tests for the reference loader for ConsensusEstimates. +""" +import blaze as bz +from blaze.compute.core import swap_resources_into_scope +import pandas as pd +from six import iteritems + +from zipline.pipeline.common import ( + COUNT_FIELD_NAME, + FISCAL_QUARTER_FIELD_NAME, + FISCAL_YEAR_FIELD_NAME, + HIGH_FIELD_NAME, + LOW_FIELD_NAME, + MEAN_FIELD_NAME, + NEXT_COUNT, + NEXT_FISCAL_QUARTER, + NEXT_FISCAL_YEAR, + NEXT_HIGH, + NEXT_LOW, + NEXT_RELEASE_DATE, + NEXT_STANDARD_DEVIATION, + PREVIOUS_COUNT, + PREVIOUS_FISCAL_QUARTER, + PREVIOUS_FISCAL_YEAR, + PREVIOUS_HIGH, + PREVIOUS_LOW, + PREVIOUS_MEAN, NEXT_MEAN, + PREVIOUS_RELEASE_DATE, + PREVIOUS_STANDARD_DEVIATION, + RELEASE_DATE_FIELD_NAME, + STANDARD_DEVIATION_FIELD_NAME, + SID_FIELD_NAME) +from zipline.pipeline.data import ConsensusEstimates +from zipline.pipeline.loaders.consensus_estimates import ( + ConsensusEstimatesLoader +) +from zipline.pipeline.loaders.blaze import BlazeConsensusEstimatesLoader +from zipline.pipeline.loaders.utils import ( + zip_with_floats +) +from zipline.testing.fixtures import ( + ZiplineTestCase, + WithNextAndPreviousEventDataLoader +) + +consensus_estimates_cases = [ + # K1--K2--A1--A2. + pd.DataFrame({ + STANDARD_DEVIATION_FIELD_NAME: (.5, .6), + COUNT_FIELD_NAME: (1, 2), + FISCAL_QUARTER_FIELD_NAME: (1, 1), + HIGH_FIELD_NAME: (.6, .7), + MEAN_FIELD_NAME: (.1, .2), + FISCAL_YEAR_FIELD_NAME: (2014, 2014), + LOW_FIELD_NAME: (.05, .06), + }), + # K1--K2--A2--A1. + pd.DataFrame({ + STANDARD_DEVIATION_FIELD_NAME: (.6, .7), + COUNT_FIELD_NAME: (2, 3), + FISCAL_QUARTER_FIELD_NAME: (1, 1), + HIGH_FIELD_NAME: (.7, .8), + MEAN_FIELD_NAME: (.2, .3), + FISCAL_YEAR_FIELD_NAME: (2014, 2014), + LOW_FIELD_NAME: (.06, .07), + }), + # K1--A1--K2--A2. + pd.DataFrame({ + STANDARD_DEVIATION_FIELD_NAME: (.7, .8), + COUNT_FIELD_NAME: (3, 4), + FISCAL_QUARTER_FIELD_NAME: (1, 1), + HIGH_FIELD_NAME: (.8, .9), + MEAN_FIELD_NAME: (.3, .4), + FISCAL_YEAR_FIELD_NAME: (2014, 2014), + LOW_FIELD_NAME: (.07, .08), + }), + # K1 == K2. + pd.DataFrame({ + STANDARD_DEVIATION_FIELD_NAME: (.8, .9), + COUNT_FIELD_NAME: (4, 5), + FISCAL_QUARTER_FIELD_NAME: (1, 1), + HIGH_FIELD_NAME: (.9, 1.0), + MEAN_FIELD_NAME: (.4, .5), + FISCAL_YEAR_FIELD_NAME: (2014, 2014), + LOW_FIELD_NAME: (.08, .09), + }), + pd.DataFrame( + columns=[STANDARD_DEVIATION_FIELD_NAME, + COUNT_FIELD_NAME, + FISCAL_QUARTER_FIELD_NAME, + HIGH_FIELD_NAME, + MEAN_FIELD_NAME, + FISCAL_YEAR_FIELD_NAME, + LOW_FIELD_NAME], + dtype='datetime64[ns]' + ), +] + +next_standard_deviation = [ + ['NaN', .5, .6, 'NaN'], + ['NaN', .6, .7, .6, 'NaN'], + ['NaN', .7, 'NaN', .8, 'NaN'], + ['NaN', .8, .9, 'NaN'], + ['NaN'] +] + +prev_standard_deviation = [ + ['NaN', .5, .6], + ['NaN', .7, .6], + ['NaN', .7, .8], + ['NaN', .8, .9], + ['NaN'] +] + +next_count = [ + ['NaN', 1, 2, 'NaN'], + ['NaN', 2, 3, 2, 'NaN'], + ['NaN', 3, 'NaN', 4, 'NaN'], + ['NaN', 4, 5, 'NaN'], + ['NaN'] +] + +prev_count = [ + ['NaN', 1, 2], + ['NaN', 3, 2], + ['NaN', 3, 4], + ['NaN', 4, 5], + ['NaN'] +] + +next_fiscal_quarter = [ + ['NaN', 1, 1, 'NaN'], + ['NaN', 1, 1, 1, 'NaN'], + ['NaN', 1, 'NaN', 1, 'NaN'], + ['NaN', 1, 1, 'NaN'], + ['NaN'] +] + +prev_fiscal_quarter = [ + ['NaN', 1, 1], + ['NaN', 1, 1], + ['NaN', 1, 1], + ['NaN', 1, 1], + ['NaN'] +] + +next_high = [ + ['NaN', .6, .7, 'NaN'], + ['NaN', .7, .8, .7, 'NaN'], + ['NaN', .8, 'NaN', .9, 'NaN'], + ['NaN', .9, 1.0, 'NaN'], + ['NaN'] +] + +prev_high = [ + ['NaN', .6, .7], + ['NaN', .8, .7], + ['NaN', .8, .9], + ['NaN', .9, 1.0], + ['NaN'] +] + +next_mean = [ + ['NaN', .1, .2, 'NaN'], + ['NaN', .2, .3, .2, 'NaN'], + ['NaN', .3, 'NaN', .4, 'NaN'], + ['NaN', .4, .5, 'NaN'], + ['NaN'] +] + +prev_mean = [ + ['NaN', .1, .2], + ['NaN', .3, .2], + ['NaN', .3, .4], + ['NaN', .4, .5], + ['NaN'] +] + +next_fiscal_year = [ + ['NaN', 2014, 2014, 'NaN'], + ['NaN', 2014, 2014, 2014, 'NaN'], + ['NaN', 2014, 'NaN', 2014, 'NaN'], + ['NaN', 2014, 2014, 'NaN'], + ['NaN'] +] + +prev_fiscal_year = [ + ['NaN', 2014, 2014], + ['NaN', 2014, 2014], + ['NaN', 2014, 2014], + ['NaN', 2014, 2014], + ['NaN'] +] + +next_low = [ + ['NaN', .05, .06, 'NaN'], + ['NaN', .06, .07, .06, 'NaN'], + ['NaN', .07, 'NaN', .08, 'NaN'], + ['NaN', .08, .09, 'NaN'], + ['NaN'] +] + +prev_low = [ + ['NaN', .05, .06], + ['NaN', .07, .06], + ['NaN', .07, .08], + ['NaN', .08, .09], + ['NaN'] +] + +field_name_to_expected_col = { + PREVIOUS_STANDARD_DEVIATION: prev_standard_deviation, + NEXT_STANDARD_DEVIATION: next_standard_deviation, + PREVIOUS_COUNT: prev_count, + NEXT_COUNT: next_count, + PREVIOUS_FISCAL_QUARTER: prev_fiscal_quarter, + NEXT_FISCAL_QUARTER: next_fiscal_quarter, + PREVIOUS_HIGH: prev_high, + NEXT_HIGH: next_high, + PREVIOUS_MEAN: prev_mean, + NEXT_MEAN: next_mean, + PREVIOUS_FISCAL_YEAR: prev_fiscal_year, + NEXT_FISCAL_YEAR: next_fiscal_year, + PREVIOUS_LOW: prev_low, + NEXT_LOW: next_low +} + + +class ConsensusEstimatesLoaderTestCase(WithNextAndPreviousEventDataLoader, + ZiplineTestCase): + """ + Tests for loading the consensus estimates data. + """ + pipeline_columns = { + NEXT_RELEASE_DATE: + ConsensusEstimates.next_release_date.latest, + PREVIOUS_RELEASE_DATE: + ConsensusEstimates.previous_release_date.latest, + PREVIOUS_STANDARD_DEVIATION: + ConsensusEstimates.previous_standard_deviation.latest, + NEXT_STANDARD_DEVIATION: + ConsensusEstimates.next_standard_deviation.latest, + PREVIOUS_COUNT: + ConsensusEstimates.previous_count.latest, + NEXT_COUNT: + ConsensusEstimates.next_count.latest, + PREVIOUS_FISCAL_QUARTER: + ConsensusEstimates.previous_fiscal_quarter.latest, + NEXT_FISCAL_QUARTER: + ConsensusEstimates.next_fiscal_quarter.latest, + PREVIOUS_HIGH: + ConsensusEstimates.previous_high.latest, + NEXT_HIGH: + ConsensusEstimates.next_high.latest, + PREVIOUS_MEAN: + ConsensusEstimates.previous_mean.latest, + NEXT_MEAN: + ConsensusEstimates.next_mean.latest, + PREVIOUS_FISCAL_YEAR: + ConsensusEstimates.previous_fiscal_year.latest, + NEXT_FISCAL_YEAR: + ConsensusEstimates.next_fiscal_year.latest, + PREVIOUS_LOW: + ConsensusEstimates.previous_low.latest, + NEXT_LOW: + ConsensusEstimates.next_low.latest + } + + @classmethod + def get_dataset(cls): + return {sid: + pd.concat([ + cls.base_cases[sid].rename(columns={ + 'other_date': RELEASE_DATE_FIELD_NAME + }), + df + ], axis=1) + for sid, df in enumerate(consensus_estimates_cases)} + + loader_type = ConsensusEstimatesLoader + + def setup(self, dates): + cols = { + PREVIOUS_RELEASE_DATE: + self.get_expected_previous_event_dates(dates), + NEXT_RELEASE_DATE: self.get_expected_next_event_dates(dates) + } + for field_name in field_name_to_expected_col: + cols[field_name] = self.get_sids_to_frames( + zip_with_floats, field_name_to_expected_col[field_name], + self.prev_date_intervals + if field_name.startswith("previous") + else self.next_date_intervals, + dates + ) + return cols + + +class BlazeConsensusEstimatesLoaderTestCase(ConsensusEstimatesLoaderTestCase): + loader_type = BlazeConsensusEstimatesLoader + + def pipeline_event_loader_args(self, dates): + _, mapping = super( + BlazeConsensusEstimatesLoaderTestCase, + self, + ).pipeline_event_loader_args(dates) + frames = [] + for sid, df in iteritems(mapping): + frame = df.copy() + frame[SID_FIELD_NAME] = sid + frames.append(frame) + return bz.data(pd.concat(frames).reset_index(drop=True)), + + +class BlazeConsensusEstimatesLoaderNotInteractiveTestCase( + BlazeConsensusEstimatesLoaderTestCase +): + """Test case for passing a non-interactive symbol and a dict of resources. + """ + + def pipeline_event_loader_args(self, dates): + (bound_expr,) = super( + BlazeConsensusEstimatesLoaderNotInteractiveTestCase, + self, + ).pipeline_event_loader_args(dates) + return swap_resources_into_scope(bound_expr, {}) diff --git a/tests/pipeline/test_dividends.py b/tests/pipeline/test_dividends.py index 431078b3..5c10b7bc 100644 --- a/tests/pipeline/test_dividends.py +++ b/tests/pipeline/test_dividends.py @@ -45,7 +45,6 @@ from zipline.pipeline.loaders.dividends import ( DividendsByPayDateLoader ) from zipline.pipeline.loaders.utils import ( - get_values_for_date_ranges, zip_with_dates, zip_with_floats ) @@ -159,33 +158,6 @@ next_amounts = [['NaN', 1, 15, 'NaN'], ['NaN', 6, 23, 'NaN']] -def get_vals_for_dates(zip_date_index_with_vals, - vals, - date_invervals, - dates): - return pd.DataFrame({ - 0: get_values_for_date_ranges(zip_date_index_with_vals, - vals[0], - date_invervals[0], - dates), - 1: get_values_for_date_ranges(zip_date_index_with_vals, - vals[1], - date_invervals[1], - dates), - 2: get_values_for_date_ranges(zip_date_index_with_vals, - vals[2], - date_invervals[2], - dates), - # Assume the latest of 2 cash values is used if we find out about 2 - # announcements that happened on the same day for the same sid. - 3: get_values_for_date_ranges(zip_date_index_with_vals, - vals[3], - date_invervals[3], - dates), - 4: zip_date_index_with_vals(dates, ['NaN'] * len(dates)), - }, index=dates) - - class DividendsByAnnouncementDateTestCase(WithPipelineEventDataLoader, ZiplineTestCase): """ @@ -232,14 +204,14 @@ class DividendsByAnnouncementDateTestCase(WithPipelineEventDataLoader, ['NaT', '2014-01-04', '2014-01-14'], ['NaT', '2014-01-04']] amounts = [['NaN', 1, 15], ['NaN', 7, 13], ['NaN', 3, 1], ['NaN', 23]] - cols = {} - cols[PREVIOUS_ANNOUNCEMENT] = get_vals_for_dates( - zip_with_dates, announcement_dates, date_intervals, dates - ) - - cols[PREVIOUS_AMOUNT] = get_vals_for_dates( - zip_with_floats, amounts, date_intervals, dates - ) + cols = { + PREVIOUS_ANNOUNCEMENT: self.get_sids_to_frames( + zip_with_dates, announcement_dates, date_intervals, dates + ), + PREVIOUS_AMOUNT: self.get_sids_to_frames( + zip_with_floats, amounts, date_intervals, dates + ), + } cols[ DAYS_SINCE_PREV_DIVIDEND_ANNOUNCEMENT @@ -305,27 +277,26 @@ class DividendsByExDateTestCase(WithPipelineEventDataLoader, ZiplineTestCase): loader_type = DividendsByExDateLoader def setup(self, dates): - cols = {} - cols[NEXT_EX_DATE] = get_vals_for_dates( - zip_with_dates, next_ex_and_pay_dates, next_date_intervals, dates, - ) - - cols[PREVIOUS_EX_DATE] = get_vals_for_dates( - zip_with_dates, prev_ex_and_pay_dates, prev_date_intervals, dates - ) - - cols[NEXT_AMOUNT] = get_vals_for_dates( - zip_with_floats, next_amounts, next_date_intervals, dates - ) - - cols[PREVIOUS_AMOUNT] = get_vals_for_dates( - zip_with_floats, prev_amounts, prev_date_intervals, dates - ) + cols = { + NEXT_EX_DATE: self.get_sids_to_frames( + zip_with_dates, next_ex_and_pay_dates, next_date_intervals, + dates, + ), + PREVIOUS_EX_DATE: self.get_sids_to_frames( + zip_with_dates, prev_ex_and_pay_dates, prev_date_intervals, + dates + ), + NEXT_AMOUNT: self.get_sids_to_frames( + zip_with_floats, next_amounts, next_date_intervals, dates + ), + PREVIOUS_AMOUNT: self.get_sids_to_frames( + zip_with_floats, prev_amounts, prev_date_intervals, dates + ) + } cols[DAYS_TO_NEXT_EX_DATE] = self._compute_busday_offsets( cols[NEXT_EX_DATE] ) - cols[DAYS_SINCE_PREV_EX_DATE] = self._compute_busday_offsets( cols[PREVIOUS_EX_DATE] ) @@ -386,22 +357,22 @@ class DividendsByPayDateTestCase(WithPipelineEventDataLoader, ZiplineTestCase): loader_type = DividendsByPayDateLoader def setup(self, dates): - cols = {} - cols[NEXT_PAY_DATE] = get_vals_for_dates( - zip_with_dates, next_ex_and_pay_dates, next_date_intervals, dates - ) - cols[PREVIOUS_PAY_DATE] = get_vals_for_dates( - zip_with_dates, prev_ex_and_pay_dates, prev_date_intervals, dates - ) - - cols[NEXT_AMOUNT] = get_vals_for_dates( - zip_with_floats, next_amounts, next_date_intervals, dates - ) - - cols[PREVIOUS_AMOUNT] = get_vals_for_dates( - zip_with_floats, prev_amounts, prev_date_intervals, dates - ) - return cols + return { + NEXT_PAY_DATE: self.get_sids_to_frames( + zip_with_dates, next_ex_and_pay_dates, next_date_intervals, + dates + ), + PREVIOUS_PAY_DATE: self.get_sids_to_frames( + zip_with_dates, prev_ex_and_pay_dates, prev_date_intervals, + dates + ), + NEXT_AMOUNT: self.get_sids_to_frames( + zip_with_floats, next_amounts, next_date_intervals, dates + ), + PREVIOUS_AMOUNT: self.get_sids_to_frames( + zip_with_floats, prev_amounts, prev_date_intervals, dates + ) + } class BlazeDividendsByPayDateLoaderTestCase(DividendsByPayDateTestCase): diff --git a/tests/pipeline/test_earnings.py b/tests/pipeline/test_earnings.py index 23e92ae1..28727832 100644 --- a/tests/pipeline/test_earnings.py +++ b/tests/pipeline/test_earnings.py @@ -22,97 +22,13 @@ from zipline.pipeline.factors.events import ( ) from zipline.pipeline.loaders.earnings import EarningsCalendarLoader from zipline.pipeline.loaders.blaze import BlazeEarningsCalendarLoader -from zipline.pipeline.loaders.utils import ( - get_values_for_date_ranges, - zip_with_dates -) from zipline.testing.fixtures import ( - WithPipelineEventDataLoader, - ZiplineTestCase + ZiplineTestCase, + WithNextAndPreviousEventDataLoader ) -earnings_cases = [ - # K1--K2--A1--A2. - pd.DataFrame({ - TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']), - ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-15', '2014-01-20']) - }), - # K1--K2--A2--A1. - pd.DataFrame({ - TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']), - ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-20', '2014-01-15']) - }), - # K1--A1--K2--A2. - pd.DataFrame({ - TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-15']), - ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-10', '2014-01-20']) - }), - # K1 == K2. - pd.DataFrame({ - TS_FIELD_NAME: pd.to_datetime(['2014-01-05'] * 2), - ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-10', '2014-01-15']) - }), - pd.DataFrame( - columns=[ANNOUNCEMENT_FIELD_NAME, - TS_FIELD_NAME], - dtype='datetime64[ns]' - ), -] -next_date_intervals = [ - [[None, '2014-01-04'], - ['2014-01-05', '2014-01-15'], - ['2014-01-16', '2014-01-20'], - ['2014-01-21', None]], - [[None, '2014-01-04'], - ['2014-01-05', '2014-01-09'], - ['2014-01-10', '2014-01-15'], - ['2014-01-16', '2014-01-20'], - ['2014-01-21', None]], - [[None, '2014-01-04'], - ['2014-01-05', '2014-01-10'], - ['2014-01-11', '2014-01-14'], - ['2014-01-15', '2014-01-20'], - ['2014-01-21', None]], - [[None, '2014-01-04'], - ['2014-01-05', '2014-01-10'], - ['2014-01-11', '2014-01-15'], - ['2014-01-16', None]] -] - -next_dates = [ - ['NaT', '2014-01-15', '2014-01-20', 'NaT'], - ['NaT', '2014-01-20', '2014-01-15', '2014-01-20', 'NaT'], - ['NaT', '2014-01-10', 'NaT', '2014-01-20', 'NaT'], - ['NaT', '2014-01-10', '2014-01-15', 'NaT'], - ['NaT'] -] - -prev_date_intervals = [ - [[None, '2014-01-14'], - ['2014-01-15', '2014-01-19'], - ['2014-01-20', None]], - [[None, '2014-01-14'], - ['2014-01-15', '2014-01-19'], - ['2014-01-20', None]], - [[None, '2014-01-09'], - ['2014-01-10', '2014-01-19'], - ['2014-01-20', None]], - [[None, '2014-01-09'], - ['2014-01-10', '2014-01-14'], - ['2014-01-15', None]] -] - -prev_dates = [ - ['NaT', '2014-01-15', '2014-01-20'], - ['NaT', '2014-01-15', '2014-01-20'], - ['NaT', '2014-01-10', '2014-01-20'], - ['NaT', '2014-01-10', '2014-01-15'], - ['NaT'] -] - - -class EarningsCalendarLoaderTestCase(WithPipelineEventDataLoader, +class EarningsCalendarLoaderTestCase(WithNextAndPreviousEventDataLoader, ZiplineTestCase): """ Tests for loading the earnings announcement data. @@ -126,70 +42,25 @@ class EarningsCalendarLoaderTestCase(WithPipelineEventDataLoader, @classmethod def get_dataset(cls): - return {sid: df for sid, df in enumerate(earnings_cases)} + return {sid: df.rename( + columns={'other_date': ANNOUNCEMENT_FIELD_NAME} + ) for sid, df in enumerate(cls.base_cases)} loader_type = EarningsCalendarLoader - def get_expected_next_event_dates(self, dates): - return pd.DataFrame({ - 0: get_values_for_date_ranges(zip_with_dates, - next_dates[0], - next_date_intervals[0], - dates), - 1: get_values_for_date_ranges(zip_with_dates, - next_dates[1], - next_date_intervals[1], - dates), - 2: get_values_for_date_ranges(zip_with_dates, - next_dates[2], - next_date_intervals[2], - dates), - 3: get_values_for_date_ranges(zip_with_dates, - next_dates[3], - next_date_intervals[3], - dates), - 4: zip_with_dates(dates, ['NaT'] * len(dates)), - }, index=dates) - - def get_expected_previous_event_dates(self, dates): - return pd.DataFrame({ - 0: get_values_for_date_ranges(zip_with_dates, - prev_dates[0], - prev_date_intervals[0], - dates), - 1: get_values_for_date_ranges(zip_with_dates, - prev_dates[1], - prev_date_intervals[1], - dates), - 2: get_values_for_date_ranges(zip_with_dates, - prev_dates[2], - prev_date_intervals[2], - dates), - 3: get_values_for_date_ranges(zip_with_dates, - prev_dates[3], - prev_date_intervals[3], - dates), - 4: zip_with_dates(dates, ['NaT'] * len(dates)), - }, index=dates) - def setup(self, dates): - _expected_next_announce = self.get_expected_next_event_dates(dates) - - _expected_previous_announce = self.get_expected_previous_event_dates( - dates + cols = { + PREVIOUS_ANNOUNCEMENT: self.get_expected_previous_event_dates( + dates + ), + NEXT_ANNOUNCEMENT: self.get_expected_next_event_dates(dates), + } + cols[DAYS_TO_NEXT] = self._compute_busday_offsets( + cols[NEXT_ANNOUNCEMENT] ) - - _expected_next_busday_offsets = self._compute_busday_offsets( - _expected_next_announce + cols[DAYS_SINCE_PREV] = self._compute_busday_offsets( + cols[PREVIOUS_ANNOUNCEMENT] ) - _expected_previous_busday_offsets = self._compute_busday_offsets( - _expected_previous_announce - ) - cols = {} - cols[PREVIOUS_ANNOUNCEMENT] = _expected_previous_announce - cols[NEXT_ANNOUNCEMENT] = _expected_next_announce - cols[DAYS_TO_NEXT] = _expected_next_busday_offsets - cols[DAYS_SINCE_PREV] = _expected_previous_busday_offsets return cols diff --git a/zipline/pipeline/common.py b/zipline/pipeline/common.py index aa71d3a9..8b86d1e3 100644 --- a/zipline/pipeline/common.py +++ b/zipline/pipeline/common.py @@ -5,6 +5,7 @@ AD_FIELD_NAME = 'asof_date' ANNOUNCEMENT_FIELD_NAME = 'announcement_date' CASH_FIELD_NAME = 'cash' CASH_AMOUNT_FIELD_NAME = 'cash_amount' +COUNT_FIELD_NAME = 'count' BUYBACK_ANNOUNCEMENT_FIELD_NAME = 'buyback_date' DAYS_SINCE_PREV = 'days_since_prev' DAYS_SINCE_PREV_DIVIDEND_ANNOUNCEMENT = 'days_since_prev_dividend_announcement' @@ -12,18 +13,41 @@ DAYS_SINCE_PREV_EX_DATE = 'days_since_prev_ex_date' DAYS_TO_NEXT = 'days_to_next' DAYS_TO_NEXT_EX_DATE = 'days_to_next_ex_date' EX_DATE_FIELD_NAME = 'ex_date' +FISCAL_QUARTER_FIELD_NAME = 'fiscal_quarter' +FISCAL_YEAR_FIELD_NAME = 'fiscal_year' +HIGH_FIELD_NAME = 'high' +LOW_FIELD_NAME = 'low' +MEAN_FIELD_NAME = 'mean' NEXT_AMOUNT = 'next_amount' NEXT_ANNOUNCEMENT = 'next_announcement' +NEXT_COUNT = 'next_count' +NEXT_FISCAL_QUARTER = 'next_fiscal_quarter' +NEXT_FISCAL_YEAR = 'next_fiscal_year' NEXT_EX_DATE = 'next_ex_date' +NEXT_HIGH = 'next_high' +NEXT_LOW = 'next_low' +NEXT_MEAN = 'next_mean' NEXT_PAY_DATE = 'next_pay_date' +NEXT_RELEASE_DATE = 'next_release_date' +NEXT_STANDARD_DEVIATION = 'next_standard_deviation' PAY_DATE_FIELD_NAME = 'pay_date' PREVIOUS_AMOUNT = 'previous_amount' PREVIOUS_ANNOUNCEMENT = 'previous_announcement' PREVIOUS_BUYBACK_ANNOUNCEMENT = 'previous_buyback_announcement' PREVIOUS_BUYBACK_CASH = 'previous_buyback_cash' PREVIOUS_BUYBACK_SHARE_COUNT = 'previous_buyback_share_count' +PREVIOUS_COUNT = 'previous_count' PREVIOUS_EX_DATE = 'previous_ex_date' +PREVIOUS_FISCAL_QUARTER = 'previous_fiscal_quarter' +PREVIOUS_FISCAL_YEAR = 'previous_fiscal_year' +PREVIOUS_HIGH = 'previous_high' +PREVIOUS_LOW = 'previous_low' +PREVIOUS_MEAN = 'previous_mean' PREVIOUS_PAY_DATE = 'previous_pay_date' +PREVIOUS_RELEASE_DATE = 'previous_release_date' +PREVIOUS_STANDARD_DEVIATION = 'previous_standard_deviation' +RELEASE_DATE_FIELD_NAME = 'release_date' SHARE_COUNT_FIELD_NAME = 'share_count' SID_FIELD_NAME = 'sid' +STANDARD_DEVIATION_FIELD_NAME = 'standard_deviation' TS_FIELD_NAME = 'timestamp' diff --git a/zipline/pipeline/data/__init__.py b/zipline/pipeline/data/__init__.py index 798d3c81..545088dd 100644 --- a/zipline/pipeline/data/__init__.py +++ b/zipline/pipeline/data/__init__.py @@ -5,6 +5,7 @@ from .dividends import ( DividendsByPayDate, ) from .earnings import EarningsCalendar +from .consensus_estimates import ConsensusEstimates from .equity_pricing import USEquityPricing from .dataset import DataSet, Column, BoundColumn @@ -17,6 +18,7 @@ __all__ = [ 'DividendsByExDate', 'DividendsByPayDate', 'EarningsCalendar', + 'ConsensusEstimates', 'ShareBuybackAuthorizations', 'USEquityPricing', ] diff --git a/zipline/pipeline/data/consensus_estimates.py b/zipline/pipeline/data/consensus_estimates.py new file mode 100644 index 00000000..614e6597 --- /dev/null +++ b/zipline/pipeline/data/consensus_estimates.py @@ -0,0 +1,28 @@ +""" +Datasets representing consensus estimates data. +""" +from zipline.utils.numpy_utils import datetime64ns_dtype, float64_dtype + +from .dataset import Column, DataSet + + +class ConsensusEstimates(DataSet): + """ + Dataset representing consensus estimates data. + """ + previous_release_date = Column(datetime64ns_dtype) + next_release_date = Column(datetime64ns_dtype) + previous_standard_deviation = Column(float64_dtype) + next_standard_deviation = Column(float64_dtype) + previous_count = Column(float64_dtype) + next_count = Column(float64_dtype) + previous_fiscal_quarter = Column(float64_dtype) + next_fiscal_quarter = Column(float64_dtype) + previous_high = Column(float64_dtype) + next_high = Column(float64_dtype) + previous_mean = Column(float64_dtype) + next_mean = Column(float64_dtype) + previous_fiscal_year = Column(float64_dtype) + next_fiscal_year = Column(float64_dtype) + previous_low = Column(float64_dtype) + next_low = Column(float64_dtype) diff --git a/zipline/pipeline/loaders/__init__.py b/zipline/pipeline/loaders/__init__.py index 8e497f9b..6c717b0e 100644 --- a/zipline/pipeline/loaders/__init__.py +++ b/zipline/pipeline/loaders/__init__.py @@ -1,4 +1,5 @@ from .earnings import EarningsCalendarLoader +from .consensus_estimates import ConsensusEstimatesLoader from .buyback_auth import ( CashBuybackAuthorizationsLoader, ShareBuybackAuthorizationsLoader @@ -16,6 +17,7 @@ __all__ = [ 'DividendsByExDateLoader', 'DividendsByPayDateLoader', 'EarningsCalendarLoader', + 'ConsensusEstimatesLoader', 'ShareBuybackAuthorizationsLoader', 'USEquityPricingLoader', ] diff --git a/zipline/pipeline/loaders/blaze/__init__.py b/zipline/pipeline/loaders/blaze/__init__.py index 1a624359..9fec0579 100644 --- a/zipline/pipeline/loaders/blaze/__init__.py +++ b/zipline/pipeline/loaders/blaze/__init__.py @@ -17,10 +17,12 @@ from .dividends import ( from .earnings import ( BlazeEarningsCalendarLoader, ) +from .consensus_estimates import BlazeConsensusEstimatesLoader __all__ = ( 'BlazeCashBuybackAuthorizationsLoader', 'BlazeDividendsByAnnouncementDateLoader', + 'BlazeConsensusEstimatesLoader', 'BlazeDividendsByExDateLoader', 'BlazeDividendsByPayDateLoader', 'BlazeEarningsCalendarLoader', diff --git a/zipline/pipeline/loaders/blaze/consensus_estimates.py b/zipline/pipeline/loaders/blaze/consensus_estimates.py new file mode 100644 index 00000000..3176d9d4 --- /dev/null +++ b/zipline/pipeline/loaders/blaze/consensus_estimates.py @@ -0,0 +1,91 @@ +from zipline.pipeline.common import ( + COUNT_FIELD_NAME, + FISCAL_QUARTER_FIELD_NAME, + FISCAL_YEAR_FIELD_NAME, + HIGH_FIELD_NAME, + LOW_FIELD_NAME, + MEAN_FIELD_NAME, + RELEASE_DATE_FIELD_NAME, + SID_FIELD_NAME, + STANDARD_DEVIATION_FIELD_NAME, + TS_FIELD_NAME, +) +from zipline.pipeline.data import ConsensusEstimates +from zipline.pipeline.loaders import ConsensusEstimatesLoader +from .events import BlazeEventsLoader + + +class BlazeConsensusEstimatesLoader(BlazeEventsLoader): + """A pipeline loader for the ``ConsensusEstimates`` dataset that + loads + data from a blaze expression. + + Parameters + ---------- + expr : Expr + The expression representing the data to load. + resources : dict, optional + Mapping from the loadable terms of ``expr`` to actual data resources. + odo_kwargs : dict, optional + Extra keyword arguments to pass to odo when executing the expression. + data_query_time : time, optional + The time to use for the data query cutoff. + data_query_tz : tzinfo or str + The timezeone to use for the data query cutoff. + dataset: DataSet + The DataSet object for which this loader loads data. + + Notes + ----- + The expression should have a tabular dshape of:: + + Dim * {{ + {SID_FIELD_NAME}: int64, + {TS_FIELD_NAME}: datetime, + {RELEASE_DATE_FIELD_NAME}: ?datetime, + {STANDARD_DEVIATION_FIELD_NAME}: ?float64, + {COUNT_FIELD_NAME}: ?float64, + {FISCAL_QUARTER_FIELD_NAME}: ?float64, + {HIGH_FIELD_NAME}: ?float64, + {MEAN_FIELD_NAME}: ?float64, + {FISCAL_YEAR_FIELD_NAME}: ?float64, + {LOW_FIELD_NAME}: ?float64, + }} + + Where each row of the table is a record including the sid to identify the + company, the timestamp where we learned about the announcement, + the release date for the corresponding estimate, and other estimate + information. + + If the '{TS_FIELD_NAME}' field is not included it is assumed that we + start the backtest with knowledge of all announcements. + """ + + __doc__ = __doc__.format( + TS_FIELD_NAME=TS_FIELD_NAME, + SID_FIELD_NAME=SID_FIELD_NAME, + RELEASE_DATE_FIELD_NAME=RELEASE_DATE_FIELD_NAME, + STANDARD_DEVIATION_FIELD_NAME=STANDARD_DEVIATION_FIELD_NAME, + COUNT_FIELD_NAME=COUNT_FIELD_NAME, + FISCAL_QUARTER_FIELD_NAME=FISCAL_QUARTER_FIELD_NAME, + HIGH_FIELD_NAME=HIGH_FIELD_NAME, + MEAN_FIELD_NAME=MEAN_FIELD_NAME, + FISCAL_YEAR_FIELD_NAME=FISCAL_YEAR_FIELD_NAME, + LOW_FIELD_NAME=LOW_FIELD_NAME, + ) + + _expected_fields = frozenset({ + TS_FIELD_NAME, + SID_FIELD_NAME, + RELEASE_DATE_FIELD_NAME, + STANDARD_DEVIATION_FIELD_NAME, + COUNT_FIELD_NAME, + FISCAL_QUARTER_FIELD_NAME, + HIGH_FIELD_NAME, + MEAN_FIELD_NAME, + FISCAL_YEAR_FIELD_NAME, + LOW_FIELD_NAME, + }) + + concrete_loader = ConsensusEstimatesLoader + default_dataset = ConsensusEstimates diff --git a/zipline/pipeline/loaders/blaze/dividends.py b/zipline/pipeline/loaders/blaze/dividends.py index eef172b0..6fd86be1 100644 --- a/zipline/pipeline/loaders/blaze/dividends.py +++ b/zipline/pipeline/loaders/blaze/dividends.py @@ -45,7 +45,7 @@ class BlazeDividendsByAnnouncementDateLoader(BlazeEventsLoader): Dim * {{ {SID_FIELD_NAME}: int64, {TS_FIELD_NAME}: datetime, - {CASH_AMOUNT_FIELD_NAME}: ?datetime, + {CASH_AMOUNT_FIELD_NAME}: ?float64, {ANNOUNCEMENT_FIELD_NAME}: ?datetime, }} diff --git a/zipline/pipeline/loaders/consensus_estimates.py b/zipline/pipeline/loaders/consensus_estimates.py new file mode 100644 index 00000000..e2d02f2a --- /dev/null +++ b/zipline/pipeline/loaders/consensus_estimates.py @@ -0,0 +1,162 @@ +""" +Reference implementation for ConsensusEstimates loaders. +""" + +from ..data import ConsensusEstimates +from .events import EventsLoader +from zipline.pipeline.common import ( + COUNT_FIELD_NAME, + FISCAL_QUARTER_FIELD_NAME, + FISCAL_YEAR_FIELD_NAME, + HIGH_FIELD_NAME, + LOW_FIELD_NAME, + MEAN_FIELD_NAME, + RELEASE_DATE_FIELD_NAME, + STANDARD_DEVIATION_FIELD_NAME, +) +from zipline.utils.memoize import lazyval + + +class ConsensusEstimatesLoader(EventsLoader): + + expected_cols = frozenset([RELEASE_DATE_FIELD_NAME, + STANDARD_DEVIATION_FIELD_NAME, + COUNT_FIELD_NAME, + FISCAL_QUARTER_FIELD_NAME, + HIGH_FIELD_NAME, + MEAN_FIELD_NAME, + FISCAL_YEAR_FIELD_NAME, + LOW_FIELD_NAME]) + + def __init__(self, all_dates, events_by_sid, + infer_timestamps=False, + dataset=ConsensusEstimates): + super(ConsensusEstimatesLoader, self).__init__( + all_dates, events_by_sid, infer_timestamps, dataset=dataset, + ) + + @lazyval + def next_release_date_loader(self): + return self._next_event_date_loader( + self.dataset.next_release_date, + RELEASE_DATE_FIELD_NAME + ) + + @lazyval + def previous_release_date_loader(self): + return self._previous_event_date_loader( + self.dataset.previous_release_date, + RELEASE_DATE_FIELD_NAME, + ) + + @lazyval + def next_standard_deviation_loader(self): + return self._next_event_value_loader( + self.dataset.next_standard_deviation, + RELEASE_DATE_FIELD_NAME, + STANDARD_DEVIATION_FIELD_NAME, + ) + + @lazyval + def previous_standard_deviation_loader(self): + return self._previous_event_value_loader( + self.dataset.previous_standard_deviation, + RELEASE_DATE_FIELD_NAME, + STANDARD_DEVIATION_FIELD_NAME, + ) + + @lazyval + def next_count_loader(self): + return self._next_event_value_loader( + self.dataset.next_count, + RELEASE_DATE_FIELD_NAME, + COUNT_FIELD_NAME, + ) + + @lazyval + def previous_count_loader(self): + return self._previous_event_value_loader( + self.dataset.previous_count, + RELEASE_DATE_FIELD_NAME, + COUNT_FIELD_NAME, + ) + + @lazyval + def next_fiscal_quarter_loader(self): + return self._next_event_value_loader( + self.dataset.next_fiscal_quarter, + RELEASE_DATE_FIELD_NAME, + FISCAL_QUARTER_FIELD_NAME, + ) + + @lazyval + def previous_fiscal_quarter_loader(self): + return self._previous_event_value_loader( + self.dataset.previous_fiscal_quarter, + RELEASE_DATE_FIELD_NAME, + FISCAL_QUARTER_FIELD_NAME, + ) + + @lazyval + def next_high_loader(self): + return self._next_event_value_loader( + self.dataset.next_high, + RELEASE_DATE_FIELD_NAME, + HIGH_FIELD_NAME, + ) + + @lazyval + def previous_high_loader(self): + return self._previous_event_value_loader( + self.dataset.previous_high, + RELEASE_DATE_FIELD_NAME, + HIGH_FIELD_NAME, + ) + + @lazyval + def next_mean_loader(self): + return self._next_event_value_loader( + self.dataset.next_mean, + RELEASE_DATE_FIELD_NAME, + MEAN_FIELD_NAME, + ) + + @lazyval + def previous_mean_loader(self): + return self._previous_event_value_loader( + self.dataset.previous_mean, + RELEASE_DATE_FIELD_NAME, + MEAN_FIELD_NAME, + ) + + @lazyval + def next_fiscal_year_loader(self): + return self._next_event_value_loader( + self.dataset.next_fiscal_year, + RELEASE_DATE_FIELD_NAME, + FISCAL_YEAR_FIELD_NAME, + ) + + @lazyval + def previous_fiscal_year_loader(self): + return self._previous_event_value_loader( + self.dataset.previous_fiscal_year, + RELEASE_DATE_FIELD_NAME, + FISCAL_YEAR_FIELD_NAME, + ) + + @lazyval + def next_low_loader(self): + return self._next_event_value_loader( + self.dataset.next_low, + RELEASE_DATE_FIELD_NAME, + LOW_FIELD_NAME, + ) + + @lazyval + def previous_low_loader(self): + return self._previous_event_value_loader( + self.dataset.previous_low, + RELEASE_DATE_FIELD_NAME, + LOW_FIELD_NAME, + ) diff --git a/zipline/testing/fixtures.py b/zipline/testing/fixtures.py index 1689dc8e..9748f0bb 100644 --- a/zipline/testing/fixtures.py +++ b/zipline/testing/fixtures.py @@ -6,19 +6,17 @@ from unittest import TestCase from contextlib2 import ExitStack from logbook import NullHandler, Logger from nose_parameterized import parameterized -import numpy as np -import pandas as pd from pandas.util.testing import assert_series_equal -import responses from six import with_metaclass, iteritems from toolz import flip +import numpy as np +import pandas as pd +import responses + -from ..assets.synthetic import make_simple_equity_info from .core import ( create_daily_bar_data, create_minute_bar_data, - gen_calendars, - tmp_asset_finder, tmp_dir, ) from ..data.data_portal import DataPortal @@ -26,7 +24,6 @@ from ..data.us_equity_pricing import ( SQLiteAdjustmentReader, SQLiteAdjustmentWriter, ) -from ..finance.trading import TradingEnvironment from ..data.us_equity_pricing import ( BcolzDailyBarReader, BcolzDailyBarWriter, @@ -36,13 +33,21 @@ from ..data.minute_bars import ( BcolzMinuteBarWriter, US_EQUITIES_MINUTES_PER_DAY ) + +from ..finance.trading import TradingEnvironment from ..utils import tradingcalendar, factory from ..utils.classproperty import classproperty from ..utils.final import FinalMeta, final from ..utils.metautils import compose_types -from ..pipeline import Pipeline, SimplePipelineEngine -from ..utils.numpy_utils import make_datetime64D -from ..utils.numpy_utils import NaTD +from .core import tmp_asset_finder, make_simple_equity_info, gen_calendars +from zipline.pipeline import Pipeline, SimplePipelineEngine +from zipline.utils.numpy_utils import make_datetime64D +from zipline.utils.numpy_utils import NaTD +from zipline.pipeline.common import TS_FIELD_NAME +from zipline.pipeline.loaders.utils import ( + get_values_for_date_ranges, + zip_with_dates +) def _take_out_the_trash(): @@ -890,6 +895,42 @@ class WithPipelineEventDataLoader(with_metaclass( loader = self.loader_type(*self.pipeline_event_loader_args(dates)) return SimplePipelineEngine(lambda _: loader, dates, self.asset_finder) + def get_sids_to_frames(self, + zip_date_index_with_vals, + vals, + date_intervals, + dates): + """ + Construct a DataFrame that maps sid to the expected values for the + given dates. + + Parameters + ---------- + zip_date_index_with_vals: callable + A function that returns a series of `vals` repeated based on the + number of days in the date interval for each val, indexed by the + dates in `dates`. + vals: iterable + An iterable with values that correspond to each interval in + `date_intervals`. + date_intervals: list + A list of date intervals for each sid that correspond to values in + `vals`. + dates: DatetimeIndex + The dates which will serve as the index for each Series for each + sid in the DataFrame. + """ + frame = pd.DataFrame({sid: get_values_for_date_ranges( + zip_date_index_with_vals, + vals[sid], + date_intervals[sid], + dates + ) for sid in self.get_sids()[:-1]}) + frame[self.get_sids()[-1]] = zip_date_index_with_vals( + dates, ['NaN'] * len(dates) + ) + return frame + @staticmethod def _compute_busday_offsets(announcement_dates): """ @@ -1026,3 +1067,122 @@ class WithResponses(object): self.responses = self.enter_instance_context( responses.RequestsMock(), ) + + +class WithNextAndPreviousEventDataLoader(WithPipelineEventDataLoader): + """ + ZiplineTestCase mixin extending common functionality for event data + loader tests that have both next and previous events. + + `base_cases` should be used as the template to test cases that combine + knowledge date (timestamp) and some 'other_date' in various ways. + `next_date_intervals` gives the date intervals for the next event based + on the dates given in `base_cases`. + `next_dates` gives the next date from `other_date` which is known about at + each interval. + `prev_date_intervals` gives the date intervals for each sid for the + previous event based on the dates given in `base_cases`. + `prev_dates` gives the previous date from `other_date` which is known + about at each interval. + `get_expected_previous_event_dates` is a convenience function that fills + a DataFrame with the previously known dates for each sid for the given + dates. + `get_expected_next_event_dates` is a convenience function that fills + a DataFrame with the next known dates for each sid for the given + dates. + """ + base_cases = [ + # K1--K2--A1--A2. + pd.DataFrame({ + TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']), + 'other_date': pd.to_datetime(['2014-01-15', '2014-01-20']), + }), + # K1--K2--A2--A1. + pd.DataFrame({ + TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']), + 'other_date': pd.to_datetime(['2014-01-20', '2014-01-15']), + }), + # K1--A1--K2--A2. + pd.DataFrame({ + TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-15']), + 'other_date': pd.to_datetime(['2014-01-10', '2014-01-20']), + }), + # K1 == K2. + pd.DataFrame({ + TS_FIELD_NAME: pd.to_datetime(['2014-01-05'] * 2), + 'other_date': pd.to_datetime(['2014-01-10', '2014-01-15']), + }), + pd.DataFrame( + columns=['other_date', + TS_FIELD_NAME], + dtype='datetime64[ns]' + ), + ] + + next_date_intervals = [ + [[None, '2014-01-04'], + ['2014-01-05', '2014-01-15'], + ['2014-01-16', '2014-01-20'], + ['2014-01-21', None]], + [[None, '2014-01-04'], + ['2014-01-05', '2014-01-09'], + ['2014-01-10', '2014-01-15'], + ['2014-01-16', '2014-01-20'], + ['2014-01-21', None]], + [[None, '2014-01-04'], + ['2014-01-05', '2014-01-10'], + ['2014-01-11', '2014-01-14'], + ['2014-01-15', '2014-01-20'], + ['2014-01-21', None]], + [[None, '2014-01-04'], + ['2014-01-05', '2014-01-10'], + ['2014-01-11', '2014-01-15'], + ['2014-01-16', None]] + ] + + next_dates = [ + ['NaT', '2014-01-15', '2014-01-20', 'NaT'], + ['NaT', '2014-01-20', '2014-01-15', '2014-01-20', 'NaT'], + ['NaT', '2014-01-10', 'NaT', '2014-01-20', 'NaT'], + ['NaT', '2014-01-10', '2014-01-15', 'NaT'], + ['NaT'] + ] + + prev_date_intervals = [ + [[None, '2014-01-14'], + ['2014-01-15', '2014-01-19'], + ['2014-01-20', None]], + [[None, '2014-01-14'], + ['2014-01-15', '2014-01-19'], + ['2014-01-20', None]], + [[None, '2014-01-09'], + ['2014-01-10', '2014-01-19'], + ['2014-01-20', None]], + [[None, '2014-01-09'], + ['2014-01-10', '2014-01-14'], + ['2014-01-15', None]] + ] + + prev_dates = [ + ['NaT', '2014-01-15', '2014-01-20'], + ['NaT', '2014-01-15', '2014-01-20'], + ['NaT', '2014-01-10', '2014-01-20'], + ['NaT', '2014-01-10', '2014-01-15'], + ['NaT'] + ] + + def get_expected_previous_event_dates(self, dates): + return self.get_sids_to_frames( + zip_with_dates, + self.prev_dates, + self.prev_date_intervals, + dates + ) + + def get_expected_next_event_dates(self, dates): + return self.get_sids_to_frames( + zip_with_dates, + self.next_dates, + self.next_date_intervals, + dates + )