From bd58140b976f7db2f2a1d9de8a75cfcfe3983e30 Mon Sep 17 00:00:00 2001 From: Maya Tydykov Date: Tue, 19 Apr 2016 17:06:57 -0400 Subject: [PATCH] ENH: add actual value column to estimates dataset. --- tests/pipeline/test_consensus_estimates.py | 20 ++++++++++++++++++- zipline/pipeline/common.py | 2 ++ zipline/pipeline/data/consensus_estimates.py | 1 + .../loaders/blaze/consensus_estimates.py | 4 ++++ .../pipeline/loaders/consensus_estimates.py | 13 ++++++++++-- 5 files changed, 37 insertions(+), 3 deletions(-) diff --git a/tests/pipeline/test_consensus_estimates.py b/tests/pipeline/test_consensus_estimates.py index e49f1b1e..3e0faae4 100644 --- a/tests/pipeline/test_consensus_estimates.py +++ b/tests/pipeline/test_consensus_estimates.py @@ -7,6 +7,7 @@ import pandas as pd from six import iteritems from zipline.pipeline.common import ( + ACTUAL_VALUE_FIELD_NAME, COUNT_FIELD_NAME, FISCAL_QUARTER_FIELD_NAME, FISCAL_YEAR_FIELD_NAME, @@ -20,6 +21,7 @@ from zipline.pipeline.common import ( NEXT_LOW, NEXT_RELEASE_DATE, NEXT_STANDARD_DEVIATION, + PREVIOUS_ACTUAL_VALUE, PREVIOUS_COUNT, PREVIOUS_FISCAL_QUARTER, PREVIOUS_FISCAL_YEAR, @@ -47,6 +49,7 @@ from zipline.testing.fixtures import ( consensus_estimates_cases = [ # K1--K2--A1--A2. pd.DataFrame({ + ACTUAL_VALUE_FIELD_NAME: (100, 200), STANDARD_DEVIATION_FIELD_NAME: (.5, .6), COUNT_FIELD_NAME: (1, 2), FISCAL_QUARTER_FIELD_NAME: (1, 1), @@ -57,6 +60,7 @@ consensus_estimates_cases = [ }), # K1--K2--A2--A1. pd.DataFrame({ + ACTUAL_VALUE_FIELD_NAME: (200, 300), STANDARD_DEVIATION_FIELD_NAME: (.6, .7), COUNT_FIELD_NAME: (2, 3), FISCAL_QUARTER_FIELD_NAME: (1, 1), @@ -67,6 +71,7 @@ consensus_estimates_cases = [ }), # K1--A1--K2--A2. pd.DataFrame({ + ACTUAL_VALUE_FIELD_NAME: (300, 400), STANDARD_DEVIATION_FIELD_NAME: (.7, .8), COUNT_FIELD_NAME: (3, 4), FISCAL_QUARTER_FIELD_NAME: (1, 1), @@ -77,6 +82,7 @@ consensus_estimates_cases = [ }), # K1 == K2. pd.DataFrame({ + ACTUAL_VALUE_FIELD_NAME: (400, 500), STANDARD_DEVIATION_FIELD_NAME: (.8, .9), COUNT_FIELD_NAME: (4, 5), FISCAL_QUARTER_FIELD_NAME: (1, 1), @@ -86,7 +92,8 @@ consensus_estimates_cases = [ LOW_FIELD_NAME: (.08, .09), }), pd.DataFrame( - columns=[STANDARD_DEVIATION_FIELD_NAME, + columns=[ACTUAL_VALUE_FIELD_NAME, + STANDARD_DEVIATION_FIELD_NAME, COUNT_FIELD_NAME, FISCAL_QUARTER_FIELD_NAME, HIGH_FIELD_NAME, @@ -97,6 +104,14 @@ consensus_estimates_cases = [ ), ] +prev_actual_value = [ + ['NaN', 100, 200], + ['NaN', 300, 200], + ['NaN', 300, 400], + ['NaN', 400, 500], + ['NaN'] +] + next_standard_deviation = [ ['NaN', .5, .6, 'NaN'], ['NaN', .6, .7, .6, 'NaN'], @@ -210,6 +225,7 @@ prev_low = [ ] field_name_to_expected_col = { + PREVIOUS_ACTUAL_VALUE: prev_actual_value, PREVIOUS_STANDARD_DEVIATION: prev_standard_deviation, NEXT_STANDARD_DEVIATION: next_standard_deviation, PREVIOUS_COUNT: prev_count, @@ -233,6 +249,8 @@ class ConsensusEstimatesLoaderTestCase(WithNextAndPreviousEventDataLoader, Tests for loading the consensus estimates data. """ pipeline_columns = { + PREVIOUS_ACTUAL_VALUE: + ConsensusEstimates.previous_actual_value.latest, NEXT_RELEASE_DATE: ConsensusEstimates.next_release_date.latest, PREVIOUS_RELEASE_DATE: diff --git a/zipline/pipeline/common.py b/zipline/pipeline/common.py index 8b86d1e3..f6dd4c8f 100644 --- a/zipline/pipeline/common.py +++ b/zipline/pipeline/common.py @@ -1,6 +1,7 @@ """ Common constants for Pipeline. """ +ACTUAL_VALUE_FIELD_NAME = 'actual_value' AD_FIELD_NAME = 'asof_date' ANNOUNCEMENT_FIELD_NAME = 'announcement_date' CASH_FIELD_NAME = 'cash' @@ -31,6 +32,7 @@ NEXT_PAY_DATE = 'next_pay_date' NEXT_RELEASE_DATE = 'next_release_date' NEXT_STANDARD_DEVIATION = 'next_standard_deviation' PAY_DATE_FIELD_NAME = 'pay_date' +PREVIOUS_ACTUAL_VALUE = 'previous_actual_value' PREVIOUS_AMOUNT = 'previous_amount' PREVIOUS_ANNOUNCEMENT = 'previous_announcement' PREVIOUS_BUYBACK_ANNOUNCEMENT = 'previous_buyback_announcement' diff --git a/zipline/pipeline/data/consensus_estimates.py b/zipline/pipeline/data/consensus_estimates.py index 614e6597..ee784290 100644 --- a/zipline/pipeline/data/consensus_estimates.py +++ b/zipline/pipeline/data/consensus_estimates.py @@ -26,3 +26,4 @@ class ConsensusEstimates(DataSet): next_fiscal_year = Column(float64_dtype) previous_low = Column(float64_dtype) next_low = Column(float64_dtype) + previous_actual_value = Column(float64_dtype) diff --git a/zipline/pipeline/loaders/blaze/consensus_estimates.py b/zipline/pipeline/loaders/blaze/consensus_estimates.py index 3176d9d4..6e4ea31b 100644 --- a/zipline/pipeline/loaders/blaze/consensus_estimates.py +++ b/zipline/pipeline/loaders/blaze/consensus_estimates.py @@ -9,6 +9,7 @@ from zipline.pipeline.common import ( SID_FIELD_NAME, STANDARD_DEVIATION_FIELD_NAME, TS_FIELD_NAME, + ACTUAL_VALUE_FIELD_NAME ) from zipline.pipeline.data import ConsensusEstimates from zipline.pipeline.loaders import ConsensusEstimatesLoader @@ -50,6 +51,7 @@ class BlazeConsensusEstimatesLoader(BlazeEventsLoader): {MEAN_FIELD_NAME}: ?float64, {FISCAL_YEAR_FIELD_NAME}: ?float64, {LOW_FIELD_NAME}: ?float64, + {ACTUAL_VALUE_FIELD_NAME}: ?float64 }} Where each row of the table is a record including the sid to identify the @@ -72,6 +74,7 @@ class BlazeConsensusEstimatesLoader(BlazeEventsLoader): MEAN_FIELD_NAME=MEAN_FIELD_NAME, FISCAL_YEAR_FIELD_NAME=FISCAL_YEAR_FIELD_NAME, LOW_FIELD_NAME=LOW_FIELD_NAME, + ACTUAL_VALUE_FIELD_NAME=ACTUAL_VALUE_FIELD_NAME ) _expected_fields = frozenset({ @@ -85,6 +88,7 @@ class BlazeConsensusEstimatesLoader(BlazeEventsLoader): MEAN_FIELD_NAME, FISCAL_YEAR_FIELD_NAME, LOW_FIELD_NAME, + ACTUAL_VALUE_FIELD_NAME }) concrete_loader = ConsensusEstimatesLoader diff --git a/zipline/pipeline/loaders/consensus_estimates.py b/zipline/pipeline/loaders/consensus_estimates.py index e2d02f2a..666931a1 100644 --- a/zipline/pipeline/loaders/consensus_estimates.py +++ b/zipline/pipeline/loaders/consensus_estimates.py @@ -13,7 +13,7 @@ from zipline.pipeline.common import ( MEAN_FIELD_NAME, RELEASE_DATE_FIELD_NAME, STANDARD_DEVIATION_FIELD_NAME, -) + ACTUAL_VALUE_FIELD_NAME) from zipline.utils.memoize import lazyval @@ -26,7 +26,8 @@ class ConsensusEstimatesLoader(EventsLoader): HIGH_FIELD_NAME, MEAN_FIELD_NAME, FISCAL_YEAR_FIELD_NAME, - LOW_FIELD_NAME]) + LOW_FIELD_NAME, + ACTUAL_VALUE_FIELD_NAME]) def __init__(self, all_dates, events_by_sid, infer_timestamps=False, @@ -160,3 +161,11 @@ class ConsensusEstimatesLoader(EventsLoader): RELEASE_DATE_FIELD_NAME, LOW_FIELD_NAME, ) + + @lazyval + def previous_actual_value_loader(self): + return self._previous_event_value_loader( + self.dataset.previous_actual_value, + RELEASE_DATE_FIELD_NAME, + ACTUAL_VALUE_FIELD_NAME, + )