From e304786fa035624ac5c3b6294379b6b88ba51051 Mon Sep 17 00:00:00 2001 From: Maya Tydykov Date: Wed, 5 Oct 2016 12:40:21 -0400 Subject: [PATCH 1/2] BUG: append overwrites to list rather than replacing with new list --- zipline/pipeline/loaders/earnings_estimates.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/zipline/pipeline/loaders/earnings_estimates.py b/zipline/pipeline/loaders/earnings_estimates.py index 5a794683..9c5a3833 100644 --- a/zipline/pipeline/loaders/earnings_estimates.py +++ b/zipline/pipeline/loaders/earnings_estimates.py @@ -280,6 +280,10 @@ class EarningsEstimatesLoader(PipelineLoader): sid_to_idx = dict(zip(assets, range(len(assets)))) + for column in columns: + column_name = self.name_map[column.name] + col_to_overwrites[column_name] = defaultdict(list) + def collect_adjustments(group): next_qtr_start_indices = dates.searchsorted( group[EVENT_DATE_FIELD_NAME].values, @@ -358,7 +362,7 @@ class EarningsEstimatesLoader(PipelineLoader): # overwrite all values going up to the starting index of # that quarter with estimates for that quarter. if requested_quarter in quarters_with_estimates_for_sid: - col_to_overwrites[column_name][next_qtr_start_idx] = [ + col_to_overwrites[column_name][next_qtr_start_idx].extend([ self.create_overwrite_for_estimate( col, column_name, @@ -368,19 +372,19 @@ class EarningsEstimatesLoader(PipelineLoader): sid, sid_idx ), - ] + ]) # There are no estimates for the quarter. Overwrite all # values going up to the starting index of that quarter # with the missing value for this column. else: - col_to_overwrites[column_name][next_qtr_start_idx] = [ + col_to_overwrites[column_name][next_qtr_start_idx].extend([ self.overwrite_with_null( col, last_per_qtr.index, next_qtr_start_idx, sid_idx ), - ] + ]) def overwrite_with_null(self, column, From ea5b2b030c5c592e5e7acbe744234add9f3452e2 Mon Sep 17 00:00:00 2001 From: Maya Tydykov Date: Wed, 5 Oct 2016 12:40:34 -0400 Subject: [PATCH 2/2] TST: add test condition to check for bug --- tests/pipeline/test_quarters_estimates.py | 118 ++++++++++++------ .../pipeline/loaders/earnings_estimates.py | 8 +- 2 files changed, 82 insertions(+), 44 deletions(-) diff --git a/tests/pipeline/test_quarters_estimates.py b/tests/pipeline/test_quarters_estimates.py index 8f0cf777..40933fca 100644 --- a/tests/pipeline/test_quarters_estimates.py +++ b/tests/pipeline/test_quarters_estimates.py @@ -819,7 +819,7 @@ class WithEstimateWindows(WithEstimates): pd.Timestamp('2015-01-10'), pd.Timestamp('2015-01-20'), pd.Timestamp('2015-01-20')], - 'estimate': [10., 11.] + [20., 21.], + 'estimate': [100., 101.] + [200., 201.], FISCAL_QUARTER_FIELD_NAME: [1] * 2 + [2] * 2, FISCAL_YEAR_FIELD_NAME: 2015, SID_FIELD_NAME: 0, @@ -833,12 +833,30 @@ class WithEstimateWindows(WithEstimates): EVENT_DATE_FIELD_NAME: [pd.Timestamp('2015-01-12'), pd.Timestamp('2015-01-12'), pd.Timestamp('2015-01-15'), pd.Timestamp('2015-01-15')], - 'estimate': [10., 11.] + [30., 31.], + 'estimate': [110., 111.] + [310., 311.], FISCAL_QUARTER_FIELD_NAME: [1] * 2 + [3] * 2, FISCAL_YEAR_FIELD_NAME: 2015, SID_FIELD_NAME: 1 }) - return pd.concat([sid_0_timeline, sid_1_timeline]) + + # Extra sid to make sure we have correct overwrites when sid quarter + # boundaries collide. + sid_3_timeline = pd.DataFrame({ + TS_FIELD_NAME: [pd.Timestamp('2015-01-05'), + pd.Timestamp('2015-01-07'), + pd.Timestamp('2015-01-05'), + pd.Timestamp('2015-01-17')], + EVENT_DATE_FIELD_NAME: + [pd.Timestamp('2015-01-10'), + pd.Timestamp('2015-01-10'), + pd.Timestamp('2015-01-20'), + pd.Timestamp('2015-01-20')], + 'estimate': [120., 121.] + [220., 221.], + FISCAL_QUARTER_FIELD_NAME: [1] * 2 + [2] * 2, + FISCAL_YEAR_FIELD_NAME: 2015, + SID_FIELD_NAME: 2 + }) + return pd.concat([sid_0_timeline, sid_1_timeline, sid_3_timeline]) @classmethod def make_expected_timelines(cls): @@ -925,37 +943,44 @@ class PreviousEstimateWindows(WithEstimateWindows, ZiplineTestCase): oneq_previous = pd.concat([ cls.create_expected_df( [(0, np.NaN, cls.window_test_start_date), - (1, np.NaN, cls.window_test_start_date)], + (1, np.NaN, cls.window_test_start_date), + (2, np.NaN, cls.window_test_start_date)], pd.Timestamp('2015-01-09') ), cls.create_expected_df( - [(0, 11, pd.Timestamp('2015-01-10')), - (1, 11, pd.Timestamp('2015-01-12'))], + [(0, 101, pd.Timestamp('2015-01-10')), + (1, 111, pd.Timestamp('2015-01-12')), + (2, 121, pd.Timestamp('2015-01-10'))], pd.Timestamp('2015-01-12') ), cls.create_expected_df( - [(0, 11, pd.Timestamp('2015-01-10')), - (1, 11, pd.Timestamp('2015-01-12'))], + [(0, 101, pd.Timestamp('2015-01-10')), + (1, 111, pd.Timestamp('2015-01-12')), + (2, 121, pd.Timestamp('2015-01-10'))], pd.Timestamp('2015-01-13') ), cls.create_expected_df( - [(0, 11, pd.Timestamp('2015-01-10')), - (1, 11, pd.Timestamp('2015-01-12'))], + [(0, 101, pd.Timestamp('2015-01-10')), + (1, 111, pd.Timestamp('2015-01-12')), + (2, 121, pd.Timestamp('2015-01-10'))], pd.Timestamp('2015-01-14') ), cls.create_expected_df( - [(0, 11, pd.Timestamp('2015-01-10')), - (1, 31, pd.Timestamp('2015-01-15'))], + [(0, 101, pd.Timestamp('2015-01-10')), + (1, 311, pd.Timestamp('2015-01-15')), + (2, 121, pd.Timestamp('2015-01-10'))], pd.Timestamp('2015-01-15') ), cls.create_expected_df( - [(0, 11, pd.Timestamp('2015-01-10')), - (1, 31, pd.Timestamp('2015-01-15'))], + [(0, 101, pd.Timestamp('2015-01-10')), + (1, 311, pd.Timestamp('2015-01-15')), + (2, 121, pd.Timestamp('2015-01-10'))], pd.Timestamp('2015-01-16') ), cls.create_expected_df( - [(0, 21, pd.Timestamp('2015-01-17')), - (1, 31, pd.Timestamp('2015-01-15'))], + [(0, 201, pd.Timestamp('2015-01-17')), + (1, 311, pd.Timestamp('2015-01-15')), + (2, 221, pd.Timestamp('2015-01-17'))], pd.Timestamp('2015-01-20') ), ]) @@ -963,12 +988,14 @@ class PreviousEstimateWindows(WithEstimateWindows, ZiplineTestCase): twoq_previous = pd.concat( [cls.create_expected_df( [(0, np.NaN, cls.window_test_start_date), - (1, np.NaN, cls.window_test_start_date)], + (1, np.NaN, cls.window_test_start_date), + (2, np.NaN, cls.window_test_start_date)], end_date ) for end_date in pd.date_range('2015-01-09', '2015-01-19')] + [cls.create_expected_df( - [(0, 11, pd.Timestamp('2015-01-20')), - (1, np.NaN, cls.window_test_start_date)], + [(0, 101, pd.Timestamp('2015-01-20')), + (1, np.NaN, cls.window_test_start_date), + (2, 121, pd.Timestamp('2015-01-20'))], pd.Timestamp('2015-01-20') )] ) @@ -987,55 +1014,66 @@ class NextEstimateWindows(WithEstimateWindows, ZiplineTestCase): def make_expected_timelines(cls): oneq_next = pd.concat([ cls.create_expected_df( - [(0, 10, cls.window_test_start_date), - (0, 11, pd.Timestamp('2015-01-07')), - (1, 10, pd.Timestamp('2015-01-09'))], + [(0, 100, cls.window_test_start_date), + (0, 101, pd.Timestamp('2015-01-07')), + (1, 110, pd.Timestamp('2015-01-09')), + (2, 120, cls.window_test_start_date), + (2, 121, pd.Timestamp('2015-01-07'))], pd.Timestamp('2015-01-09') ), cls.create_expected_df( - [(0, 20, cls.window_test_start_date), - (1, 10, pd.Timestamp('2015-01-09')), - (1, 11, pd.Timestamp('2015-01-12'))], + [(0, 200, cls.window_test_start_date), + (1, 110, pd.Timestamp('2015-01-09')), + (1, 111, pd.Timestamp('2015-01-12')), + (2, 220, cls.window_test_start_date)], pd.Timestamp('2015-01-12') ), cls.create_expected_df( - [(0, 20, cls.window_test_start_date), - (1, 30, pd.Timestamp('2015-01-09'))], + [(0, 200, cls.window_test_start_date), + (1, 310, pd.Timestamp('2015-01-09')), + (2, 220, cls.window_test_start_date)], pd.Timestamp('2015-01-13') ), cls.create_expected_df( - [(0, 20, cls.window_test_start_date), - (1, 30, pd.Timestamp('2015-01-09'))], + [(0, 200, cls.window_test_start_date), + (1, 310, pd.Timestamp('2015-01-09')), + (2, 220, cls.window_test_start_date)], pd.Timestamp('2015-01-14') ), cls.create_expected_df( - [(0, 20, cls.window_test_start_date), - (1, 30, pd.Timestamp('2015-01-09')), - (1, 31, pd.Timestamp('2015-01-15'))], + [(0, 200, cls.window_test_start_date), + (1, 310, pd.Timestamp('2015-01-09')), + (1, 311, pd.Timestamp('2015-01-15')), + (2, 220, cls.window_test_start_date)], pd.Timestamp('2015-01-15') ), cls.create_expected_df( - [(0, 20, cls.window_test_start_date), - (1, np.NaN, cls.window_test_start_date)], + [(0, 200, cls.window_test_start_date), + (1, np.NaN, cls.window_test_start_date), + (2, 220, cls.window_test_start_date)], pd.Timestamp('2015-01-16') ), cls.create_expected_df( - [(0, 20, cls.window_test_start_date), - (0, 21, pd.Timestamp('2015-01-17')), - (1, np.NaN, cls.window_test_start_date)], + [(0, 200, cls.window_test_start_date), + (0, 201, pd.Timestamp('2015-01-17')), + (1, np.NaN, cls.window_test_start_date), + (2, 220, cls.window_test_start_date), + (2, 221, pd.Timestamp('2015-01-17'))], pd.Timestamp('2015-01-20') ), ]) twoq_next = pd.concat( [cls.create_expected_df( - [(0, 20, pd.Timestamp(cls.window_test_start_date)), - (1, np.NaN, pd.Timestamp(cls.window_test_start_date))], + [(0, 200, pd.Timestamp(cls.window_test_start_date)), + (1, np.NaN, pd.Timestamp(cls.window_test_start_date)), + (2, 220, pd.Timestamp(cls.window_test_start_date))], pd.Timestamp('2015-01-09') )] + [cls.create_expected_df( [(0, np.NaN, pd.Timestamp(cls.window_test_start_date)), - (1, np.NaN, pd.Timestamp(cls.window_test_start_date))], + (1, np.NaN, pd.Timestamp(cls.window_test_start_date)), + (2, np.NaN, pd.Timestamp(cls.window_test_start_date))], end_date ) for end_date in pd.date_range('2015-01-12', '2015-01-20')] ) diff --git a/zipline/pipeline/loaders/earnings_estimates.py b/zipline/pipeline/loaders/earnings_estimates.py index 9c5a3833..b28991d4 100644 --- a/zipline/pipeline/loaders/earnings_estimates.py +++ b/zipline/pipeline/loaders/earnings_estimates.py @@ -362,7 +362,7 @@ class EarningsEstimatesLoader(PipelineLoader): # overwrite all values going up to the starting index of # that quarter with estimates for that quarter. if requested_quarter in quarters_with_estimates_for_sid: - col_to_overwrites[column_name][next_qtr_start_idx].extend([ + col_to_overwrites[column_name][next_qtr_start_idx].append( self.create_overwrite_for_estimate( col, column_name, @@ -372,19 +372,19 @@ class EarningsEstimatesLoader(PipelineLoader): sid, sid_idx ), - ]) + ) # There are no estimates for the quarter. Overwrite all # values going up to the starting index of that quarter # with the missing value for this column. else: - col_to_overwrites[column_name][next_qtr_start_idx].extend([ + col_to_overwrites[column_name][next_qtr_start_idx].append( self.overwrite_with_null( col, last_per_qtr.index, next_qtr_start_idx, sid_idx ), - ]) + ) def overwrite_with_null(self, column,