From 80cc2bd6f63d13bf9c9355eecb071e7cd547df7f Mon Sep 17 00:00:00 2001 From: llllllllll Date: Mon, 2 Nov 2015 13:30:02 -0500 Subject: [PATCH] BUG: Corrects an index error in blaze loader. Fixes the case where a delta has an asof_date of the last requested day and an index error would occur. This guards against this specifically to make the delta be effective through the end of the requested window. Adds a test case for this behavior. --- tests/pipeline/test_blaze.py | 7 +++++-- zipline/pipeline/loaders/blaze.py | 25 ++++++++++++++++++++++--- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/tests/pipeline/test_blaze.py b/tests/pipeline/test_blaze.py index e1ee109e..c42f4e14 100644 --- a/tests/pipeline/test_blaze.py +++ b/tests/pipeline/test_blaze.py @@ -399,7 +399,7 @@ class BlazeToPipelineTestCase(TestCase): @with_extra_sid def test_deltas(self, asset_info): expr = bz.Data(self.df, name='expr', dshape=self.dshape) - deltas = bz.Data(self.df.iloc[:-3], name='deltas', dshape=self.dshape) + deltas = bz.Data(self.df, name='deltas', dshape=self.dshape) deltas = bz.transform( deltas, value=deltas.value + 10, @@ -411,6 +411,8 @@ class BlazeToPipelineTestCase(TestCase): [1.0, 2.0, 3.0]]), '2014-01-03': np.array([[11.0, 12.0, 13.0], [2.0, 3.0, 4.0]]), + '2014-01-04': np.array([[12.0, 13.0, 14.0], + [12.0, 13.0, 14.0]]), }) nassets = len(asset_info) @@ -422,7 +424,7 @@ class BlazeToPipelineTestCase(TestCase): with tmp_asset_finder(asset_info) as finder: expected_output = pd.DataFrame( - list(concatv([12] * nassets, [13] * nassets)), + list(concatv([12] * nassets, [13] * nassets, [14] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), @@ -430,6 +432,7 @@ class BlazeToPipelineTestCase(TestCase): columns=('value',), ) dates = self.dates + dates = dates.insert(len(dates), dates[-1] + timedelta(days=1)) self._run_pipeline( expr, deltas, diff --git a/zipline/pipeline/loaders/blaze.py b/zipline/pipeline/loaders/blaze.py index a90650d4..d3d9d4d9 100644 --- a/zipline/pipeline/loaders/blaze.py +++ b/zipline/pipeline/loaders/blaze.py @@ -648,11 +648,30 @@ def overwrite_from_dates(asof, dense_dates, sparse_dates, asset_idx, value): ------- overwrite : Float64Overwrite The overwrite that will apply the new value to the data. + + Notes + ----- + This is forward-filling all dense dates that are between the asof_date date + and the next sparse date after the asof_date. + + For example: + let ``asof = pd.Timestamp('2014-01-02')``, + ``dense_dates = pd.date_range('2014-01-01', '2014-01-05')`` + ``sparse_dates = pd.to_datetime(['2014-01', '2014-02', '2014-04'])`` + + Then the overwrite will apply to indexes: 1, 2, 3, 4 """ first_row = dense_dates.searchsorted(asof) - last_row = dense_dates.searchsorted( - sparse_dates[sparse_dates.searchsorted(asof, 'right')], - ) - 1 + next_idx = sparse_dates.searchsorted(asof, 'right') + if next_idx == len(sparse_dates): + # There is no next date in the sparse, this overwrite should apply + # through the end of the dense dates. + last_row = len(dense_dates) - 1 + else: + # There is a next date in sparse dates. This means that the overwrite + # should only apply until the index of this date in the dense dates. + last_row = dense_dates.searchsorted(sparse_dates[next_idx]) - 1 + if first_row > last_row: return