diff --git a/tests/pipeline/test_blaze.py b/tests/pipeline/test_blaze.py index e1ee109e..c42f4e14 100644 --- a/tests/pipeline/test_blaze.py +++ b/tests/pipeline/test_blaze.py @@ -399,7 +399,7 @@ class BlazeToPipelineTestCase(TestCase): @with_extra_sid def test_deltas(self, asset_info): expr = bz.Data(self.df, name='expr', dshape=self.dshape) - deltas = bz.Data(self.df.iloc[:-3], name='deltas', dshape=self.dshape) + deltas = bz.Data(self.df, name='deltas', dshape=self.dshape) deltas = bz.transform( deltas, value=deltas.value + 10, @@ -411,6 +411,8 @@ class BlazeToPipelineTestCase(TestCase): [1.0, 2.0, 3.0]]), '2014-01-03': np.array([[11.0, 12.0, 13.0], [2.0, 3.0, 4.0]]), + '2014-01-04': np.array([[12.0, 13.0, 14.0], + [12.0, 13.0, 14.0]]), }) nassets = len(asset_info) @@ -422,7 +424,7 @@ class BlazeToPipelineTestCase(TestCase): with tmp_asset_finder(asset_info) as finder: expected_output = pd.DataFrame( - list(concatv([12] * nassets, [13] * nassets)), + list(concatv([12] * nassets, [13] * nassets, [14] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), @@ -430,6 +432,7 @@ class BlazeToPipelineTestCase(TestCase): columns=('value',), ) dates = self.dates + dates = dates.insert(len(dates), dates[-1] + timedelta(days=1)) self._run_pipeline( expr, deltas, diff --git a/zipline/pipeline/loaders/blaze.py b/zipline/pipeline/loaders/blaze.py index a90650d4..d3d9d4d9 100644 --- a/zipline/pipeline/loaders/blaze.py +++ b/zipline/pipeline/loaders/blaze.py @@ -648,11 +648,30 @@ def overwrite_from_dates(asof, dense_dates, sparse_dates, asset_idx, value): ------- overwrite : Float64Overwrite The overwrite that will apply the new value to the data. + + Notes + ----- + This is forward-filling all dense dates that are between the asof_date date + and the next sparse date after the asof_date. + + For example: + let ``asof = pd.Timestamp('2014-01-02')``, + ``dense_dates = pd.date_range('2014-01-01', '2014-01-05')`` + ``sparse_dates = pd.to_datetime(['2014-01', '2014-02', '2014-04'])`` + + Then the overwrite will apply to indexes: 1, 2, 3, 4 """ first_row = dense_dates.searchsorted(asof) - last_row = dense_dates.searchsorted( - sparse_dates[sparse_dates.searchsorted(asof, 'right')], - ) - 1 + next_idx = sparse_dates.searchsorted(asof, 'right') + if next_idx == len(sparse_dates): + # There is no next date in the sparse, this overwrite should apply + # through the end of the dense dates. + last_row = len(dense_dates) - 1 + else: + # There is a next date in sparse dates. This means that the overwrite + # should only apply until the index of this date in the dense dates. + last_row = dense_dates.searchsorted(sparse_dates[next_idx]) - 1 + if first_row > last_row: return