BUG: Corrects an index error in blaze loader.

Fixes the case where a delta has an asof_date of the last requested
day and an index error would occur. This guards against this
specifically to make the delta be effective through the end of the
requested window.

Adds a test case for this behavior.
This commit is contained in:
llllllllll
2015-11-02 13:30:02 -05:00
parent 420df53d78
commit 80cc2bd6f6
2 changed files with 27 additions and 5 deletions
+5 -2
View File
@@ -399,7 +399,7 @@ class BlazeToPipelineTestCase(TestCase):
@with_extra_sid
def test_deltas(self, asset_info):
expr = bz.Data(self.df, name='expr', dshape=self.dshape)
deltas = bz.Data(self.df.iloc[:-3], name='deltas', dshape=self.dshape)
deltas = bz.Data(self.df, name='deltas', dshape=self.dshape)
deltas = bz.transform(
deltas,
value=deltas.value + 10,
@@ -411,6 +411,8 @@ class BlazeToPipelineTestCase(TestCase):
[1.0, 2.0, 3.0]]),
'2014-01-03': np.array([[11.0, 12.0, 13.0],
[2.0, 3.0, 4.0]]),
'2014-01-04': np.array([[12.0, 13.0, 14.0],
[12.0, 13.0, 14.0]]),
})
nassets = len(asset_info)
@@ -422,7 +424,7 @@ class BlazeToPipelineTestCase(TestCase):
with tmp_asset_finder(asset_info) as finder:
expected_output = pd.DataFrame(
list(concatv([12] * nassets, [13] * nassets)),
list(concatv([12] * nassets, [13] * nassets, [14] * nassets)),
index=pd.MultiIndex.from_product((
sorted(expected_views.keys()),
finder.retrieve_all(asset_info.index),
@@ -430,6 +432,7 @@ class BlazeToPipelineTestCase(TestCase):
columns=('value',),
)
dates = self.dates
dates = dates.insert(len(dates), dates[-1] + timedelta(days=1))
self._run_pipeline(
expr,
deltas,
+22 -3
View File
@@ -648,11 +648,30 @@ def overwrite_from_dates(asof, dense_dates, sparse_dates, asset_idx, value):
-------
overwrite : Float64Overwrite
The overwrite that will apply the new value to the data.
Notes
-----
This is forward-filling all dense dates that are between the asof_date date
and the next sparse date after the asof_date.
For example:
let ``asof = pd.Timestamp('2014-01-02')``,
``dense_dates = pd.date_range('2014-01-01', '2014-01-05')``
``sparse_dates = pd.to_datetime(['2014-01', '2014-02', '2014-04'])``
Then the overwrite will apply to indexes: 1, 2, 3, 4
"""
first_row = dense_dates.searchsorted(asof)
last_row = dense_dates.searchsorted(
sparse_dates[sparse_dates.searchsorted(asof, 'right')],
) - 1
next_idx = sparse_dates.searchsorted(asof, 'right')
if next_idx == len(sparse_dates):
# There is no next date in the sparse, this overwrite should apply
# through the end of the dense dates.
last_row = len(dense_dates) - 1
else:
# There is a next date in sparse dates. This means that the overwrite
# should only apply until the index of this date in the dense dates.
last_row = dense_dates.searchsorted(sparse_dates[next_idx]) - 1
if first_row > last_row:
return