mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-29 07:56:38 +08:00
BUG: Corrects an index error in blaze loader.
Fixes the case where a delta has an asof_date of the last requested day and an index error would occur. This guards against this specifically to make the delta be effective through the end of the requested window. Adds a test case for this behavior.
This commit is contained in:
@@ -399,7 +399,7 @@ class BlazeToPipelineTestCase(TestCase):
|
||||
@with_extra_sid
|
||||
def test_deltas(self, asset_info):
|
||||
expr = bz.Data(self.df, name='expr', dshape=self.dshape)
|
||||
deltas = bz.Data(self.df.iloc[:-3], name='deltas', dshape=self.dshape)
|
||||
deltas = bz.Data(self.df, name='deltas', dshape=self.dshape)
|
||||
deltas = bz.transform(
|
||||
deltas,
|
||||
value=deltas.value + 10,
|
||||
@@ -411,6 +411,8 @@ class BlazeToPipelineTestCase(TestCase):
|
||||
[1.0, 2.0, 3.0]]),
|
||||
'2014-01-03': np.array([[11.0, 12.0, 13.0],
|
||||
[2.0, 3.0, 4.0]]),
|
||||
'2014-01-04': np.array([[12.0, 13.0, 14.0],
|
||||
[12.0, 13.0, 14.0]]),
|
||||
})
|
||||
|
||||
nassets = len(asset_info)
|
||||
@@ -422,7 +424,7 @@ class BlazeToPipelineTestCase(TestCase):
|
||||
|
||||
with tmp_asset_finder(asset_info) as finder:
|
||||
expected_output = pd.DataFrame(
|
||||
list(concatv([12] * nassets, [13] * nassets)),
|
||||
list(concatv([12] * nassets, [13] * nassets, [14] * nassets)),
|
||||
index=pd.MultiIndex.from_product((
|
||||
sorted(expected_views.keys()),
|
||||
finder.retrieve_all(asset_info.index),
|
||||
@@ -430,6 +432,7 @@ class BlazeToPipelineTestCase(TestCase):
|
||||
columns=('value',),
|
||||
)
|
||||
dates = self.dates
|
||||
dates = dates.insert(len(dates), dates[-1] + timedelta(days=1))
|
||||
self._run_pipeline(
|
||||
expr,
|
||||
deltas,
|
||||
|
||||
@@ -648,11 +648,30 @@ def overwrite_from_dates(asof, dense_dates, sparse_dates, asset_idx, value):
|
||||
-------
|
||||
overwrite : Float64Overwrite
|
||||
The overwrite that will apply the new value to the data.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is forward-filling all dense dates that are between the asof_date date
|
||||
and the next sparse date after the asof_date.
|
||||
|
||||
For example:
|
||||
let ``asof = pd.Timestamp('2014-01-02')``,
|
||||
``dense_dates = pd.date_range('2014-01-01', '2014-01-05')``
|
||||
``sparse_dates = pd.to_datetime(['2014-01', '2014-02', '2014-04'])``
|
||||
|
||||
Then the overwrite will apply to indexes: 1, 2, 3, 4
|
||||
"""
|
||||
first_row = dense_dates.searchsorted(asof)
|
||||
last_row = dense_dates.searchsorted(
|
||||
sparse_dates[sparse_dates.searchsorted(asof, 'right')],
|
||||
) - 1
|
||||
next_idx = sparse_dates.searchsorted(asof, 'right')
|
||||
if next_idx == len(sparse_dates):
|
||||
# There is no next date in the sparse, this overwrite should apply
|
||||
# through the end of the dense dates.
|
||||
last_row = len(dense_dates) - 1
|
||||
else:
|
||||
# There is a next date in sparse dates. This means that the overwrite
|
||||
# should only apply until the index of this date in the dense dates.
|
||||
last_row = dense_dates.searchsorted(sparse_dates[next_idx]) - 1
|
||||
|
||||
if first_row > last_row:
|
||||
return
|
||||
|
||||
|
||||
Reference in New Issue
Block a user