BUG: Corrects an index error in blaze loader.

Fixes the case where a delta has an asof_date of the last requested day and an index error would occur. This guards against this specifically to make the delta be effective through the end of the requested window. Adds a test case for this behavior.
2026-06-29 07:56:38 +08:00 · 2015-11-02 13:30:02 -05:00
parent 420df53d78
commit 80cc2bd6f6
2 changed files with 27 additions and 5 deletions
@@ -399,7 +399,7 @@ class BlazeToPipelineTestCase(TestCase):
    @with_extra_sid
    def test_deltas(self, asset_info):
        expr = bz.Data(self.df, name='expr', dshape=self.dshape)
-        deltas = bz.Data(self.df.iloc[:-3], name='deltas', dshape=self.dshape)
+        deltas = bz.Data(self.df, name='deltas', dshape=self.dshape)
        deltas = bz.transform(
            deltas,
            value=deltas.value + 10,
@@ -411,6 +411,8 @@ class BlazeToPipelineTestCase(TestCase):
                                    [1.0, 2.0, 3.0]]),
            '2014-01-03': np.array([[11.0, 12.0, 13.0],
                                    [2.0, 3.0, 4.0]]),
+            '2014-01-04': np.array([[12.0, 13.0, 14.0],
+                                    [12.0, 13.0, 14.0]]),
        })

        nassets = len(asset_info)
@@ -422,7 +424,7 @@ class BlazeToPipelineTestCase(TestCase):

        with tmp_asset_finder(asset_info) as finder:
            expected_output = pd.DataFrame(
-                list(concatv([12] * nassets, [13] * nassets)),
+                list(concatv([12] * nassets, [13] * nassets, [14] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
@@ -430,6 +432,7 @@ class BlazeToPipelineTestCase(TestCase):
                columns=('value',),
            )
            dates = self.dates
+            dates = dates.insert(len(dates), dates[-1] + timedelta(days=1))
            self._run_pipeline(
                expr,
                deltas,
@@ -648,11 +648,30 @@ def overwrite_from_dates(asof, dense_dates, sparse_dates, asset_idx, value):
    -------
    overwrite : Float64Overwrite
        The overwrite that will apply the new value to the data.
+
+    Notes
+    -----
+    This is forward-filling all dense dates that are between the asof_date date
+    and the next sparse date after the asof_date.
+
+    For example:
+    let ``asof = pd.Timestamp('2014-01-02')``,
+        ``dense_dates = pd.date_range('2014-01-01', '2014-01-05')``
+        ``sparse_dates = pd.to_datetime(['2014-01', '2014-02', '2014-04'])``
+
+    Then the overwrite will apply to indexes: 1, 2, 3, 4
    """
    first_row = dense_dates.searchsorted(asof)
-    last_row = dense_dates.searchsorted(
-        sparse_dates[sparse_dates.searchsorted(asof, 'right')],
-    ) - 1
+    next_idx = sparse_dates.searchsorted(asof, 'right')
+    if next_idx == len(sparse_dates):
+        # There is no next date in the sparse, this overwrite should apply
+        # through the end of the dense dates.
+        last_row = len(dense_dates) - 1
+    else:
+        # There is a next date in sparse dates. This means that the overwrite
+        # should only apply until the index of this date in the dense dates.
+        last_row = dense_dates.searchsorted(sparse_dates[next_idx]) - 1
+
    if first_row > last_row:
        return