BUG: Fix out of order emission of performance with minutely data.

With the benchmark returns marked at midnight, the performance packet for a day was emitted *before* any events for that day were processed. Fix by expecting benchmarks marked at the market close, for backtests that use minute data but emit performance results daily, so that the benchmark handles at the end of day. TST: Also, add test that exercises the event loop with minutely data, (with benchmarks that are marked end of day), since that combination was previously uncovered.
2026-06-28 20:05:52 +08:00 · 2013-05-08 15:17:45 -04:00
parent 74e743b8c6
commit e9d80cc044
2 changed files with 189 additions and 1 deletions
@@ -20,6 +20,7 @@ import pytz
 import numpy as np

 from zipline.finance.trading import SimulationParameters
+from zipline.finance import trading
 from zipline.algorithm import TradingAlgorithm
 from zipline.protocol import (
    Event,
@@ -161,3 +162,173 @@ class TestEventsThroughRisk(unittest.TestCase):
                expected_sharpe[current_dt],
                crm.sharpe[-1],
                decimal=6)
+
+    @trading.use_environment(trading.TradingEnvironment())
+    def test_minute_buy_and_hold(self):
+
+        start_date = datetime.datetime(
+            year=2006,
+            month=1,
+            day=3,
+            hour=0,
+            minute=0,
+            tzinfo=pytz.utc)
+        end_date = datetime.datetime(
+            year=2006,
+            month=1,
+            day=5,
+            hour=0,
+            minute=0,
+            tzinfo=pytz.utc)
+
+        sim_params = SimulationParameters(
+            period_start=start_date,
+            period_end=end_date,
+            emission_rate='daily',
+            data_frequency='minute')
+
+        algo = BuyAndHoldAlgorithm(
+            sim_params=sim_params,
+            data_frequency='minute')
+
+        first_date = datetime.datetime(2006, 1, 3, tzinfo=pytz.utc)
+        first_open, first_close = \
+            trading.environment.get_open_and_close(first_date)
+
+        second_date = datetime.datetime(2006, 1, 4, tzinfo=pytz.utc)
+        second_open, second_close = \
+            trading.environment.get_open_and_close(second_date)
+
+        third_date = datetime.datetime(2006, 1, 5, tzinfo=pytz.utc)
+        third_open, third_close = \
+            trading.environment.get_open_and_close(third_date)
+
+        benchmark_data = [
+            Event({
+                'returns': 0.1,
+                'dt': first_close,
+                'source_id': 'test-benchmark-source',
+                'type': DATASOURCE_TYPE.BENCHMARK
+            }),
+            Event({
+                'returns': 0.2,
+                'dt': second_close,
+                'source_id': 'test-benchmark-source',
+                'type': DATASOURCE_TYPE.BENCHMARK
+            }),
+            Event({
+                'returns': 0.4,
+                'dt': third_close,
+                'source_id': 'test-benchmark-source',
+                'type': DATASOURCE_TYPE.BENCHMARK
+            }),
+        ]
+
+        trade_bar_data = [
+            Event({
+                'open_price': 10,
+                'close_price': 15,
+                'price': 15,
+                'volume': 1000,
+                'sid': 1,
+                'dt': first_open,
+                'source_id': 'test-trade-source',
+                'type': DATASOURCE_TYPE.TRADE
+            }),
+            Event({
+                'open_price': 10,
+                'close_price': 15,
+                'price': 15,
+                'volume': 1000,
+                'sid': 1,
+                'dt': first_open + datetime.timedelta(minutes=10),
+                'source_id': 'test-trade-source',
+                'type': DATASOURCE_TYPE.TRADE
+            }),
+            Event({
+                'open_price': 15,
+                'close_price': 20,
+                'price': 20,
+                'volume': 2000,
+                'sid': 1,
+                'dt': second_open,
+                'source_id': 'test-trade-source',
+                'type': DATASOURCE_TYPE.TRADE
+            }),
+            Event({
+                'open_price': 15,
+                'close_price': 20,
+                'price': 20,
+                'volume': 2000,
+                'sid': 1,
+                'dt': second_open + datetime.timedelta(minutes=10),
+                'source_id': 'test-trade-source',
+                'type': DATASOURCE_TYPE.TRADE
+            }),
+            Event({
+                'open_price': 20,
+                'close_price': 15,
+                'price': 15,
+                'volume': 1000,
+                'sid': 1,
+                'dt': third_open,
+                'source_id': 'test-trade-source',
+                'type': DATASOURCE_TYPE.TRADE
+            }),
+            Event({
+                'open_price': 20,
+                'close_price': 15,
+                'price': 15,
+                'volume': 1000,
+                'sid': 1,
+                'dt': third_open + datetime.timedelta(minutes=10),
+                'source_id': 'test-trade-source',
+                'type': DATASOURCE_TYPE.TRADE
+            }),
+        ]
+
+        algo.benchmark_return_source = benchmark_data
+        algo.sources = list([trade_bar_data])
+        gen = algo._create_generator(sim_params)
+
+        crm = algo.perf_tracker.cumulative_risk_metrics
+
+        first_msg = gen.next()
+
+        self.assertIsNotNone(first_msg, "There should be a message emitted.")
+
+        # Protects against bug where the positions appeared to be a day late,
+        # because benchmarks were triggering calculations before the events
+        # for the day were processed.
+        self.assertEqual(1, len(algo.portfolio.positions),
+                         "There should be one position after the first day.")
+
+        self.assertTrue(
+            np.isnan(crm.algorithm_volatility[-1]),
+            "On the first day algorithm volatility does not exist.")
+
+        second_msg = gen.next()
+
+        self.assertIsNotNone(second_msg, "There should be a message emitted.")
+
+        self.assertEqual(1, len(algo.portfolio.positions),
+                         "Number of positions should stay the same.")
+
+        # TODO: Hand derive. Current value is just a canary to detect changes.
+        np.testing.assert_almost_equal(
+            0.050022510129558301,
+            crm.algorithm_returns[-1],
+            decimal=6)
+
+        third_msg = gen.next()
+
+        self.assertEqual(1, len(algo.portfolio.positions),
+                         "Number of positions should stay the same.")
+
+        self.assertIsNotNone(third_msg, "There should be a message emitted.")
+
+        # TODO: Hand derive. Current value is just a canary to detect changes.
+        np.testing.assert_almost_equal(
+            -0.047639464532418657,
+            crm.algorithm_returns[-1],
+            decimal=6)
@@ -311,7 +311,24 @@ class PerformanceTracker(object):
        elif event.type == zp.DATASOURCE_TYPE.CUSTOM:
            pass
        elif event.type == zp.DATASOURCE_TYPE.BENCHMARK:
-            self.all_benchmark_returns[event.dt] = event.returns
+            if (
+                self.sim_params.data_frequency == 'minute'
+                and
+                self.sim_params.emission_rate == 'daily'
+            ):
+                # Minute data benchmarks should have a timestamp of market
+                # close, so that calculations are triggered at the right time.
+                # However, risk module uses midnight as the 'day'
+                # marker for returns, so adjust back to midgnight.
+                midnight = event.dt.replace(
+                    hour=0,
+                    minute=0,
+                    second=0,
+                    microsecond=0)
+            else:
+                midnight = event.dt
+
+            self.all_benchmark_returns[midnight] = event.returns

        #calculate performance as of last trade
        for perf_period in self.perf_periods: