From e9d80cc044f648569eb67e83886ee939fc2e57cf Mon Sep 17 00:00:00 2001
From: Eddie Hebert <ehebert@quantopian.com>
Date: Wed, 8 May 2013 15:17:45 -0400
Subject: [PATCH] BUG: Fix out of order emission of performance with minutely
 data.

With the benchmark returns marked at midnight, the performance packet
for a day was emitted *before* any events for that day were processed.

Fix by expecting benchmarks marked at the market close, for backtests
that use minute data but emit performance results daily, so that the
benchmark handles at the end of day.

TST: Also, add test that exercises the event loop with minutely data,
(with benchmarks that are marked end of day), since that combination
was previously uncovered.
---
 tests/test_events_through_risk.py | 171 ++++++++++++++++++++++++++++++
 zipline/finance/performance.py    |  19 +++-
 2 files changed, 189 insertions(+), 1 deletion(-)

diff --git a/tests/test_events_through_risk.py b/tests/test_events_through_risk.py
index ed3ddf80..6ebef976 100644
--- a/tests/test_events_through_risk.py
+++ b/tests/test_events_through_risk.py
@@ -20,6 +20,7 @@ import pytz
 import numpy as np
 
 from zipline.finance.trading import SimulationParameters
+from zipline.finance import trading
 from zipline.algorithm import TradingAlgorithm
 from zipline.protocol import (
     Event,
@@ -161,3 +162,173 @@ class TestEventsThroughRisk(unittest.TestCase):
                 expected_sharpe[current_dt],
                 crm.sharpe[-1],
                 decimal=6)
+
+    @trading.use_environment(trading.TradingEnvironment())
+    def test_minute_buy_and_hold(self):
+
+        start_date = datetime.datetime(
+            year=2006,
+            month=1,
+            day=3,
+            hour=0,
+            minute=0,
+            tzinfo=pytz.utc)
+        end_date = datetime.datetime(
+            year=2006,
+            month=1,
+            day=5,
+            hour=0,
+            minute=0,
+            tzinfo=pytz.utc)
+
+        sim_params = SimulationParameters(
+            period_start=start_date,
+            period_end=end_date,
+            emission_rate='daily',
+            data_frequency='minute')
+
+        algo = BuyAndHoldAlgorithm(
+            sim_params=sim_params,
+            data_frequency='minute')
+
+        first_date = datetime.datetime(2006, 1, 3, tzinfo=pytz.utc)
+        first_open, first_close = \
+            trading.environment.get_open_and_close(first_date)
+
+        second_date = datetime.datetime(2006, 1, 4, tzinfo=pytz.utc)
+        second_open, second_close = \
+            trading.environment.get_open_and_close(second_date)
+
+        third_date = datetime.datetime(2006, 1, 5, tzinfo=pytz.utc)
+        third_open, third_close = \
+            trading.environment.get_open_and_close(third_date)
+
+        benchmark_data = [
+            Event({
+                'returns': 0.1,
+                'dt': first_close,
+                'source_id': 'test-benchmark-source',
+                'type': DATASOURCE_TYPE.BENCHMARK
+            }),
+            Event({
+                'returns': 0.2,
+                'dt': second_close,
+                'source_id': 'test-benchmark-source',
+                'type': DATASOURCE_TYPE.BENCHMARK
+            }),
+            Event({
+                'returns': 0.4,
+                'dt': third_close,
+                'source_id': 'test-benchmark-source',
+                'type': DATASOURCE_TYPE.BENCHMARK
+            }),
+        ]
+
+        trade_bar_data = [
+            Event({
+                'open_price': 10,
+                'close_price': 15,
+                'price': 15,
+                'volume': 1000,
+                'sid': 1,
+                'dt': first_open,
+                'source_id': 'test-trade-source',
+                'type': DATASOURCE_TYPE.TRADE
+            }),
+            Event({
+                'open_price': 10,
+                'close_price': 15,
+                'price': 15,
+                'volume': 1000,
+                'sid': 1,
+                'dt': first_open + datetime.timedelta(minutes=10),
+                'source_id': 'test-trade-source',
+                'type': DATASOURCE_TYPE.TRADE
+            }),
+            Event({
+                'open_price': 15,
+                'close_price': 20,
+                'price': 20,
+                'volume': 2000,
+                'sid': 1,
+                'dt': second_open,
+                'source_id': 'test-trade-source',
+                'type': DATASOURCE_TYPE.TRADE
+            }),
+            Event({
+                'open_price': 15,
+                'close_price': 20,
+                'price': 20,
+                'volume': 2000,
+                'sid': 1,
+                'dt': second_open + datetime.timedelta(minutes=10),
+                'source_id': 'test-trade-source',
+                'type': DATASOURCE_TYPE.TRADE
+            }),
+            Event({
+                'open_price': 20,
+                'close_price': 15,
+                'price': 15,
+                'volume': 1000,
+                'sid': 1,
+                'dt': third_open,
+                'source_id': 'test-trade-source',
+                'type': DATASOURCE_TYPE.TRADE
+            }),
+            Event({
+                'open_price': 20,
+                'close_price': 15,
+                'price': 15,
+                'volume': 1000,
+                'sid': 1,
+                'dt': third_open + datetime.timedelta(minutes=10),
+                'source_id': 'test-trade-source',
+                'type': DATASOURCE_TYPE.TRADE
+            }),
+        ]
+
+        algo.benchmark_return_source = benchmark_data
+        algo.sources = list([trade_bar_data])
+        gen = algo._create_generator(sim_params)
+
+        crm = algo.perf_tracker.cumulative_risk_metrics
+
+        first_msg = gen.next()
+
+        self.assertIsNotNone(first_msg, "There should be a message emitted.")
+
+        # Protects against bug where the positions appeared to be a day late,
+        # because benchmarks were triggering calculations before the events
+        # for the day were processed.
+        self.assertEqual(1, len(algo.portfolio.positions),
+                         "There should be one position after the first day.")
+
+        self.assertTrue(
+            np.isnan(crm.algorithm_volatility[-1]),
+            "On the first day algorithm volatility does not exist.")
+
+        second_msg = gen.next()
+
+        self.assertIsNotNone(second_msg, "There should be a message emitted.")
+
+        self.assertEqual(1, len(algo.portfolio.positions),
+                         "Number of positions should stay the same.")
+
+        # TODO: Hand derive. Current value is just a canary to detect changes.
+        np.testing.assert_almost_equal(
+            0.050022510129558301,
+            crm.algorithm_returns[-1],
+            decimal=6)
+
+        third_msg = gen.next()
+
+        self.assertEqual(1, len(algo.portfolio.positions),
+                         "Number of positions should stay the same.")
+
+        self.assertIsNotNone(third_msg, "There should be a message emitted.")
+
+        # TODO: Hand derive. Current value is just a canary to detect changes.
+        np.testing.assert_almost_equal(
+            -0.047639464532418657,
+            crm.algorithm_returns[-1],
+            decimal=6)
diff --git a/zipline/finance/performance.py b/zipline/finance/performance.py
index bb3c2d5e..19e1422a 100644
--- a/zipline/finance/performance.py
+++ b/zipline/finance/performance.py
@@ -311,7 +311,24 @@ class PerformanceTracker(object):
         elif event.type == zp.DATASOURCE_TYPE.CUSTOM:
             pass
         elif event.type == zp.DATASOURCE_TYPE.BENCHMARK:
-            self.all_benchmark_returns[event.dt] = event.returns
+            if (
+                self.sim_params.data_frequency == 'minute'
+                and
+                self.sim_params.emission_rate == 'daily'
+            ):
+                # Minute data benchmarks should have a timestamp of market
+                # close, so that calculations are triggered at the right time.
+                # However, risk module uses midnight as the 'day'
+                # marker for returns, so adjust back to midgnight.
+                midnight = event.dt.replace(
+                    hour=0,
+                    minute=0,
+                    second=0,
+                    microsecond=0)
+            else:
+                midnight = event.dt
+
+            self.all_benchmark_returns[midnight] = event.returns
 
         #calculate performance as of last trade
         for perf_period in self.perf_periods: