From e9d80cc044f648569eb67e83886ee939fc2e57cf Mon Sep 17 00:00:00 2001 From: Eddie Hebert Date: Wed, 8 May 2013 15:17:45 -0400 Subject: [PATCH] BUG: Fix out of order emission of performance with minutely data. With the benchmark returns marked at midnight, the performance packet for a day was emitted *before* any events for that day were processed. Fix by expecting benchmarks marked at the market close, for backtests that use minute data but emit performance results daily, so that the benchmark handles at the end of day. TST: Also, add test that exercises the event loop with minutely data, (with benchmarks that are marked end of day), since that combination was previously uncovered. --- tests/test_events_through_risk.py | 171 ++++++++++++++++++++++++++++++ zipline/finance/performance.py | 19 +++- 2 files changed, 189 insertions(+), 1 deletion(-) diff --git a/tests/test_events_through_risk.py b/tests/test_events_through_risk.py index ed3ddf80..6ebef976 100644 --- a/tests/test_events_through_risk.py +++ b/tests/test_events_through_risk.py @@ -20,6 +20,7 @@ import pytz import numpy as np from zipline.finance.trading import SimulationParameters +from zipline.finance import trading from zipline.algorithm import TradingAlgorithm from zipline.protocol import ( Event, @@ -161,3 +162,173 @@ class TestEventsThroughRisk(unittest.TestCase): expected_sharpe[current_dt], crm.sharpe[-1], decimal=6) + + @trading.use_environment(trading.TradingEnvironment()) + def test_minute_buy_and_hold(self): + + start_date = datetime.datetime( + year=2006, + month=1, + day=3, + hour=0, + minute=0, + tzinfo=pytz.utc) + end_date = datetime.datetime( + year=2006, + month=1, + day=5, + hour=0, + minute=0, + tzinfo=pytz.utc) + + sim_params = SimulationParameters( + period_start=start_date, + period_end=end_date, + emission_rate='daily', + data_frequency='minute') + + algo = BuyAndHoldAlgorithm( + sim_params=sim_params, + data_frequency='minute') + + first_date = datetime.datetime(2006, 1, 3, tzinfo=pytz.utc) + first_open, first_close = \ + trading.environment.get_open_and_close(first_date) + + second_date = datetime.datetime(2006, 1, 4, tzinfo=pytz.utc) + second_open, second_close = \ + trading.environment.get_open_and_close(second_date) + + third_date = datetime.datetime(2006, 1, 5, tzinfo=pytz.utc) + third_open, third_close = \ + trading.environment.get_open_and_close(third_date) + + benchmark_data = [ + Event({ + 'returns': 0.1, + 'dt': first_close, + 'source_id': 'test-benchmark-source', + 'type': DATASOURCE_TYPE.BENCHMARK + }), + Event({ + 'returns': 0.2, + 'dt': second_close, + 'source_id': 'test-benchmark-source', + 'type': DATASOURCE_TYPE.BENCHMARK + }), + Event({ + 'returns': 0.4, + 'dt': third_close, + 'source_id': 'test-benchmark-source', + 'type': DATASOURCE_TYPE.BENCHMARK + }), + ] + + trade_bar_data = [ + Event({ + 'open_price': 10, + 'close_price': 15, + 'price': 15, + 'volume': 1000, + 'sid': 1, + 'dt': first_open, + 'source_id': 'test-trade-source', + 'type': DATASOURCE_TYPE.TRADE + }), + Event({ + 'open_price': 10, + 'close_price': 15, + 'price': 15, + 'volume': 1000, + 'sid': 1, + 'dt': first_open + datetime.timedelta(minutes=10), + 'source_id': 'test-trade-source', + 'type': DATASOURCE_TYPE.TRADE + }), + Event({ + 'open_price': 15, + 'close_price': 20, + 'price': 20, + 'volume': 2000, + 'sid': 1, + 'dt': second_open, + 'source_id': 'test-trade-source', + 'type': DATASOURCE_TYPE.TRADE + }), + Event({ + 'open_price': 15, + 'close_price': 20, + 'price': 20, + 'volume': 2000, + 'sid': 1, + 'dt': second_open + datetime.timedelta(minutes=10), + 'source_id': 'test-trade-source', + 'type': DATASOURCE_TYPE.TRADE + }), + Event({ + 'open_price': 20, + 'close_price': 15, + 'price': 15, + 'volume': 1000, + 'sid': 1, + 'dt': third_open, + 'source_id': 'test-trade-source', + 'type': DATASOURCE_TYPE.TRADE + }), + Event({ + 'open_price': 20, + 'close_price': 15, + 'price': 15, + 'volume': 1000, + 'sid': 1, + 'dt': third_open + datetime.timedelta(minutes=10), + 'source_id': 'test-trade-source', + 'type': DATASOURCE_TYPE.TRADE + }), + ] + + algo.benchmark_return_source = benchmark_data + algo.sources = list([trade_bar_data]) + gen = algo._create_generator(sim_params) + + crm = algo.perf_tracker.cumulative_risk_metrics + + first_msg = gen.next() + + self.assertIsNotNone(first_msg, "There should be a message emitted.") + + # Protects against bug where the positions appeared to be a day late, + # because benchmarks were triggering calculations before the events + # for the day were processed. + self.assertEqual(1, len(algo.portfolio.positions), + "There should be one position after the first day.") + + self.assertTrue( + np.isnan(crm.algorithm_volatility[-1]), + "On the first day algorithm volatility does not exist.") + + second_msg = gen.next() + + self.assertIsNotNone(second_msg, "There should be a message emitted.") + + self.assertEqual(1, len(algo.portfolio.positions), + "Number of positions should stay the same.") + + # TODO: Hand derive. Current value is just a canary to detect changes. + np.testing.assert_almost_equal( + 0.050022510129558301, + crm.algorithm_returns[-1], + decimal=6) + + third_msg = gen.next() + + self.assertEqual(1, len(algo.portfolio.positions), + "Number of positions should stay the same.") + + self.assertIsNotNone(third_msg, "There should be a message emitted.") + + # TODO: Hand derive. Current value is just a canary to detect changes. + np.testing.assert_almost_equal( + -0.047639464532418657, + crm.algorithm_returns[-1], + decimal=6) diff --git a/zipline/finance/performance.py b/zipline/finance/performance.py index bb3c2d5e..19e1422a 100644 --- a/zipline/finance/performance.py +++ b/zipline/finance/performance.py @@ -311,7 +311,24 @@ class PerformanceTracker(object): elif event.type == zp.DATASOURCE_TYPE.CUSTOM: pass elif event.type == zp.DATASOURCE_TYPE.BENCHMARK: - self.all_benchmark_returns[event.dt] = event.returns + if ( + self.sim_params.data_frequency == 'minute' + and + self.sim_params.emission_rate == 'daily' + ): + # Minute data benchmarks should have a timestamp of market + # close, so that calculations are triggered at the right time. + # However, risk module uses midnight as the 'day' + # marker for returns, so adjust back to midgnight. + midnight = event.dt.replace( + hour=0, + minute=0, + second=0, + microsecond=0) + else: + midnight = event.dt + + self.all_benchmark_returns[midnight] = event.returns #calculate performance as of last trade for perf_period in self.perf_periods: