From 5624e0f391959148734b7f1ff0ec8b89d635a0de Mon Sep 17 00:00:00 2001 From: Eddie Hebert Date: Tue, 15 Nov 2016 11:30:53 -0500 Subject: [PATCH] BUG: Fix minute bar last traded after half day. When the following conditions occur, - a `nan` occurred after a half day (e.g. on the Monday after Thanksgiving, where the Friday would be a half day.) -data was written to the span between the early close and where the market close would have been if it were not an early close session - a `nan` also occured on the last minute of the early market session. the exisitng implementation would incorrectly return a `nan` when requesting a forward filled price. The steps that caused this error were. 1. Request for `'price'` on the market open of the day after the early close. 2. `nan` is found for that minute 3. `get_last_traded_dt` is called, and finds a volume that occurs after the early close. e.g. `18:47` when the market close was `18:00`. 4. The minute position for `18:47` is used, when calling `find_positon_of_minute`, since that value is after the `market_close` the minute is set to the position of `18:00`` due to the delta logic in 5. Since there is also no data in at `18:00`, a `nan` is returned, even though there were valid minutes earlier in the session. e.g. a non-zero volume at `16:47` should have been used, but was not. Fix by checking the current minute against the minute close when searching for the last traded minute. If the minute is greater than the market close for the corresponding day, continue the search until the minute position is within the trading session. This could also be fixed by enforcing that only zeros can be written between an early close and the minute where the close would have been, but this fix allows the reader to work with existing data. --- tests/data/test_minute_bars.py | 65 +++++++++++++++++++++++++++ zipline/data/_minute_bar_internal.pyx | 11 ++++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/tests/data/test_minute_bars.py b/tests/data/test_minute_bars.py index 514933bc..c8760452 100644 --- a/tests/data/test_minute_bars.py +++ b/tests/data/test_minute_bars.py @@ -46,6 +46,7 @@ from zipline.data.minute_bars import ( ) from zipline.testing.fixtures import ( + WithAssetFinder, WithInstanceTmpDir, WithTradingCalendars, ZiplineTestCase, @@ -59,9 +60,12 @@ TEST_CALENDAR_STOP = Timestamp('2015-12-31', tz='UTC') class BcolzMinuteBarTestCase(WithTradingCalendars, + WithAssetFinder, WithInstanceTmpDir, ZiplineTestCase): + ASSET_FINDER_EQUITY_SIDS = 1, 2 + @classmethod def init_class_fixtures(cls): super(BcolzMinuteBarTestCase, cls).init_class_fixtures() @@ -1040,3 +1044,64 @@ class BcolzMinuteBarTestCase(WithTradingCalendars, _, last_close = cal.open_and_close_for_session( self.test_calendar_start) self.assertEqual(self.reader.last_available_dt, last_close) + + def test_early_market_close(self): + # Date to test is 2015-11-30 9:31 + # Early close is 2015-11-27 18:00 + friday_after_tday = Timestamp('2015-11-27', tz='UTC') + friday_after_tday_close = self.market_closes[friday_after_tday] + + before_early_close = friday_after_tday_close - timedelta(minutes=8) + after_early_close = friday_after_tday_close + timedelta(minutes=8) + + monday_after_tday = Timestamp('2015-11-30', tz='UTC') + minute = self.market_opens[monday_after_tday] + + # Test condition where there is data written after the market + # close (ideally, this should not occur in datasets, but guards + # against consumers of the minute bar writer, which do not filter + # out after close minutes. + minutes = [ + before_early_close, + after_early_close, + minute, + ] + sid = 1 + data = DataFrame( + data={ + 'open': [10.0, 11.0, nan], + 'high': [20.0, 21.0, nan], + 'low': [30.0, 31.0, nan], + 'close': [40.0, 41.0, nan], + 'volume': [50, 51, 0] + }, + index=[minutes]) + self.writer.write_sid(sid, data) + + open_price = self.reader.get_value(sid, minute, 'open') + + assert_almost_equal(nan, open_price) + + high_price = self.reader.get_value(sid, minute, 'high') + + assert_almost_equal(nan, high_price) + + low_price = self.reader.get_value(sid, minute, 'low') + + assert_almost_equal(nan, low_price) + + close_price = self.reader.get_value(sid, minute, 'close') + + assert_almost_equal(nan, close_price) + + volume = self.reader.get_value(sid, minute, 'volume') + + self.assertEquals(0, volume) + + asset = self.asset_finder.retrieve_asset(sid) + last_traded_dt = self.reader.get_last_traded_dt(asset, minute) + + self.assertEquals(last_traded_dt, before_early_close, + "The last traded dt should be before the early " + "close, even when data is written between the early " + "close and the next open.") diff --git a/zipline/data/_minute_bar_internal.pyx b/zipline/data/_minute_bar_internal.pyx index ea39d72d..61818ae1 100644 --- a/zipline/data/_minute_bar_internal.pyx +++ b/zipline/data/_minute_bar_internal.pyx @@ -124,7 +124,7 @@ def find_last_traded_position_internal( ------- int: The position of the last traded minute, starting from `minute_val` """ - cdef Py_ssize_t minute_pos, current_minute + cdef Py_ssize_t minute_pos, current_minute, q minute_pos = int_min( find_position_of_minute(market_opens, market_closes, end_minute, @@ -137,6 +137,15 @@ def find_last_traded_position_internal( market_opens, minute_pos, minutes_per_day ) + q = cython.cdiv(minute_pos, minutes_per_day) + if current_minute > market_closes[q]: + minute_pos = find_position_of_minute(market_opens, + market_closes, + market_closes[q], + minutes_per_day, + False) + continue + if current_minute < start_minute: return -1