From ae0d41af6f3377e6fe10ddfdff6a62c1f66eca51 Mon Sep 17 00:00:00 2001 From: Jean Bredeche Date: Wed, 14 Sep 2016 22:21:43 -0400 Subject: [PATCH] ENH: Make reader.get_value raise NoDataOnDate if the date is not in the calendar. DataPortal now catches the NoDataOnDate exception and returns nan for OHLC and 0 for V. Price is still forward filled, unchanged. --- tests/data/test_minute_bars.py | 28 ++++++++++++++----------- tests/data/test_resample.py | 13 ++++++------ tests/test_bar_data.py | 30 +++++++++++++++++++++++++++ zipline/data/_minute_bar_internal.pyx | 20 ++++++++++++++++-- zipline/data/bar_reader.py | 8 ++++++- zipline/data/data_portal.py | 13 +++++++++--- zipline/data/minute_bars.py | 9 ++++++-- zipline/data/resample.py | 10 +-------- 8 files changed, 96 insertions(+), 35 deletions(-) diff --git a/tests/data/test_minute_bars.py b/tests/data/test_minute_bars.py index b15f16d7..b6fb7b77 100644 --- a/tests/data/test_minute_bars.py +++ b/tests/data/test_minute_bars.py @@ -35,6 +35,7 @@ from pandas import ( date_range, ) +from zipline.data.bar_reader import NoDataOnDate from zipline.data.minute_bars import ( BcolzMinuteBarMetadata, BcolzMinuteBarWriter, @@ -854,18 +855,19 @@ class BcolzMinuteBarTestCase(WithTradingCalendars, 'open'), 780) - self.assertEqual( + with self.assertRaises(NoDataOnDate): self.reader.get_value( sid, Timestamp('2015-06-02', tz='UTC'), - 'open'), - 390) - self.assertEqual( + 'open' + ) + + with self.assertRaises(NoDataOnDate): self.reader.get_value( sid, Timestamp('2015-06-02 20:01:00', tz='UTC'), - 'open'), - 780) + 'open' + ) def test_adjust_non_trading_minutes_half_days(self): # half day @@ -908,18 +910,20 @@ class BcolzMinuteBarTestCase(WithTradingCalendars, Timestamp('2015-11-27 18:01:00', tz='UTC'), 'open'), 210) - self.assertEqual( + + with self.assertRaises(NoDataOnDate): self.reader.get_value( sid, Timestamp('2015-11-30', tz='UTC'), - 'open'), - 210) - self.assertEqual( + 'open' + ) + + with self.assertRaises(NoDataOnDate): self.reader.get_value( sid, Timestamp('2015-11-30 21:01:00', tz='UTC'), - 'open'), - 600) + 'open' + ) def test_set_sid_attrs(self): """Confirm that we can set the attributes of a sid's file correctly. diff --git a/tests/data/test_resample.py b/tests/data/test_resample.py index 035354af..debd2625 100644 --- a/tests/data/test_resample.py +++ b/tests/data/test_resample.py @@ -21,6 +21,7 @@ import pandas as pd from pandas import DataFrame from six import iteritems +from zipline.data.bar_reader import NoDataOnDate from zipline.data.resample import ( minute_to_session, DailyHistoryAggregator, @@ -803,12 +804,12 @@ class TestReindexSessionBars(WithBcolzEquityDailyBarReader, err_msg="The open of the fixture data on the " "first session should be 10.") tday = pd.Timestamp('2015-11-26', tz='UTC') - assert_almost_equal(self.reader.get_value(1, tday, 'close'), nan, - err_msg="Thanksgiving is a NYSE holiday, but " - "futures trading is open. Result should be nan.") - assert_almost_equal(self.reader.get_value(1, tday, 'volume'), 0, - err_msg="Thanksgiving is a NYSE holiday, but " - "futures trading is open. Result should be 0.") + + with self.assertRaises(NoDataOnDate): + self.reader.get_value(1, tday, 'close') + + with self.assertRaises(NoDataOnDate): + self.reader.get_value(1, tday, 'volume') def test_last_availabe_dt(self): self.assertEqual(self.reader.last_available_dt, self.END_DATE) diff --git a/tests/test_bar_data.py b/tests/test_bar_data.py index cafcbc4d..f65a5456 100644 --- a/tests/test_bar_data.py +++ b/tests/test_bar_data.py @@ -22,6 +22,8 @@ from numpy.testing import assert_almost_equal import pandas as pd from zipline._protocol import handle_non_market_minutes + +from zipline.data.data_portal import DataPortal from zipline.protocol import BarData from zipline.testing import ( MockDailyBarReader, @@ -477,6 +479,34 @@ class TestMinuteBarData(WithBarDataChecks, bd.current(self.HILARIOUSLY_ILLIQUID_ASSET, "volume") ) + def test_get_value_during_non_market_hours(self): + # make sure that if we try to get the OHLCV values of ASSET1 during + # non-market hours, we don't get the previous market minute's values + futures_cal = get_calendar("us_futures") + + data_portal = DataPortal( + self.env.asset_finder, + futures_cal, + first_trading_day=self.DATA_PORTAL_FIRST_TRADING_DAY, + equity_minute_reader=self.bcolz_equity_minute_bar_reader, + ) + + bar_data = BarData( + data_portal, + lambda: pd.Timestamp("2016-01-06 3:15", tz="US/Eastern"), + "minute", + futures_cal + ) + + self.assertTrue(np.isnan(bar_data.current(self.ASSET1, "open"))) + self.assertTrue(np.isnan(bar_data.current(self.ASSET1, "high"))) + self.assertTrue(np.isnan(bar_data.current(self.ASSET1, "low"))) + self.assertTrue(np.isnan(bar_data.current(self.ASSET1, "close"))) + self.assertEqual(0, bar_data.current(self.ASSET1, "volume")) + + # price should still forward fill + self.assertEqual(390, bar_data.current(self.ASSET1, "price")) + def test_can_trade_equity_same_cal_outside_lifetime(self): cal = get_calendar(self.ASSET1.exchange) diff --git a/zipline/data/_minute_bar_internal.pyx b/zipline/data/_minute_bar_internal.pyx index 04aa7573..ea39d72d 100644 --- a/zipline/data/_minute_bar_internal.pyx +++ b/zipline/data/_minute_bar_internal.pyx @@ -38,7 +38,8 @@ def minute_value(ndarray[long_t, ndim=1] market_opens, def find_position_of_minute(ndarray[long_t, ndim=1] market_opens, ndarray[long_t, ndim=1] market_closes, long_t minute_val, - short minutes_per_day): + short minutes_per_day, + bool forward_fill): """ Finds the position of a given minute in the given array of market opens. If not a market minute, adjusts to the last market minute. @@ -57,9 +58,21 @@ def find_position_of_minute(ndarray[long_t, ndim=1] market_opens, minutes_per_day: int The number of minutes per day (e.g. 390 for NYSE). + forward_fill: bool + Whether to use the previous market minute if the given minute does + not fall within an open/close pair. + Returns ------- int: The position of the given minute in the market opens array. + + Raises + ------ + ValueError + If the given minute is not between a single open/close pair AND + forward_fill is False. For example, if minute_val is 17:00 Eastern + for a given day whose normal hours are 9:30 to 16:00, and we are not + forward filling, ValueError is raised. """ cdef Py_ssize_t market_open_loc, market_open, delta @@ -68,6 +81,9 @@ def find_position_of_minute(ndarray[long_t, ndim=1] market_opens, market_open = market_opens[market_open_loc] market_close = market_closes[market_open_loc] + if not forward_fill and ((minute_val - market_open) >= minutes_per_day): + raise ValueError("Given minute is not between an open and a close") + delta = int_min(minute_val - market_open, market_close - market_open) return (market_open_loc * minutes_per_day) + delta @@ -112,7 +128,7 @@ def find_last_traded_position_internal( minute_pos = int_min( find_position_of_minute(market_opens, market_closes, end_minute, - minutes_per_day), + minutes_per_day, True), len(volumes) - 1 ) diff --git a/zipline/data/bar_reader.py b/zipline/data/bar_reader.py index 386da2f9..a4da6f9a 100644 --- a/zipline/data/bar_reader.py +++ b/zipline/data/bar_reader.py @@ -17,7 +17,7 @@ from six import with_metaclass class NoDataOnDate(Exception): """ - Raised when a spot price can be found for the sid and date. + Raised when a spot price cannot be found for the sid and date. """ pass @@ -106,6 +106,12 @@ class BarReader(with_metaclass(ABCMeta, object)): value : float|int The value at the given coordinates, ``float`` for OHLC, ``int`` for 'volume'. + + Raises + ------ + NoDataOnDate + If the given dt is not a valid market minute (in minute mode) or + session (in daily mode) according to this reader's tradingcalendar. """ pass diff --git a/zipline/data/data_portal.py b/zipline/data/data_portal.py index c66e28bf..c2efafd5 100644 --- a/zipline/data/data_portal.py +++ b/zipline/data/data_portal.py @@ -533,9 +533,16 @@ class DataPortal(object): def _get_minute_spot_value(self, asset, column, dt, ffill=False): reader = self._get_pricing_reader('minute') - result = reader.get_value( - asset.sid, dt, column - ) + try: + result = reader.get_value( + asset.sid, dt, column + ) + except NoDataOnDate: + if not ffill: + if column == 'volume': + return 0 + else: + return np.nan if not ffill: return result diff --git a/zipline/data/minute_bars.py b/zipline/data/minute_bars.py index 8c8947ab..893ac5c1 100644 --- a/zipline/data/minute_bars.py +++ b/zipline/data/minute_bars.py @@ -32,7 +32,7 @@ from zipline.data._minute_bar_internal import ( from zipline.gens.sim_engine import NANOS_IN_MINUTE -from zipline.data.bar_reader import BarReader +from zipline.data.bar_reader import BarReader, NoDataOnDate from zipline.utils.calendars import get_calendar from zipline.utils.cli import maybe_show_progress from zipline.utils.memoize import lazyval @@ -964,7 +964,11 @@ class BcolzMinuteBarReader(MinuteBarReader): if self._last_get_value_dt_value == dt.value: minute_pos = self._last_get_value_dt_position else: - minute_pos = self._find_position_of_minute(dt) + try: + minute_pos = self._find_position_of_minute(dt) + except ValueError: + raise NoDataOnDate() + self._last_get_value_dt_value = dt.value self._last_get_value_dt_position = minute_pos @@ -1058,6 +1062,7 @@ class BcolzMinuteBarReader(MinuteBarReader): self._market_close_values, minute_dt.value / NANOS_IN_MINUTE, self._minutes_per_day, + False, ) def load_raw_arrays(self, fields, start_dt, end_dt, sids): diff --git a/zipline/data/resample.py b/zipline/data/resample.py index 339eb82b..98a878d4 100644 --- a/zipline/data/resample.py +++ b/zipline/data/resample.py @@ -15,13 +15,11 @@ from collections import OrderedDict from abc import ABCMeta, abstractmethod import numpy as np -from numpy import nan import pandas as pd from pandas import DataFrame from six import with_metaclass from zipline.data.minute_bars import MinuteBarReader -from zipline.data.us_equity_pricing import NoDataOnDate from zipline.data.session_bars import SessionBarReader from zipline.utils.memoize import lazyval @@ -585,13 +583,7 @@ class ReindexBarReader(with_metaclass(ABCMeta)): return self._reader.first_trading_day def get_value(self, sid, dt, field): - try: - return self._reader.get_value(sid, dt, field) - except NoDataOnDate: - if field == 'volume': - return 0 - else: - return nan + return self._reader.get_value(sid, dt, field) @abstractmethod def _outer_dts(self, start_dt, end_dt):