From f088afc1e148ad64c98b0cd7c2cf987dd986e85c Mon Sep 17 00:00:00 2001 From: Andrew Daniels Date: Fri, 28 Apr 2017 15:53:04 -0400 Subject: [PATCH 1/5] MAINT: Modify ReindexBarReader.get_value to handle missing data Instead of raising an exception, return 0.0 for volume, and nan for everything else. --- tests/data/test_resample.py | 9 +++------ zipline/data/resample.py | 14 +++++++++----- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/tests/data/test_resample.py b/tests/data/test_resample.py index 53c5493a..4829189e 100644 --- a/tests/data/test_resample.py +++ b/tests/data/test_resample.py @@ -16,12 +16,11 @@ from numbers import Real from nose_parameterized import parameterized from numpy.testing import assert_almost_equal -from numpy import nan, array, full +from numpy import nan, array, full, isnan import pandas as pd from pandas import DataFrame from six import iteritems -from zipline.data.bar_reader import NoDataOnDate from zipline.data.resample import ( minute_frame_to_session_frame, DailyHistoryAggregator, @@ -863,11 +862,9 @@ class TestReindexSessionBars(WithBcolzEquityDailyBarReader, "first session should be 10.") tday = pd.Timestamp('2015-11-26', tz='UTC') - with self.assertRaises(NoDataOnDate): - self.reader.get_value(1, tday, 'close') + self.assertTrue(isnan(self.reader.get_value(1, tday, 'close'))) - with self.assertRaises(NoDataOnDate): - self.reader.get_value(1, tday, 'volume') + self.assertEqual(self.reader.get_value(1, tday, 'volume'), 0) def test_last_availabe_dt(self): self.assertEqual(self.reader.last_available_dt, self.END_DATE) diff --git a/zipline/data/resample.py b/zipline/data/resample.py index 4676a18f..f7257df7 100644 --- a/zipline/data/resample.py +++ b/zipline/data/resample.py @@ -25,6 +25,7 @@ from zipline.data._resample import ( _minute_to_session_close, _minute_to_session_volume, ) +from zipline.data.bar_reader import NoDataOnDate from zipline.data.minute_bars import MinuteBarReader from zipline.data.session_bars import SessionBarReader from zipline.utils.memoize import lazyval @@ -606,10 +607,6 @@ class ReindexBarReader(with_metaclass(ABCMeta)): Currently only supports a ``trading_calendar`` which is a superset of the ``reader``'s calendar. - Also, the currenty implementation only reindexes the results from - ``load_raw_arrays``, but in the future, `get_value` may also be made to - provide an empty result instead of raising on error. - Parameters ---------- @@ -652,7 +649,14 @@ class ReindexBarReader(with_metaclass(ABCMeta)): return self._reader.first_trading_day def get_value(self, sid, dt, field): - return self._reader.get_value(sid, dt, field) + # Give an empty result if no data is present. + try: + return self._reader.get_value(sid, dt, field) + except NoDataOnDate: + if field == 'volume': + return 0 + else: + return np.nan @abstractmethod def _outer_dts(self, start_dt, end_dt): From b2a39b4ae4e9bd5baf7552fe91bf58fc4411a76c Mon Sep 17 00:00:00 2001 From: Andrew Daniels Date: Mon, 1 May 2017 17:06:49 -0400 Subject: [PATCH 2/5] TST: Adds DailyEquityHistoryOnFuturesCalendarTestCase Added as a minimal subclass of DailyEquityHistoryTestCase, swapping out just the primary calendar. This requires significant modifications to DailyEquityHistoryTestCase, to allow for a generic primary calendar. --- tests/test_history.py | 122 ++++++++++++++++++++++++++++++------ zipline/testing/fixtures.py | 5 +- 2 files changed, 106 insertions(+), 21 deletions(-) diff --git a/tests/test_history.py b/tests/test_history.py index a6357ce3..c4ed5130 100644 --- a/tests/test_history.py +++ b/tests/test_history.py @@ -235,8 +235,7 @@ class WithHistory(WithCreateBarData, WithDataPortal): 'declared_date', 'pay_date', 'amount', - 'sid', - ], + 'sid'], ) @classmethod @@ -249,6 +248,40 @@ class WithHistory(WithCreateBarData, WithDataPortal): else: freq = '1m' + cal = self.trading_calendar + equity_cal = self.trading_calendars[Equity] + + def reindex_to_primary_calendar(a, field): + """ + Reindex an array of prices from a window on the NYSE + calendar by the window on the primary calendar with the same + dt and window size. + """ + if mode == 'daily': + dts = cal.sessions_window(dt, -9) + + # `dt` may not be a session on the equity calendar, so + # find the next valid session. + equity_sess = equity_cal.minute_to_session_label(dt) + equity_dts = equity_cal.sessions_window(equity_sess, -9) + elif mode == 'minute': + dts = cal.minutes_window(dt, -10) + equity_dts = equity_cal.minutes_window(dt, -10) + + output = pd.Series( + index=equity_dts, + data=a, + ).reindex(dts) + + # Fill after reindexing, to ensure we don't forward fill + # with values that are being dropped. + if field == 'volume': + return output.fillna(0) + elif field == 'price': + return output.fillna(method='ffill') + else: + return output + fields = fields if fields is not None else ALL_FIELDS assets = assets if assets is not None else [self.ASSET2, self.ASSET3] @@ -330,11 +363,19 @@ class WithHistory(WithCreateBarData, WithDataPortal): asset3_answer_key = np.full(10, np.nan) asset3_answer_key[-position_from_end] = \ value_for_asset3 + asset3_answer_key = reindex_to_primary_calendar( + asset3_answer_key, + field, + ) if asset == self.ASSET2: np.testing.assert_array_equal( - np.array( - range(base + idx - 9, base + idx + 1)), + reindex_to_primary_calendar( + np.array( + range(base + idx - 9, base + idx + 1) + ), + field, + ), asset_series ) @@ -347,12 +388,19 @@ class WithHistory(WithCreateBarData, WithDataPortal): asset3_answer_key = np.zeros(10) asset3_answer_key[-position_from_end] = \ value_for_asset3 * 100 + asset3_answer_key = reindex_to_primary_calendar( + asset3_answer_key, + field, + ) if asset == self.ASSET2: np.testing.assert_array_equal( - np.array( - range(base + idx - 9, base + idx + 1) - ) * 100, + reindex_to_primary_calendar( + np.array( + range(base + idx - 9, base + idx + 1) + ) * 100, + field, + ), asset_series ) @@ -369,20 +417,38 @@ class WithHistory(WithCreateBarData, WithDataPortal): if asset == self.ASSET2: # at idx 9, the data is 2 to 11 np.testing.assert_array_equal( - range(idx - 7, idx + 3), + reindex_to_primary_calendar( + range(idx - 7, idx + 3), + field=field, + ), asset_series ) if asset == self.ASSET3: - first_part = asset_series[0:-position_from_end] - second_part = asset_series[-position_from_end:] + # Second part begins on the session after + # `position_from_end` on the NYSE calendar. + second_begin = ( + dt - equity_cal.day * (position_from_end - 1) + ) + + # First part goes up until the start of the + # second part, because we forward-fill. + first_end = second_begin - cal.day + + first_part = asset_series[:first_end] + second_part = asset_series[second_begin:] decile_count = ((idx + 1) // 10) # in our test data, asset3 prices will be nine # NaNs, then ten 11s, ten 21s, ten 31s... - if decile_count == 1: + if len(second_part) >= 10: + np.testing.assert_array_equal( + np.full(len(first_part), np.nan), + first_part + ) + elif decile_count == 1: np.testing.assert_array_equal( np.full(len(first_part), np.nan), first_part @@ -1386,7 +1452,10 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): @classmethod def create_df_for_asset(cls, start_day, end_day, interval=1, force_zeroes=False): - sessions = cls.trading_calendar.sessions_in_range(start_day, end_day) + sessions = cls.trading_calendars[Equity].sessions_in_range( + start_day, + end_day, + ) sessions_count = len(sessions) # default to 2 because the low array subtracts 1, and we don't @@ -1455,7 +1524,9 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): # get the first 30 days of 2015 jan5 = pd.Timestamp('2015-01-05') - days = self.trading_calendar.sessions_window(jan5, 30) + # Regardless of the calendar used for this test, equities will + # only have data on NYSE sessions. + days = self.trading_calendars[Equity].sessions_window(jan5, 30) for idx, day in enumerate(days): self.verify_regular_dt(idx, day, 'daily') @@ -1806,20 +1877,31 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): offsets = np.arange(4) - def assert_window_prices(window, starting_price): - np.testing.assert_almost_equal(window.loc[:, self.ASSET1], - starting_price + offsets) + def assert_window_prices(window, prices): + np.testing.assert_almost_equal(window.loc[:, self.ASSET1], prices) # Window 1 starts on the 23rd day of data for ASSET 1. - assert_window_prices(window_1, 23) + assert_window_prices(window_1, 23 + offsets) # Window 2 starts on the 21st day of data for ASSET 1. - assert_window_prices(window_2, 21) + assert_window_prices(window_2, 21 + offsets) # Window 3 starts on the 23rd day of data for ASSET 1. - assert_window_prices(window_3, 23) + assert_window_prices(window_3, 23 + offsets) + # Window 4 starts on the 11th day of data for ASSET 1. - assert_window_prices(window_4, 11) + if not self.trading_calendar.is_session('2014-01-20'): + assert_window_prices(window_4, 11 + offsets) + else: + # If not on the NYSE calendar, it is possible that MLK day + # (2014-01-20) is an active trading session. In that case, + # we expect a nan value for this asset. + assert_window_prices(window_4, [12, nan, 13, 14]) class NoPrefetchDailyEquityHistoryTestCase(DailyEquityHistoryTestCase): DATA_PORTAL_MINUTE_HISTORY_PREFETCH = 0 DATA_PORTAL_DAILY_HISTORY_PREFETCH = 0 + + +class DailyEquityHistoryOnFuturesCalendarTestCase(DailyEquityHistoryTestCase): + TRADING_CALENDAR_STRS = ('NYSE', 'us_futures') + TRADING_CALENDAR_PRIMARY_CAL = 'us_futures' diff --git a/zipline/testing/fixtures.py b/zipline/testing/fixtures.py index 0f54ac12..e08ee7b7 100644 --- a/zipline/testing/fixtures.py +++ b/zipline/testing/fixtures.py @@ -433,7 +433,10 @@ class WithTradingCalendars(object): cls.trading_calendars = {} - for cal_str in cls.TRADING_CALENDAR_STRS: + for cal_str in ( + set(cls.TRADING_CALENDAR_STRS) | + {cls.TRADING_CALENDAR_PRIMARY_CAL} + ): # Set name to allow aliasing. calendar = get_calendar(cal_str) setattr(cls, From a4f1171f1f24c23fbd5f60ac35b57ea3fa6d4109 Mon Sep 17 00:00:00 2001 From: Andrew Daniels Date: Tue, 2 May 2017 13:33:42 -0400 Subject: [PATCH 3/5] TST: Adds MinuteEquityHistoryFuturesCalendarTestCase Added as a subclass of MinuteEquityHistoryTestCase, where the primary calendar is 'us_futures'. Notes on modifications to MinuteEquityHistoryTestCase: - To work on generic calendars, many tests now use set minutes for window start and end, and check the values on active equity minutes. - test_minute_regular should test against active equity minutes - Adapts test_minute_midnight to work with futures calendar - Use a method of getting the last open minute that works with calendars that are open at midnight - Test against Sunday at midnight, since the real intention of this test is to check that given a non-open minute, we fall back to the last open minute. --- tests/test_history.py | 363 ++++++++++++++++++++++++++++++------------ 1 file changed, 264 insertions(+), 99 deletions(-) diff --git a/tests/test_history.py b/tests/test_history.py index c4ed5130..e1f197ce 100644 --- a/tests/test_history.py +++ b/tests/test_history.py @@ -22,7 +22,7 @@ from six import iteritems from zipline import TradingAlgorithm from zipline._protocol import handle_non_market_minutes, BarData -from zipline.assets import Asset +from zipline.assets import Asset, Equity from zipline.errors import ( HistoryInInitialize, HistoryWindowStartsBeforeData, @@ -537,19 +537,21 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): @classmethod def make_equity_minute_bar_data(cls): + equities_cal = cls.trading_calendars[Equity] + data = {} sids = {2, 5, cls.SHORT_ASSET_SID, cls.HALF_DAY_TEST_ASSET_SID} for sid in sids: asset = cls.asset_finder.retrieve_asset(sid) data[sid] = create_minute_df_for_asset( - cls.trading_calendar, + equities_cal, asset.start_date, asset.end_date, start_val=2, ) data[1] = create_minute_df_for_asset( - cls.trading_calendar, + equities_cal, pd.Timestamp('2014-01-03', tz='utc'), pd.Timestamp('2016-01-29', tz='utc'), start_val=2, @@ -557,9 +559,9 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): asset2 = cls.asset_finder.retrieve_asset(2) data[asset2.sid] = create_minute_df_for_asset( - cls.trading_calendar, + equities_cal, asset2.start_date, - cls.trading_calendar.previous_session_label(asset2.end_date), + equities_cal.previous_session_label(asset2.end_date), start_val=2, minute_blacklist=[ pd.Timestamp('2015-01-08 14:31', tz='UTC'), @@ -574,29 +576,29 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): # the thousands place. data[cls.MERGER_ASSET_SID] = data[cls.SPLIT_ASSET_SID] = pd.concat(( create_minute_df_for_asset( - cls.trading_calendar, + equities_cal, pd.Timestamp('2015-01-05', tz='UTC'), pd.Timestamp('2015-01-05', tz='UTC'), start_val=8000), create_minute_df_for_asset( - cls.trading_calendar, + equities_cal, pd.Timestamp('2015-01-06', tz='UTC'), pd.Timestamp('2015-01-06', tz='UTC'), start_val=2000), create_minute_df_for_asset( - cls.trading_calendar, + equities_cal, pd.Timestamp('2015-01-07', tz='UTC'), pd.Timestamp('2015-01-07', tz='UTC'), start_val=1000), create_minute_df_for_asset( - cls.trading_calendar, + equities_cal, pd.Timestamp('2015-01-08', tz='UTC'), pd.Timestamp('2015-01-08', tz='UTC'), start_val=1000) )) asset3 = cls.asset_finder.retrieve_asset(3) data[3] = create_minute_df_for_asset( - cls.trading_calendar, + equities_cal, asset3.start_date, asset3.end_date, start_val=2, @@ -817,7 +819,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): # 10 minutes asset = self.env.asset_finder.retrieve_asset(sid) - minutes = self.trading_calendar.minutes_for_session( + # Check the first hour of equities trading. + minutes = self.trading_calendars[Equity].minutes_for_session( pd.Timestamp('2015-01-05', tz='UTC') )[0:60] @@ -826,33 +829,37 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): assets=[asset], fields=[field]) - def test_minute_midnight(self): - midnight = pd.Timestamp('2015-01-06', tz='UTC') - last_minute = self.trading_calendar.open_and_close_for_session( - self.trading_calendar.minute_to_session_label( - midnight, - direction="previous" - ) - )[1] + def test_minute_sunday_midnight(self): + # Most trading calendars aren't open at midnight on Sunday. + sunday_midnight = pd.Timestamp('2015-01-09', tz='UTC') - midnight_bar_data = self.create_bardata( - lambda: midnight, - ) - yesterday_bar_data = self.create_bardata( - lambda: last_minute - ) + # Find the closest prior minute when the trading calendar was + # open (note that if the calendar is open at `sunday_midnight`, + # this will be `sunday_midnight`). + trading_minutes = self.trading_calendar.all_minutes + last_minute = trading_minutes[trading_minutes <= sunday_midnight][-1] - with handle_non_market_minutes(midnight_bar_data): + sunday_midnight_bar_data = self.create_bardata(lambda: sunday_midnight) + last_minute_bar_data = self.create_bardata(lambda: last_minute) + + # Ensure that we get the same results at midnight on Sunday as + # the last open minute. + with handle_non_market_minutes(sunday_midnight_bar_data): for field in ALL_FIELDS: np.testing.assert_array_equal( - midnight_bar_data.history(self.ASSET2, field, 30, '1m'), - yesterday_bar_data.history(self.ASSET2, field, 30, '1m') + sunday_midnight_bar_data.history( + self.ASSET2, + field, + 30, + '1m', + ), + last_minute_bar_data.history(self.ASSET2, field, 30, '1m') ) def test_minute_after_asset_stopped(self): # SHORT_ASSET's last day was 2015-01-06 # get some history windows that straddle the end - minutes = self.trading_calendar.minutes_for_session( + minutes = self.trading_calendars[Equity].minutes_for_session( pd.Timestamp('2015-01-07', tz='UTC') )[0:60] @@ -867,15 +874,6 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): # Reset data portal because it has advanced past next test date. data_portal = self.make_data_portal() - # choose a window that contains the last minute of the asset - bar_data = BarData( - data_portal=data_portal, - simulation_dt_func=lambda: minutes[15], - data_frequency='minute', - restrictions=NoRestrictions(), - trading_calendar=self.trading_calendar, - ) - # close high low open price volume # 2015-01-06 20:47:00+00:00 768 770 767 769 768 76800 # 2015-01-06 20:48:00+00:00 769 771 768 770 769 76900 @@ -908,9 +906,29 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): # 2015-01-07 14:45:00+00:00 NaN NaN NaN NaN NaN 0 # 2015-01-07 14:46:00+00:00 NaN NaN NaN NaN NaN 0 - window = bar_data.history(self.SHORT_ASSET, ALL_FIELDS, 30, '1m') + # choose a window that contains the last minute of the asset + window_start = pd.Timestamp('2015-01-06 20:47', tz='UTC') + window_end = pd.Timestamp('2015-01-07 14:46', tz='UTC') - # there should be 14 values and 16 NaNs/0s + bar_data = BarData( + data_portal=data_portal, + simulation_dt_func=lambda: minutes[15], + data_frequency='minute', + restrictions=NoRestrictions(), + trading_calendar=self.trading_calendar, + ) + + bar_count = len( + self.trading_calendar.minutes_in_range(window_start, window_end) + ) + window = bar_data.history( + self.SHORT_ASSET, + ALL_FIELDS, + bar_count, + '1m', + ) + + # Window should start with 14 values and end with 16 NaNs/0s. for field in ALL_FIELDS: if field == 'volume': np.testing.assert_array_equal( @@ -953,9 +971,10 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): for asset in [self.SPLIT_ASSET, self.MERGER_ASSET]: # before any of the adjustments, last 10 minutes of jan 5 + equity_cal = self.trading_calendars[Equity] window1 = self.data_portal.get_history_window( [asset], - self.trading_calendar.open_and_close_for_session(jan5)[1], + equity_cal.open_and_close_for_session(jan5)[1], 10, '1m', 'close' @@ -964,11 +983,19 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): np.testing.assert_array_equal( np.array(range(8380, 8390)), window1) - # straddling the first event + # straddling the first event - begins with the last 5 equity + # minutes on 2015-01-05, ends with the first 5 on + # 2015-01-06. + window2_start = pd.Timestamp('2015-01-05 20:56', tz='UTC') + window2_end = pd.Timestamp('2015-01-06 14:35', tz='UTC') + window2_count = len(self.trading_calendar.minutes_in_range( + window2_start, + window2_end, + )) window2 = self.data_portal.get_history_window( [asset], pd.Timestamp('2015-01-06 14:35', tz='UTC'), - 10, + window2_count, '1m', 'close' )[asset] @@ -979,22 +1006,33 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): 2096.5, 2096.75, 2097, - 2097.25, - # Split occurs. The value of the thousands place should - # match. - 2000, + 2097.25], + window2[:5], + ) + # Split occurs. The value of the thousands place should + # match. + np.testing.assert_array_equal( + [2000, 2001, 2002, 2003, 2004], - window2 + window2[-5:], ) - # straddling both events! + # straddling both events! on the equities calendar this is 5 + # minutes of 1/7, 390 of 1/6, and 5 minutes of 1/5. + window3_start = pd.Timestamp('2015-01-05 20:56', tz='UTC') + window3_end = pd.Timestamp('2015-01-07 14:35', tz='UTC') + window3_minutes = self.trading_calendar.minutes_in_range( + window3_start, + window3_end, + ) + window3_count = len(window3_minutes) window3 = self.data_portal.get_history_window( [asset], pd.Timestamp('2015-01-07 14:35', tz='UTC'), - 400, # 5 minutes of 1/7, 390 of 1/6, and 5 minutes of 1/5 + window3_count, '1m', 'close' )[asset] @@ -1005,14 +1043,21 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): window3[0:5] ) - # next 390 minutes should be 2000-2390, but halved + # next 390 minutes (the 2015-01-06 session) should be + # 2000-2390, but halved + middle_day_open_i = window3_minutes.searchsorted( + pd.Timestamp('2015-01-06 14:31', tz='UTC') + ) + middle_day_close_i = window3_minutes.searchsorted( + pd.Timestamp('2015-01-06 21:00', tz='UTC') + ) np.testing.assert_array_equal( np.array(range(2000, 2390), dtype='float64') / 2, - window3[5:395] + window3[middle_day_open_i:middle_day_close_i + 1] ) # final 5 minutes should be 1000-1004 - np.testing.assert_array_equal(range(1000, 1005), window3[395:]) + np.testing.assert_array_equal(range(1000, 1005), window3[-5:]) # after last event window4 = self.data_portal.get_history_window( @@ -1040,11 +1085,16 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): np.testing.assert_array_equal(np.array(range(382, 392)), window1) - # straddling the first dividend + # straddling the first dividend (10 active equity minutes) + window2_start = pd.Timestamp('2015-01-05 20:56', tz='UTC') + window2_end = pd.Timestamp('2015-01-06 14:35', tz='UTC') + window2_count = len( + self.trading_calendar.minutes_in_range(window2_start, window2_end) + ) window2 = self.data_portal.get_history_window( [self.DIVIDEND_ASSET], - pd.Timestamp('2015-01-06 14:35', tz='UTC'), - 10, + window2_end, + window2_count, '1m', 'close' )[self.DIVIDEND_ASSET] @@ -1057,13 +1107,21 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): ) # second half of window is unadjusted - np.testing.assert_array_equal(range(392, 397), window2[5:]) + np.testing.assert_array_equal(range(392, 397), window2[-5:]) - # straddling both dividends + # straddling both dividends (on the equities calendar, this is + # 5 minutes of 1/7, 390 of 1/6, and 5 minutes of 1/5). + window3_start = pd.Timestamp('2015-01-05 20:56', tz='UTC') + window3_end = pd.Timestamp('2015-01-07 14:35', tz='UTC') + window3_minutes = self.trading_calendar.minutes_in_range( + window3_start, + window3_end, + ) + window3_count = len(window3_minutes) window3 = self.data_portal.get_history_window( [self.DIVIDEND_ASSET], - pd.Timestamp('2015-01-07 14:35', tz='UTC'), - 400, # 5 minutes of 1/7, 390 of 1/6, and 5 minutes of 1/5 + window3_end, + window3_count, '1m', 'close' )[self.DIVIDEND_ASSET] @@ -1074,14 +1132,21 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): window3[0:5] ) - # next 390 minutes should be hit by 0.96 (second dividend) + # next 390 minutes (the 2015-01-06 session) should be hit by 0.96 + # (second dividend) + middle_day_open_i = window3_minutes.searchsorted( + pd.Timestamp('2015-01-06 14:31', tz='UTC') + ) + middle_day_close_i = window3_minutes.searchsorted( + pd.Timestamp('2015-01-06 21:00', tz='UTC') + ) np.testing.assert_array_almost_equal( np.array(range(392, 782), dtype='float64') * 0.96, - window3[5:395] + window3[middle_day_open_i:middle_day_close_i + 1] ) # last 5 minutes should not be adjusted - np.testing.assert_array_equal(np.array(range(782, 787)), window3[395:]) + np.testing.assert_array_equal(np.array(range(782, 787)), window3[-5:]) def test_passing_iterable_to_history_regular_hours(self): # regular hours @@ -1129,50 +1194,70 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): 'price': np.arange(382, 392) / 2.0, } + # Use a window looking back to 3:51pm from 8:45am the following day. + # This contains the last ten minutes of the equity session for + # 2015-01-05. + window_start = pd.Timestamp('2015-01-05 20:51', tz='UTC') + window_end = pd.Timestamp('2015-01-06 13:44', tz='UTC') + window_length = len( + self.trading_calendar.minutes_in_range(window_start, window_end) + ) + with handle_non_market_minutes(bar_data): # Single field, single asset for field in ALL_FIELDS: - values = bar_data.history(self.SPLIT_ASSET, field, 10, '1m') - np.testing.assert_array_equal(values.values, + values = bar_data.history( + self.SPLIT_ASSET, + field, + window_length, + '1m', + ) + + # The first 10 bars the `values` correspond to the last + # 10 minutes in the 2015-01-05 session. + np.testing.assert_array_equal(values.values[:10], adj_expected[field], err_msg=field) # Multi field, single asset values = bar_data.history( - self.SPLIT_ASSET, ['open', 'volume'], 10, '1m' + self.SPLIT_ASSET, ['open', 'volume'], window_length, '1m' ) - np.testing.assert_array_equal(values.open.values, + np.testing.assert_array_equal(values.open.values[:10], adj_expected['open']) - np.testing.assert_array_equal(values.volume.values, + np.testing.assert_array_equal(values.volume.values[:10], adj_expected['volume']) # Single field, multi asset values = bar_data.history( - [self.SPLIT_ASSET, self.ASSET2], 'open', 10, '1m' + [self.SPLIT_ASSET, self.ASSET2], 'open', window_length, '1m' ) - np.testing.assert_array_equal(values[self.SPLIT_ASSET].values, + np.testing.assert_array_equal(values[self.SPLIT_ASSET].values[:10], adj_expected['open']) - np.testing.assert_array_equal(values[self.ASSET2].values, + np.testing.assert_array_equal(values[self.ASSET2].values[:10], expected['open'] * 2) # Multi field, multi asset values = bar_data.history( - [self.SPLIT_ASSET, self.ASSET2], ['open', 'volume'], 10, '1m' + [self.SPLIT_ASSET, self.ASSET2], + ['open', 'volume'], + window_length, + '1m', ) np.testing.assert_array_equal( - values.open[self.SPLIT_ASSET].values, + values.open[self.SPLIT_ASSET].values[:10], adj_expected['open'] ) np.testing.assert_array_equal( - values.volume[self.SPLIT_ASSET].values, + values.volume[self.SPLIT_ASSET].values[:10], adj_expected['volume'] ) np.testing.assert_array_equal( - values.open[self.ASSET2].values, + values.open[self.ASSET2].values[:10], expected['open'] * 2 ) np.testing.assert_array_equal( - values.volume[self.ASSET2].values, + values.volume[self.ASSET2].values[:10], expected['volume'] / 2 ) @@ -1182,12 +1267,16 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): # # five minutes into the day after the early close, get 20 1m bars - dt = pd.Timestamp('2014-07-07 13:35:00', tz='UTC') + cal = self.trading_calendar + + window_start = pd.Timestamp('2014-07-03 16:46:00', tz='UTC') + window_end = pd.Timestamp('2014-07-07 13:35:00', tz='UTC') + bar_count = len(cal.minutes_in_range(window_start, window_end)) window = self.data_portal.get_history_window( [self.HALF_DAY_TEST_ASSET], - dt, - 20, + window_end, + bar_count, '1m', 'close' )[self.HALF_DAY_TEST_ASSET] @@ -1196,10 +1285,20 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): # first minute of 7/7 is the 600th trading minute for this asset # this asset's first minute had a close value of 2, so every value is # 2 + (minute index) - np.testing.assert_array_equal(range(587, 607), window) + expected = range(587, 607) + + # First 15 bars on occur at the end of 2014-07-03. + np.testing.assert_array_equal(window[:15], expected[:15]) + # Interim bars (only on other calendars) should all be nan. + np.testing.assert_array_equal( + window[15:-5], + np.full(len(window) - 20, np.nan), + ) + # Last 5 bars occur at the start of 2014-07-07. + np.testing.assert_array_equal(window[-5:], expected[-5:]) self.assertEqual( - window.index[-6], + window.index[14], pd.Timestamp('2014-07-03 17:00', tz='UTC') ) @@ -1209,34 +1308,58 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): ) def test_minute_different_lifetimes(self): + cal = self.trading_calendar + equity_cal = self.trading_calendars[Equity] + # at trading start, only asset1 existed day = self.trading_calendar.next_session_label(self.TRADING_START_DT) - asset1_minutes = \ - self.trading_calendar.minutes_for_sessions_in_range( - self.ASSET1.start_date, - self.ASSET1.end_date - ) + # Range containing 100 equity minutes, possibly more on other + # calendars (i.e. futures). + window_start = pd.Timestamp('2014-01-03 19:22', tz='UTC') + window_end = pd.Timestamp('2014-01-06 14:31', tz='UTC') + bar_count = len(cal.minutes_in_range(window_start, window_end)) - asset1_idx = asset1_minutes.searchsorted( - self.trading_calendar.open_and_close_for_session(day)[0] + equity_cal = self.trading_calendars[Equity] + first_equity_open, _ = equity_cal.open_and_close_for_session(day) + + asset1_minutes = equity_cal.minutes_for_sessions_in_range( + self.ASSET1.start_date, + self.ASSET1.end_date ) + asset1_idx = asset1_minutes.searchsorted(first_equity_open) window = self.data_portal.get_history_window( [self.ASSET1, self.ASSET2], - self.trading_calendar.open_and_close_for_session(day)[0], - 100, + first_equity_open, + bar_count, '1m', 'close' ) + expected = range(asset1_idx - 97, asset1_idx + 3) + + # First 99 bars occur on the previous day, np.testing.assert_array_equal( - range(asset1_idx - 97, asset1_idx + 3), - window[self.ASSET1] + window[self.ASSET1][:99], + expected[:99], + ) + # Any interim bars are not active equity minutes, so should all + # be nan. + np.testing.assert_array_equal( + window[self.ASSET1][99:-1], + np.full(len(window) - 100, np.nan), + ) + # Final bar in the window is the first equity bar of `day`. + np.testing.assert_array_equal( + window[self.ASSET1][-1:], + expected[-1:], ) + # All NaNs for ASSET2, since it hasn't started yet. np.testing.assert_array_equal( - np.full(100, np.nan), window[self.ASSET2] + window[self.ASSET2], + np.full(len(window), np.nan), ) def test_history_window_before_first_trading_day(self): @@ -1264,11 +1387,17 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): day = pd.Timestamp('2015-01-07', tz='UTC') minutes = self.trading_calendar.minutes_for_session(day) + equity_cal = self.trading_calendars[Equity] + equity_minutes = equity_cal.minutes_for_session(day) + equity_open, equity_close = equity_minutes[0], equity_minutes[-1] + # minute data, baseline: # Jan 5: 2 to 391 # Jan 6: 392 to 781 # Jan 7: 782 to 1172 - for idx, minute in enumerate(minutes): + for minute in minutes: + idx = equity_minutes.searchsorted(min(minute, equity_close)) + for field in ALL_FIELDS: window = self.data_portal.get_history_window( @@ -1299,7 +1428,20 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): last_val = -1 - if field == 'open': + # XXX + if minute == day: + continue + + if minute < equity_open: + # If before the equity calendar open, we don't yet + # have data (but price is forward-filled). + if field == 'volume': + last_val = 0 + elif field == 'price': + last_val = window[1] + else: + last_val = nan + elif field == 'open': last_val = 783 elif field == 'high': # since we increase monotonically, it's just the last @@ -1317,7 +1459,7 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): last_val = sum(np.array(range(782, 782 + idx + 1)) * 100) - self.assertEqual(window[-1], last_val) + np.testing.assert_equal(window[-1], last_val) @parameterized.expand(ALL_FIELDS) def test_daily_history_blended_gaps(self, field): @@ -1328,11 +1470,16 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): day = pd.Timestamp('2015-01-08', tz='UTC') minutes = self.trading_calendar.minutes_for_session(day) + equity_cal = self.trading_calendars[Equity] + equity_minutes = equity_cal.minutes_for_session(day) + equity_open, equity_close = equity_minutes[0], equity_minutes[-1] + # minute data, baseline: # Jan 5: 2 to 391 # Jan 6: 392 to 781 # Jan 7: 782 to 1172 - for idx, minute in enumerate(minutes): + for minute in minutes: + idx = equity_minutes.searchsorted(min(minute, equity_close)) window = self.data_portal.get_history_window( [self.ASSET2], @@ -1363,9 +1510,22 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): self.assertEqual(window[0], 22873500) self.assertEqual(window[1], 38083500) + # XXX + if minute == day: + continue + last_val = -1 - if field == 'open': + if minute < equity_open: + # If before the equity calendar open, we don't yet + # have data (but price is forward-filled). + if field == 'volume': + last_val = 0 + elif field == 'price': + last_val = window[1] + else: + last_val = nan + elif field == 'open': if idx == 0: last_val = np.nan else: @@ -1902,6 +2062,11 @@ class NoPrefetchDailyEquityHistoryTestCase(DailyEquityHistoryTestCase): DATA_PORTAL_DAILY_HISTORY_PREFETCH = 0 -class DailyEquityHistoryOnFuturesCalendarTestCase(DailyEquityHistoryTestCase): +class MinuteEquityHistoryFuturesCalendarTestCase(MinuteEquityHistoryTestCase): + TRADING_CALENDAR_STRS = ('NYSE', 'us_futures') + TRADING_CALENDAR_PRIMARY_CAL = 'us_futures' + + +class DailyEquityHistoryFuturesCalendarTestCase(DailyEquityHistoryTestCase): TRADING_CALENDAR_STRS = ('NYSE', 'us_futures') TRADING_CALENDAR_PRIMARY_CAL = 'us_futures' From 423a76730c883921d3deddd352b40e019d370621 Mon Sep 17 00:00:00 2001 From: Andrew Daniels Date: Tue, 2 May 2017 14:21:21 -0400 Subject: [PATCH 4/5] BUG: Fix _handle_minute_history_out_of_bounds for future calendar Need to use minute_to_session_label to retrieve the proper session. --- zipline/data/data_portal.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/zipline/data/data_portal.py b/zipline/data/data_portal.py index bd4cb61b..997cee49 100644 --- a/zipline/data/data_portal.py +++ b/zipline/data/data_portal.py @@ -810,23 +810,25 @@ class DataPortal(object): return daily_data def _handle_minute_history_out_of_bounds(self, bar_count): + cal = self.trading_calendar + first_trading_minute_loc = ( - self.trading_calendar.all_minutes.get_loc( + cal.all_minutes.get_loc( self._first_trading_minute ) if self._first_trading_minute is not None else None ) - suggested_start_day = ( - self.trading_calendar.all_minutes[ + suggested_start_day = cal.minute_to_session_label( + cal.all_minutes[ first_trading_minute_loc + bar_count - ] + self.trading_calendar.day - ).date() + ] + cal.day + ) raise HistoryWindowStartsBeforeData( first_trading_day=self._first_trading_day.date(), bar_count=bar_count, - suggested_start_day=suggested_start_day, + suggested_start_day=suggested_start_day.date(), ) def _get_history_minute_window(self, assets, end_dt, bar_count, From d155d894fe7175bdac7ba5b18c9562229f96b7d3 Mon Sep 17 00:00:00 2001 From: Andrew Daniels Date: Thu, 4 May 2017 16:06:15 -0400 Subject: [PATCH 5/5] MAINT: Pass data_frequency to get_history_window This allows us to remove the check for whether the provided dt had a time of midnight, which was a flimsy way to infer if the data frequency was 'daily'. Besides the explicit check being preferable, this method was broken on the futures calendar, since midnight is a valid market minute. --- tests/test_api_shim.py | 2 + tests/test_benchmark.py | 8 +- tests/test_continuous_futures.py | 52 +++++++----- tests/test_history.py | 126 ++++++++++++++++++---------- zipline/_protocol.pyx | 9 +- zipline/algorithm.py | 2 + zipline/data/data_portal.py | 58 +++++++++---- zipline/sources/benchmark_source.py | 3 + zipline/testing/core.py | 2 +- 9 files changed, 176 insertions(+), 86 deletions(-) diff --git a/tests/test_api_shim.py b/tests/test_api_shim.py index 4773e57f..614df1e0 100644 --- a/tests/test_api_shim.py +++ b/tests/test_api_shim.py @@ -265,6 +265,7 @@ class TestAPIShim(WithCreateBarData, 5, "1m", "volume", + "minute", True ) else: @@ -274,6 +275,7 @@ class TestAPIShim(WithCreateBarData, 5, "1m", "volume", + "minute", ) test_sim_params = SimulationParameters( diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 7abe4604..fced35a9 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -101,7 +101,12 @@ class TestBenchmark(WithDataPortal, WithSimParams, WithTradingCalendars, # should be the equivalent of getting the price history, then doing # a pct_change on it manually_calculated = self.data_portal.get_history_window( - [1], days_to_use[-1], len(days_to_use), "1d", "close" + [1], + days_to_use[-1], + len(days_to_use), + "1d", + "close", + "daily", )[1].pct_change() # compare all the fields except the first one, for which we don't have @@ -187,6 +192,7 @@ class TestBenchmark(WithDataPortal, WithSimParams, WithTradingCalendars, len(days_to_use), "1d", "close", + "daily", )[2].pct_change() for idx, day in enumerate(days_to_use[1:]): diff --git a/tests/test_continuous_futures.py b/tests/test_continuous_futures.py index 4f3d73d8..b05287c3 100644 --- a/tests/test_continuous_futures.py +++ b/tests/test_continuous_futures.py @@ -670,7 +670,7 @@ def record_current_contract(algo, data): window = self.data_portal.get_history_window( [cf], Timestamp('2016-03-04 18:01', tz='US/Eastern').tz_convert('UTC'), - 30, '1d', 'sid') + 30, '1d', 'sid', 'minute') self.assertEqual(window.loc['2016-01-26', cf], 0, @@ -696,7 +696,7 @@ def record_current_contract(algo, data): window = self.data_portal.get_history_window( [cf], Timestamp('2016-04-06 18:01', tz='US/Eastern').tz_convert('UTC'), - 30, '1d', 'sid') + 30, '1d', 'sid', 'minute') self.assertEqual(window.loc['2016-02-25', cf], 1, @@ -724,7 +724,7 @@ def record_current_contract(algo, data): window = self.data_portal.get_history_window( [cf], Timestamp('2016-01-11 18:01', tz='US/Eastern').tz_convert('UTC'), - 3, '1d', 'sid') + 3, '1d', 'sid', 'minute') self.assertEqual(window.loc['2016-01-08', cf], 10, @@ -745,7 +745,7 @@ def record_current_contract(algo, data): window = self.data_portal.get_history_window( [cf], Timestamp('2016-03-04 18:01', tz='US/Eastern').tz_convert('UTC'), - 30, '1d', 'sid') + 30, '1d', 'sid', 'minute') self.assertEqual(window.loc['2016-01-26', cf], 1, @@ -771,7 +771,7 @@ def record_current_contract(algo, data): window = self.data_portal.get_history_window( [cf], Timestamp('2016-04-06 18:01', tz='US/Eastern').tz_convert('UTC'), - 30, '1d', 'sid') + 30, '1d', 'sid', 'minute') self.assertEqual(window.loc['2016-02-25', cf], 2, @@ -799,7 +799,7 @@ def record_current_contract(algo, data): window = self.data_portal.get_history_window( [cf], Timestamp('2016-03-04 18:01', tz='US/Eastern').tz_convert('UTC'), - 30, '1d', 'sid') + 30, '1d', 'sid', 'minute') # Volume cuts out for FOF16 on 2016-01-25 self.assertEqual(window.loc['2016-01-26', cf], @@ -826,7 +826,7 @@ def record_current_contract(algo, data): window = self.data_portal.get_history_window( [cf], Timestamp('2016-04-06 18:01', tz='US/Eastern').tz_convert('UTC'), - 30, '1d', 'sid') + 30, '1d', 'sid', 'minute') self.assertEqual(window.loc['2016-02-25', cf], 1, @@ -863,7 +863,7 @@ def record_current_contract(algo, data): window = self.data_portal.get_history_window( [cf.sid], Timestamp('2016-01-26 18:01', tz='US/Eastern').tz_convert('UTC'), - 30, '1m', 'sid') + 30, '1m', 'sid', 'minute') self.assertEqual(window.loc['2016-01-26 22:32', cf], 0, @@ -882,7 +882,7 @@ def record_current_contract(algo, data): window = self.data_portal.get_history_window( [cf], Timestamp('2016-01-27 18:01', tz='US/Eastern').tz_convert('UTC'), - 30, '1m', 'sid') + 30, '1m', 'sid', 'minute') self.assertEqual(window.loc['2016-01-27 22:32', cf], 1, @@ -896,7 +896,9 @@ def record_current_contract(algo, data): cf = self.data_portal.asset_finder.create_continuous_future( 'FO', 0, 'calendar', None) window = self.data_portal.get_history_window( - [cf.sid], Timestamp('2016-03-06', tz='UTC'), 30, '1d', 'close') + [cf.sid], + Timestamp('2016-03-06', tz='UTC'), + 30, '1d', 'close', 'daily') assert_almost_equal( window.loc['2016-01-26', cf], @@ -915,7 +917,9 @@ def record_current_contract(algo, data): # Advance the window a month. window = self.data_portal.get_history_window( - [cf.sid], Timestamp('2016-04-06', tz='UTC'), 30, '1d', 'close') + [cf.sid], + Timestamp('2016-04-06', tz='UTC'), + 30, '1d', 'close', 'daily') assert_almost_equal( window.loc['2016-02-24', cf], @@ -946,7 +950,9 @@ def record_current_contract(algo, data): cf = self.data_portal.asset_finder.create_continuous_future( 'MA', 0, 'volume', None) window = self.data_portal.get_history_window( - [cf.sid], Timestamp('2016-03-06', tz='UTC'), 30, '1d', 'close') + [cf.sid], + Timestamp('2016-03-06', tz='UTC'), + 30, '1d', 'close', 'daily') assert_almost_equal( window.loc['2016-01-26', cf], @@ -965,7 +971,9 @@ def record_current_contract(algo, data): # Advance the window a month. window = self.data_portal.get_history_window( - [cf.sid], Timestamp('2016-04-06', tz='UTC'), 30, '1d', 'close') + [cf.sid], + Timestamp('2016-04-06', tz='UTC'), + 30, '1d', 'close', 'daily') assert_almost_equal( window.loc['2016-02-24', cf], @@ -991,7 +999,8 @@ def record_current_contract(algo, data): 'FO', 0, 'calendar', 'add') window = self.data_portal.get_history_window( [cf, cf_mul, cf_add], - Timestamp('2016-03-06', tz='UTC'), 30, '1d', 'close') + Timestamp('2016-03-06', tz='UTC'), + 30, '1d', 'close', 'daily') # Unadjusted value is: 115011.44 # Adjustment is based on hop from 115231.44 to 125231.44 @@ -1034,7 +1043,8 @@ def record_current_contract(algo, data): # Advance the window a month. window = self.data_portal.get_history_window( [cf, cf_mul, cf_add], - Timestamp('2016-04-06', tz='UTC'), 30, '1d', 'close') + Timestamp('2016-04-06', tz='UTC'), + 30, '1d', 'close', 'daily') # Unadjusted value: 115221.44 # Adjustments based on hops: @@ -1116,7 +1126,7 @@ def record_current_contract(algo, data): window = self.data_portal.get_history_window( [cf.sid], Timestamp('2016-02-25 18:01', tz='US/Eastern').tz_convert('UTC'), - 30, '1m', 'close') + 30, '1m', 'close', 'minute') self.assertEqual(window.loc['2016-02-25 22:32', cf], 115231.412, @@ -1135,7 +1145,7 @@ def record_current_contract(algo, data): window = self.data_portal.get_history_window( [cf], Timestamp('2016-02-28 18:01', tz='US/Eastern').tz_convert('UTC'), - 30, '1m', 'close') + 30, '1m', 'close', 'minute') self.assertEqual(window.loc['2016-02-26 22:32', cf], 125241.412, @@ -1155,7 +1165,7 @@ def record_current_contract(algo, data): window = self.data_portal.get_history_window( [cf, cf_mul, cf_add], Timestamp('2016-02-25 18:01', tz='US/Eastern').tz_convert('UTC'), - 30, '1m', 'close') + 30, '1m', 'close', 'minute') # Unadjusted: 115231.412 # Adjustment based on roll: @@ -1198,7 +1208,7 @@ def record_current_contract(algo, data): window = self.data_portal.get_history_window( [cf, cf_mul, cf_add], Timestamp('2016-02-28 18:01', tz='US/Eastern').tz_convert('UTC'), - 30, '1m', 'close') + 30, '1m', 'close', 'minute') # No adjustments in this window. self.assertEqual(window.loc['2016-02-26 22:32', cf_mul], @@ -1219,7 +1229,7 @@ def record_current_contract(algo, data): window = self.data_portal.get_history_window( [cf, cf_mul, cf_add], Timestamp('2016-02-25 18:01', tz='US/Eastern').tz_convert('UTC'), - 30, '1m', 'close') + 30, '1m', 'close', 'minute') # Unadjusted: 115231.412 # Adjustment based on roll: @@ -1262,7 +1272,7 @@ def record_current_contract(algo, data): window = self.data_portal.get_history_window( [cf, cf_mul, cf_add], Timestamp('2016-02-28 18:01', tz='US/Eastern').tz_convert('UTC'), - 30, '1m', 'close') + 30, '1m', 'close', 'minute') # No adjustments in this window. self.assertEqual(window.loc['2016-02-26 22:32', cf_mul], diff --git a/tests/test_history.py b/tests/test_history.py index e1f197ce..cdc36d02 100644 --- a/tests/test_history.py +++ b/tests/test_history.py @@ -654,7 +654,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): self.trading_calendar.open_and_close_for_session(jan5)[1], 2, '1d', - 'close' + 'close', + 'minute', )[asset] np.testing.assert_array_equal(np.array([np.nan, 8389]), window1) @@ -665,7 +666,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-06 14:35', tz='UTC'), 2, '1d', - 'close' + 'close', + 'minute', )[asset] # Value from 1/5 should be quartered @@ -683,7 +685,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-07 14:35', tz='UTC'), 3, '1d', - 'close' + 'close', + 'minute', )[asset] np.testing.assert_array_equal( @@ -697,7 +700,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-08 14:40', tz='UTC'), 2, '1d', - 'close' + 'close', + 'minute', )[asset] # should not be adjusted @@ -715,7 +719,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): self.trading_calendar.open_and_close_for_session(jan5)[1], 2, '1d', - 'close' + 'close', + 'minute', )[asset] np.testing.assert_array_equal(np.array([nan, 391]), window1) @@ -726,7 +731,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-06 14:35', tz='UTC'), 2, '1d', - 'close' + 'close', + 'minute', )[asset] np.testing.assert_array_equal( @@ -742,7 +748,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-07 14:35', tz='UTC'), 3, '1d', - 'close' + 'close', + 'minute', )[asset] np.testing.assert_array_equal( @@ -758,7 +765,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-08 14:40', tz='UTC'), 2, '1d', - 'close' + 'close', + 'minute', )[asset] # should not be adjusted, should be 787 to 791 @@ -977,7 +985,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): equity_cal.open_and_close_for_session(jan5)[1], 10, '1m', - 'close' + 'close', + 'minute', )[asset] np.testing.assert_array_equal( @@ -997,7 +1006,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-06 14:35', tz='UTC'), window2_count, '1m', - 'close' + 'close', + 'minute', )[asset] # five minutes from 1/5 should be halved @@ -1034,7 +1044,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-07 14:35', tz='UTC'), window3_count, '1m', - 'close' + 'close', + 'minute', )[asset] # first five minutes should be 4385-4390, but eigthed @@ -1065,7 +1076,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-07 14:40', tz='UTC'), 5, '1m', - 'close' + 'close', + 'minute', )[asset] # should not be adjusted, should be 1005 to 1009 @@ -1080,7 +1092,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-05 21:00', tz='UTC'), 10, '1m', - 'close' + 'close', + 'minute', )[self.DIVIDEND_ASSET] np.testing.assert_array_equal(np.array(range(382, 392)), window1) @@ -1096,7 +1109,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): window2_end, window2_count, '1m', - 'close' + 'close', + 'minute', )[self.DIVIDEND_ASSET] # first dividend is 2%, so the first five values should be 2% lower @@ -1123,7 +1137,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): window3_end, window3_count, '1m', - 'close' + 'close', + 'minute', )[self.DIVIDEND_ASSET] # first five minute from 1/7 should be hit by 0.9408 (= 0.98 * 0.96) @@ -1278,7 +1293,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): window_end, bar_count, '1m', - 'close' + 'close', + 'minute', )[self.HALF_DAY_TEST_ASSET] # 390 minutes for 7/2, 210 minutes for 7/3, 7/4-7/6 closed @@ -1334,7 +1350,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): first_equity_open, bar_count, '1m', - 'close' + 'close', + 'minute', ) expected = range(asset1_idx - 97, asset1_idx + 3) @@ -1376,7 +1393,12 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): with self.assertRaisesRegexp( HistoryWindowStartsBeforeData, exp_msg): self.data_portal.get_history_window( - [self.ASSET1], first_day_minutes[5], 15, '1m', 'price' + [self.ASSET1], + first_day_minutes[5], + 15, + '1m', + 'price', + 'minute', )[self.ASSET1] def test_daily_history_blended(self): @@ -1405,7 +1427,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): minute, 3, '1d', - field + field, + 'minute', )[self.ASSET2] self.assertEqual(len(window), 3) @@ -1428,10 +1451,6 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): last_val = -1 - # XXX - if minute == day: - continue - if minute < equity_open: # If before the equity calendar open, we don't yet # have data (but price is forward-filled). @@ -1486,7 +1505,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): minute, 3, '1d', - field + field, + 'minute', )[self.ASSET2] self.assertEqual(len(window), 3) @@ -1510,10 +1530,6 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): self.assertEqual(window[0], 22873500) self.assertEqual(window[1], 38083500) - # XXX - if minute == day: - continue - last_val = -1 if minute < equity_open: @@ -1779,7 +1795,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-05', tz='UTC'), 1, '1d', - 'close' + 'close', + 'daily', )[asset] np.testing.assert_array_equal(window1, [2]) @@ -1789,7 +1806,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-05', tz='UTC'), 1, '1d', - 'volume' + 'volume', + 'daily', )[asset] np.testing.assert_array_equal(window1_volume, [200]) @@ -1800,7 +1818,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-06', tz='UTC'), 2, '1d', - 'close' + 'close', + 'daily', )[asset] # first value should be halved, second value unadjusted @@ -1811,7 +1830,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-06', tz='UTC'), 2, '1d', - 'volume' + 'volume', + 'daily', )[asset] if asset == self.SPLIT_ASSET: @@ -1826,7 +1846,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-07', tz='UTC'), 3, '1d', - 'close' + 'close', + 'daily', )[asset] np.testing.assert_array_equal([0.25, 1.5, 4], window3) @@ -1836,7 +1857,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-07', tz='UTC'), 3, '1d', - 'volume' + 'volume', + 'daily', )[asset] if asset == self.SPLIT_ASSET: @@ -1853,7 +1875,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-05', tz='UTC'), 1, '1d', - 'close' + 'close', + 'daily', )[self.DIVIDEND_ASSET] np.testing.assert_array_equal(window1, [2]) @@ -1864,7 +1887,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-06', tz='UTC'), 2, '1d', - 'close' + 'close', + 'daily', )[self.DIVIDEND_ASSET] # first dividend is 2%, so the first value should be 2% lower than @@ -1877,7 +1901,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2015-01-07', tz='UTC'), 3, '1d', - 'close' + 'close', + 'daily', )[self.DIVIDEND_ASSET] # second dividend is 0.96 @@ -1937,7 +1962,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): second_day, 4, '1d', - 'price' + 'price', + 'daily', )[self.ASSET1] with self.assertRaisesRegexp(HistoryWindowStartsBeforeData, exp_msg): @@ -1946,7 +1972,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): second_day, 4, '1d', - 'volume' + 'volume', + 'daily', )[self.ASSET1] # Use a minute to force minute mode. @@ -1959,7 +1986,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): first_minute, 4, '1d', - 'close' + 'close', + 'daily', )[self.ASSET2] def test_history_window_different_order(self): @@ -1976,7 +2004,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): day, 4, "1d", - "close" + "close", + 'daily', ) window_2 = self.data_portal.get_history_window( @@ -1984,7 +2013,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): day, 4, "1d", - "close" + "close", + 'daily', ) np.testing.assert_almost_equal(window_1[self.ASSET1].values, @@ -2004,7 +2034,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2014-02-07', tz='UTC'), 4, "1d", - "close" + "close", + 'daily', ) window_2 = self.data_portal.get_history_window( @@ -2012,7 +2043,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2014-02-05', tz='UTC'), 4, "1d", - "close" + "close", + 'daily', ) window_3 = self.data_portal.get_history_window( @@ -2020,7 +2052,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2014-02-07', tz='UTC'), 4, "1d", - "close" + "close", + 'daily', ) window_4 = self.data_portal.get_history_window( @@ -2028,7 +2061,8 @@ class DailyEquityHistoryTestCase(WithHistory, ZiplineTestCase): pd.Timestamp('2014-01-22', tz='UTC'), 4, "1d", - "close" + "close", + 'daily', ) # Calling 02-07 after resetting the window should not affect the diff --git a/zipline/_protocol.pyx b/zipline/_protocol.pyx index e6ee6a33..4b401431 100644 --- a/zipline/_protocol.pyx +++ b/zipline/_protocol.pyx @@ -648,7 +648,8 @@ cdef class BarData: self._get_current_minute(), bar_count, frequency, - fields + fields, + self.data_frequency, ) if self._adjust_minutes: @@ -680,7 +681,8 @@ cdef class BarData: self._get_current_minute(), bar_count, frequency, - field + field, + self.data_frequency, )[assets] for field in fields } @@ -708,7 +710,8 @@ cdef class BarData: self._get_current_minute(), bar_count, frequency, - field + field, + self.data_frequency, ) for field in fields } diff --git a/zipline/algorithm.py b/zipline/algorithm.py index ccbd1fae..5341c87d 100644 --- a/zipline/algorithm.py +++ b/zipline/algorithm.py @@ -2160,6 +2160,7 @@ class TradingAlgorithm(object): bar_count, frequency, field, + self.data_frequency, ffill, ) else: @@ -2176,6 +2177,7 @@ class TradingAlgorithm(object): bar_count, frequency, field, + self.data_frequency, ffill, ) diff --git a/zipline/data/data_portal.py b/zipline/data/data_portal.py index 997cee49..450f74ba 100644 --- a/zipline/data/data_portal.py +++ b/zipline/data/data_portal.py @@ -735,8 +735,12 @@ class DataPortal(object): ) return tds[start_loc:end_loc + 1] - def _get_history_daily_window(self, assets, end_dt, bar_count, - field_to_use): + def _get_history_daily_window(self, + assets, + end_dt, + bar_count, + field_to_use, + data_frequency): """ Internal method that returns a dataframe containing history bars of daily frequency for the given sids. @@ -750,7 +754,7 @@ class DataPortal(object): columns=None) data = self._get_history_daily_window_data( - assets, days_for_window, end_dt, field_to_use + assets, days_for_window, end_dt, field_to_use, data_frequency ) return pd.DataFrame( data, @@ -762,10 +766,9 @@ class DataPortal(object): assets, days_for_window, end_dt, - field_to_use): - ends_at_midnight = (end_dt.hour == end_dt.minute == 0) - - if ends_at_midnight: + field_to_use, + data_frequency): + if data_frequency == 'daily': # two cases where we use daily data for the whole range: # 1) the history window ends at midnight utc. # 2) the last desired day of the window is after the @@ -860,7 +863,13 @@ class DataPortal(object): columns=assets ) - def get_history_window(self, assets, end_dt, bar_count, frequency, field, + def get_history_window(self, + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency, ffill=True): """ Public API method that returns a dataframe containing the requested @@ -880,6 +889,10 @@ class DataPortal(object): field: string The desired field of the asset. + data_frequency: string + The frequency of the data to query; i.e. whether the data is + 'daily' or 'minute' bars. + ffill: boolean Forward-fill missing values. Only has effect if field is 'price'. @@ -894,10 +907,10 @@ class DataPortal(object): if frequency == "1d": if field == "price": df = self._get_history_daily_window(assets, end_dt, bar_count, - "close") + "close", data_frequency) else: df = self._get_history_daily_window(assets, end_dt, bar_count, - field) + field, data_frequency) elif frequency == "1m": if field == "price": df = self._get_history_minute_window(assets, end_dt, bar_count, @@ -1299,7 +1312,13 @@ class DataPortal(object): # returns is always calculated over the last 2 days, regardless # of the simulation's data frequency. hst = self.get_history_window( - [asset], dt, 2, "1d", "price", ffill=True + [asset], + dt, + 2, + "1d", + "price", + data_frequency, + ffill=True, )[asset] return (hst.iloc[-1] - hst.iloc[0]) / hst.iloc[0] @@ -1317,7 +1336,13 @@ class DataPortal(object): calculated_bar_count = bars price_arr = self.get_history_window( - [asset], dt, calculated_bar_count, freq_str, "price", ffill=True + [asset], + dt, + calculated_bar_count, + freq_str, + "price", + data_frequency, + ffill=True, )[asset] if transform_name == "mavg": @@ -1326,8 +1351,13 @@ class DataPortal(object): return nanstd(price_arr, ddof=1) elif transform_name == "vwap": volume_arr = self.get_history_window( - [asset], dt, calculated_bar_count, freq_str, "volume", - ffill=True + [asset], + dt, + calculated_bar_count, + freq_str, + "volume", + data_frequency, + ffill=True, )[asset] vol_sum = nansum(volume_arr) diff --git a/zipline/sources/benchmark_source.py b/zipline/sources/benchmark_source.py index b1816bcf..c20892f4 100644 --- a/zipline/sources/benchmark_source.py +++ b/zipline/sources/benchmark_source.py @@ -146,6 +146,7 @@ class BenchmarkSource(object): bar_count=len(minutes) + 1, frequency="1m", field="price", + data_frequency=self.emission_rate, ffill=True )[asset] @@ -163,6 +164,7 @@ class BenchmarkSource(object): bar_count=len(trading_days) + 1, frequency="1d", field="price", + data_frequency=self.emission_rate, ffill=True )[asset] return benchmark_series.pct_change()[1:] @@ -175,6 +177,7 @@ class BenchmarkSource(object): bar_count=len(trading_days), frequency="1d", field="price", + data_frequency=self.emission_rate, ffill=True )[asset] diff --git a/zipline/testing/core.py b/zipline/testing/core.py index 47eee138..34be29a5 100644 --- a/zipline/testing/core.py +++ b/zipline/testing/core.py @@ -714,7 +714,7 @@ class FakeDataPortal(DataPortal): return 1.0 def get_history_window(self, assets, end_dt, bar_count, frequency, field, - ffill=True): + data_frequency, ffill=True): if frequency == "1d": end_idx = \ self.trading_calendar.all_sessions.searchsorted(end_dt)