From e86fffc4caced2ff3e98437ef08f44bb38748733 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Wed, 21 Sep 2016 06:18:31 -0400 Subject: [PATCH] PERF: Remove or defer calls to get_loc on large indices. Mitigation for https://github.com/quantopian/zipline/issues/1503. --- zipline/data/data_portal.py | 21 +++++++------- zipline/utils/calendars/trading_calendar.py | 29 +++++++------------ .../utils/calendars/us_futures_calendar.py | 14 ++++++++- 3 files changed, 34 insertions(+), 30 deletions(-) diff --git a/zipline/data/data_portal.py b/zipline/data/data_portal.py index 5ebccf87..8e9ca304 100644 --- a/zipline/data/data_portal.py +++ b/zipline/data/data_portal.py @@ -213,12 +213,6 @@ class DataPortal(object): self.trading_calendar.all_sessions.get_loc(self._first_trading_day) if self._first_trading_day is not None else None ) - self._first_trading_minute_loc = ( - self.trading_calendar.all_minutes.get_loc( - self._first_trading_minute - ) - if self._first_trading_minute is not None else None - ) def _ensure_reader_aligned(self, reader): if reader is None: @@ -703,10 +697,17 @@ class DataPortal(object): return daily_data - def _handle_history_out_of_bounds(self, bar_count): + def _handle_minute_history_out_of_bounds(self, bar_count): + first_trading_minute_loc = ( + self.trading_calendar.all_minutes.get_loc( + self._first_trading_minute + ) + if self._first_trading_minute is not None else None + ) + suggested_start_day = ( self.trading_calendar.all_minutes[ - self._first_trading_minute_loc + bar_count + first_trading_minute_loc + bar_count ] + self.trading_calendar.day ).date() @@ -728,10 +729,10 @@ class DataPortal(object): end_dt, -bar_count ) except KeyError: - self._handle_history_out_of_bounds(bar_count) + self._handle_minute_history_out_of_bounds(bar_count) if minutes_for_window[0] < self._first_trading_minute: - self._handle_history_out_of_bounds(bar_count) + self._handle_minute_history_out_of_bounds(bar_count) asset_minute_data = self._get_minute_window_for_assets( assets, diff --git a/zipline/utils/calendars/trading_calendar.py b/zipline/utils/calendars/trading_calendar.py index 0ec1bbe9..a8e35a07 100644 --- a/zipline/utils/calendars/trading_calendar.py +++ b/zipline/utils/calendars/trading_calendar.py @@ -460,29 +460,20 @@ class TradingCalendar(with_metaclass(ABCMeta)): pd.DateTimeIndex All the minutes for the given session. """ - data = self.schedule.loc[session_label] - return self.all_minutes[ - self.all_minutes.slice_indexer( - data.market_open, - data.market_close - ) - ] + return self.minutes_in_range(*self.schedule.loc[session_label]) def minutes_window(self, start_dt, count): - try: - start_idx = self.all_minutes.get_loc(start_dt) - except KeyError: - # if this is not a market minute, go to the previous session's - # close - previous_session = self.minute_to_session_label( - start_dt, direction="previous" - ) + start_dt_nanos = start_dt.value + all_minutes_nanos = self._trading_minutes_nanos + start_idx = all_minutes_nanos.searchsorted(start_dt_nanos) - previous_close = self.open_and_close_for_session( - previous_session - )[1] + # searchsorted finds the index of the minute **on or after** start_dt. + # If the latter, push back to the prior minute. + if all_minutes_nanos[start_idx] != start_dt_nanos: + start_idx -= 1 - start_idx = self.all_minutes.get_loc(previous_close) + if start_idx < 0 or start_idx >= len(all_minutes_nanos): + raise KeyError("Can't start minute window at {}".format(start_dt)) end_idx = start_idx + count diff --git a/zipline/utils/calendars/us_futures_calendar.py b/zipline/utils/calendars/us_futures_calendar.py index b6f91313..519ffd8c 100644 --- a/zipline/utils/calendars/us_futures_calendar.py +++ b/zipline/utils/calendars/us_futures_calendar.py @@ -1,10 +1,13 @@ from datetime import time +from pandas import Timestamp from pandas.tseries.holiday import GoodFriday from pytz import timezone from zipline.utils.calendars import TradingCalendar -from zipline.utils.calendars.trading_calendar import HolidayCalendar +from zipline.utils.calendars.trading_calendar import ( + HolidayCalendar, end_default +) from zipline.utils.calendars.us_holidays import ( USNewYearsDay, Christmas @@ -31,6 +34,15 @@ class QuantopianUSFuturesCalendar(TradingCalendar): In order to align the hours of each session, we ignore the Sunday CME Pre-Open hour (5-6pm). """ + # XXX: Override the default TradingCalendar start and end dates with ones + # further in the future. This is a stopgap for memory issues caused by + # upgrading to pandas 18. This calendar is the most severely affected, + # since it has the most total minutes of any of the zipline calendars. + def __init__(self, + start=Timestamp('2000-01-01', tz='UTC'), + end=end_default): + super(QuantopianUSFuturesCalendar, self).__init__(start=start, end=end) + @property def name(self): return "us_futures"