PERF: Remove or defer calls to get_loc on large indices.

Mitigation for https://github.com/quantopian/zipline/issues/1503.
This commit is contained in:
Scott Sanderson
2016-09-21 06:18:31 -04:00
parent 74413695f3
commit e86fffc4ca
3 changed files with 34 additions and 30 deletions
+11 -10
View File
@@ -213,12 +213,6 @@ class DataPortal(object):
self.trading_calendar.all_sessions.get_loc(self._first_trading_day)
if self._first_trading_day is not None else None
)
self._first_trading_minute_loc = (
self.trading_calendar.all_minutes.get_loc(
self._first_trading_minute
)
if self._first_trading_minute is not None else None
)
def _ensure_reader_aligned(self, reader):
if reader is None:
@@ -703,10 +697,17 @@ class DataPortal(object):
return daily_data
def _handle_history_out_of_bounds(self, bar_count):
def _handle_minute_history_out_of_bounds(self, bar_count):
first_trading_minute_loc = (
self.trading_calendar.all_minutes.get_loc(
self._first_trading_minute
)
if self._first_trading_minute is not None else None
)
suggested_start_day = (
self.trading_calendar.all_minutes[
self._first_trading_minute_loc + bar_count
first_trading_minute_loc + bar_count
] + self.trading_calendar.day
).date()
@@ -728,10 +729,10 @@ class DataPortal(object):
end_dt, -bar_count
)
except KeyError:
self._handle_history_out_of_bounds(bar_count)
self._handle_minute_history_out_of_bounds(bar_count)
if minutes_for_window[0] < self._first_trading_minute:
self._handle_history_out_of_bounds(bar_count)
self._handle_minute_history_out_of_bounds(bar_count)
asset_minute_data = self._get_minute_window_for_assets(
assets,
+10 -19
View File
@@ -460,29 +460,20 @@ class TradingCalendar(with_metaclass(ABCMeta)):
pd.DateTimeIndex
All the minutes for the given session.
"""
data = self.schedule.loc[session_label]
return self.all_minutes[
self.all_minutes.slice_indexer(
data.market_open,
data.market_close
)
]
return self.minutes_in_range(*self.schedule.loc[session_label])
def minutes_window(self, start_dt, count):
try:
start_idx = self.all_minutes.get_loc(start_dt)
except KeyError:
# if this is not a market minute, go to the previous session's
# close
previous_session = self.minute_to_session_label(
start_dt, direction="previous"
)
start_dt_nanos = start_dt.value
all_minutes_nanos = self._trading_minutes_nanos
start_idx = all_minutes_nanos.searchsorted(start_dt_nanos)
previous_close = self.open_and_close_for_session(
previous_session
)[1]
# searchsorted finds the index of the minute **on or after** start_dt.
# If the latter, push back to the prior minute.
if all_minutes_nanos[start_idx] != start_dt_nanos:
start_idx -= 1
start_idx = self.all_minutes.get_loc(previous_close)
if start_idx < 0 or start_idx >= len(all_minutes_nanos):
raise KeyError("Can't start minute window at {}".format(start_dt))
end_idx = start_idx + count
+13 -1
View File
@@ -1,10 +1,13 @@
from datetime import time
from pandas import Timestamp
from pandas.tseries.holiday import GoodFriday
from pytz import timezone
from zipline.utils.calendars import TradingCalendar
from zipline.utils.calendars.trading_calendar import HolidayCalendar
from zipline.utils.calendars.trading_calendar import (
HolidayCalendar, end_default
)
from zipline.utils.calendars.us_holidays import (
USNewYearsDay,
Christmas
@@ -31,6 +34,15 @@ class QuantopianUSFuturesCalendar(TradingCalendar):
In order to align the hours of each session, we ignore the Sunday
CME Pre-Open hour (5-6pm).
"""
# XXX: Override the default TradingCalendar start and end dates with ones
# further in the future. This is a stopgap for memory issues caused by
# upgrading to pandas 18. This calendar is the most severely affected,
# since it has the most total minutes of any of the zipline calendars.
def __init__(self,
start=Timestamp('2000-01-01', tz='UTC'),
end=end_default):
super(QuantopianUSFuturesCalendar, self).__init__(start=start, end=end)
@property
def name(self):
return "us_futures"