From e78f4f072938a77fc4affe4a1679ca14d1cfc661 Mon Sep 17 00:00:00 2001 From: Eddie Hebert Date: Wed, 13 Apr 2016 23:20:41 -0400 Subject: [PATCH] PERF: Remove memoization from minute bar position. Instead of using the `remember_last` memoization on all calls to `_find_position_of_minute`, add an instance local cache which is only used by the `get_value` call. The `get_value` call is very hot, so any extra overhead (e.g. creating the WeakArgs on every invocation) becomes costly. The current usage `get_value` also has the property that it is called with monotonically increasing, but with a high repeat count on each value. (A further improvement could making a `get_value` which supports being used by many sids, for use by the update portfolio positions.) The caching is not done at the `_find_position_of_minute_level` because `unadjusted_window` always uses two positions on the tape (start and end of range) which would cause the entries and removal into the cache which would be invalidated both between the calls of start and end, and next call of the function. --- zipline/data/minute_bars.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/zipline/data/minute_bars.py b/zipline/data/minute_bars.py index 2a0a8698..3916bd83 100644 --- a/zipline/data/minute_bars.py +++ b/zipline/data/minute_bars.py @@ -30,7 +30,7 @@ from zipline.data._minute_bar_internal import ( find_last_traded_position_internal ) -from zipline.utils.memoize import remember_last, lazyval +from zipline.utils.memoize import lazyval US_EQUITIES_MINUTES_PER_DAY = 390 @@ -618,6 +618,9 @@ class BcolzMinuteBarReader(object): 'volume': {}, } + self._last_get_value_dt_position = None + self._last_get_value_dt_value = None + def _get_metadata(self): return BcolzMinuteBarMetadata.read(self._rootdir) @@ -747,7 +750,13 @@ class BcolzMinuteBarReader(object): Returns the integer value of the volume. (A volume of 0 signifies no trades for the given dt.) """ - minute_pos = self._find_position_of_minute(dt) + if self._last_get_value_dt_value == dt.value: + minute_pos = self._last_get_value_dt_position + else: + minute_pos = self._find_position_of_minute(dt) + self._last_get_value_dt_value = dt.value + self._last_get_value_dt_position = minute_pos + value = self._open_minute_file(field, sid)[minute_pos] if value == 0: if field == 'volume': @@ -790,7 +799,6 @@ class BcolzMinuteBarReader(object): return pd.Timestamp(minute_epoch, tz='UTC', unit="m") - @remember_last def _find_position_of_minute(self, minute_dt): """ Internal method that returns the position of the given minute in the