From be0ecb61be954efa110cc1b026bbc11922078703 Mon Sep 17 00:00:00 2001 From: Andrew Daniels Date: Thu, 22 Sep 2016 19:05:43 -0400 Subject: [PATCH] PERF: Replace get_loc calls in calc_dividend_ratios with get_indexer (#1510) We can make a single vectorized call outside of the loop, instead of repeatedly calling get_loc inside it. --- zipline/data/us_equity_pricing.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/zipline/data/us_equity_pricing.py b/zipline/data/us_equity_pricing.py index 05f0dc00..35949e21 100644 --- a/zipline/data/us_equity_pricing.py +++ b/zipline/data/us_equity_pricing.py @@ -959,7 +959,7 @@ class SQLiteAdjustmentWriter(object): - effective_date, the date in seconds on which to apply the ratio. - ratio, the ratio to apply to backwards looking pricing data. """ - if dividends is None: + if dividends is None or dividends.empty: return DataFrame(np.array( [], dtype=[ @@ -978,12 +978,26 @@ class SQLiteAdjustmentWriter(object): equity_daily_bar_reader = self._equity_daily_bar_reader effective_dates = full(len(amounts), -1, dtype=int64) + calendar = self._calendar + + # Calculate locs against a tz-naive cal, as the ex_dates are tz- + # naive. + # + # TODO: A better approach here would be to localize ex_date to + # the tz of the calendar, but currently get_indexer does not + # preserve tz of the target when method='bfill', which throws + # off the comparison. + tz_naive_calendar = calendar.tz_localize(None) + day_locs = tz_naive_calendar.get_indexer(ex_dates, method='bfill') + for i, amount in enumerate(amounts): sid = sids[i] ex_date = ex_dates[i] - day_loc = calendar.get_loc(ex_date, method='bfill') + day_loc = day_locs[i] + prev_close_date = calendar[day_loc - 1] + try: prev_close = equity_daily_bar_reader.get_value( sid, prev_close_date, 'close')