From 5547cca779b73557dcbda80e1baacb7508be6cf7 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Wed, 26 Oct 2016 23:27:33 -0400 Subject: [PATCH] PERF: Pull out loop-invariant code. This shaves off 20 out of 160 seconds for an algorithm that makes a large number of large universe, short window_length `history()` calls. --- zipline/data/data_portal.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/zipline/data/data_portal.py b/zipline/data/data_portal.py index 5b9fc3b5..abc2cb2f 100644 --- a/zipline/data/data_portal.py +++ b/zipline/data/data_portal.py @@ -887,11 +887,15 @@ class DataPortal(object): ) df.fillna(method='ffill', inplace=True) + # forward-filling will incorrectly produce values after the end of + # an asset's lifetime, so write NaNs back over the asset's + # end_date. + normed_index = df.index.normalize() for asset in df.columns: - if df.index[-1] >= asset.end_date: + if history_end >= asset.end_date: # if the window extends past the asset's end date, set # all post-end-date values to NaN in that asset's series - df.loc[df.index.normalize() > asset.end_date, asset] = nan + df.loc[normed_index > asset.end_date, asset] = nan return df def _get_minute_window_for_assets(self, assets, field, minutes_for_window):