From d52dc68b5ad0dbdcf9bceceb725a1eced1c587c6 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Thu, 2 Oct 2014 16:10:56 -0400 Subject: [PATCH] PERF: Use numpy functions for concatenation in history. --- zipline/history/history_container.py | 65 ++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 13 deletions(-) diff --git a/zipline/history/history_container.py b/zipline/history/history_container.py index 9e08d1ad..4eb6b7ee 100644 --- a/zipline/history/history_container.py +++ b/zipline/history/history_container.py @@ -134,10 +134,10 @@ class HistoryContainer(object): # The set of fields specified by all history specs self.fields = pd.Index( - set(spec.field for spec in itervalues(history_specs)) + sorted(set(spec.field for spec in itervalues(history_specs))) ) self.sids = pd.Index( - set(initial_sids) + sorted(set(initial_sids)) ) # This panel contains raw minutes for periods that haven't been fully @@ -209,14 +209,18 @@ class HistoryContainer(object): """ Add new sids to the container. """ - self.sids = self.sids + _ensure_index(to_add) + self.sids = pd.Index( + sorted(self.sids + _ensure_index(to_add)), + ) self._realign() def drop_sids(self, to_drop): """ Remove sids from the container. """ - self.sids = self.sids - _ensure_index(to_drop) + self.sids = pd.Index( + sorted(self.sids - _ensure_index(to_drop)), + ) self._realign() def _realign(self): @@ -433,15 +437,15 @@ class HistoryContainer(object): ) if field in ['price', 'close_price']: - return frame.ffill().iloc[-1] + return frame.ffill().iloc[-1].values elif field == 'open_price': - return frame.bfill().iloc[0] + return frame.bfill().iloc[0].values elif field == 'volume': - return frame.sum() + return frame.sum().values elif field == 'high': - return frame.max() + return frame.max().values elif field == 'low': - return frame.min() + return frame.min().values else: raise ValueError("Unknown field {}".format(field)) @@ -456,6 +460,7 @@ class HistoryContainer(object): for field in fields ], index=fields, + columns=ohlcv_panel.minor_axis, ) def create_new_digest_frame(self, buffer_minutes): @@ -518,8 +523,42 @@ class HistoryContainer(object): self.last_known_prior_values, ) - last_period = pd.DataFrame( - [self.frame_to_series(field, buffer_frame)], - index=[algo_dt], + last_period = self.frame_to_series(field, buffer_frame) + return fast_build_history_output(digest_frame, last_period, algo_dt) + + +def fast_build_history_output(buffer_frame, last_period, algo_dt): + """ + Optimized concatenation of DataFrame and Series for use in + HistoryContainer.get_history. + + Relies on the fact that the input arrays have compatible shapes. + """ + return pd.DataFrame( + data=np.vstack( + [ + buffer_frame.values, + last_period, + ] + ), + index=fast_append_date_to_index( + buffer_frame.index, + pd.Timestamp(algo_dt) + ), + columns=buffer_frame.columns, + ) + + +def fast_append_date_to_index(index, timestamp): + """ + Append a timestamp to a DatetimeIndex. DatetimeIndex.append throws an + error on pandas 0.12.0 + """ + return pd.DatetimeIndex( + np.hstack( + [ + index.values, + [timestamp], + ] ) - return pd.concat([digest_frame, last_period]) + )