From ca54721058afbfce8cf25713446f6c88624b7bf0 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Wed, 20 Jul 2016 20:03:35 -0400 Subject: [PATCH] MAINT: Pandas compat for rolling_*. --- tests/pipeline/test_engine.py | 36 ++++++++++----- zipline/utils/pandas_utils.py | 82 +++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 11 deletions(-) diff --git a/tests/pipeline/test_engine.py b/tests/pipeline/test_engine.py index e656cbae..335cda44 100644 --- a/tests/pipeline/test_engine.py +++ b/tests/pipeline/test_engine.py @@ -25,12 +25,8 @@ from pandas import ( Categorical, DataFrame, date_range, - ewma, - ewmstd, Int64Index, MultiIndex, - rolling_apply, - rolling_mean, Series, Timestamp, ) @@ -84,6 +80,12 @@ from zipline.testing.fixtures import ( ) from zipline.utils.memoize import lazyval from zipline.utils.numpy_utils import bool_dtype, datetime64ns_dtype +from zipline.utils.pandas_utils import ( + ewma, + ewmstd, + rolling_apply, + rolling_mean, +) class RollingSumDifference(CustomFactor): @@ -1009,14 +1011,16 @@ class SyntheticBcolzTestCase(WithAdjustmentReader, # computed results to be computed using values anchored on the # **previous** day's data. expected_raw = rolling_mean( - expected_bar_values_2d( - dates - self.trading_calendar.day, - self.equity_info, - 'close', + DataFrame( + expected_bar_values_2d( + dates - self.trading_calendar.day, + self.equity_info, + 'close', + ), ), window_length, min_periods=1, - ) + ).values expected = DataFrame( # Truncate off the extra rows needed to compute the SMAs. @@ -1122,19 +1126,29 @@ class ParameterizedFactorTestCase(WithTradingEnvironment, ZiplineTestCase): def expected_ewma(self, window_length, decay_rate): alpha = 1 - decay_rate span = (2 / alpha) - 1 + + # XXX: This is a comically inefficient way to compute a windowed EWMA. + # Don't use it outside of testing. We're using rolling-apply of an + # ewma (which is itself a rolling-window function) because we only want + # to look at ``window_length`` rows at a time. return rolling_apply( self.raw_data, window_length, - lambda window: ewma(window, span=span)[-1], + lambda window: ewma(DataFrame(window), span=span).values[-1], )[window_length:] def expected_ewmstd(self, window_length, decay_rate): alpha = 1 - decay_rate span = (2 / alpha) - 1 + + # XXX: This is a comically inefficient way to compute a windowed EWMSTD. + # Don't use it outside of testing. We're using rolling-apply of an + # ewma (which is itself a rolling-window function) because we only want + # to look at ``window_length`` rows at a time. return rolling_apply( self.raw_data, window_length, - lambda window: ewmstd(window, span=span)[-1], + lambda window: ewmstd(DataFrame(window), span=span).values[-1], )[window_length:] @parameterized.expand([ diff --git a/zipline/utils/pandas_utils.py b/zipline/utils/pandas_utils.py index 3e58230a..a38b437a 100644 --- a/zipline/utils/pandas_utils.py +++ b/zipline/utils/pandas_utils.py @@ -160,3 +160,85 @@ def timedelta_to_integral_minutes(delta): Convert a pd.Timedelta to a number of minutes as an int. """ return timedelta_to_integral_seconds(delta) // 60 + + +# Remove when we drop support for 0.17 +if pandas_version >= StrictVersion('0.18'): + def rolling_mean(arg, + window, + min_periods=None, + freq=None, + center=False, + **kwargs): + return arg.rolling( + window, + min_periods=min_periods, + freq=freq, + center=center, + **kwargs + ).mean() + + def rolling_apply(arg, + window, + func, + min_periods=None, + freq=None, + center=False, + **kwargs): + return arg.rolling( + window, + min_periods=min_periods, + freq=freq, + center=center, + **kwargs + ).apply(func) + + def ewma(arg, + com=None, + span=None, + halflife=None, + alpha=None, + min_periods=0, + freq=None, + adjust=True, + how=None, + ignore_na=False): + + return arg.ewm( + com=com, + span=span, + halflife=halflife, + alpha=alpha, + min_periods=min_periods, + freq=freq, + adjust=adjust, + ignore_na=ignore_na, + ).mean() + + def ewmstd(arg, + com=None, + span=None, + halflife=None, + alpha=None, + min_periods=0, + freq=None, + adjust=True, + how=None, + ignore_na=False): + + return arg.ewm( + com=com, + span=span, + halflife=halflife, + alpha=alpha, + min_periods=min_periods, + freq=freq, + adjust=adjust, + ignore_na=ignore_na, + ).std() + +else: + rolling_mean = pd.rolling_mean + rolling_apply = pd.rolling_apply + ewma = pd.ewma + ewmstd = pd.ewmstd