From 9d68ab6ba732a0deb858dbf3ff2cd029f1fbc348 Mon Sep 17 00:00:00 2001 From: Ana Ruelas Date: Wed, 9 Nov 2016 17:04:45 -0500 Subject: [PATCH 01/10] ENH: Add MACD, MA, and AnnVol as built in factors --- zipline/pipeline/factors/technical.py | 79 +++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/zipline/pipeline/factors/technical.py b/zipline/pipeline/factors/technical.py index 9b5c480a..77bda03e 100644 --- a/zipline/pipeline/factors/technical.py +++ b/zipline/pipeline/factors/technical.py @@ -21,6 +21,7 @@ from numpy import ( NINF, sqrt, sum as np_sum, + nan ) from numexpr import evaluate @@ -39,6 +40,8 @@ from zipline.utils.math_utils import ( ) from .factor import CustomFactor +from talib import MACD + class Returns(CustomFactor): """ @@ -683,3 +686,79 @@ class TrueRange(CustomFactor): )), 2 ) + + + +class MovingAverageConvergenceDivergence(CustomFactor): + """ + Moving Average Convergence/Divergence (MACD) + https://en.wikipedia.org/wiki/MACD + + A technical indicator originally developed by Gerald Appel in the late + 1970's. MACD shows the relationship between two moving averages and + reveals changes in the strength, direction, momentum, and duration of a + trend in a stock's price. + + **Default Inputs:** :data:`zipline.pipeline.data.USEquityPricing.close` + **Default Window Length:** None + + Parameters + ---------- + fast_period : int >= 0, <= window_length + The window length for the "fast" EMA. + slow_period : int >= 0, <= window_length + The window length for the "slow" EMA. + signal_period' : int >= 0, <= slow_period + The window length for the signal line. + + Returns + ------- + MACD: The difference between "fast" EMA and "slow" EMA. + signal: The signal_period length period EMA of the MACD line. + hist: Difference between MACD and signal. (Divergence series) + """ + inputs = [USEquityPricing.close] + + params = {'fast_period': 12, + 'slow_period': 26, + 'signal_period': 9} + + outputs = ('MACD', 'signal', 'hist') + + def calculate_macd(self, col, fast, slow, signal): + try: + macd, sig, hist = MACD(col, + fastperiod=fast, + slowperiod=slow, + signalperiod=signal) + return macd[-1], sig[-1], hist[-1] + except: + return nan, nan, nan + + def compute(self, today, assets, out, close, fast_period, slow_period, + signal_period): + n = len(close) + macd, sig, hist = zip(*map(self.calculate_macd, + close.T, + [fast_period]*n, + [slow_period]*n, + [signal_period]*n)) + out.MACD[:] = macd + out.signal[:] = sig + out.hist[:] = hist + + +class AnnualVolatility(CustomFactor): + """ + Volatility + https://en.wikipedia.org/wiki/Volatility_(finance) + + The degree of variation of a series over time as measured by the standard + deviation of the data over the course of a year. + + **Default Inputs:** :data:`zipline.pipeline.data.USEquityPricing.close` + """ + inputs = [USEquityPricing.close] + + def compute(self, today, assets, out, closes): + out[:] = nanstd(closes, ddof=1, axis=0) * (252 ** 0.5) From 7f762d02bf843a2b871750a07c5398c644d017f1 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Wed, 9 Nov 2016 18:35:30 -0500 Subject: [PATCH 02/10] MAINT: Use fancy default for window_length. --- zipline/pipeline/factors/technical.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/zipline/pipeline/factors/technical.py b/zipline/pipeline/factors/technical.py index 77bda03e..bdbc49ae 100644 --- a/zipline/pipeline/factors/technical.py +++ b/zipline/pipeline/factors/technical.py @@ -718,13 +718,23 @@ class MovingAverageConvergenceDivergence(CustomFactor): hist: Difference between MACD and signal. (Divergence series) """ inputs = [USEquityPricing.close] - - params = {'fast_period': 12, - 'slow_period': 26, - 'signal_period': 9} - + params = ('fast_period', 'slow_period', 'signal_period') outputs = ('MACD', 'signal', 'hist') + def __new__(cls, + fast_period=12, + slow_period=26, + signal_period=9, + *args, + **kwargs): + return super(MovingAverageConvergenceDivergence, cls).__new__( + fast_period=fast_period, + slow_period=slow_period, + signal_period=signal_period, + window_length=slow_period + signal_period, + *args, **kwargs + ) + def calculate_macd(self, col, fast, slow, signal): try: macd, sig, hist = MACD(col, From 10f5cc2cbb7e3d7085e7fc57ac11fe0d3cdc09b8 Mon Sep 17 00:00:00 2001 From: Ana Ruelas Date: Mon, 14 Nov 2016 16:35:56 -0500 Subject: [PATCH 03/10] ENH: Actually use rolling windows for EWMA in MACD --- tests/pipeline/test_technical.py | 77 +++++++++++++++++++++ zipline/pipeline/factors/__init__.py | 6 ++ zipline/pipeline/factors/technical.py | 97 ++++++++++++++++----------- 3 files changed, 141 insertions(+), 39 deletions(-) diff --git a/tests/pipeline/test_technical.py b/tests/pipeline/test_technical.py index 7ef0e032..e49ebcf8 100644 --- a/tests/pipeline/test_technical.py +++ b/tests/pipeline/test_technical.py @@ -5,6 +5,7 @@ from six.moves import range import numpy as np import pandas as pd import talib +from numpy.random import random_integers from zipline.lib.adjusted_array import AdjustedArray from zipline.pipeline.data import USEquityPricing @@ -16,6 +17,7 @@ from zipline.pipeline.factors import ( LinearWeightedMovingAverage, RateOfChangePercentage, TrueRange, + MovingAverageConvergenceDivergence ) from zipline.testing import parameter_space from zipline.testing.fixtures import ZiplineTestCase @@ -403,3 +405,78 @@ class TestTrueRange(ZiplineTestCase): tr.compute(today, assets, out, highs, lows, closes) assert_equal(out, np.full((3,), 2.)) + + +class MovingAverageConvergenceDivergenceCase(ZiplineTestCase): + def test_MACD_window_length_generation(self): + signal_period = random_integers(1, 90) + fast_period = random_integers(signal_period+1, signal_period+100) + slow_period = random_integers(fast_period+1, fast_period+100) + ewma = MovingAverageConvergenceDivergence( + fast_period=fast_period, + slow_period=slow_period, + signal_period=signal_period, + ) + assert_equal( + ewma.window_length, + slow_period+signal_period-1, + ) + + def test_moving_average_convergence_divergence(self): + fast_period = 3 + slow_period = 8 + signal_period = 2 + + macd = MovingAverageConvergenceDivergence( + fast_period=fast_period, + slow_period=slow_period, + signal_period=signal_period, + ) + + today = pd.Timestamp('2016', tz='utc') + nassets = macd.window_length + assets = pd.Index(np.arange(nassets)) + days_col = np.arange(start=-.05, + stop=.01*nassets-.05, + step=.01)[:, np.newaxis] + close = np.logspace(start=.01, stop=.10, num=nassets) - 1 + days_col + + dtype = [ + ('macd', 'f8'), + ('signal', 'f8'), + ('hist', 'f8'), + ] + out = np.recarray( + shape=(nassets,), + dtype=dtype, + buf=np.empty(shape=(nassets,), dtype=dtype), + ) + macd.compute( + today, + assets, + out, + close, + fast_period, + slow_period, + signal_period, + ) + + expected_macd = np.array([0.01691553] * nassets) + expected_signal = np.array([0.01691553] * nassets) + expected_hist = np.array([0] * nassets) + + np.testing.assert_almost_equal( + out.macd, + expected_macd, + decimal=8 + ) + np.testing.assert_almost_equal( + out.signal, + expected_signal, + decimal=8 + ) + np.testing.assert_almost_equal( + out.hist, + expected_hist, + decimal=8 + ) diff --git a/zipline/pipeline/factors/__init__.py b/zipline/pipeline/factors/__init__.py index 0155cab6..73e8f5fe 100644 --- a/zipline/pipeline/factors/__init__.py +++ b/zipline/pipeline/factors/__init__.py @@ -32,6 +32,9 @@ from .technical import ( TrueRange, VWAP, WeightedAverageValue, + MovingAverageConvergenceDivergence, + MACD, + AnnualizedVolatility, ) __all__ = [ @@ -62,4 +65,7 @@ __all__ = [ 'TrueRange', 'VWAP', 'WeightedAverageValue', + 'MovingAverageConvergenceDivergence', + 'MACD', + 'AnnualizedVolatility', ] diff --git a/zipline/pipeline/factors/technical.py b/zipline/pipeline/factors/technical.py index bdbc49ae..4cf2f328 100644 --- a/zipline/pipeline/factors/technical.py +++ b/zipline/pipeline/factors/technical.py @@ -21,7 +21,6 @@ from numpy import ( NINF, sqrt, sum as np_sum, - nan ) from numexpr import evaluate @@ -38,10 +37,9 @@ from zipline.utils.math_utils import ( nansum, nanmin, ) +from zipline.utils.numpy_utils import rolling_window from .factor import CustomFactor -from talib import MACD - class Returns(CustomFactor): """ @@ -448,11 +446,6 @@ class ExponentialWeightedMovingStdDev(_ExponentialWeightedFactor): out[:] = sqrt(variance * bias_correction) -# Convenience aliases. -EWMA = ExponentialWeightedMovingAverage -EWMSTD = ExponentialWeightedMovingStdDev - - class BollingerBands(CustomFactor): """ Bollinger Bands technical indicator. @@ -688,8 +681,7 @@ class TrueRange(CustomFactor): ) - -class MovingAverageConvergenceDivergence(CustomFactor): +class MovingAverageConvergenceDivergence(_ExponentialWeightedFactor): """ Moving Average Convergence/Divergence (MACD) https://en.wikipedia.org/wiki/MACD @@ -700,21 +692,22 @@ class MovingAverageConvergenceDivergence(CustomFactor): trend in a stock's price. **Default Inputs:** :data:`zipline.pipeline.data.USEquityPricing.close` - **Default Window Length:** None + **Default Window Length:** Window length is automatically calculated as the + sum of slow_period and signal_period. Parameters ---------- fast_period : int >= 0, <= window_length - The window length for the "fast" EMA. + The window length for the "fast" EWMA. Default is 12. slow_period : int >= 0, <= window_length - The window length for the "slow" EMA. + The window length for the "slow" EWMA. Default is 26. signal_period' : int >= 0, <= slow_period - The window length for the signal line. + The window length for the signal line. Default is 9. Returns ------- - MACD: The difference between "fast" EMA and "slow" EMA. - signal: The signal_period length period EMA of the MACD line. + MACD: The difference between "fast" EWMA and "slow" EWMA. + signal: The EWMA of the MACD line using `signal_period` as span. hist: Difference between MACD and signal. (Divergence series) """ inputs = [USEquityPricing.close] @@ -728,47 +721,73 @@ class MovingAverageConvergenceDivergence(CustomFactor): *args, **kwargs): return super(MovingAverageConvergenceDivergence, cls).__new__( + cls, fast_period=fast_period, slow_period=slow_period, signal_period=signal_period, - window_length=slow_period + signal_period, + window_length=slow_period + signal_period - 1, *args, **kwargs ) - def calculate_macd(self, col, fast, slow, signal): - try: - macd, sig, hist = MACD(col, - fastperiod=fast, - slowperiod=slow, - signalperiod=signal) - return macd[-1], sig[-1], hist[-1] - except: - return nan, nan, nan + def calculate_ewma(self, data, length): + decay_rate = 1.0 - (2.0 / (1.0 + length)) + return average(data, + axis=1, + weights=self.weights(length, decay_rate)) + + def calculate_macd(self, col): + slow_EWMA = self.calculate_ewma( + rolling_window( + col, + self.params['slow_period'] + ), + self.params['slow_period']) + fast_EWMA = self.calculate_ewma( + rolling_window( + col, + self.params['fast_period'] + )[-self.params['signal_period']:], + self.params['fast_period']) + macd = fast_EWMA - slow_EWMA + signal_line = self.calculate_ewma( + macd.reshape(-1, self.params['signal_period']), + self.params['signal_period']) + hist = macd[-1] - signal_line + return macd[-1], signal_line[-1], hist[-1] def compute(self, today, assets, out, close, fast_period, slow_period, signal_period): - n = len(close) - macd, sig, hist = zip(*map(self.calculate_macd, - close.T, - [fast_period]*n, - [slow_period]*n, - [signal_period]*n)) - out.MACD[:] = macd + macd, sig, hist = zip(*map(self.calculate_macd, close.T)) + out.macd[:] = macd out.signal[:] = sig out.hist[:] = hist -class AnnualVolatility(CustomFactor): +class AnnualizedVolatility(CustomFactor): """ Volatility https://en.wikipedia.org/wiki/Volatility_(finance) The degree of variation of a series over time as measured by the standard - deviation of the data over the course of a year. + deviation of returns. - **Default Inputs:** :data:`zipline.pipeline.data.USEquityPricing.close` + **Default Inputs:** + :data:`zipline.pipeline.factors.Returns(window_length=2)` + + Parameters + ---------- + annualization_factor : + The number of time units per year. Defaults to average number of NYSE + trading days per year, 252. """ - inputs = [USEquityPricing.close] + inputs = [Returns(window_length=2)] + params = {'annualization_factor': 252} + window_length = 252 - def compute(self, today, assets, out, closes): - out[:] = nanstd(closes, ddof=1, axis=0) * (252 ** 0.5) + def compute(self, today, assets, out, returns, annualization_factor): + out[:] = nanstd(returns, ddof=0, axis=0) * (annualization_factor ** .5) + +# Convenience aliases. +EWMA = ExponentialWeightedMovingAverage +EWMSTD = ExponentialWeightedMovingStdDev +MACD = MovingAverageConvergenceDivergence From 435d5acd1405a94c452cdb6ad99c32f6f24b6988 Mon Sep 17 00:00:00 2001 From: Ana Ruelas Date: Mon, 21 Nov 2016 14:17:26 -0500 Subject: [PATCH 04/10] TST: Add test for annualized volatility factor --- tests/pipeline/test_technical.py | 57 ++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/tests/pipeline/test_technical.py b/tests/pipeline/test_technical.py index e49ebcf8..896e2845 100644 --- a/tests/pipeline/test_technical.py +++ b/tests/pipeline/test_technical.py @@ -17,7 +17,8 @@ from zipline.pipeline.factors import ( LinearWeightedMovingAverage, RateOfChangePercentage, TrueRange, - MovingAverageConvergenceDivergence + MovingAverageConvergenceDivergence, + AnnualizedVolatility, ) from zipline.testing import parameter_space from zipline.testing.fixtures import ZiplineTestCase @@ -407,7 +408,7 @@ class TestTrueRange(ZiplineTestCase): assert_equal(out, np.full((3,), 2.)) -class MovingAverageConvergenceDivergenceCase(ZiplineTestCase): +class MovingAverageConvergenceDivergenceTestCase(ZiplineTestCase): def test_MACD_window_length_generation(self): signal_period = random_integers(1, 90) fast_period = random_integers(signal_period+1, signal_period+100) @@ -480,3 +481,55 @@ class MovingAverageConvergenceDivergenceCase(ZiplineTestCase): expected_hist, decimal=8 ) + + +class AnnualizedVolatilityTestCase(ZiplineTestCase): + """ + Test Annualized Volatility + """ + def test_simple_volatility(self): + """ + Simple test for uniform returns should generate 0 volatility + """ + nassets = 3 + ann_vol = AnnualizedVolatility() + today = pd.Timestamp('2016', tz='utc') + assets = np.arange(nassets, dtype=np.float) + returns = np.full((ann_vol.window_length, nassets), + 0.004, + dtype=np.float64) + out = np.empty(shape=(nassets,), dtype=np.float64) + + ann_vol.compute(today, assets, out, returns, 252) + + expected_vol = np.array([0] * nassets) + np.testing.assert_almost_equal( + out, + expected_vol, + decimal=8 + ) + + def test_volatility(self): + """ + Check volatility results against values calculated manually + """ + nassets = 3 + ann_vol = AnnualizedVolatility() + today = pd.Timestamp('2016', tz='utc') + assets = np.arange(nassets, dtype=np.float) + returns = np.random.normal(loc=0.001, + scale=0.01, + size=(ann_vol.window_length, nassets)) + out = np.empty(shape=(nassets,), dtype=np.float64) + ann_vol.compute(today, assets, out, returns, 252) + + mean = returns.sum(axis=0) / returns.shape[0] + annualized_variance = ((returns - mean) ** 2).sum(axis=0) / \ + returns.shape[0] * 252 + expected_vol = np.sqrt(annualized_variance) + + np.testing.assert_almost_equal( + out, + expected_vol, + decimal=8 + ) From 3363237123f885a50ddbd3570d197aa4ecfc0ad1 Mon Sep 17 00:00:00 2001 From: Ana Ruelas Date: Wed, 23 Nov 2016 11:43:29 -0500 Subject: [PATCH 05/10] ENH: Add MACDSignal, test with random input --- tests/pipeline/test_technical.py | 68 +++++++++++---------- zipline/pipeline/factors/__init__.py | 12 ++-- zipline/pipeline/factors/technical.py | 85 ++++++++++++--------------- zipline/utils/math_utils.py | 10 +++- 4 files changed, 84 insertions(+), 91 deletions(-) diff --git a/tests/pipeline/test_technical.py b/tests/pipeline/test_technical.py index 896e2845..4f62704b 100644 --- a/tests/pipeline/test_technical.py +++ b/tests/pipeline/test_technical.py @@ -17,13 +17,12 @@ from zipline.pipeline.factors import ( LinearWeightedMovingAverage, RateOfChangePercentage, TrueRange, - MovingAverageConvergenceDivergence, + MovingAverageConvergenceDivergenceSignal, AnnualizedVolatility, ) from zipline.testing import parameter_space from zipline.testing.fixtures import ZiplineTestCase from zipline.testing.predicates import assert_equal - from .base import BasePipelineTestCase @@ -409,11 +408,24 @@ class TestTrueRange(ZiplineTestCase): class MovingAverageConvergenceDivergenceTestCase(ZiplineTestCase): + + def expected_ewma(self, data_df, window): + # Comment copied from `test_engine.py`: + # XXX: This is a comically inefficient way to compute a windowed EWMA. + # Don't use it outside of testing. We're using rolling-apply of an + # ewma (which is itself a rolling-window function) because we only want + # to look at ``window_length`` rows at a time. + return data_df.rolling(window).apply( + lambda sub: pd.DataFrame(sub) + .ewm(span=window) + .mean() + .values[-1]) + def test_MACD_window_length_generation(self): signal_period = random_integers(1, 90) fast_period = random_integers(signal_period+1, signal_period+100) slow_period = random_integers(fast_period+1, fast_period+100) - ewma = MovingAverageConvergenceDivergence( + ewma = MovingAverageConvergenceDivergenceSignal( fast_period=fast_period, slow_period=slow_period, signal_period=signal_period, @@ -424,34 +436,22 @@ class MovingAverageConvergenceDivergenceTestCase(ZiplineTestCase): ) def test_moving_average_convergence_divergence(self): + nassets = 3 fast_period = 3 slow_period = 8 signal_period = 2 - macd = MovingAverageConvergenceDivergence( + macd = MovingAverageConvergenceDivergenceSignal( fast_period=fast_period, slow_period=slow_period, signal_period=signal_period, ) today = pd.Timestamp('2016', tz='utc') - nassets = macd.window_length assets = pd.Index(np.arange(nassets)) - days_col = np.arange(start=-.05, - stop=.01*nassets-.05, - step=.01)[:, np.newaxis] - close = np.logspace(start=.01, stop=.10, num=nassets) - 1 + days_col + out = np.empty(shape=(nassets,), dtype=np.float64) + close = np.random.rand(macd.window_length, nassets) - dtype = [ - ('macd', 'f8'), - ('signal', 'f8'), - ('hist', 'f8'), - ] - out = np.recarray( - shape=(nassets,), - dtype=dtype, - buf=np.empty(shape=(nassets,), dtype=dtype), - ) macd.compute( today, assets, @@ -462,25 +462,23 @@ class MovingAverageConvergenceDivergenceTestCase(ZiplineTestCase): signal_period, ) - expected_macd = np.array([0.01691553] * nassets) - expected_signal = np.array([0.01691553] * nassets) - expected_hist = np.array([0] * nassets) + close_df = pd.DataFrame(close) + fast_ewma = self.expected_ewma( + close_df, + fast_period) + slow_ewma = self.expected_ewma( + close_df, + slow_period) + expected_signal = self.expected_ewma( + fast_ewma-slow_ewma, + signal_period + ).values[-1] np.testing.assert_almost_equal( - out.macd, - expected_macd, - decimal=8 - ) - np.testing.assert_almost_equal( - out.signal, + out, expected_signal, decimal=8 ) - np.testing.assert_almost_equal( - out.hist, - expected_hist, - decimal=8 - ) class AnnualizedVolatilityTestCase(ZiplineTestCase): @@ -502,7 +500,7 @@ class AnnualizedVolatilityTestCase(ZiplineTestCase): ann_vol.compute(today, assets, out, returns, 252) - expected_vol = np.array([0] * nassets) + expected_vol = np.zeros(nassets) np.testing.assert_almost_equal( out, expected_vol, @@ -523,7 +521,7 @@ class AnnualizedVolatilityTestCase(ZiplineTestCase): out = np.empty(shape=(nassets,), dtype=np.float64) ann_vol.compute(today, assets, out, returns, 252) - mean = returns.sum(axis=0) / returns.shape[0] + mean = np.mean(returns, axis=0) annualized_variance = ((returns - mean) ** 2).sum(axis=0) / \ returns.shape[0] * 252 expected_vol = np.sqrt(annualized_variance) diff --git a/zipline/pipeline/factors/__init__.py b/zipline/pipeline/factors/__init__.py index 73e8f5fe..0cd539c8 100644 --- a/zipline/pipeline/factors/__init__.py +++ b/zipline/pipeline/factors/__init__.py @@ -14,6 +14,7 @@ from .statistical import ( RollingSpearmanOfReturns, ) from .technical import ( + AnnualizedVolatility, Aroon, AverageDollarVolume, BollingerBands, @@ -24,7 +25,9 @@ from .technical import ( FastStochasticOscillator, IchimokuKinkoHyo, LinearWeightedMovingAverage, + MACDSignal, MaxDrawdown, + MovingAverageConvergenceDivergenceSignal, RateOfChangePercentage, Returns, RSI, @@ -32,12 +35,10 @@ from .technical import ( TrueRange, VWAP, WeightedAverageValue, - MovingAverageConvergenceDivergence, - MACD, - AnnualizedVolatility, ) __all__ = [ + 'AnnualizedVolatility', 'Aroon', 'AverageDollarVolume', 'BollingerBands', @@ -53,7 +54,9 @@ __all__ = [ 'IchimokuKinkoHyo', 'Latest', 'LinearWeightedMovingAverage', + 'MACDSignal', 'MaxDrawdown', + 'MovingAverageConvergenceDivergenceSignal', 'RateOfChangePercentage', 'RecarrayField', 'Returns', @@ -65,7 +68,4 @@ __all__ = [ 'TrueRange', 'VWAP', 'WeightedAverageValue', - 'MovingAverageConvergenceDivergence', - 'MACD', - 'AnnualizedVolatility', ] diff --git a/zipline/pipeline/factors/technical.py b/zipline/pipeline/factors/technical.py index 4cf2f328..ef495aea 100644 --- a/zipline/pipeline/factors/technical.py +++ b/zipline/pipeline/factors/technical.py @@ -14,7 +14,6 @@ from numpy import ( dstack, exp, fmax, - full, inf, isnan, log, @@ -36,6 +35,7 @@ from zipline.utils.math_utils import ( nanstd, nansum, nanmin, + exponential_weights, ) from zipline.utils.numpy_utils import rolling_window from .factor import CustomFactor @@ -192,14 +192,6 @@ class _ExponentialWeightedFactor(SingleInputMixin, CustomFactor): """ params = ('decay_rate',) - @staticmethod - def weights(length, decay_rate): - """ - Return weighting vector for an exponential moving statistic on `length` - rows with a decay rate of `decay_rate`. - """ - return full(length, decay_rate, float) ** arange(length + 1, 1, -1) - @classmethod @expect_types(span=Number) def from_span(cls, inputs, window_length, span, **kwargs): @@ -369,7 +361,7 @@ class ExponentialWeightedMovingAverage(_ExponentialWeightedFactor): out[:] = average( data, axis=0, - weights=self.weights(len(data), decay_rate), + weights=exponential_weights(len(data), decay_rate), ) @@ -434,7 +426,7 @@ class ExponentialWeightedMovingStdDev(_ExponentialWeightedFactor): """ def compute(self, today, assets, out, data, decay_rate): - weights = self.weights(len(data), decay_rate) + weights = exponential_weights(len(data), decay_rate) mean = average(data, axis=0, weights=weights) variance = average((data - mean) ** 2, axis=0, weights=weights) @@ -681,9 +673,9 @@ class TrueRange(CustomFactor): ) -class MovingAverageConvergenceDivergence(_ExponentialWeightedFactor): +class MovingAverageConvergenceDivergenceSignal(CustomFactor): """ - Moving Average Convergence/Divergence (MACD) + Moving Average Convergence/Divergence (MACD) Signal line https://en.wikipedia.org/wiki/MACD A technical indicator originally developed by Gerald Appel in the late @@ -697,22 +689,21 @@ class MovingAverageConvergenceDivergence(_ExponentialWeightedFactor): Parameters ---------- - fast_period : int >= 0, <= window_length + fast_period : int > 0 The window length for the "fast" EWMA. Default is 12. - slow_period : int >= 0, <= window_length + slow_period : int > 0, > fast_period The window length for the "slow" EWMA. Default is 26. - signal_period' : int >= 0, <= slow_period + signal_period' : int > 0, < fast_period The window length for the signal line. Default is 9. Returns ------- - MACD: The difference between "fast" EWMA and "slow" EWMA. - signal: The EWMA of the MACD line using `signal_period` as span. - hist: Difference between MACD and signal. (Divergence series) + The EWMA of the difference between "fast" EWMA and "slow" EWMA line using + `signal_period` as span. """ + inputs = [USEquityPricing.close] params = ('fast_period', 'slow_period', 'signal_period') - outputs = ('MACD', 'signal', 'hist') def __new__(cls, fast_period=12, @@ -720,7 +711,16 @@ class MovingAverageConvergenceDivergence(_ExponentialWeightedFactor): signal_period=9, *args, **kwargs): - return super(MovingAverageConvergenceDivergence, cls).__new__( + + if signal_period <= 0: + raise ValueError("'signal_period' must be larger than 0.") + if slow_period <= fast_period or fast_period <= signal_period: + raise ValueError( + "'slow_period' must be larger than 'fast_period'." + "'fast_period' must be larger than 'signal_period'." + ) + + return super(MovingAverageConvergenceDivergenceSignal, cls).__new__( cls, fast_period=fast_period, slow_period=slow_period, @@ -729,38 +729,25 @@ class MovingAverageConvergenceDivergence(_ExponentialWeightedFactor): *args, **kwargs ) - def calculate_ewma(self, data, length): + def _ewma(self, data, length): decay_rate = 1.0 - (2.0 / (1.0 + length)) return average(data, axis=1, - weights=self.weights(length, decay_rate)) - - def calculate_macd(self, col): - slow_EWMA = self.calculate_ewma( - rolling_window( - col, - self.params['slow_period'] - ), - self.params['slow_period']) - fast_EWMA = self.calculate_ewma( - rolling_window( - col, - self.params['fast_period'] - )[-self.params['signal_period']:], - self.params['fast_period']) - macd = fast_EWMA - slow_EWMA - signal_line = self.calculate_ewma( - macd.reshape(-1, self.params['signal_period']), - self.params['signal_period']) - hist = macd[-1] - signal_line - return macd[-1], signal_line[-1], hist[-1] + weights=exponential_weights(length, decay_rate) + ) def compute(self, today, assets, out, close, fast_period, slow_period, signal_period): - macd, sig, hist = zip(*map(self.calculate_macd, close.T)) - out.macd[:] = macd - out.signal[:] = sig - out.hist[:] = hist + slow_EWMA = self._ewma( + rolling_window(close, slow_period), + slow_period + ) + fast_EWMA = self._ewma( + rolling_window(close, fast_period)[-signal_period:], + fast_period + ) + macd = fast_EWMA - slow_EWMA + out[:] = self._ewma(macd.T, signal_period) class AnnualizedVolatility(CustomFactor): @@ -785,9 +772,9 @@ class AnnualizedVolatility(CustomFactor): window_length = 252 def compute(self, today, assets, out, returns, annualization_factor): - out[:] = nanstd(returns, ddof=0, axis=0) * (annualization_factor ** .5) + out[:] = nanstd(returns, axis=0) * (annualization_factor ** .5) # Convenience aliases. EWMA = ExponentialWeightedMovingAverage EWMSTD = ExponentialWeightedMovingStdDev -MACD = MovingAverageConvergenceDivergence +MACDSignal = MovingAverageConvergenceDivergenceSignal diff --git a/zipline/utils/math_utils.py b/zipline/utils/math_utils.py index da99900d..1bcc93c5 100644 --- a/zipline/utils/math_utils.py +++ b/zipline/utils/math_utils.py @@ -14,7 +14,7 @@ # limitations under the License. import math -from numpy import isnan +from numpy import isnan, full, arange def tolerant_equals(a, b, atol=10e-7, rtol=10e-7, equal_nan=False): @@ -77,3 +77,11 @@ def round_if_near_integer(a, epsilon=1e-4): return round(a) else: return a + + +def exponential_weights(length, decay_rate): + """ + Return weighting vector for an exponential moving statistic on `length` + rows with a decay rate of `decay_rate`. + """ + return full(length, decay_rate, float) ** arange(length + 1, 1, -1) From 52958a0340241091d17ac17e669eaa0017f07c48 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Mon, 28 Nov 2016 12:53:06 -0500 Subject: [PATCH 06/10] TEST: Use parameter_space for randomized tests. - Use a RandomState with a seed so that we have repeatible results. - Use `randint` instead of `random_integers.` `random_integers` is deprecated. - Use `parameter_space` to test multiple period lengths. --- tests/pipeline/test_technical.py | 33 ++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/tests/pipeline/test_technical.py b/tests/pipeline/test_technical.py index 4f62704b..9a019459 100644 --- a/tests/pipeline/test_technical.py +++ b/tests/pipeline/test_technical.py @@ -5,7 +5,7 @@ from six.moves import range import numpy as np import pandas as pd import talib -from numpy.random import random_integers +from numpy.random import RandomState from zipline.lib.adjusted_array import AdjustedArray from zipline.pipeline.data import USEquityPricing @@ -421,10 +421,13 @@ class MovingAverageConvergenceDivergenceTestCase(ZiplineTestCase): .mean() .values[-1]) - def test_MACD_window_length_generation(self): - signal_period = random_integers(1, 90) - fast_period = random_integers(signal_period+1, signal_period+100) - slow_period = random_integers(fast_period+1, fast_period+100) + @parameter_space(seed=range(5)) + def test_MACD_window_length_generation(self, seed): + rng = RandomState(seed) + + signal_period = rng.randint(1, 90) + fast_period = rng.randint(signal_period + 1, signal_period + 100) + slow_period = rng.randint(fast_period + 1, fast_period + 100) ewma = MovingAverageConvergenceDivergenceSignal( fast_period=fast_period, slow_period=slow_period, @@ -435,11 +438,21 @@ class MovingAverageConvergenceDivergenceTestCase(ZiplineTestCase): slow_period+signal_period-1, ) - def test_moving_average_convergence_divergence(self): + @parameter_space( + seed=range(2), + fast_period=[3, 5], + slow_period=[8, 10], + signal_period=[3, 9], + __fail_fast=True, + ) + def test_moving_average_convergence_divergence(self, + seed, + fast_period, + slow_period, + signal_period): + rng = RandomState(seed) + nassets = 3 - fast_period = 3 - slow_period = 8 - signal_period = 2 macd = MovingAverageConvergenceDivergenceSignal( fast_period=fast_period, @@ -450,7 +463,7 @@ class MovingAverageConvergenceDivergenceTestCase(ZiplineTestCase): today = pd.Timestamp('2016', tz='utc') assets = pd.Index(np.arange(nassets)) out = np.empty(shape=(nassets,), dtype=np.float64) - close = np.random.rand(macd.window_length, nassets) + close = rng.rand(macd.window_length, nassets) macd.compute( today, From c05635333e8e763c9189b1de34758f4ea0a6cdf4 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Mon, 28 Nov 2016 12:54:50 -0500 Subject: [PATCH 07/10] STY: Put whitespace between operators. --- tests/pipeline/test_technical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipeline/test_technical.py b/tests/pipeline/test_technical.py index 9a019459..cd5ada22 100644 --- a/tests/pipeline/test_technical.py +++ b/tests/pipeline/test_technical.py @@ -435,7 +435,7 @@ class MovingAverageConvergenceDivergenceTestCase(ZiplineTestCase): ) assert_equal( ewma.window_length, - slow_period+signal_period-1, + slow_period + signal_period - 1, ) @parameter_space( From d140d585c27dcdf5d34aba271798877624475efe Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Mon, 28 Nov 2016 12:58:18 -0500 Subject: [PATCH 08/10] MAINT: Put exponential_weights where it's used. `math_utils` is mostly a shim around bottleneck imports. If we need this somewhere else, it probably belongs in `numpy_utils`. --- zipline/pipeline/factors/technical.py | 24 +++++++++++++++++++++++- zipline/utils/math_utils.py | 10 +--------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/zipline/pipeline/factors/technical.py b/zipline/pipeline/factors/technical.py index ef495aea..947b7495 100644 --- a/zipline/pipeline/factors/technical.py +++ b/zipline/pipeline/factors/technical.py @@ -14,6 +14,7 @@ from numpy import ( dstack, exp, fmax, + full, inf, isnan, log, @@ -35,7 +36,6 @@ from zipline.utils.math_utils import ( nanstd, nansum, nanmin, - exponential_weights, ) from zipline.utils.numpy_utils import rolling_window from .factor import CustomFactor @@ -161,6 +161,28 @@ class AverageDollarVolume(CustomFactor): out[:] = nansum(close * volume, axis=0) / len(close) +def exponential_weights(length, decay_rate): + """ + Build a weight vector for an exponentially-weighted statistic. + + The resulting ndarray is of the form:: + + [decay_rate ** length, ..., decay_rate ** 2, decay_rate] + + Parameters + ---------- + length : int + The length of the desired weight vector. + decay_rate : float + The rate at which entries in the weight vector increase or decrease. + + Returns + ------- + weights : ndarray[float64] + """ + return full(length, decay_rate, float64_dtype) ** arange(length + 1, 1, -1) + + class _ExponentialWeightedFactor(SingleInputMixin, CustomFactor): """ Base class for factors implementing exponential-weighted operations. diff --git a/zipline/utils/math_utils.py b/zipline/utils/math_utils.py index 1bcc93c5..da99900d 100644 --- a/zipline/utils/math_utils.py +++ b/zipline/utils/math_utils.py @@ -14,7 +14,7 @@ # limitations under the License. import math -from numpy import isnan, full, arange +from numpy import isnan def tolerant_equals(a, b, atol=10e-7, rtol=10e-7, equal_nan=False): @@ -77,11 +77,3 @@ def round_if_near_integer(a, epsilon=1e-4): return round(a) else: return a - - -def exponential_weights(length, decay_rate): - """ - Return weighting vector for an exponential moving statistic on `length` - rows with a decay rate of `decay_rate`. - """ - return full(length, decay_rate, float) ** arange(length + 1, 1, -1) From 4210a2f80f333455efa938102e07efeb554ae7be Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Mon, 28 Nov 2016 13:00:24 -0500 Subject: [PATCH 09/10] MAINT: Tweaks/cleanups in technical.py. - Use `expect_bounded` to check inputs. - Add tests for expected failures from `MACDSignal`. - Use `float64` instead of `float` in a few places. This prevents diverging behavior on 32-bit systems. - Docstring edits. --- tests/pipeline/test_technical.py | 52 +++++++++++++++++--- zipline/pipeline/factors/technical.py | 68 ++++++++++++++++----------- 2 files changed, 85 insertions(+), 35 deletions(-) diff --git a/tests/pipeline/test_technical.py b/tests/pipeline/test_technical.py index cd5ada22..78976893 100644 --- a/tests/pipeline/test_technical.py +++ b/tests/pipeline/test_technical.py @@ -438,6 +438,35 @@ class MovingAverageConvergenceDivergenceTestCase(ZiplineTestCase): slow_period + signal_period - 1, ) + def test_bad_inputs(self): + template = ( + "MACDSignal() expected a value greater than or equal to 1" + " for argument %r, but got 0 instead." + ) + with self.assertRaises(ValueError) as e: + MovingAverageConvergenceDivergenceSignal(fast_period=0) + self.assertEqual(template % 'fast_period', str(e.exception)) + + with self.assertRaises(ValueError) as e: + MovingAverageConvergenceDivergenceSignal(slow_period=0) + self.assertEqual(template % 'slow_period', str(e.exception)) + + with self.assertRaises(ValueError) as e: + MovingAverageConvergenceDivergenceSignal(signal_period=0) + self.assertEqual(template % 'signal_period', str(e.exception)) + + with self.assertRaises(ValueError) as e: + MovingAverageConvergenceDivergenceSignal( + fast_period=5, + slow_period=4, + ) + + expected = ( + "'slow_period' must be greater than 'fast_period', but got\n" + "slow_period=4, fast_period=5" + ) + self.assertEqual(expected, str(e.exception)) + @parameter_space( seed=range(2), fast_period=[3, 5], @@ -478,14 +507,23 @@ class MovingAverageConvergenceDivergenceTestCase(ZiplineTestCase): close_df = pd.DataFrame(close) fast_ewma = self.expected_ewma( close_df, - fast_period) + fast_period, + ) slow_ewma = self.expected_ewma( close_df, - slow_period) - expected_signal = self.expected_ewma( - fast_ewma-slow_ewma, + slow_period, + ) + signal_ewma = self.expected_ewma( + fast_ewma - slow_ewma, signal_period - ).values[-1] + ) + + # Everything but the last row should be NaN. + self.assertTrue(signal_ewma.iloc[:-1].isnull().all().all()) + + # We're testing a single compute call, which we expect to be equivalent + # to the last row of the frame we calculated with pandas. + expected_signal = signal_ewma.values[-1] np.testing.assert_almost_equal( out, @@ -505,7 +543,7 @@ class AnnualizedVolatilityTestCase(ZiplineTestCase): nassets = 3 ann_vol = AnnualizedVolatility() today = pd.Timestamp('2016', tz='utc') - assets = np.arange(nassets, dtype=np.float) + assets = np.arange(nassets, dtype=np.float64) returns = np.full((ann_vol.window_length, nassets), 0.004, dtype=np.float64) @@ -527,7 +565,7 @@ class AnnualizedVolatilityTestCase(ZiplineTestCase): nassets = 3 ann_vol = AnnualizedVolatility() today = pd.Timestamp('2016', tz='utc') - assets = np.arange(nassets, dtype=np.float) + assets = np.arange(nassets, dtype=np.float64) returns = np.random.normal(loc=0.001, scale=0.01, size=(ann_vol.window_length, nassets)) diff --git a/zipline/pipeline/factors/technical.py b/zipline/pipeline/factors/technical.py index 947b7495..c7363bba 100644 --- a/zipline/pipeline/factors/technical.py +++ b/zipline/pipeline/factors/technical.py @@ -26,8 +26,7 @@ from numexpr import evaluate from zipline.pipeline.data import USEquityPricing from zipline.pipeline.mixins import SingleInputMixin -from zipline.utils.numpy_utils import ignore_nanwarnings -from zipline.utils.input_validation import expect_types +from zipline.utils.input_validation import expect_bounded, expect_types from zipline.utils.math_utils import ( nanargmax, nanargmin, @@ -37,7 +36,11 @@ from zipline.utils.math_utils import ( nansum, nanmin, ) -from zipline.utils.numpy_utils import rolling_window +from zipline.utils.numpy_utils import ( + float64_dtype, + ignore_nanwarnings, + rolling_window, +) from .factor import CustomFactor @@ -401,13 +404,13 @@ class LinearWeightedMovingAverage(CustomFactor, SingleInputMixin): ctx = ignore_nanwarnings() def compute(self, today, assets, out, data): - num_days = data.shape[0] + ndays = data.shape[0] # Initialize weights array - weights = arange(1, num_days + 1, dtype=float).reshape(num_days, 1) + weights = arange(1, ndays + 1, dtype=float64_dtype).reshape(ndays, 1) # Compute normalizer - normalizer = (num_days * (num_days + 1)) / 2 + normalizer = (ndays * (ndays + 1)) / 2 # Weight the data weighted_data = data * weights @@ -706,8 +709,6 @@ class MovingAverageConvergenceDivergenceSignal(CustomFactor): trend in a stock's price. **Default Inputs:** :data:`zipline.pipeline.data.USEquityPricing.close` - **Default Window Length:** Window length is automatically calculated as the - sum of slow_period and signal_period. Parameters ---------- @@ -718,15 +719,24 @@ class MovingAverageConvergenceDivergenceSignal(CustomFactor): signal_period' : int > 0, < fast_period The window length for the signal line. Default is 9. - Returns - ------- - The EWMA of the difference between "fast" EWMA and "slow" EWMA line using - `signal_period` as span. + Notes + ----- + Unlike most Factors, MovingAverageConvergenceDivergence does not accept a + ``window_length`` parameter. ``window_length`` is inferred from + ``slow_period`` and ``signal_period``. """ - - inputs = [USEquityPricing.close] + inputs = (USEquityPricing.close,) + # We don't use the default form of `params` here because we want to + # dynamically calculate `window_length` from the period lengths in our + # __new__. params = ('fast_period', 'slow_period', 'signal_period') + @expect_bounded( + __funcname='MACDSignal', + fast_period=(1, None), # These must all be >= 1. + slow_period=(1, None), + signal_period=(1, None), + ) def __new__(cls, fast_period=12, slow_period=26, @@ -734,12 +744,13 @@ class MovingAverageConvergenceDivergenceSignal(CustomFactor): *args, **kwargs): - if signal_period <= 0: - raise ValueError("'signal_period' must be larger than 0.") - if slow_period <= fast_period or fast_period <= signal_period: + if slow_period <= fast_period: raise ValueError( - "'slow_period' must be larger than 'fast_period'." - "'fast_period' must be larger than 'signal_period'." + "'slow_period' must be greater than 'fast_period', but got\n" + "slow_period={slow}, fast_period={fast}".format( + slow=slow_period, + fast=fast_period, + ) ) return super(MovingAverageConvergenceDivergenceSignal, cls).__new__( @@ -753,10 +764,11 @@ class MovingAverageConvergenceDivergenceSignal(CustomFactor): def _ewma(self, data, length): decay_rate = 1.0 - (2.0 / (1.0 + length)) - return average(data, - axis=1, - weights=exponential_weights(length, decay_rate) - ) + return average( + data, + axis=1, + weights=exponential_weights(length, decay_rate) + ) def compute(self, today, assets, out, close, fast_period, slow_period, signal_period): @@ -778,19 +790,19 @@ class AnnualizedVolatility(CustomFactor): https://en.wikipedia.org/wiki/Volatility_(finance) The degree of variation of a series over time as measured by the standard - deviation of returns. + deviation of daily returns. **Default Inputs:** :data:`zipline.pipeline.factors.Returns(window_length=2)` Parameters ---------- - annualization_factor : - The number of time units per year. Defaults to average number of NYSE - trading days per year, 252. + annualization_factor : float, optional + The number of time units per year. Defaults is 252, the number of NYSE + trading days in a normal year. """ inputs = [Returns(window_length=2)] - params = {'annualization_factor': 252} + params = {'annualization_factor': 252.0} window_length = 252 def compute(self, today, assets, out, returns, annualization_factor): From ee230c807fbbd820fe19733cc07d934d0ea2dccd Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Mon, 28 Nov 2016 15:29:36 -0500 Subject: [PATCH 10/10] DOC: Docstring updates. --- zipline/pipeline/factors/technical.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/zipline/pipeline/factors/technical.py b/zipline/pipeline/factors/technical.py index c7363bba..c96a4fd1 100644 --- a/zipline/pipeline/factors/technical.py +++ b/zipline/pipeline/factors/technical.py @@ -712,16 +712,16 @@ class MovingAverageConvergenceDivergenceSignal(CustomFactor): Parameters ---------- - fast_period : int > 0 + fast_period : int > 0, optional The window length for the "fast" EWMA. Default is 12. - slow_period : int > 0, > fast_period + slow_period : int > 0, > fast_period, optional The window length for the "slow" EWMA. Default is 26. - signal_period' : int > 0, < fast_period + signal_period' : int > 0, < fast_period, optional The window length for the signal line. Default is 9. Notes ----- - Unlike most Factors, MovingAverageConvergenceDivergence does not accept a + Unlike most pipeline expressions, this factor does not accept a ``window_length`` parameter. ``window_length`` is inferred from ``slow_period`` and ``signal_period``. """