From 694d9e952a6febf837d6934ee5898cd9f91e4450 Mon Sep 17 00:00:00 2001 From: Gil Wassermann Date: Mon, 1 Aug 2016 08:20:10 -0400 Subject: [PATCH 1/7] ENH: added smoothing to zipline --- tests/pipeline/test_filter.py | 30 ++++++++++++++++++++++++++++ zipline/pipeline/filters/__init__.py | 2 ++ zipline/pipeline/filters/filter.py | 16 +++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/tests/pipeline/test_filter.py b/tests/pipeline/test_filter.py index ccf6211f..1c14ded2 100644 --- a/tests/pipeline/test_filter.py +++ b/tests/pipeline/test_filter.py @@ -395,6 +395,36 @@ class FilterTestCase(BasePipelineTestCase): ) check_arrays(results['isfinite'], isfinite(data)) + def test_smoothing_filter(self): + from zipline.pipeline.filters import SmoothingFilter + + data = full(self.default_shape, True, dtype=bool) + # one column all false + data[0, 0] = False + data[1, 1] = False + + class InputFilter(Filter): + inputs = () + window_length = 0 + + smoothing_filter = SmoothingFilter( + inputs=[InputFilter()], + window_length=self.default_shape[0] + ) + + results = self.run_graph( + TermGraph({'smoothing': smoothing_filter}), + initial_workspace={InputFilter(): data} + ) + + expected_result = full(self.default_shape[1], True, dtype=bool) + expected_result[0] = False + expected_result[1] = False + check_arrays( + results['smoothing'].flatten(), + expected_result, + ) + @parameter_space(factor_len=[2, 3, 4]) def test_window_safe(self, factor_len): # all true data set of (days, securities) diff --git a/zipline/pipeline/filters/__init__.py b/zipline/pipeline/filters/__init__.py index b1c06ec4..0f0bb52f 100644 --- a/zipline/pipeline/filters/__init__.py +++ b/zipline/pipeline/filters/__init__.py @@ -8,6 +8,7 @@ from .filter import ( NumExprFilter, PercentileFilter, SingleAsset, + SmoothingFilter, ) __all__ = [ @@ -20,4 +21,5 @@ __all__ = [ 'NumExprFilter', 'PercentileFilter', 'SingleAsset', + 'SmoothingFilter', ] diff --git a/zipline/pipeline/filters/filter.py b/zipline/pipeline/filters/filter.py index 502fd031..3cf8ae10 100644 --- a/zipline/pipeline/filters/filter.py +++ b/zipline/pipeline/filters/filter.py @@ -8,6 +8,7 @@ from numpy import ( float64, nan, nanpercentile, + sum as sum_ ) from zipline.errors import ( @@ -488,3 +489,18 @@ class SingleAsset(Filter): asset=self._asset, start_date=dates[0], end_date=dates[-1], ) return out + + +class SmoothingFilter(CustomFilter): + """ + A Filter that requires its inputs to have + been True for the last `window_length` days. + An integral part of the Q500US methodology + + **Default Inputs**: None + + **Default Window Length**: None + """ + + def compute(self, today, assets, out, arg): + out[:] = (sum_(arg, axis=0) == self.window_length) From 2d5a2055d00f3f6a7ff394b8de1dd96e1fb90366 Mon Sep 17 00:00:00 2001 From: Gil Wassermann Date: Mon, 1 Aug 2016 08:24:20 -0400 Subject: [PATCH 2/7] DOC: Added smoothing filter to whatsnew --- docs/source/whatsnew/1.0.2.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/whatsnew/1.0.2.txt b/docs/source/whatsnew/1.0.2.txt index b27b8b5b..f92ad68d 100644 --- a/docs/source/whatsnew/1.0.2.txt +++ b/docs/source/whatsnew/1.0.2.txt @@ -26,6 +26,9 @@ Enhancements :meth:`~zipline.pipeline.factors.Factor.top`, and :meth:`~zipline.pipeline.factors.Factor.bottom`. (:issue:`1349`). +- Added a smoothing filter that adds 'stickiness' to its input, + making boolean designations less volatile over time. (:issue:`1358`) + Bug Fixes ~~~~~~~~~ From 73de8e61820e5c042033e1e7f894ca47fc00b994 Mon Sep 17 00:00:00 2001 From: Gil Wassermann Date: Mon, 1 Aug 2016 11:16:02 -0400 Subject: [PATCH 3/7] STY: style changes and strictly_true_filter --- tests/pipeline/test_filter.py | 10 +++++----- zipline/pipeline/filters/__init__.py | 4 ++-- zipline/pipeline/filters/filter.py | 6 ++---- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/tests/pipeline/test_filter.py b/tests/pipeline/test_filter.py index 1c14ded2..859fc144 100644 --- a/tests/pipeline/test_filter.py +++ b/tests/pipeline/test_filter.py @@ -395,8 +395,8 @@ class FilterTestCase(BasePipelineTestCase): ) check_arrays(results['isfinite'], isfinite(data)) - def test_smoothing_filter(self): - from zipline.pipeline.filters import SmoothingFilter + def test_strictly_true_filter(self): + from zipline.pipeline.filters import StrictlyTrueFilter data = full(self.default_shape, True, dtype=bool) # one column all false @@ -407,13 +407,13 @@ class FilterTestCase(BasePipelineTestCase): inputs = () window_length = 0 - smoothing_filter = SmoothingFilter( + strictly_true_filter = StrictlyTrueFilter( inputs=[InputFilter()], window_length=self.default_shape[0] ) results = self.run_graph( - TermGraph({'smoothing': smoothing_filter}), + TermGraph({'Filter': strictly_true_filter}), initial_workspace={InputFilter(): data} ) @@ -421,7 +421,7 @@ class FilterTestCase(BasePipelineTestCase): expected_result[0] = False expected_result[1] = False check_arrays( - results['smoothing'].flatten(), + results['Filter'].flatten(), expected_result, ) diff --git a/zipline/pipeline/filters/__init__.py b/zipline/pipeline/filters/__init__.py index 0f0bb52f..86813eff 100644 --- a/zipline/pipeline/filters/__init__.py +++ b/zipline/pipeline/filters/__init__.py @@ -8,7 +8,7 @@ from .filter import ( NumExprFilter, PercentileFilter, SingleAsset, - SmoothingFilter, + StrictlyTrueFilter, ) __all__ = [ @@ -21,5 +21,5 @@ __all__ = [ 'NumExprFilter', 'PercentileFilter', 'SingleAsset', - 'SmoothingFilter', + 'StrictlyTrueFilter', ] diff --git a/zipline/pipeline/filters/filter.py b/zipline/pipeline/filters/filter.py index 3cf8ae10..ba0e0985 100644 --- a/zipline/pipeline/filters/filter.py +++ b/zipline/pipeline/filters/filter.py @@ -8,7 +8,6 @@ from numpy import ( float64, nan, nanpercentile, - sum as sum_ ) from zipline.errors import ( @@ -491,11 +490,10 @@ class SingleAsset(Filter): return out -class SmoothingFilter(CustomFilter): +class StrictlyTrueFilter(CustomFilter): """ A Filter that requires its inputs to have been True for the last `window_length` days. - An integral part of the Q500US methodology **Default Inputs**: None @@ -503,4 +501,4 @@ class SmoothingFilter(CustomFilter): """ def compute(self, today, assets, out, arg): - out[:] = (sum_(arg, axis=0) == self.window_length) + out[:] = (sum(arg) == self.window_length) From c10af2a0b9d9ba1a0ee2452d5626b6ea7e059945 Mon Sep 17 00:00:00 2001 From: Gil Wassermann Date: Mon, 1 Aug 2016 11:40:14 -0400 Subject: [PATCH 4/7] TEST: more thorough testing --- tests/pipeline/test_filter.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/pipeline/test_filter.py b/tests/pipeline/test_filter.py index 859fc144..c235b215 100644 --- a/tests/pipeline/test_filter.py +++ b/tests/pipeline/test_filter.py @@ -21,6 +21,7 @@ from numpy import ( ones, ones_like, putmask, + reshape, rot90, sum as np_sum ) @@ -398,10 +399,10 @@ class FilterTestCase(BasePipelineTestCase): def test_strictly_true_filter(self): from zipline.pipeline.filters import StrictlyTrueFilter - data = full(self.default_shape, True, dtype=bool) - # one column all false - data[0, 0] = False - data[1, 1] = False + data = ~eye(N=self.default_shape[0], + M=self.default_shape[1], + k=1, + dtype=bool) class InputFilter(Filter): inputs = () @@ -417,11 +418,10 @@ class FilterTestCase(BasePipelineTestCase): initial_workspace={InputFilter(): data} ) - expected_result = full(self.default_shape[1], True, dtype=bool) - expected_result[0] = False - expected_result[1] = False + expected_result = full(self.default_shape[1], False, dtype=bool) + expected_result[0] = True check_arrays( - results['Filter'].flatten(), + reshape(results['Filter'], expected_result.shape[0]), expected_result, ) From 7623c0f6ebe1e5184335db9fc3cb34ffc6b01b16 Mon Sep 17 00:00:00 2001 From: Gil Wassermann Date: Mon, 1 Aug 2016 13:48:14 -0400 Subject: [PATCH 5/7] MAINT: .sum() behaviour --- tests/pipeline/test_filter.py | 2 +- zipline/pipeline/filters/filter.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/pipeline/test_filter.py b/tests/pipeline/test_filter.py index c235b215..461f5beb 100644 --- a/tests/pipeline/test_filter.py +++ b/tests/pipeline/test_filter.py @@ -409,7 +409,7 @@ class FilterTestCase(BasePipelineTestCase): window_length = 0 strictly_true_filter = StrictlyTrueFilter( - inputs=[InputFilter()], + inputs=(InputFilter(), ), window_length=self.default_shape[0] ) diff --git a/zipline/pipeline/filters/filter.py b/zipline/pipeline/filters/filter.py index ba0e0985..065c2e4a 100644 --- a/zipline/pipeline/filters/filter.py +++ b/zipline/pipeline/filters/filter.py @@ -499,6 +499,5 @@ class StrictlyTrueFilter(CustomFilter): **Default Window Length**: None """ - def compute(self, today, assets, out, arg): - out[:] = (sum(arg) == self.window_length) + out[:] = (arg.sum(axis=0) == self.window_length) From 574d7b197f87d4388ff92abd69ebf828a8fd757e Mon Sep 17 00:00:00 2001 From: Gil Wassermann Date: Mon, 1 Aug 2016 15:35:22 -0400 Subject: [PATCH 6/7] TEST: test for rolling nature of smoothing filter --- tests/pipeline/test_filter.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tests/pipeline/test_filter.py b/tests/pipeline/test_filter.py index 461f5beb..a3e090f6 100644 --- a/tests/pipeline/test_filter.py +++ b/tests/pipeline/test_filter.py @@ -410,7 +410,7 @@ class FilterTestCase(BasePipelineTestCase): strictly_true_filter = StrictlyTrueFilter( inputs=(InputFilter(), ), - window_length=self.default_shape[0] + window_length=(self.default_shape[0]-1) ) results = self.run_graph( @@ -418,11 +418,18 @@ class FilterTestCase(BasePipelineTestCase): initial_workspace={InputFilter(): data} ) - expected_result = full(self.default_shape[1], False, dtype=bool) - expected_result[0] = True + expected_result_0 = full(self.default_shape[1], False, dtype=bool) + expected_result_0[0] = True check_arrays( - reshape(results['Filter'], expected_result.shape[0]), - expected_result, + reshape(results['Filter'][0], expected_result_0.shape[0]), + expected_result_0, + ) + + expected_result_1 = full(self.default_shape[1], False, dtype=bool) + expected_result_1[:2] = True + check_arrays( + reshape(results['Filter'][1], expected_result_1.shape[0]), + expected_result_1, ) @parameter_space(factor_len=[2, 3, 4]) From f13294de4e37eede58c3467944eeb4616fba15b6 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Mon, 1 Aug 2016 21:59:58 -0400 Subject: [PATCH 7/7] ENH: Rename StrictlyTrue to All and add Any(). Also, moved All() and Any() to `zipline.pipeline.filters.smoothing`. --- docs/source/whatsnew/1.0.2.txt | 6 +- tests/pipeline/test_filter.py | 118 ++++++++++++++++++++------ zipline/pipeline/filters/__init__.py | 5 +- zipline/pipeline/filters/filter.py | 13 --- zipline/pipeline/filters/smoothing.py | 35 ++++++++ 5 files changed, 134 insertions(+), 43 deletions(-) create mode 100644 zipline/pipeline/filters/smoothing.py diff --git a/docs/source/whatsnew/1.0.2.txt b/docs/source/whatsnew/1.0.2.txt index f92ad68d..83a54071 100644 --- a/docs/source/whatsnew/1.0.2.txt +++ b/docs/source/whatsnew/1.0.2.txt @@ -26,8 +26,10 @@ Enhancements :meth:`~zipline.pipeline.factors.Factor.top`, and :meth:`~zipline.pipeline.factors.Factor.bottom`. (:issue:`1349`). -- Added a smoothing filter that adds 'stickiness' to its input, - making boolean designations less volatile over time. (:issue:`1358`) +- Added new pipeline filters, :class:`~zipline.pipeline.filters.All` and + :class:`~zipline.pipeline.filters.Any`, which takes another filter and + returns True if an asset produced a True for any/all days in the previous + ``window_length`` days (:issue:`1358`). Bug Fixes ~~~~~~~~~ diff --git a/tests/pipeline/test_filter.py b/tests/pipeline/test_filter.py index a3e090f6..ecd232a2 100644 --- a/tests/pipeline/test_filter.py +++ b/tests/pipeline/test_filter.py @@ -21,7 +21,6 @@ from numpy import ( ones, ones_like, putmask, - reshape, rot90, sum as np_sum ) @@ -31,6 +30,7 @@ from zipline.errors import BadPercentileBounds from zipline.pipeline import Filter, Factor, TermGraph from zipline.pipeline.classifiers import Classifier from zipline.pipeline.factors import CustomFactor +from zipline.pipeline.filters import All, Any from zipline.testing import check_arrays, parameter_space, permute_rows from zipline.utils.numpy_utils import float64_dtype, int64_dtype from .base import BasePipelineTestCase, with_default_shape @@ -396,41 +396,107 @@ class FilterTestCase(BasePipelineTestCase): ) check_arrays(results['isfinite'], isfinite(data)) - def test_strictly_true_filter(self): - from zipline.pipeline.filters import StrictlyTrueFilter + def test_all(self): - data = ~eye(N=self.default_shape[0], - M=self.default_shape[1], - k=1, - dtype=bool) + data = array([[1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1], + [1, 0, 1, 1, 1, 1], + [1, 1, 0, 1, 1, 1], + [1, 1, 1, 0, 1, 1], + [1, 1, 1, 1, 0, 1], + [1, 1, 1, 1, 1, 0]], dtype=bool) - class InputFilter(Filter): + # With a window_length of N, 0's should be "sticky" for the (N - 1) + # days after the 0 in the base data. + + # Note that, the way ``self.run_graph`` works, we compute the same + # number of output rows for all inputs, so we only get the last 4 + # outputs for expected_3 even though we have enought input data to + # compute 5 rows. + expected_3 = array([[0, 0, 0, 1, 1, 1], + [1, 0, 0, 0, 1, 1], + [1, 1, 0, 0, 0, 1], + [1, 1, 1, 0, 0, 0]], dtype=bool) + + expected_4 = array([[0, 0, 0, 1, 1, 1], + [0, 0, 0, 0, 1, 1], + [1, 0, 0, 0, 0, 1], + [1, 1, 0, 0, 0, 0]], dtype=bool) + + class Input(Filter): inputs = () window_length = 0 - strictly_true_filter = StrictlyTrueFilter( - inputs=(InputFilter(), ), - window_length=(self.default_shape[0]-1) + results = self.run_graph( + TermGraph({ + '3': All(inputs=[Input()], window_length=3), + '4': All(inputs=[Input()], window_length=4), + }), + initial_workspace={Input(): data}, + mask=self.build_mask(ones(shape=data.shape)), ) + check_arrays(results['3'], expected_3) + check_arrays(results['4'], expected_4) + + def test_any(self): + + # FUN FACT: The inputs and outputs here are exactly the negation of + # the inputs and outputs for test_all above. This isn't a coincidence. + # + # By de Morgan's Laws, we have:: + # + # ~(a & b) == (~a | ~b) + # + # negating both sides, we have:: + # + # (a & b) == ~(a | ~b) + # + # Since all(a, b) is isomorphic to (a & b), and any(a, b) is isomorphic + # to (a | b), we have:: + # + # all(a, b) == ~(any(~a, ~b)) + # + data = array([[0, 0, 0, 0, 0, 0], + [1, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 1]], dtype=bool) + + # With a window_length of N, 1's should be "sticky" for the (N - 1) + # days after the 1 in the base data. + + # Note that, the way ``self.run_graph`` works, we compute the same + # number of output rows for all inputs, so we only get the last 4 + # outputs for expected_3 even though we have enought input data to + # compute 5 rows. + expected_3 = array([[1, 1, 1, 0, 0, 0], + [0, 1, 1, 1, 0, 0], + [0, 0, 1, 1, 1, 0], + [0, 0, 0, 1, 1, 1]], dtype=bool) + + expected_4 = array([[1, 1, 1, 0, 0, 0], + [1, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 0], + [0, 0, 1, 1, 1, 1]], dtype=bool) + + class Input(Filter): + inputs = () + window_length = 0 + results = self.run_graph( - TermGraph({'Filter': strictly_true_filter}), - initial_workspace={InputFilter(): data} + TermGraph({ + '3': Any(inputs=[Input()], window_length=3), + '4': Any(inputs=[Input()], window_length=4), + }), + initial_workspace={Input(): data}, + mask=self.build_mask(ones(shape=data.shape)), ) - expected_result_0 = full(self.default_shape[1], False, dtype=bool) - expected_result_0[0] = True - check_arrays( - reshape(results['Filter'][0], expected_result_0.shape[0]), - expected_result_0, - ) - - expected_result_1 = full(self.default_shape[1], False, dtype=bool) - expected_result_1[:2] = True - check_arrays( - reshape(results['Filter'][1], expected_result_1.shape[0]), - expected_result_1, - ) + check_arrays(results['3'], expected_3) + check_arrays(results['4'], expected_4) @parameter_space(factor_len=[2, 3, 4]) def test_window_safe(self, factor_len): diff --git a/zipline/pipeline/filters/__init__.py b/zipline/pipeline/filters/__init__.py index 86813eff..86ad7476 100644 --- a/zipline/pipeline/filters/__init__.py +++ b/zipline/pipeline/filters/__init__.py @@ -8,10 +8,12 @@ from .filter import ( NumExprFilter, PercentileFilter, SingleAsset, - StrictlyTrueFilter, ) +from .smoothing import All, Any __all__ = [ + 'All', + 'Any', 'ArrayPredicate', 'CustomFilter', 'Filter', @@ -21,5 +23,4 @@ __all__ = [ 'NumExprFilter', 'PercentileFilter', 'SingleAsset', - 'StrictlyTrueFilter', ] diff --git a/zipline/pipeline/filters/filter.py b/zipline/pipeline/filters/filter.py index 065c2e4a..502fd031 100644 --- a/zipline/pipeline/filters/filter.py +++ b/zipline/pipeline/filters/filter.py @@ -488,16 +488,3 @@ class SingleAsset(Filter): asset=self._asset, start_date=dates[0], end_date=dates[-1], ) return out - - -class StrictlyTrueFilter(CustomFilter): - """ - A Filter that requires its inputs to have - been True for the last `window_length` days. - - **Default Inputs**: None - - **Default Window Length**: None - """ - def compute(self, today, assets, out, arg): - out[:] = (arg.sum(axis=0) == self.window_length) diff --git a/zipline/pipeline/filters/smoothing.py b/zipline/pipeline/filters/smoothing.py new file mode 100644 index 00000000..f48e2976 --- /dev/null +++ b/zipline/pipeline/filters/smoothing.py @@ -0,0 +1,35 @@ +""" +Filters that apply smoothing operations on other filters. + +These are generally useful for controlling/minimizing turnover on existing +Filters. +""" +from .filter import CustomFilter + + +class All(CustomFilter): + """ + A Filter requiring that assets produce True for ``window_length`` + consecutive days. + + **Default Inputs:** None + + **Default Window Length:** None + """ + + def compute(self, today, assets, out, arg): + out[:] = (arg.sum(axis=0) == self.window_length) + + +class Any(CustomFilter): + """ + A Filter requiring that assets produce True for at least one day in the + last ``window_length`` days. + + **Default Inputs:** None + + **Default Window Length:** None + """ + + def compute(self, today, assets, out, arg): + out[:] = (arg.sum(axis=0) > 0)