diff --git a/docs/source/whatsnew/1.0.2.txt b/docs/source/whatsnew/1.0.2.txt index b27b8b5b..83a54071 100644 --- a/docs/source/whatsnew/1.0.2.txt +++ b/docs/source/whatsnew/1.0.2.txt @@ -26,6 +26,11 @@ Enhancements :meth:`~zipline.pipeline.factors.Factor.top`, and :meth:`~zipline.pipeline.factors.Factor.bottom`. (:issue:`1349`). +- Added new pipeline filters, :class:`~zipline.pipeline.filters.All` and + :class:`~zipline.pipeline.filters.Any`, which takes another filter and + returns True if an asset produced a True for any/all days in the previous + ``window_length`` days (:issue:`1358`). + Bug Fixes ~~~~~~~~~ diff --git a/tests/pipeline/test_filter.py b/tests/pipeline/test_filter.py index ccf6211f..ecd232a2 100644 --- a/tests/pipeline/test_filter.py +++ b/tests/pipeline/test_filter.py @@ -30,6 +30,7 @@ from zipline.errors import BadPercentileBounds from zipline.pipeline import Filter, Factor, TermGraph from zipline.pipeline.classifiers import Classifier from zipline.pipeline.factors import CustomFactor +from zipline.pipeline.filters import All, Any from zipline.testing import check_arrays, parameter_space, permute_rows from zipline.utils.numpy_utils import float64_dtype, int64_dtype from .base import BasePipelineTestCase, with_default_shape @@ -395,6 +396,108 @@ class FilterTestCase(BasePipelineTestCase): ) check_arrays(results['isfinite'], isfinite(data)) + def test_all(self): + + data = array([[1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1], + [1, 0, 1, 1, 1, 1], + [1, 1, 0, 1, 1, 1], + [1, 1, 1, 0, 1, 1], + [1, 1, 1, 1, 0, 1], + [1, 1, 1, 1, 1, 0]], dtype=bool) + + # With a window_length of N, 0's should be "sticky" for the (N - 1) + # days after the 0 in the base data. + + # Note that, the way ``self.run_graph`` works, we compute the same + # number of output rows for all inputs, so we only get the last 4 + # outputs for expected_3 even though we have enought input data to + # compute 5 rows. + expected_3 = array([[0, 0, 0, 1, 1, 1], + [1, 0, 0, 0, 1, 1], + [1, 1, 0, 0, 0, 1], + [1, 1, 1, 0, 0, 0]], dtype=bool) + + expected_4 = array([[0, 0, 0, 1, 1, 1], + [0, 0, 0, 0, 1, 1], + [1, 0, 0, 0, 0, 1], + [1, 1, 0, 0, 0, 0]], dtype=bool) + + class Input(Filter): + inputs = () + window_length = 0 + + results = self.run_graph( + TermGraph({ + '3': All(inputs=[Input()], window_length=3), + '4': All(inputs=[Input()], window_length=4), + }), + initial_workspace={Input(): data}, + mask=self.build_mask(ones(shape=data.shape)), + ) + + check_arrays(results['3'], expected_3) + check_arrays(results['4'], expected_4) + + def test_any(self): + + # FUN FACT: The inputs and outputs here are exactly the negation of + # the inputs and outputs for test_all above. This isn't a coincidence. + # + # By de Morgan's Laws, we have:: + # + # ~(a & b) == (~a | ~b) + # + # negating both sides, we have:: + # + # (a & b) == ~(a | ~b) + # + # Since all(a, b) is isomorphic to (a & b), and any(a, b) is isomorphic + # to (a | b), we have:: + # + # all(a, b) == ~(any(~a, ~b)) + # + data = array([[0, 0, 0, 0, 0, 0], + [1, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 1]], dtype=bool) + + # With a window_length of N, 1's should be "sticky" for the (N - 1) + # days after the 1 in the base data. + + # Note that, the way ``self.run_graph`` works, we compute the same + # number of output rows for all inputs, so we only get the last 4 + # outputs for expected_3 even though we have enought input data to + # compute 5 rows. + expected_3 = array([[1, 1, 1, 0, 0, 0], + [0, 1, 1, 1, 0, 0], + [0, 0, 1, 1, 1, 0], + [0, 0, 0, 1, 1, 1]], dtype=bool) + + expected_4 = array([[1, 1, 1, 0, 0, 0], + [1, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 0], + [0, 0, 1, 1, 1, 1]], dtype=bool) + + class Input(Filter): + inputs = () + window_length = 0 + + results = self.run_graph( + TermGraph({ + '3': Any(inputs=[Input()], window_length=3), + '4': Any(inputs=[Input()], window_length=4), + }), + initial_workspace={Input(): data}, + mask=self.build_mask(ones(shape=data.shape)), + ) + + check_arrays(results['3'], expected_3) + check_arrays(results['4'], expected_4) + @parameter_space(factor_len=[2, 3, 4]) def test_window_safe(self, factor_len): # all true data set of (days, securities) diff --git a/zipline/pipeline/filters/__init__.py b/zipline/pipeline/filters/__init__.py index b1c06ec4..86ad7476 100644 --- a/zipline/pipeline/filters/__init__.py +++ b/zipline/pipeline/filters/__init__.py @@ -9,8 +9,11 @@ from .filter import ( PercentileFilter, SingleAsset, ) +from .smoothing import All, Any __all__ = [ + 'All', + 'Any', 'ArrayPredicate', 'CustomFilter', 'Filter', diff --git a/zipline/pipeline/filters/smoothing.py b/zipline/pipeline/filters/smoothing.py new file mode 100644 index 00000000..f48e2976 --- /dev/null +++ b/zipline/pipeline/filters/smoothing.py @@ -0,0 +1,35 @@ +""" +Filters that apply smoothing operations on other filters. + +These are generally useful for controlling/minimizing turnover on existing +Filters. +""" +from .filter import CustomFilter + + +class All(CustomFilter): + """ + A Filter requiring that assets produce True for ``window_length`` + consecutive days. + + **Default Inputs:** None + + **Default Window Length:** None + """ + + def compute(self, today, assets, out, arg): + out[:] = (arg.sum(axis=0) == self.window_length) + + +class Any(CustomFilter): + """ + A Filter requiring that assets produce True for at least one day in the + last ``window_length`` days. + + **Default Inputs:** None + + **Default Window Length:** None + """ + + def compute(self, today, assets, out, arg): + out[:] = (arg.sum(axis=0) > 0)