ENH: Dont allow length=1 regressions/correlations.

They're not meaningful, and they cause warnings from numpy. Implemented in terms of a new preprocessor, `expect_bounded`, which takes a tuple of `upper_bound` and `lower_bound`.
2026-06-28 21:10:25 +08:00 · 2016-09-02 12:49:09 -04:00
parent 959baf7fe0
commit dee715cff2
3 changed files with 122 additions and 3 deletions
@@ -362,6 +362,30 @@ class StatisticalBuiltInsTestCase(WithTradingEnvironment, ZiplineTestCase):
                    end_date,
                )

+    def test_require_length_greater_than_one(self):
+        my_asset = Equity(0, exchange="TEST")
+
+        with self.assertRaises(ValueError):
+            RollingPearsonOfReturns(
+                target=my_asset,
+                returns_length=3,
+                correlation_length=1,
+            )
+
+        with self.assertRaises(ValueError):
+            spearman_factor = RollingSpearmanOfReturns(
+                target=my_asset,
+                returns_length=3,
+                correlation_length=1,
+            )
+
+        with self.assertRaises(ValueError):
+            regression_factor = RollingLinearRegressionOfReturns(
+                target=my_asset,
+                returns_length=3,
+                regression_length=1,
+            )
+

 class StatisticalMethodsTestCase(WithSeededRandomPipelineEngine,
                                 ZiplineTestCase):
@@ -581,7 +605,7 @@ class StatisticalMethodsTestCase(WithSeededRandomPipelineEngine,
                regression_length=regression_length,
            )

-    @parameter_space(correlation_length=[1, 2, 3, 4])
+    @parameter_space(correlation_length=[2, 3, 4])
    def test_factor_correlation_methods_two_factors(self, correlation_length):
        """
        Tests for `Factor.pearsonr` and `Factor.spearmanr` when passed another
@@ -682,7 +706,7 @@ class StatisticalMethodsTestCase(WithSeededRandomPipelineEngine,
        )
        assert_frame_equal(spearman_results, expected_spearman_results)

-    @parameter_space(regression_length=[1, 2, 3, 4])
+    @parameter_space(regression_length=[2, 3, 4])
    def test_factor_regression_method_two_factors(self, regression_length):
        """
        Tests for `Factor.linear_regression` when passed another 2D factor
@@ -12,7 +12,7 @@ from zipline.pipeline.filters import SingleAsset
 from zipline.pipeline.mixins import SingleInputMixin
 from zipline.pipeline.sentinels import NotSpecified
 from zipline.pipeline.term import AssetExists
-from zipline.utils.input_validation import expect_dtypes
+from zipline.utils.input_validation import expect_bounded, expect_dtypes
 from zipline.utils.numpy_utils import float64_dtype, int64_dtype

 from .technical import Returns
@@ -24,6 +24,7 @@ ALLOWED_DTYPES = (float64_dtype, int64_dtype)
 class _RollingCorrelation(CustomFactor, SingleInputMixin):

    @expect_dtypes(base_factor=ALLOWED_DTYPES, target=ALLOWED_DTYPES)
+    @expect_bounded(correlation_length=(2, None))
    def __new__(cls,
                base_factor,
                target,
@@ -31,6 +32,7 @@ class _RollingCorrelation(CustomFactor, SingleInputMixin):
                mask=NotSpecified):
        if target.ndim == 2 and base_factor.mask is not target.mask:
            raise IncompatibleTerms(term_1=base_factor, term_2=target)
+
        return super(_RollingCorrelation, cls).__new__(
            cls,
            inputs=[base_factor, target],
@@ -167,6 +169,7 @@ class RollingLinearRegression(CustomFactor, SingleInputMixin):
    outputs = ['alpha', 'beta', 'r_value', 'p_value', 'stderr']

    @expect_dtypes(dependent=ALLOWED_DTYPES, independent=ALLOWED_DTYPES)
+    @expect_bounded(regression_length=(2, None))
    def __new__(cls,
                dependent,
                independent,
@@ -174,6 +177,7 @@ class RollingLinearRegression(CustomFactor, SingleInputMixin):
                mask=NotSpecified):
        if independent.ndim == 2 and dependent.mask is not independent.mask:
            raise IncompatibleTerms(term_1=dependent, term_2=independent)
+
        return super(RollingLinearRegression, cls).__new__(
            cls,
            inputs=[dependent, independent],
@@ -503,6 +503,97 @@ def expect_element(*_pos, **named):
    return preprocess(**valmap(_expect_element, named))


+def expect_bounded(**named):
+    """
+    Preprocessing decorator that verifies inputs fall between upper and lower
+    bounds.
+
+    Usage
+    -----
+    >>> @expect_bounded(x=(1, 5))
+    ... def foo(x):
+    ...    return x + 1
+    ...
+    >>> foo(1)
+    2
+    >>> foo(5)
+    6
+    >>> foo(6)  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
+    Traceback (most recent call last):
+       ...
+    ValueError: ...foo() expected a value between 1 and 5 for argument 'x',
+    but got 6 instead.
+
+    Notes
+    -----
+    None can be passed as the lower or upper bound to signify that a value only
+    has an upper or lower bound.
+
+    >>> @expect_bounded(x=(2, None))
+    ... def foo(x):
+    ...    return x
+    ...
+    >>> foo(100000)
+    100000
+    >>> foo(1)  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
+    Traceback (most recent call last):
+       ...
+    ValueError: ...foo() expected a value greater than or equal to 2 for
+    argument 'x', but got 1 instead.
+
+    >>> @expect_bounded(x=(None, 5))
+    ... def foo(x):
+    ...    return x
+    ...
+    >>> foo(6)  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
+    Traceback (most recent call last):
+       ...
+    ValueError: ...foo() expected a value less than or equal to 5 for
+    argument 'x', but got 6 instead.
+    """
+    def valid_bounds(t):
+        return (
+            isinstance(t, tuple)
+            and len(t) == 2
+            and t != (None, None)
+        )
+
+    for name, bounds in iteritems(named):
+        if not valid_bounds(bounds):
+            raise TypeError(
+                "expect_bounded() expected a tuple of bounds for"
+                " argument '{name}', but got {bounds} instead.".format(
+                    name=name,
+                    bounds=bounds,
+                )
+            )
+
+    def _expect_bounded(bounds):
+        (lower, upper) = bounds
+        if lower is None:
+            should_fail = lambda value: value > upper
+            predicate_descr = "less than or equal to " + str(upper)
+        elif upper is None:
+            should_fail = lambda value: value < lower
+            predicate_descr = "greater than or equal to " + str(lower)
+        else:
+            should_fail = lambda value: not (lower <= value <= upper)
+            predicate_descr = "between %s and %s" % bounds
+
+        template = (
+            "%(funcname)s() expected a value {predicate}"
+            " for argument '%(argname)s', but got %(actual)s instead."
+        ).format(predicate=predicate_descr)
+
+        return make_check(
+            exc_type=ValueError,
+            template=template,
+            pred=should_fail,
+            actual=repr,
+        )
+    return preprocess(**valmap(_expect_bounded, named))
+
+
 def expect_dimensions(**dimensions):
    """
    Preprocessing decorator that verifies inputs are numpy arrays with a