ENH: Dont allow length=1 regressions/correlations.

They're not meaningful, and they cause warnings from numpy.

Implemented in terms of a new preprocessor, `expect_bounded`, which
takes a tuple of `upper_bound` and `lower_bound`.
This commit is contained in:
Scott Sanderson
2016-09-02 12:49:09 -04:00
parent 959baf7fe0
commit dee715cff2
3 changed files with 122 additions and 3 deletions
+26 -2
View File
@@ -362,6 +362,30 @@ class StatisticalBuiltInsTestCase(WithTradingEnvironment, ZiplineTestCase):
end_date,
)
def test_require_length_greater_than_one(self):
my_asset = Equity(0, exchange="TEST")
with self.assertRaises(ValueError):
RollingPearsonOfReturns(
target=my_asset,
returns_length=3,
correlation_length=1,
)
with self.assertRaises(ValueError):
spearman_factor = RollingSpearmanOfReturns(
target=my_asset,
returns_length=3,
correlation_length=1,
)
with self.assertRaises(ValueError):
regression_factor = RollingLinearRegressionOfReturns(
target=my_asset,
returns_length=3,
regression_length=1,
)
class StatisticalMethodsTestCase(WithSeededRandomPipelineEngine,
ZiplineTestCase):
@@ -581,7 +605,7 @@ class StatisticalMethodsTestCase(WithSeededRandomPipelineEngine,
regression_length=regression_length,
)
@parameter_space(correlation_length=[1, 2, 3, 4])
@parameter_space(correlation_length=[2, 3, 4])
def test_factor_correlation_methods_two_factors(self, correlation_length):
"""
Tests for `Factor.pearsonr` and `Factor.spearmanr` when passed another
@@ -682,7 +706,7 @@ class StatisticalMethodsTestCase(WithSeededRandomPipelineEngine,
)
assert_frame_equal(spearman_results, expected_spearman_results)
@parameter_space(regression_length=[1, 2, 3, 4])
@parameter_space(regression_length=[2, 3, 4])
def test_factor_regression_method_two_factors(self, regression_length):
"""
Tests for `Factor.linear_regression` when passed another 2D factor
+5 -1
View File
@@ -12,7 +12,7 @@ from zipline.pipeline.filters import SingleAsset
from zipline.pipeline.mixins import SingleInputMixin
from zipline.pipeline.sentinels import NotSpecified
from zipline.pipeline.term import AssetExists
from zipline.utils.input_validation import expect_dtypes
from zipline.utils.input_validation import expect_bounded, expect_dtypes
from zipline.utils.numpy_utils import float64_dtype, int64_dtype
from .technical import Returns
@@ -24,6 +24,7 @@ ALLOWED_DTYPES = (float64_dtype, int64_dtype)
class _RollingCorrelation(CustomFactor, SingleInputMixin):
@expect_dtypes(base_factor=ALLOWED_DTYPES, target=ALLOWED_DTYPES)
@expect_bounded(correlation_length=(2, None))
def __new__(cls,
base_factor,
target,
@@ -31,6 +32,7 @@ class _RollingCorrelation(CustomFactor, SingleInputMixin):
mask=NotSpecified):
if target.ndim == 2 and base_factor.mask is not target.mask:
raise IncompatibleTerms(term_1=base_factor, term_2=target)
return super(_RollingCorrelation, cls).__new__(
cls,
inputs=[base_factor, target],
@@ -167,6 +169,7 @@ class RollingLinearRegression(CustomFactor, SingleInputMixin):
outputs = ['alpha', 'beta', 'r_value', 'p_value', 'stderr']
@expect_dtypes(dependent=ALLOWED_DTYPES, independent=ALLOWED_DTYPES)
@expect_bounded(regression_length=(2, None))
def __new__(cls,
dependent,
independent,
@@ -174,6 +177,7 @@ class RollingLinearRegression(CustomFactor, SingleInputMixin):
mask=NotSpecified):
if independent.ndim == 2 and dependent.mask is not independent.mask:
raise IncompatibleTerms(term_1=dependent, term_2=independent)
return super(RollingLinearRegression, cls).__new__(
cls,
inputs=[dependent, independent],
+91
View File
@@ -503,6 +503,97 @@ def expect_element(*_pos, **named):
return preprocess(**valmap(_expect_element, named))
def expect_bounded(**named):
"""
Preprocessing decorator that verifies inputs fall between upper and lower
bounds.
Usage
-----
>>> @expect_bounded(x=(1, 5))
... def foo(x):
... return x + 1
...
>>> foo(1)
2
>>> foo(5)
6
>>> foo(6) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
Traceback (most recent call last):
...
ValueError: ...foo() expected a value between 1 and 5 for argument 'x',
but got 6 instead.
Notes
-----
None can be passed as the lower or upper bound to signify that a value only
has an upper or lower bound.
>>> @expect_bounded(x=(2, None))
... def foo(x):
... return x
...
>>> foo(100000)
100000
>>> foo(1) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
Traceback (most recent call last):
...
ValueError: ...foo() expected a value greater than or equal to 2 for
argument 'x', but got 1 instead.
>>> @expect_bounded(x=(None, 5))
... def foo(x):
... return x
...
>>> foo(6) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
Traceback (most recent call last):
...
ValueError: ...foo() expected a value less than or equal to 5 for
argument 'x', but got 6 instead.
"""
def valid_bounds(t):
return (
isinstance(t, tuple)
and len(t) == 2
and t != (None, None)
)
for name, bounds in iteritems(named):
if not valid_bounds(bounds):
raise TypeError(
"expect_bounded() expected a tuple of bounds for"
" argument '{name}', but got {bounds} instead.".format(
name=name,
bounds=bounds,
)
)
def _expect_bounded(bounds):
(lower, upper) = bounds
if lower is None:
should_fail = lambda value: value > upper
predicate_descr = "less than or equal to " + str(upper)
elif upper is None:
should_fail = lambda value: value < lower
predicate_descr = "greater than or equal to " + str(lower)
else:
should_fail = lambda value: not (lower <= value <= upper)
predicate_descr = "between %s and %s" % bounds
template = (
"%(funcname)s() expected a value {predicate}"
" for argument '%(argname)s', but got %(actual)s instead."
).format(predicate=predicate_descr)
return make_check(
exc_type=ValueError,
template=template,
pred=should_fail,
actual=repr,
)
return preprocess(**valmap(_expect_bounded, named))
def expect_dimensions(**dimensions):
"""
Preprocessing decorator that verifies inputs are numpy arrays with a