mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-29 00:37:43 +08:00
8eb09f70d7
Before this was raising a KeyError on failure to format the message properly.
449 lines
13 KiB
Python
449 lines
13 KiB
Python
"""
|
|
factor.py
|
|
"""
|
|
from operator import attrgetter
|
|
from numpy import (
|
|
apply_along_axis,
|
|
float64,
|
|
nan,
|
|
)
|
|
from scipy.stats import rankdata
|
|
|
|
from zipline.errors import (
|
|
UnknownRankMethod,
|
|
UnsupportedDataType,
|
|
)
|
|
from zipline.lib.rank import rankdata_2d_ordinal
|
|
from zipline.modelling.term import (
|
|
CustomTermMixin,
|
|
NotSpecified,
|
|
RequiredWindowLengthMixin,
|
|
SingleInputMixin,
|
|
Term,
|
|
)
|
|
from zipline.modelling.expression import (
|
|
BadBinaryOperator,
|
|
COMPARISONS,
|
|
is_comparison,
|
|
MATH_BINOPS,
|
|
method_name_for_op,
|
|
NUMERIC_TYPES,
|
|
NumericalExpression,
|
|
NUMEXPR_MATH_FUNCS,
|
|
UNARY_OPS,
|
|
)
|
|
from zipline.modelling.filter import (
|
|
NumExprFilter,
|
|
PercentileFilter,
|
|
)
|
|
from zipline.utils.control_flow import nullctx
|
|
|
|
|
|
_RANK_METHODS = frozenset(['average', 'min', 'max', 'dense', 'ordinal'])
|
|
|
|
|
|
def binop_return_type(op):
|
|
if is_comparison(op):
|
|
return NumExprFilter
|
|
else:
|
|
return NumExprFactor
|
|
|
|
|
|
def binary_operator(op):
|
|
"""
|
|
Factory function for making binary operator methods on a Factor subclass.
|
|
|
|
Returns a function, "binary_operator" suitable for implementing functions
|
|
like __add__.
|
|
"""
|
|
# When combining a Factor with a NumericalExpression, we use this
|
|
# attrgetter instance to defer to the commuted implementation of the
|
|
# NumericalExpression operator.
|
|
commuted_method_getter = attrgetter(method_name_for_op(op, commute=True))
|
|
|
|
def binary_operator(self, other):
|
|
# This can't be hoisted up a scope because the types returned by
|
|
# binop_return_type aren't defined when the top-level function is
|
|
# invoked in the class body of Factor.
|
|
return_type = binop_return_type(op)
|
|
if isinstance(self, NumExprFactor):
|
|
self_expr, other_expr, new_inputs = self.build_binary_op(
|
|
op, other,
|
|
)
|
|
return return_type(
|
|
"({left}) {op} ({right})".format(
|
|
left=self_expr,
|
|
op=op,
|
|
right=other_expr,
|
|
),
|
|
new_inputs,
|
|
)
|
|
elif isinstance(other, NumExprFactor):
|
|
# NumericalExpression overrides ops to correctly handle merging of
|
|
# inputs. Look up and call the appropriate reflected operator with
|
|
# ourself as the input.
|
|
return commuted_method_getter(other)(self)
|
|
elif isinstance(other, Factor):
|
|
if self is other:
|
|
return return_type(
|
|
"x_0 {op} x_0".format(op=op),
|
|
(self,),
|
|
)
|
|
return return_type(
|
|
"x_0 {op} x_1".format(op=op),
|
|
(self, other),
|
|
)
|
|
elif isinstance(other, NUMERIC_TYPES):
|
|
return return_type(
|
|
"x_0 {op} ({constant})".format(op=op, constant=other),
|
|
binds=(self,),
|
|
)
|
|
raise BadBinaryOperator(op, self, other)
|
|
|
|
return binary_operator
|
|
|
|
|
|
def reflected_binary_operator(op):
|
|
"""
|
|
Factory function for making binary operator methods on a Factor.
|
|
|
|
Returns a function, "reflected_binary_operator" suitable for implementing
|
|
functions like __radd__.
|
|
"""
|
|
assert not is_comparison(op)
|
|
|
|
def reflected_binary_operator(self, other):
|
|
|
|
if isinstance(self, NumericalExpression):
|
|
self_expr, other_expr, new_inputs = self.build_binary_op(
|
|
op, other
|
|
)
|
|
return NumExprFactor(
|
|
"({left}) {op} ({right})".format(
|
|
left=other_expr,
|
|
right=self_expr,
|
|
op=op,
|
|
),
|
|
new_inputs,
|
|
)
|
|
|
|
# Only have to handle the numeric case because in all other valid cases
|
|
# the corresponding left-binding method will be called.
|
|
elif isinstance(other, NUMERIC_TYPES):
|
|
return NumExprFactor(
|
|
"{constant} {op} x_0".format(op=op, constant=other),
|
|
binds=(self,),
|
|
)
|
|
raise BadBinaryOperator(op, other, self)
|
|
return reflected_binary_operator
|
|
|
|
|
|
def unary_operator(op):
|
|
"""
|
|
Factory function for making unary operator methods for Factors.
|
|
"""
|
|
# Only negate is currently supported for all our possible input types.
|
|
valid_ops = {'-'}
|
|
if op not in valid_ops:
|
|
raise ValueError("Invalid unary operator %s." % op)
|
|
|
|
def unary_operator(self):
|
|
# This can't be hoisted up a scope because the types returned by
|
|
# unary_op_return_type aren't defined when the top-level function is
|
|
# invoked.
|
|
if isinstance(self, NumericalExpression):
|
|
return NumExprFactor(
|
|
"{op}({expr})".format(op=op, expr=self._expr),
|
|
self.inputs,
|
|
)
|
|
else:
|
|
return NumExprFactor("{op}x_0".format(op=op), (self,))
|
|
return unary_operator
|
|
|
|
|
|
def function_application(func):
|
|
"""
|
|
Factory function for producing function application methods for Factor
|
|
subclasses.
|
|
"""
|
|
if func not in NUMEXPR_MATH_FUNCS:
|
|
raise ValueError("Unsupported mathematical function '%s'" % func)
|
|
|
|
def mathfunc(self):
|
|
if isinstance(self, NumericalExpression):
|
|
return NumExprFactor(
|
|
"{func}({expr})".format(func=func, expr=self._expr),
|
|
self.inputs,
|
|
)
|
|
else:
|
|
return NumExprFactor("{func}(x_0)".format(func=func), (self,))
|
|
return mathfunc
|
|
|
|
|
|
class Factor(Term):
|
|
"""
|
|
A transformation yielding a timeseries of scalar values associated with an
|
|
Asset.
|
|
"""
|
|
dtype = float64
|
|
|
|
# Dynamically add functions for creating NumExprFactor/NumExprFilter
|
|
# instances.
|
|
clsdict = locals()
|
|
clsdict.update(
|
|
{
|
|
method_name_for_op(op): binary_operator(op)
|
|
# Don't override __eq__ because it breaks comparisons on tuples of
|
|
# Factors.
|
|
for op in MATH_BINOPS.union(COMPARISONS - {'=='})
|
|
}
|
|
)
|
|
clsdict.update(
|
|
{
|
|
method_name_for_op(op, commute=True): reflected_binary_operator(op)
|
|
for op in MATH_BINOPS
|
|
}
|
|
)
|
|
clsdict.update(
|
|
{
|
|
'__neg__': unary_operator(op)
|
|
for op in UNARY_OPS
|
|
}
|
|
)
|
|
clsdict.update(
|
|
{
|
|
funcname: function_application(funcname)
|
|
for funcname in NUMEXPR_MATH_FUNCS
|
|
}
|
|
)
|
|
|
|
__truediv__ = clsdict['__div__']
|
|
__rtruediv__ = clsdict['__rdiv__']
|
|
|
|
eq = binary_operator('==')
|
|
|
|
def rank(self, method='ordinal', ascending=True, mask=NotSpecified):
|
|
"""
|
|
Construct a new Factor representing the sorted rank of each column
|
|
within each row.
|
|
|
|
Parameters
|
|
----------
|
|
ranks : zipline.modelling.factor.Rank
|
|
A new factor that will compute the sorted indices of the data
|
|
produced by `self`.
|
|
method : str, {'ordinal', 'min', 'max', 'dense', 'average'}
|
|
The method used to assign ranks to tied elements. See
|
|
`scipy.stats.rankdata` for a full description of the semantics for
|
|
each ranking method. Default is 'ordinal'.
|
|
ascending : bool, optional
|
|
Whether to return sorted rank in ascending or descending order.
|
|
Default is True.
|
|
|
|
Returns
|
|
-------
|
|
ranks : zipline.modelling.factor.Rank
|
|
|
|
Notes
|
|
-----
|
|
The default value for `method` is different from the default for
|
|
`scipy.stats.rankdata`. See that function's documentation for a full
|
|
description of the valid inputs to `method`.
|
|
|
|
Missing or non-existent data on a given day will cause an asset to be
|
|
given a rank of NaN for that day.
|
|
|
|
See Also
|
|
--------
|
|
scipy.stats.rankdata
|
|
zipline.lib.rank
|
|
zipline.modelling.factor.Rank
|
|
"""
|
|
return Rank(self if ascending else -self, method=method, mask=mask)
|
|
|
|
def top(self, N, mask=NotSpecified):
|
|
"""
|
|
Construct a Filter matching the top N asset values of self each day.
|
|
|
|
Parameters
|
|
----------
|
|
N : int
|
|
Number of assets passing the returned filter each day.
|
|
|
|
Returns
|
|
-------
|
|
filter : zipline.modelling.filter.Filter
|
|
"""
|
|
return self.rank(ascending=False, mask=mask) <= N
|
|
|
|
def bottom(self, N, mask=NotSpecified):
|
|
"""
|
|
Construct a Filter matching the bottom N asset values of self each day.
|
|
|
|
Parameters
|
|
----------
|
|
N : int
|
|
Number of assets passing the returned filter each day.
|
|
|
|
Returns
|
|
-------
|
|
filter : zipline.modelling.filter.Filter
|
|
"""
|
|
return self.rank(ascending=True, mask=mask) <= N
|
|
|
|
def percentile_between(self,
|
|
min_percentile,
|
|
max_percentile,
|
|
mask=NotSpecified):
|
|
"""
|
|
Construct a new Filter representing entries from the output of this
|
|
Factor that fall within the percentile range defined by min_percentile
|
|
and max_percentile.
|
|
|
|
Parameters
|
|
----------
|
|
min_percentile : float [0.0, 100.0]
|
|
max_percentile : float [0.0, 100.0]
|
|
|
|
Returns
|
|
-------
|
|
out : zipline.modelling.filter.PercentileFilter
|
|
A new filter that will compute the specified percentile-range mask.
|
|
|
|
See Also
|
|
--------
|
|
zipline.modelling.filter.PercentileFilter
|
|
"""
|
|
return PercentileFilter(
|
|
self,
|
|
min_percentile=min_percentile,
|
|
max_percentile=max_percentile,
|
|
mask=mask,
|
|
)
|
|
|
|
|
|
class NumExprFactor(NumericalExpression, Factor):
|
|
"""
|
|
Factor computed from a numexpr expression.
|
|
|
|
Parameters
|
|
----------
|
|
expr : string
|
|
A string suitable for passing to numexpr. All variables in 'expr'
|
|
should be of the form "x_i", where i is the index of the corresponding
|
|
factor input in 'binds'.
|
|
binds : tuple
|
|
A tuple of factors to use as inputs.
|
|
|
|
Notes
|
|
-----
|
|
NumExprFactors are constructed by numerical operators like `+` and `-`.
|
|
Users should rarely need to construct a NumExprFactor directly.
|
|
"""
|
|
pass
|
|
|
|
|
|
class Rank(SingleInputMixin, Factor):
|
|
"""
|
|
A Factor representing the row-wise rank data of another Factor.
|
|
|
|
Parameters
|
|
----------
|
|
factor : zipline.modelling.factor.Factor
|
|
The factor on which to compute ranks.
|
|
method : str, {'average', 'min', 'max', 'dense', 'ordinal'}
|
|
The method used to assign ranks to tied elements. See
|
|
`scipy.stats.rankdata` for a full description of the semantics for each
|
|
ranking method.
|
|
|
|
See Also
|
|
--------
|
|
scipy.stats.rankdata : Underlying ranking algorithm.
|
|
zipline.factor.Factor.rank : Method-style interface to same functionality.
|
|
|
|
Notes
|
|
-----
|
|
Most users should call Factor.rank rather than directly construct an
|
|
instance of this class.
|
|
"""
|
|
window_length = 0
|
|
dtype = float64
|
|
|
|
def __new__(cls, factor, method, mask):
|
|
return super(Rank, cls).__new__(
|
|
cls,
|
|
inputs=(factor,),
|
|
method=method,
|
|
mask=mask,
|
|
)
|
|
|
|
def _init(self, method, *args, **kwargs):
|
|
self._method = method
|
|
return super(Rank, self)._init(*args, **kwargs)
|
|
|
|
@classmethod
|
|
def static_identity(cls, method, *args, **kwargs):
|
|
return (
|
|
super(Rank, cls).static_identity(*args, **kwargs),
|
|
method,
|
|
)
|
|
|
|
def _validate(self):
|
|
"""
|
|
Verify that the stored rank method is valid.
|
|
"""
|
|
if self._method not in _RANK_METHODS:
|
|
raise UnknownRankMethod(
|
|
method=self._method,
|
|
choices=set(_RANK_METHODS),
|
|
)
|
|
return super(Rank, self)._validate()
|
|
|
|
def _compute(self, arrays, dates, assets, mask):
|
|
"""
|
|
For each row in the input, compute a like-shaped array of per-row
|
|
ranks.
|
|
"""
|
|
inv_mask = ~mask
|
|
data = arrays[0].copy()
|
|
data[inv_mask] = nan
|
|
# OPTIMIZATION: Fast path the default case with our own specialized
|
|
# Cython implementation.
|
|
if self._method == 'ordinal':
|
|
result = rankdata_2d_ordinal(data)
|
|
else:
|
|
# FUTURE OPTIMIZATION:
|
|
# Write a less general "apply to rows" method that doesn't do all
|
|
# the extra work that apply_along_axis does.
|
|
result = apply_along_axis(rankdata, 1, data, method=self._method)
|
|
|
|
# rankdata will sort nan values into last place, but we want our
|
|
# nans to propagate, so explicitly re-apply.
|
|
result[inv_mask] = nan
|
|
return result
|
|
|
|
def __repr__(self):
|
|
return "{type}({input_}, method='{method}', mask={mask})".format(
|
|
type=type(self).__name__,
|
|
input_=self.inputs[0],
|
|
method=self._method,
|
|
mask=self.mask,
|
|
)
|
|
|
|
|
|
class CustomFactor(RequiredWindowLengthMixin, CustomTermMixin, Factor):
|
|
"""
|
|
Base class for user-defined Factors operating on windows of raw data.
|
|
|
|
TODO: This is basically the most important class to document in the whole
|
|
FFC API...
|
|
|
|
We currently only support CustomFactors of type float64.
|
|
"""
|
|
ctx = nullctx()
|
|
|
|
def _validate(self):
|
|
if self.dtype != float64:
|
|
raise UnsupportedDataType(dtype=self.dtype)
|
|
return super(CustomFactor, self)._validate()
|