mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-30 02:54:16 +08:00
26fd6fda8b
- Fixes an error where Modeling API data known as of the close of `day N` would be shown to algorithms during `before_trading_start` as of the close of the same day. Algorithms should now only receive data during `before_trading_start/handle_data` that was known as of the simulation time at which the function would be called. - All Term instances now have a `mask` attribute that must be a `Filter` or an instance of `AssetExists()`. `mask` can be used to specify that a Factor should be computed in a manner that ignores the values that were not `True` in the mask. - Changed the interface for `FFCLoader.load_adjusted_array` and `Term._compute` from `(columns, mask)`, with mask as a DataFrame, to `(columns, dates, assets, mask)`, where mask is a numpy array. This is primarily to avoid having to reconstruct extra DataFrames when using masks produced by non `AssetExists` filters. - Adds `BoundColumn.latest`, which gives the most-recently-known value of a column.
191 lines
5.7 KiB
Python
191 lines
5.7 KiB
Python
"""
|
|
filter.py
|
|
"""
|
|
from numpy import (
|
|
bool_,
|
|
float64,
|
|
nan,
|
|
nanpercentile,
|
|
)
|
|
from itertools import chain
|
|
from operator import attrgetter
|
|
|
|
from zipline.errors import (
|
|
BadPercentileBounds,
|
|
)
|
|
from zipline.modelling.term import (
|
|
SingleInputMixin,
|
|
Term,
|
|
)
|
|
from zipline.modelling.expression import (
|
|
BadBinaryOperator,
|
|
FILTER_BINOPS,
|
|
method_name_for_op,
|
|
NumericalExpression,
|
|
)
|
|
|
|
|
|
def concat_tuples(*tuples):
|
|
"""
|
|
Concatenate a sequence of tuples into one tuple.
|
|
"""
|
|
return tuple(chain(*tuples))
|
|
|
|
|
|
def binary_operator(op):
|
|
"""
|
|
Factory function for making binary operator methods on a Filter subclass.
|
|
|
|
Returns a function "binary_operator" suitable for implementing functions
|
|
like __and__ or __or__.
|
|
"""
|
|
# When combining a Filter with a NumericalExpression, we use this
|
|
# attrgetter instance to defer to the commuted interpretation of the
|
|
# NumericalExpression operator.
|
|
commuted_method_getter = attrgetter(method_name_for_op(op, commute=True))
|
|
|
|
def binary_operator(self, other):
|
|
if isinstance(self, NumericalExpression):
|
|
self_expr, other_expr, new_inputs = self.build_binary_op(
|
|
op, other,
|
|
)
|
|
return NumExprFilter(
|
|
"({left}) {op} ({right})".format(
|
|
left=self_expr,
|
|
op=op,
|
|
right=other_expr,
|
|
),
|
|
new_inputs,
|
|
)
|
|
elif isinstance(other, NumericalExpression):
|
|
# NumericalExpression overrides numerical ops to correctly handle
|
|
# merging of inputs. Look up and call the appropriate
|
|
# right-binding operator with ourself as the input.
|
|
return commuted_method_getter(other)(self)
|
|
elif isinstance(other, Filter):
|
|
if self is other:
|
|
return NumExprFilter(
|
|
"x_0 {op} x_0".format(op=op),
|
|
(self,),
|
|
)
|
|
return NumExprFilter(
|
|
"x_0 {op} x_1".format(op=op),
|
|
(self, other),
|
|
)
|
|
elif isinstance(other, int): # Note that this is true for bool as well
|
|
return NumExprFilter(
|
|
"x_0 {op} ({constant})".format(op=op, constant=int(other)),
|
|
binds=(self,),
|
|
)
|
|
raise BadBinaryOperator(op, self, other)
|
|
return binary_operator
|
|
|
|
|
|
class Filter(Term):
|
|
"""
|
|
A boolean predicate on a universe of Assets.
|
|
"""
|
|
dtype = bool_
|
|
|
|
clsdict = locals()
|
|
clsdict.update(
|
|
{
|
|
method_name_for_op(op): binary_operator(op)
|
|
for op in FILTER_BINOPS
|
|
}
|
|
)
|
|
|
|
|
|
class NumExprFilter(NumericalExpression, Filter):
|
|
"""
|
|
A Filter computed from a numexpr expression.
|
|
"""
|
|
|
|
def _compute(self, arrays, dates, assets, mask):
|
|
"""
|
|
Compute our result with numexpr, then re-apply `mask`.
|
|
"""
|
|
return super(NumExprFilter, self)._compute(
|
|
arrays,
|
|
dates,
|
|
assets,
|
|
mask,
|
|
) & mask
|
|
|
|
|
|
class PercentileFilter(SingleInputMixin, Filter):
|
|
"""
|
|
A Filter representing assets falling between percentile bounds of a Factor.
|
|
|
|
Parameters
|
|
----------
|
|
factor : zipline.modelling.factor.Factor
|
|
The factor over which to compute percentile bounds.
|
|
min_percentile : float [0.0, 1.0]
|
|
The minimum percentile rank of an asset that will pass the filter.
|
|
max_percentile : float [0.0, 1.0]
|
|
The maxiumum percentile rank of an asset that will pass the filter.
|
|
"""
|
|
window_length = 0
|
|
|
|
def __new__(cls, factor, min_percentile, max_percentile, mask):
|
|
return super(PercentileFilter, cls).__new__(
|
|
cls,
|
|
inputs=(factor,),
|
|
mask=mask,
|
|
min_percentile=min_percentile,
|
|
max_percentile=max_percentile,
|
|
)
|
|
|
|
def _init(self, min_percentile, max_percentile, *args, **kwargs):
|
|
self._min_percentile = min_percentile
|
|
self._max_percentile = max_percentile
|
|
return super(PercentileFilter, self)._init(*args, **kwargs)
|
|
|
|
@classmethod
|
|
def static_identity(cls, min_percentile, max_percentile, *args, **kwargs):
|
|
return (
|
|
super(PercentileFilter, cls).static_identity(*args, **kwargs),
|
|
min_percentile,
|
|
max_percentile,
|
|
)
|
|
|
|
def _validate(self):
|
|
"""
|
|
Ensure that our percentile bounds are well-formed.
|
|
"""
|
|
if not 0.0 <= self._min_percentile < self._max_percentile <= 100.0:
|
|
raise BadPercentileBounds(
|
|
min_percentile=self._min_percentile,
|
|
max_percentile=self._max_percentile,
|
|
)
|
|
return super(PercentileFilter, self)._validate()
|
|
|
|
def _compute(self, arrays, dates, assets, mask):
|
|
"""
|
|
For each row in the input, compute a mask of all values falling between
|
|
the given percentiles.
|
|
"""
|
|
# TODO: Review whether there's a better way of handling small numbers
|
|
# of columns.
|
|
data = arrays[0].copy().astype(float64)
|
|
data[~mask] = nan
|
|
|
|
# FIXME: np.nanpercentile **should** support computing multiple bounds
|
|
# at once, but there's a bug in the logic for multiple bounds in numpy
|
|
# 1.9.2. It will be fixed in 1.10.
|
|
# c.f. https://github.com/numpy/numpy/pull/5981
|
|
lower_bounds = nanpercentile(
|
|
data,
|
|
self._min_percentile,
|
|
axis=1,
|
|
keepdims=True,
|
|
)
|
|
upper_bounds = nanpercentile(
|
|
data,
|
|
self._max_percentile,
|
|
axis=1,
|
|
keepdims=True,
|
|
)
|
|
return (lower_bounds <= data) & (data <= upper_bounds)
|