mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-29 16:46:59 +08:00
53d3b0855b
Classifiers are computations that represent grouping keys. They can be used in conjuction with normalization functions like ``zscore`` or ``demean`` to perform normalizations over subsets of a dataset. Notable changes: - Added ``demean()`` and ``zscore()`` methods to ``Factor``. - Added a classifier versions of ``Latest`` and ``CustomTermMixin``. The .latest attribute of int64 dataset columns no produces a classifier by default. - Added ``Everything``, a classifier that maps all data to the same value. - Added ``zipline.lib.normalize``, which implements a naive, pure-Python grouped normalize function. This will likely be moved to Cython in a subsequent PR.
46 lines
1.6 KiB
Python
46 lines
1.6 KiB
Python
import numpy as np
|
|
|
|
|
|
def naive_grouped_rowwise_apply(data, group_labels, func, out=None):
|
|
"""
|
|
Simple implementation of grouped row-wise function application.
|
|
|
|
Parameters
|
|
----------
|
|
data : ndarray[ndim=2]
|
|
Input array over which to apply a grouped function.
|
|
group_labels : ndarray[ndim=2, dtype=int64]
|
|
Labels to use to bucket inputs from array.
|
|
Should be the same shape as array.
|
|
func : function[ndarray[ndim=1]] -> function[ndarray[ndim=1]]
|
|
Function to apply to pieces of each row in array.
|
|
out : ndarray, optional
|
|
Array into which to write output. If not supplied, a new array of the
|
|
same shape as ``data`` is allocated and returned.
|
|
|
|
Example
|
|
-------
|
|
>>> data = np.array([[1., 2., 3.],
|
|
... [2., 3., 4.],
|
|
... [5., 6., 7.]])
|
|
>>> labels = np.array([[0, 0, 1],
|
|
... [0, 1, 0],
|
|
... [1, 0, 2]])
|
|
>>> naive_grouped_rowwise_apply(data, labels, lambda row: row - row.min())
|
|
array([[ 0., 1., 0.],
|
|
[ 0., 0., 2.],
|
|
[ 0., 0., 0.]])
|
|
>>> naive_grouped_rowwise_apply(data, labels, lambda row: row / row.sum())
|
|
array([[ 0.33333333, 0.66666667, 1. ],
|
|
[ 0.33333333, 1. , 0.66666667],
|
|
[ 1. , 1. , 1. ]])
|
|
"""
|
|
if out is None:
|
|
out = np.empty_like(data)
|
|
|
|
for (row, label_row, out_row) in zip(data, group_labels, out):
|
|
for label in np.unique(label_row):
|
|
locs = (label_row == label)
|
|
out_row[locs] = func(row[locs])
|
|
return out
|