mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-29 21:06:54 +08:00
5f190395ad
- Adds a new class, ``LabelArray``, which is a subclass of np.ndarray. LabelArray is conceptually similar to pandas.Categorical, in that it stores data with many duplicate values as indices into an array of unique values. For string data with many duplicates (e.g. time-series of tickers or or industry classifications), this provides multiple orders of magnitude of improvement when doing string operations, especially string comparison/matching operations. - Adds a new generic object "specialization" for `AdjustedArrayWindow`, and a corresponding ObjectOverwrite adjustment. - Adds a new ``postprocess`` method to ``zipline.pipeline.term.Term``. This method is called on the final result of any pipeline expression after screen filtering has occurred. The default implementation of ``postprocess`` is identity, but Classifier overrides it to coerce string columns into pandas.Categoricals before presenting them to the user.
77 lines
2.3 KiB
Python
77 lines
2.3 KiB
Python
"""
|
|
Tests BoundColumn attributes and methods.
|
|
"""
|
|
from contextlib2 import ExitStack
|
|
from unittest import TestCase
|
|
|
|
from pandas import date_range, DataFrame
|
|
from pandas.util.testing import assert_frame_equal
|
|
|
|
from zipline.lib.labelarray import LabelArray
|
|
from zipline.pipeline import Pipeline
|
|
from zipline.pipeline.data.testing import TestingDataSet as TDS
|
|
from zipline.testing import chrange, temp_pipeline_engine
|
|
|
|
|
|
class LatestTestCase(TestCase):
|
|
|
|
@classmethod
|
|
def setUpClass(cls):
|
|
cls._stack = stack = ExitStack()
|
|
cls.calendar = cal = date_range('2014', '2015', freq='D', tz='UTC')
|
|
cls.sids = list(range(5))
|
|
cls.engine = stack.enter_context(
|
|
temp_pipeline_engine(
|
|
cal,
|
|
cls.sids,
|
|
random_seed=100,
|
|
symbols=chrange('A', 'E'),
|
|
),
|
|
)
|
|
cls.assets = cls.engine._finder.retrieve_all(cls.sids)
|
|
|
|
@classmethod
|
|
def tearDownClass(cls):
|
|
cls._stack.close()
|
|
|
|
def expected_latest(self, column, slice_):
|
|
loader = self.engine.get_loader(column)
|
|
|
|
index = self.calendar[slice_]
|
|
columns = self.assets
|
|
values = loader.values(column.dtype, self.calendar, self.sids)[slice_]
|
|
|
|
if column.dtype.kind in ('O', 'S', 'U'):
|
|
# For string columns, we expect a categorical in the output.
|
|
return LabelArray(
|
|
values,
|
|
missing_value=column.missing_value,
|
|
).as_categorical_frame(
|
|
index=index,
|
|
columns=columns,
|
|
)
|
|
|
|
return DataFrame(
|
|
loader.values(column.dtype, self.calendar, self.sids)[slice_],
|
|
index=self.calendar[slice_],
|
|
columns=self.assets,
|
|
)
|
|
|
|
def test_latest(self):
|
|
columns = TDS.columns
|
|
pipe = Pipeline(
|
|
columns={c.name: c.latest for c in columns},
|
|
)
|
|
|
|
cal_slice = slice(20, 40)
|
|
dates_to_test = self.calendar[cal_slice]
|
|
result = self.engine.run_pipeline(
|
|
pipe,
|
|
dates_to_test[0],
|
|
dates_to_test[-1],
|
|
)
|
|
for column in columns:
|
|
col_result = result[column.name].unstack()
|
|
expected_col_result = self.expected_latest(column, cal_slice)
|
|
assert_frame_equal(col_result, expected_col_result)
|