mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-27 22:36:38 +08:00
80 lines
2.5 KiB
Python
80 lines
2.5 KiB
Python
"""
|
|
Tests BoundColumn attributes and methods.
|
|
"""
|
|
from contextlib2 import ExitStack
|
|
from unittest import TestCase
|
|
|
|
from pandas import date_range, DataFrame
|
|
from pandas.util.testing import assert_frame_equal
|
|
|
|
from catalyst.lib.labelarray import LabelArray
|
|
from catalyst.pipeline import Pipeline
|
|
from catalyst.pipeline.data.testing import TestingDataSet as TDS
|
|
from catalyst.testing import chrange, temp_pipeline_engine
|
|
from catalyst.utils.pandas_utils import ignore_pandas_nan_categorical_warning
|
|
|
|
|
|
class LatestTestCase(TestCase):
|
|
|
|
@classmethod
|
|
def setUpClass(cls):
|
|
cls._stack = stack = ExitStack()
|
|
cls.calendar = cal = date_range('2014', '2015', freq='D', tz='UTC')
|
|
cls.sids = list(range(5))
|
|
cls.engine = stack.enter_context(
|
|
temp_pipeline_engine(
|
|
cal,
|
|
cls.sids,
|
|
random_seed=100,
|
|
symbols=chrange('A', 'E'),
|
|
),
|
|
)
|
|
cls.assets = cls.engine._finder.retrieve_all(cls.sids)
|
|
|
|
@classmethod
|
|
def tearDownClass(cls):
|
|
cls._stack.close()
|
|
|
|
def expected_latest(self, column, slice_):
|
|
loader = self.engine.get_loader(column)
|
|
|
|
index = self.calendar[slice_]
|
|
columns = self.assets
|
|
values = loader.values(column.dtype, self.calendar, self.sids)[slice_]
|
|
|
|
if column.dtype.kind in ('O', 'S', 'U'):
|
|
# For string columns, we expect a categorical in the output.
|
|
return LabelArray(
|
|
values,
|
|
missing_value=column.missing_value,
|
|
).as_categorical_frame(
|
|
index=index,
|
|
columns=columns,
|
|
)
|
|
|
|
return DataFrame(
|
|
loader.values(column.dtype, self.calendar, self.sids)[slice_],
|
|
index=self.calendar[slice_],
|
|
columns=self.assets,
|
|
)
|
|
|
|
def test_latest(self):
|
|
columns = TDS.columns
|
|
pipe = Pipeline(
|
|
columns={c.name: c.latest for c in columns},
|
|
)
|
|
|
|
cal_slice = slice(20, 40)
|
|
dates_to_test = self.calendar[cal_slice]
|
|
result = self.engine.run_pipeline(
|
|
pipe,
|
|
dates_to_test[0],
|
|
dates_to_test[-1],
|
|
)
|
|
for column in columns:
|
|
with ignore_pandas_nan_categorical_warning():
|
|
col_result = result[column.name].unstack()
|
|
|
|
expected_col_result = self.expected_latest(column, cal_slice)
|
|
assert_frame_equal(col_result, expected_col_result)
|