Files
2017-06-19 14:43:10 -07:00

80 lines
2.5 KiB
Python

"""
Tests BoundColumn attributes and methods.
"""
from contextlib2 import ExitStack
from unittest import TestCase
from pandas import date_range, DataFrame
from pandas.util.testing import assert_frame_equal
from catalyst.lib.labelarray import LabelArray
from catalyst.pipeline import Pipeline
from catalyst.pipeline.data.testing import TestingDataSet as TDS
from catalyst.testing import chrange, temp_pipeline_engine
from catalyst.utils.pandas_utils import ignore_pandas_nan_categorical_warning
class LatestTestCase(TestCase):
@classmethod
def setUpClass(cls):
cls._stack = stack = ExitStack()
cls.calendar = cal = date_range('2014', '2015', freq='D', tz='UTC')
cls.sids = list(range(5))
cls.engine = stack.enter_context(
temp_pipeline_engine(
cal,
cls.sids,
random_seed=100,
symbols=chrange('A', 'E'),
),
)
cls.assets = cls.engine._finder.retrieve_all(cls.sids)
@classmethod
def tearDownClass(cls):
cls._stack.close()
def expected_latest(self, column, slice_):
loader = self.engine.get_loader(column)
index = self.calendar[slice_]
columns = self.assets
values = loader.values(column.dtype, self.calendar, self.sids)[slice_]
if column.dtype.kind in ('O', 'S', 'U'):
# For string columns, we expect a categorical in the output.
return LabelArray(
values,
missing_value=column.missing_value,
).as_categorical_frame(
index=index,
columns=columns,
)
return DataFrame(
loader.values(column.dtype, self.calendar, self.sids)[slice_],
index=self.calendar[slice_],
columns=self.assets,
)
def test_latest(self):
columns = TDS.columns
pipe = Pipeline(
columns={c.name: c.latest for c in columns},
)
cal_slice = slice(20, 40)
dates_to_test = self.calendar[cal_slice]
result = self.engine.run_pipeline(
pipe,
dates_to_test[0],
dates_to_test[-1],
)
for column in columns:
with ignore_pandas_nan_categorical_warning():
col_result = result[column.name].unstack()
expected_col_result = self.expected_latest(column, cal_slice)
assert_frame_equal(col_result, expected_col_result)