Files
catalyst/tests/modelling/test_engine.py
T
Scott Sanderson 7bb20eb297 MAINT: Check dates before computing factor_matrix.
In SimpleFFCEngine.factor_matrix barf with a useful error if end_date <=
start_date.
2015-08-03 12:06:24 -04:00

433 lines
14 KiB
Python

"""
Tests for SimpleFFCEngine
"""
from __future__ import division
from unittest import TestCase
from numpy import (
full,
isnan,
nan,
)
from numpy.testing import assert_array_equal
from pandas import (
DataFrame,
date_range,
Int64Index,
rolling_mean,
Timestamp,
)
from pandas.util.testing import assert_frame_equal
from testfixtures import TempDirectory
from zipline.assets import AssetFinder
from zipline.data.equities import USEquityPricing
from zipline.data.ffc.synthetic import (
ConstantLoader,
MultiColumnLoader,
NullAdjustmentReader,
SyntheticDailyBarWriter,
)
from zipline.data.ffc.frame import (
DataFrameFFCLoader,
MULTIPLY,
)
from zipline.data.ffc.loaders.us_equity_pricing import (
BcolzDailyBarReader,
USEquityPricingLoader,
)
from zipline.finance.trading import TradingEnvironment
from zipline.modelling.engine import SimpleFFCEngine
from zipline.modelling.factor import TestingFactor
from zipline.modelling.factor.technical import (
MaxDrawdown,
SimpleMovingAverage,
)
from zipline.utils.lazyval import lazyval
from zipline.utils.test_utils import (
make_rotating_asset_info,
make_simple_asset_info,
product_upper_triangle,
)
class RollingSumDifference(TestingFactor):
window_length = 3
inputs = [USEquityPricing.open, USEquityPricing.close]
def from_windows(self, open, close):
return (open - close).sum(axis=0)
class ConstantInputTestCase(TestCase):
def setUp(self):
self.constants = {
# Every day, assume every stock starts at 2, goes down to 1,
# goes up to 4, and finishes at 3.
USEquityPricing.low: 1,
USEquityPricing.open: 2,
USEquityPricing.close: 3,
USEquityPricing.high: 4,
}
self.assets = [1, 2, 3]
self.dates = date_range('2014-01-01', '2014-02-01', freq='D', tz='UTC')
self.loader = ConstantLoader(
constants=self.constants,
dates=self.dates,
assets=self.assets,
)
self.asset_info = make_simple_asset_info(
self.assets,
start_date=self.dates[0],
end_date=self.dates[-1],
)
self.asset_finder = AssetFinder(self.asset_info)
def test_bad_dates(self):
loader = self.loader
engine = SimpleFFCEngine(loader, self.dates, self.asset_finder)
msg = "start_date must be before end_date .*"
with self.assertRaisesRegexp(ValueError, msg):
engine.factor_matrix({}, self.dates[2], self.dates[1])
with self.assertRaisesRegexp(ValueError, msg):
engine.factor_matrix({}, self.dates[2], self.dates[2])
def test_single_factor(self):
loader = self.loader
engine = SimpleFFCEngine(loader, self.dates, self.asset_finder)
result_shape = (num_dates, num_assets) = (5, len(self.assets))
dates = self.dates[10:10 + num_dates]
factor = RollingSumDifference()
result = engine.factor_matrix({'f': factor}, dates[0], dates[-1])
self.assertEqual(set(result.columns), {'f'})
assert_array_equal(
result['f'].unstack().values,
full(result_shape, -factor.window_length),
)
def test_multiple_rolling_factors(self):
loader = self.loader
engine = SimpleFFCEngine(loader, self.dates, self.asset_finder)
shape = num_dates, num_assets = (5, len(self.assets))
dates = self.dates[10:10 + num_dates]
short_factor = RollingSumDifference(window_length=3)
long_factor = RollingSumDifference(window_length=5)
high_factor = RollingSumDifference(
window_length=3,
inputs=[USEquityPricing.open, USEquityPricing.high],
)
results = engine.factor_matrix(
{'short': short_factor, 'long': long_factor, 'high': high_factor},
dates[0],
dates[-1],
)
self.assertEqual(set(results.columns), {'short', 'high', 'long'})
# row-wise sum over an array whose values are all (1 - 2)
assert_array_equal(
results['short'].unstack().values,
full(shape, -short_factor.window_length),
)
assert_array_equal(
results['long'].unstack().values,
full(shape, -long_factor.window_length),
)
# row-wise sum over an array whose values are all (1 - 3)
assert_array_equal(
results['high'].unstack().values,
full(shape, -2 * high_factor.window_length),
)
def test_numeric_factor(self):
constants = self.constants
loader = self.loader
engine = SimpleFFCEngine(loader, self.dates, self.asset_finder)
num_dates = 5
dates = self.dates[10:10 + num_dates]
high, low = USEquityPricing.high, USEquityPricing.low
open, close = USEquityPricing.open, USEquityPricing.close
high_minus_low = RollingSumDifference(inputs=[high, low])
open_minus_close = RollingSumDifference(inputs=[open, close])
avg = (high_minus_low + open_minus_close) / 2
results = engine.factor_matrix(
{
'high_low': high_minus_low,
'open_close': open_minus_close,
'avg': avg,
},
dates[0],
dates[-1],
)
high_low_result = results['high_low'].unstack()
expected_high_low = 3.0 * (constants[high] - constants[low])
assert_frame_equal(
high_low_result,
DataFrame(
expected_high_low,
index=dates,
columns=self.assets,
)
)
open_close_result = results['open_close'].unstack()
expected_open_close = 3.0 * (constants[open] - constants[close])
assert_frame_equal(
open_close_result,
DataFrame(
expected_open_close,
index=dates,
columns=self.assets,
)
)
avg_result = results['avg'].unstack()
expected_avg = (expected_high_low + expected_open_close) / 2.0
assert_frame_equal(
avg_result,
DataFrame(
expected_avg,
index=dates,
columns=self.assets,
)
)
class FrameInputTestCase(TestCase):
def setUp(self):
env = TradingEnvironment.instance()
day = env.trading_day
self.assets = Int64Index([1, 2, 3])
self.dates = date_range(
'2015-01-01',
'2015-01-31',
freq=day,
tz='UTC',
)
asset_info = make_simple_asset_info(
self.assets,
start_date=self.dates[0],
end_date=self.dates[-1],
)
self.asset_finder = AssetFinder(asset_info)
@lazyval
def base_mask(self):
return self.make_frame(True)
def make_frame(self, data):
return DataFrame(data, columns=self.assets, index=self.dates)
def test_compute_with_adjustments(self):
dates, assets = self.dates, self.assets
low, high = USEquityPricing.low, USEquityPricing.high
apply_idxs = [3, 10, 16]
def apply_date(idx, offset=0):
return dates[apply_idxs[idx] + offset]
adjustments = DataFrame.from_records(
[
dict(
kind=MULTIPLY,
sid=assets[1],
value=2.0,
start_date=None,
end_date=apply_date(0, offset=-1),
apply_date=apply_date(0),
),
dict(
kind=MULTIPLY,
sid=assets[1],
value=3.0,
start_date=None,
end_date=apply_date(1, offset=-1),
apply_date=apply_date(1),
),
dict(
kind=MULTIPLY,
sid=assets[1],
value=5.0,
start_date=None,
end_date=apply_date(2, offset=-1),
apply_date=apply_date(2),
),
]
)
low_base = DataFrame(self.make_frame(30.0))
low_loader = DataFrameFFCLoader(low, low_base.copy(), adjustments=None)
# Pre-apply inverse of adjustments to the baseline.
high_base = DataFrame(self.make_frame(30.0))
high_base.iloc[:apply_idxs[0], 1] /= 2.0
high_base.iloc[:apply_idxs[1], 1] /= 3.0
high_base.iloc[:apply_idxs[2], 1] /= 5.0
high_loader = DataFrameFFCLoader(high, high_base, adjustments)
loader = MultiColumnLoader({low: low_loader, high: high_loader})
engine = SimpleFFCEngine(loader, self.dates, self.asset_finder)
for window_length in range(1, 4):
low_mavg = SimpleMovingAverage(
inputs=[USEquityPricing.low],
window_length=window_length,
)
high_mavg = SimpleMovingAverage(
inputs=[USEquityPricing.high],
window_length=window_length,
)
bounds = product_upper_triangle(range(window_length, len(dates)))
for start, stop in bounds:
results = engine.factor_matrix(
{'low': low_mavg, 'high': high_mavg},
dates[start],
dates[stop],
)
self.assertEqual(set(results.columns), {'low', 'high'})
iloc_bounds = slice(start, stop + 1) # +1 to include end date
low_results = results.unstack()['low']
assert_frame_equal(low_results, low_base.iloc[iloc_bounds])
high_results = results.unstack()['high']
assert_frame_equal(high_results, high_base.iloc[iloc_bounds])
class SyntheticBcolzTestCase(TestCase):
@classmethod
def setUpClass(cls):
cls.first_asset_start = Timestamp('2015-04-01', tz='UTC')
cls.env = TradingEnvironment.instance()
cls.trading_day = cls.env.trading_day
cls.asset_info = make_rotating_asset_info(
num_assets=6,
first_start=cls.first_asset_start,
frequency=cls.trading_day,
periods_between_starts=4,
asset_lifetime=8,
)
cls.all_assets = cls.asset_info.index
cls.all_dates = date_range(
start=cls.first_asset_start,
end=cls.asset_info['end_date'].max(),
freq=cls.trading_day,
)
cls.finder = AssetFinder(cls.asset_info)
cls.temp_dir = TempDirectory()
cls.temp_dir.create()
cls.writer = SyntheticDailyBarWriter(
asset_info=cls.asset_info[['start_date', 'end_date']],
calendar=cls.all_dates,
)
table = cls.writer.write(
cls.temp_dir.getpath('testdata.bcolz'),
cls.all_dates,
cls.all_assets,
)
cls.ffc_loader = USEquityPricingLoader(
BcolzDailyBarReader(table),
NullAdjustmentReader(),
)
@classmethod
def tearDownClass(cls):
cls.temp_dir.cleanup()
def test_SMA(self):
engine = SimpleFFCEngine(
self.ffc_loader,
self.env.trading_days,
self.finder,
)
dates, assets = self.all_dates, self.all_assets
window_length = 5
SMA = SimpleMovingAverage(
inputs=(USEquityPricing.close,),
window_length=window_length,
)
results = engine.factor_matrix(
{'sma': SMA},
dates[window_length],
dates[-1],
)
raw_closes = self.writer.expected_values_2d(dates, assets, 'close')
expected_sma_result = rolling_mean(
raw_closes,
window_length,
min_periods=1,
)
expected_sma_result[isnan(raw_closes)] = nan
expected_sma_result = expected_sma_result[window_length:]
sma_result = results['sma'].unstack()
assert_frame_equal(
sma_result,
DataFrame(
expected_sma_result,
index=dates[window_length:],
columns=assets,
),
)
def test_drawdown(self):
# The monotonically-increasing data produced by SyntheticDailyBarWriter
# exercises two pathological cases for MaxDrawdown. The actual
# computed results are pretty much useless (everything is either NaN)
# or zero, but verifying we correctly handle those corner cases is
# valuable.
engine = SimpleFFCEngine(
self.ffc_loader,
self.env.trading_days,
self.finder,
)
dates, assets = self.all_dates, self.all_assets
window_length = 5
drawdown = MaxDrawdown(
inputs=(USEquityPricing.close,),
window_length=window_length,
)
results = engine.factor_matrix(
{'drawdown': drawdown},
dates[window_length],
dates[-1],
)
dd_result = results['drawdown']
# We expect NaNs when the asset was undefined, otherwise 0 everywhere,
# since the input is always increasing.
expected = self.writer.expected_values_2d(dates, assets, 'close')
expected[~isnan(expected)] = 0
expected = expected[window_length:]
assert_frame_equal(
dd_result.unstack(),
DataFrame(
expected,
index=dates[window_length:],
columns=assets,
),
)