Merge pull request #1680 from quantopian/rolls-bias

Eliminate Possible Bias in Volume Rolls
This commit is contained in:
David Michalowicz
2017-02-16 09:26:32 -05:00
committed by GitHub
2 changed files with 151 additions and 15 deletions
+97 -5
View File
@@ -66,9 +66,9 @@ class ContinuousFuturesTestCase(WithCreateBarData,
@classmethod
def make_root_symbols_info(self):
return pd.DataFrame({
'root_symbol': ['FO', 'BA', 'BZ', 'MA'],
'root_symbol_id': [1, 2, 3, 4],
'exchange': ['CME', 'CME', 'CME', 'CME']})
'root_symbol': ['FO', 'BA', 'BZ', 'MA', 'DF'],
'root_symbol_id': [1, 2, 3, 4, 5],
'exchange': ['CME', 'CME', 'CME', 'CME', 'CME']})
@classmethod
def make_futures_info(self):
@@ -205,7 +205,35 @@ class ContinuousFuturesTestCase(WithCreateBarData,
'exchange': ['CME'] * 3,
})
return pd.concat([fo_frame, ba_frame, bz_frame, ma_frame])
# DF is set up to have a double volume flip between the 'F' and 'G'
# contracts, and then a really early temporary volume flip between the
# 'G' and 'H' contracts.
df_frame = DataFrame({
'symbol': ['DFF16', 'DFG16', 'DFH16'],
'root_symbol': ['DF'] * 3,
'asset_name': ['Double Flip'] * 3,
'sid': range(17, 20),
'start_date': [Timestamp('2005-01-01', tz='UTC'),
Timestamp('2005-02-01', tz='UTC'),
Timestamp('2005-03-01', tz='UTC')],
'end_date': [Timestamp('2016-08-19', tz='UTC'),
Timestamp('2016-09-19', tz='UTC'),
Timestamp('2016-10-19', tz='UTC')],
'notice_date': [Timestamp('2016-02-19', tz='UTC'),
Timestamp('2016-03-18', tz='UTC'),
Timestamp('2016-04-22', tz='UTC')],
'expiration_date': [Timestamp('2016-02-19', tz='UTC'),
Timestamp('2016-03-18', tz='UTC'),
Timestamp('2016-04-22', tz='UTC')],
'auto_close_date': [Timestamp('2016-02-17', tz='UTC'),
Timestamp('2016-03-16', tz='UTC'),
Timestamp('2016-04-20', tz='UTC')],
'tick_size': [0.001] * 3,
'multiplier': [1000.0] * 3,
'exchange': ['CME'] * 3,
})
return pd.concat([fo_frame, ba_frame, bz_frame, ma_frame, df_frame])
@classmethod
def make_future_minute_bar_data(cls):
@@ -265,7 +293,7 @@ class ContinuousFuturesTestCase(WithCreateBarData,
3: Timestamp('2016-04-20', tz='UTC'),
6: Timestamp('2016-01-27', tz='UTC'),
}
for i in range(17):
for i in range(20):
df = base_df.copy()
df += i * 10000
if i in sid_to_vol_stop_session:
@@ -288,8 +316,72 @@ class ContinuousFuturesTestCase(WithCreateBarData,
df.volume.values[:loc + 1] = 10
if i == 15: # No volume for MAH16
df.volume.values[:] = 0
if i == 17:
end_loc = dts.searchsorted('2016-02-16 23:00:00+00:00')
df.volume.values[:end_loc] = 10
df.volume.values[end_loc:] = 0
if i == 18:
cross_loc_1 = dts.searchsorted('2016-02-09 23:01:00+00:00')
cross_loc_2 = dts.searchsorted('2016-02-11 23:01:00+00:00')
cross_loc_3 = dts.searchsorted('2016-02-15 23:01:00+00:00')
end_loc = dts.searchsorted('2016-03-15 23:01:00+00:00')
df.volume.values[:cross_loc_1] = 5
df.volume.values[cross_loc_1:cross_loc_2] = 15
df.volume.values[cross_loc_2:cross_loc_3] = 5
df.volume.values[cross_loc_3:end_loc] = 15
df.volume.values[end_loc:] = 0
if i == 19:
early_cross_1 = dts.searchsorted('2016-03-01 23:01:00+00:00')
early_cross_2 = dts.searchsorted('2016-03-03 23:01:00+00:00')
end_loc = dts.searchsorted('2016-04-19 23:01:00+00:00')
df.volume.values[:early_cross_1] = 1
df.volume.values[early_cross_1:early_cross_2] = 20
df.volume.values[early_cross_2:end_loc] = 10
df.volume.values[end_loc:] = 0
yield i, df
def test_double_volume_switch(self):
"""
Test that when a double volume switch occurs we treat the first switch
as the roll, assuming it is within a certain distance of the next auto
close date. See `VolumeRollFinder._active_contract` for a full
explanation and example.
"""
cf = self.asset_finder.create_continuous_future('DF', 0, 'volume')
sessions = self.trading_calendar.sessions_in_range(
'2016-02-09', '2016-02-17',
)
for session in sessions:
bar_data = self.create_bardata(lambda: session)
contract = bar_data.current(cf, 'contract')
# The 'G' contract surpasses the 'F' contract in volume on
# 2016-02-10, which means that the 'G' contract should become the
# front contract starting on 2016-02-11.
if session < pd.Timestamp('2016-02-11', tz='UTC'):
self.assertEqual(contract.symbol, 'DFF16')
else:
self.assertEqual(contract.symbol, 'DFG16')
# TODO: This test asserts behavior about a back contract briefly
# spiking in volume, but more than a week before the front contract's
# auto close date, meaning it does not fall in the 'grace' period used
# by `VolumeRollFinder._active_contract`. The current behavior is that
# during the spike, the back contract is considered current, but it may
# be worth changing that behavior in the future.
# sessions = self.trading_calendar.sessions_in_range(
# '2016-03-01', '2016-03-21',
# )
# for session in sessions:
# bar_data = self.create_bardata(lambda: session)
# contract = bar_data.current(cf, 'contract')
# if session < pd.Timestamp('2016-03-16', tz='UTC'):
# self.assertEqual(contract.symbol, 'DFG16')
# else:
# self.assertEqual(contract.symbol, 'DFH16')
def test_create_continuous_future(self):
cf_primary = self.asset_finder.create_continuous_future(
'FO', 0, 'calendar')
+54 -10
View File
@@ -48,7 +48,6 @@ class RollFinder(with_metaclass(ABCMeta, object)):
back = oc.contract_at_offset(front, 1, dt.value)
if back is None:
return front
session = self.trading_calendar.minute_to_session_label(dt)
primary = self._active_contract(oc, front, back, session)
return oc.contract_at_offset(primary, offset, session.value)
@@ -106,6 +105,9 @@ class RollFinder(with_metaclass(ABCMeta, object)):
if prev < prev_c.contract.auto_close_date:
break
if back != self._active_contract(oc, front, back, prev):
# TODO: Instead of listing each contract with its roll date
# as tuples, create a series which maps every day to the
# active contract on that day.
rolls.insert(0, ((curr >> offset).contract.sid, session))
break
session = prev
@@ -137,7 +139,7 @@ class VolumeRollFinder(RollFinder):
The CalendarRollFinder calculates contract rolls based on when
volume activity transfers from one contract to another.
"""
GRACE_DAYS = 7
THRESHOLD = 0.10
def __init__(self, trading_calendar, asset_finder, session_reader):
@@ -146,12 +148,54 @@ class VolumeRollFinder(RollFinder):
self.session_reader = session_reader
def _active_contract(self, oc, front, back, dt):
prev = dt - self.trading_calendar.day
front_vol = self.session_reader.get_value(front, prev, 'volume')
back_vol = self.session_reader.get_value(back, prev, 'volume')
if back_vol > front_vol:
"""
Return the active contract based on the previous trading day's volume.
In the rare case that a double volume switch occurs we treat the first
switch as the roll. Take the following case for example:
| +++++ _____
| + __ / <--- 'G'
| ++/++\++++/++
| _/ \__/ +
| / +
| ____/ + <--- 'F'
|_________|__|___|________
a b c <--- Switches
We should treat 'a' as the roll date rather than 'c' because from the
perspective of 'a', if a switch happens and we are pretty close to the
auto-close date, we would probably assume it is time to roll. This
means that for every date after 'a', `data.current(cf, 'contract')`
should return the 'G' contract.
"""
tc = self.trading_calendar
trading_day = tc.day
prev = dt - trading_day
get_value = self.session_reader.get_value
front_vol = get_value(front, prev, 'volume')
back_vol = get_value(back, prev, 'volume')
front_contract = oc.sid_to_contract[front].contract
if dt >= front_contract.auto_close_date or back_vol > front_vol:
return back
else:
contract = oc.sid_to_contract[front].contract
auto_closed = dt >= contract.auto_close_date
return back if auto_closed else front
gap_start = \
front_contract.auto_close_date - (trading_day * self.GRACE_DAYS)
gap_end = prev - trading_day
if dt < gap_start:
return front
# If we are within `self.GRACE_DAYS` of the front contract's auto close
# date, and a volume flip happened during that period, return the back
# contract as the active one.
sessions = tc.sessions_in_range(
tc.minute_to_session_label(gap_start),
tc.minute_to_session_label(gap_end),
)
for session in sessions:
front_vol = get_value(front, session, 'volume')
back_vol = get_value(back, session, 'volume')
if back_vol > front_vol:
return back
return front