Merge pull request #1785 from quantopian/reindex-reader-get-value

Fixes equity history calls on the futures calendar
This commit is contained in:
Andrew Daniels
2017-05-09 10:33:58 -04:00
committed by GitHub
12 changed files with 557 additions and 214 deletions
+3 -6
View File
@@ -16,12 +16,11 @@ from numbers import Real
from nose_parameterized import parameterized
from numpy.testing import assert_almost_equal
from numpy import nan, array, full
from numpy import nan, array, full, isnan
import pandas as pd
from pandas import DataFrame
from six import iteritems
from zipline.data.bar_reader import NoDataOnDate
from zipline.data.resample import (
minute_frame_to_session_frame,
DailyHistoryAggregator,
@@ -863,11 +862,9 @@ class TestReindexSessionBars(WithBcolzEquityDailyBarReader,
"first session should be 10.")
tday = pd.Timestamp('2015-11-26', tz='UTC')
with self.assertRaises(NoDataOnDate):
self.reader.get_value(1, tday, 'close')
self.assertTrue(isnan(self.reader.get_value(1, tday, 'close')))
with self.assertRaises(NoDataOnDate):
self.reader.get_value(1, tday, 'volume')
self.assertEqual(self.reader.get_value(1, tday, 'volume'), 0)
def test_last_availabe_dt(self):
self.assertEqual(self.reader.last_available_dt, self.END_DATE)
+2
View File
@@ -265,6 +265,7 @@ class TestAPIShim(WithCreateBarData,
5,
"1m",
"volume",
"minute",
True
)
else:
@@ -274,6 +275,7 @@ class TestAPIShim(WithCreateBarData,
5,
"1m",
"volume",
"minute",
)
test_sim_params = SimulationParameters(
+7 -1
View File
@@ -101,7 +101,12 @@ class TestBenchmark(WithDataPortal, WithSimParams, WithTradingCalendars,
# should be the equivalent of getting the price history, then doing
# a pct_change on it
manually_calculated = self.data_portal.get_history_window(
[1], days_to_use[-1], len(days_to_use), "1d", "close"
[1],
days_to_use[-1],
len(days_to_use),
"1d",
"close",
"daily",
)[1].pct_change()
# compare all the fields except the first one, for which we don't have
@@ -187,6 +192,7 @@ class TestBenchmark(WithDataPortal, WithSimParams, WithTradingCalendars,
len(days_to_use),
"1d",
"close",
"daily",
)[2].pct_change()
for idx, day in enumerate(days_to_use[1:]):
+31 -21
View File
@@ -670,7 +670,7 @@ def record_current_contract(algo, data):
window = self.data_portal.get_history_window(
[cf],
Timestamp('2016-03-04 18:01', tz='US/Eastern').tz_convert('UTC'),
30, '1d', 'sid')
30, '1d', 'sid', 'minute')
self.assertEqual(window.loc['2016-01-26', cf],
0,
@@ -696,7 +696,7 @@ def record_current_contract(algo, data):
window = self.data_portal.get_history_window(
[cf],
Timestamp('2016-04-06 18:01', tz='US/Eastern').tz_convert('UTC'),
30, '1d', 'sid')
30, '1d', 'sid', 'minute')
self.assertEqual(window.loc['2016-02-25', cf],
1,
@@ -724,7 +724,7 @@ def record_current_contract(algo, data):
window = self.data_portal.get_history_window(
[cf],
Timestamp('2016-01-11 18:01', tz='US/Eastern').tz_convert('UTC'),
3, '1d', 'sid')
3, '1d', 'sid', 'minute')
self.assertEqual(window.loc['2016-01-08', cf],
10,
@@ -745,7 +745,7 @@ def record_current_contract(algo, data):
window = self.data_portal.get_history_window(
[cf],
Timestamp('2016-03-04 18:01', tz='US/Eastern').tz_convert('UTC'),
30, '1d', 'sid')
30, '1d', 'sid', 'minute')
self.assertEqual(window.loc['2016-01-26', cf],
1,
@@ -771,7 +771,7 @@ def record_current_contract(algo, data):
window = self.data_portal.get_history_window(
[cf],
Timestamp('2016-04-06 18:01', tz='US/Eastern').tz_convert('UTC'),
30, '1d', 'sid')
30, '1d', 'sid', 'minute')
self.assertEqual(window.loc['2016-02-25', cf],
2,
@@ -799,7 +799,7 @@ def record_current_contract(algo, data):
window = self.data_portal.get_history_window(
[cf],
Timestamp('2016-03-04 18:01', tz='US/Eastern').tz_convert('UTC'),
30, '1d', 'sid')
30, '1d', 'sid', 'minute')
# Volume cuts out for FOF16 on 2016-01-25
self.assertEqual(window.loc['2016-01-26', cf],
@@ -826,7 +826,7 @@ def record_current_contract(algo, data):
window = self.data_portal.get_history_window(
[cf],
Timestamp('2016-04-06 18:01', tz='US/Eastern').tz_convert('UTC'),
30, '1d', 'sid')
30, '1d', 'sid', 'minute')
self.assertEqual(window.loc['2016-02-25', cf],
1,
@@ -863,7 +863,7 @@ def record_current_contract(algo, data):
window = self.data_portal.get_history_window(
[cf.sid],
Timestamp('2016-01-26 18:01', tz='US/Eastern').tz_convert('UTC'),
30, '1m', 'sid')
30, '1m', 'sid', 'minute')
self.assertEqual(window.loc['2016-01-26 22:32', cf],
0,
@@ -882,7 +882,7 @@ def record_current_contract(algo, data):
window = self.data_portal.get_history_window(
[cf],
Timestamp('2016-01-27 18:01', tz='US/Eastern').tz_convert('UTC'),
30, '1m', 'sid')
30, '1m', 'sid', 'minute')
self.assertEqual(window.loc['2016-01-27 22:32', cf],
1,
@@ -896,7 +896,9 @@ def record_current_contract(algo, data):
cf = self.data_portal.asset_finder.create_continuous_future(
'FO', 0, 'calendar', None)
window = self.data_portal.get_history_window(
[cf.sid], Timestamp('2016-03-06', tz='UTC'), 30, '1d', 'close')
[cf.sid],
Timestamp('2016-03-06', tz='UTC'),
30, '1d', 'close', 'daily')
assert_almost_equal(
window.loc['2016-01-26', cf],
@@ -915,7 +917,9 @@ def record_current_contract(algo, data):
# Advance the window a month.
window = self.data_portal.get_history_window(
[cf.sid], Timestamp('2016-04-06', tz='UTC'), 30, '1d', 'close')
[cf.sid],
Timestamp('2016-04-06', tz='UTC'),
30, '1d', 'close', 'daily')
assert_almost_equal(
window.loc['2016-02-24', cf],
@@ -946,7 +950,9 @@ def record_current_contract(algo, data):
cf = self.data_portal.asset_finder.create_continuous_future(
'MA', 0, 'volume', None)
window = self.data_portal.get_history_window(
[cf.sid], Timestamp('2016-03-06', tz='UTC'), 30, '1d', 'close')
[cf.sid],
Timestamp('2016-03-06', tz='UTC'),
30, '1d', 'close', 'daily')
assert_almost_equal(
window.loc['2016-01-26', cf],
@@ -965,7 +971,9 @@ def record_current_contract(algo, data):
# Advance the window a month.
window = self.data_portal.get_history_window(
[cf.sid], Timestamp('2016-04-06', tz='UTC'), 30, '1d', 'close')
[cf.sid],
Timestamp('2016-04-06', tz='UTC'),
30, '1d', 'close', 'daily')
assert_almost_equal(
window.loc['2016-02-24', cf],
@@ -991,7 +999,8 @@ def record_current_contract(algo, data):
'FO', 0, 'calendar', 'add')
window = self.data_portal.get_history_window(
[cf, cf_mul, cf_add],
Timestamp('2016-03-06', tz='UTC'), 30, '1d', 'close')
Timestamp('2016-03-06', tz='UTC'),
30, '1d', 'close', 'daily')
# Unadjusted value is: 115011.44
# Adjustment is based on hop from 115231.44 to 125231.44
@@ -1034,7 +1043,8 @@ def record_current_contract(algo, data):
# Advance the window a month.
window = self.data_portal.get_history_window(
[cf, cf_mul, cf_add],
Timestamp('2016-04-06', tz='UTC'), 30, '1d', 'close')
Timestamp('2016-04-06', tz='UTC'),
30, '1d', 'close', 'daily')
# Unadjusted value: 115221.44
# Adjustments based on hops:
@@ -1116,7 +1126,7 @@ def record_current_contract(algo, data):
window = self.data_portal.get_history_window(
[cf.sid],
Timestamp('2016-02-25 18:01', tz='US/Eastern').tz_convert('UTC'),
30, '1m', 'close')
30, '1m', 'close', 'minute')
self.assertEqual(window.loc['2016-02-25 22:32', cf],
115231.412,
@@ -1135,7 +1145,7 @@ def record_current_contract(algo, data):
window = self.data_portal.get_history_window(
[cf],
Timestamp('2016-02-28 18:01', tz='US/Eastern').tz_convert('UTC'),
30, '1m', 'close')
30, '1m', 'close', 'minute')
self.assertEqual(window.loc['2016-02-26 22:32', cf],
125241.412,
@@ -1155,7 +1165,7 @@ def record_current_contract(algo, data):
window = self.data_portal.get_history_window(
[cf, cf_mul, cf_add],
Timestamp('2016-02-25 18:01', tz='US/Eastern').tz_convert('UTC'),
30, '1m', 'close')
30, '1m', 'close', 'minute')
# Unadjusted: 115231.412
# Adjustment based on roll:
@@ -1198,7 +1208,7 @@ def record_current_contract(algo, data):
window = self.data_portal.get_history_window(
[cf, cf_mul, cf_add],
Timestamp('2016-02-28 18:01', tz='US/Eastern').tz_convert('UTC'),
30, '1m', 'close')
30, '1m', 'close', 'minute')
# No adjustments in this window.
self.assertEqual(window.loc['2016-02-26 22:32', cf_mul],
@@ -1219,7 +1229,7 @@ def record_current_contract(algo, data):
window = self.data_portal.get_history_window(
[cf, cf_mul, cf_add],
Timestamp('2016-02-25 18:01', tz='US/Eastern').tz_convert('UTC'),
30, '1m', 'close')
30, '1m', 'close', 'minute')
# Unadjusted: 115231.412
# Adjustment based on roll:
@@ -1262,7 +1272,7 @@ def record_current_contract(algo, data):
window = self.data_portal.get_history_window(
[cf, cf_mul, cf_add],
Timestamp('2016-02-28 18:01', tz='US/Eastern').tz_convert('UTC'),
30, '1m', 'close')
30, '1m', 'close', 'minute')
# No adjustments in this window.
self.assertEqual(window.loc['2016-02-26 22:32', cf_mul],
+437 -156
View File
File diff suppressed because it is too large Load Diff
+6 -3
View File
@@ -648,7 +648,8 @@ cdef class BarData:
self._get_current_minute(),
bar_count,
frequency,
fields
fields,
self.data_frequency,
)
if self._adjust_minutes:
@@ -680,7 +681,8 @@ cdef class BarData:
self._get_current_minute(),
bar_count,
frequency,
field
field,
self.data_frequency,
)[assets] for field in fields
}
@@ -708,7 +710,8 @@ cdef class BarData:
self._get_current_minute(),
bar_count,
frequency,
field
field,
self.data_frequency,
) for field in fields
}
+2
View File
@@ -2160,6 +2160,7 @@ class TradingAlgorithm(object):
bar_count,
frequency,
field,
self.data_frequency,
ffill,
)
else:
@@ -2176,6 +2177,7 @@ class TradingAlgorithm(object):
bar_count,
frequency,
field,
self.data_frequency,
ffill,
)
+52 -20
View File
@@ -735,8 +735,12 @@ class DataPortal(object):
)
return tds[start_loc:end_loc + 1]
def _get_history_daily_window(self, assets, end_dt, bar_count,
field_to_use):
def _get_history_daily_window(self,
assets,
end_dt,
bar_count,
field_to_use,
data_frequency):
"""
Internal method that returns a dataframe containing history bars
of daily frequency for the given sids.
@@ -750,7 +754,7 @@ class DataPortal(object):
columns=None)
data = self._get_history_daily_window_data(
assets, days_for_window, end_dt, field_to_use
assets, days_for_window, end_dt, field_to_use, data_frequency
)
return pd.DataFrame(
data,
@@ -762,10 +766,9 @@ class DataPortal(object):
assets,
days_for_window,
end_dt,
field_to_use):
ends_at_midnight = (end_dt.hour == end_dt.minute == 0)
if ends_at_midnight:
field_to_use,
data_frequency):
if data_frequency == 'daily':
# two cases where we use daily data for the whole range:
# 1) the history window ends at midnight utc.
# 2) the last desired day of the window is after the
@@ -810,23 +813,25 @@ class DataPortal(object):
return daily_data
def _handle_minute_history_out_of_bounds(self, bar_count):
cal = self.trading_calendar
first_trading_minute_loc = (
self.trading_calendar.all_minutes.get_loc(
cal.all_minutes.get_loc(
self._first_trading_minute
)
if self._first_trading_minute is not None else None
)
suggested_start_day = (
self.trading_calendar.all_minutes[
suggested_start_day = cal.minute_to_session_label(
cal.all_minutes[
first_trading_minute_loc + bar_count
] + self.trading_calendar.day
).date()
] + cal.day
)
raise HistoryWindowStartsBeforeData(
first_trading_day=self._first_trading_day.date(),
bar_count=bar_count,
suggested_start_day=suggested_start_day,
suggested_start_day=suggested_start_day.date(),
)
def _get_history_minute_window(self, assets, end_dt, bar_count,
@@ -858,7 +863,13 @@ class DataPortal(object):
columns=assets
)
def get_history_window(self, assets, end_dt, bar_count, frequency, field,
def get_history_window(self,
assets,
end_dt,
bar_count,
frequency,
field,
data_frequency,
ffill=True):
"""
Public API method that returns a dataframe containing the requested
@@ -878,6 +889,10 @@ class DataPortal(object):
field: string
The desired field of the asset.
data_frequency: string
The frequency of the data to query; i.e. whether the data is
'daily' or 'minute' bars.
ffill: boolean
Forward-fill missing values. Only has effect if field
is 'price'.
@@ -892,10 +907,10 @@ class DataPortal(object):
if frequency == "1d":
if field == "price":
df = self._get_history_daily_window(assets, end_dt, bar_count,
"close")
"close", data_frequency)
else:
df = self._get_history_daily_window(assets, end_dt, bar_count,
field)
field, data_frequency)
elif frequency == "1m":
if field == "price":
df = self._get_history_minute_window(assets, end_dt, bar_count,
@@ -1297,7 +1312,13 @@ class DataPortal(object):
# returns is always calculated over the last 2 days, regardless
# of the simulation's data frequency.
hst = self.get_history_window(
[asset], dt, 2, "1d", "price", ffill=True
[asset],
dt,
2,
"1d",
"price",
data_frequency,
ffill=True,
)[asset]
return (hst.iloc[-1] - hst.iloc[0]) / hst.iloc[0]
@@ -1315,7 +1336,13 @@ class DataPortal(object):
calculated_bar_count = bars
price_arr = self.get_history_window(
[asset], dt, calculated_bar_count, freq_str, "price", ffill=True
[asset],
dt,
calculated_bar_count,
freq_str,
"price",
data_frequency,
ffill=True,
)[asset]
if transform_name == "mavg":
@@ -1324,8 +1351,13 @@ class DataPortal(object):
return nanstd(price_arr, ddof=1)
elif transform_name == "vwap":
volume_arr = self.get_history_window(
[asset], dt, calculated_bar_count, freq_str, "volume",
ffill=True
[asset],
dt,
calculated_bar_count,
freq_str,
"volume",
data_frequency,
ffill=True,
)[asset]
vol_sum = nansum(volume_arr)
+9 -5
View File
@@ -25,6 +25,7 @@ from zipline.data._resample import (
_minute_to_session_close,
_minute_to_session_volume,
)
from zipline.data.bar_reader import NoDataOnDate
from zipline.data.minute_bars import MinuteBarReader
from zipline.data.session_bars import SessionBarReader
from zipline.utils.memoize import lazyval
@@ -606,10 +607,6 @@ class ReindexBarReader(with_metaclass(ABCMeta)):
Currently only supports a ``trading_calendar`` which is a superset of the
``reader``'s calendar.
Also, the currenty implementation only reindexes the results from
``load_raw_arrays``, but in the future, `get_value` may also be made to
provide an empty result instead of raising on error.
Parameters
----------
@@ -652,7 +649,14 @@ class ReindexBarReader(with_metaclass(ABCMeta)):
return self._reader.first_trading_day
def get_value(self, sid, dt, field):
return self._reader.get_value(sid, dt, field)
# Give an empty result if no data is present.
try:
return self._reader.get_value(sid, dt, field)
except NoDataOnDate:
if field == 'volume':
return 0
else:
return np.nan
@abstractmethod
def _outer_dts(self, start_dt, end_dt):
+3
View File
@@ -146,6 +146,7 @@ class BenchmarkSource(object):
bar_count=len(minutes) + 1,
frequency="1m",
field="price",
data_frequency=self.emission_rate,
ffill=True
)[asset]
@@ -163,6 +164,7 @@ class BenchmarkSource(object):
bar_count=len(trading_days) + 1,
frequency="1d",
field="price",
data_frequency=self.emission_rate,
ffill=True
)[asset]
return benchmark_series.pct_change()[1:]
@@ -175,6 +177,7 @@ class BenchmarkSource(object):
bar_count=len(trading_days),
frequency="1d",
field="price",
data_frequency=self.emission_rate,
ffill=True
)[asset]
+1 -1
View File
@@ -714,7 +714,7 @@ class FakeDataPortal(DataPortal):
return 1.0
def get_history_window(self, assets, end_dt, bar_count, frequency, field,
ffill=True):
data_frequency, ffill=True):
if frequency == "1d":
end_idx = \
self.trading_calendar.all_sessions.searchsorted(end_dt)
+4 -1
View File
@@ -433,7 +433,10 @@ class WithTradingCalendars(object):
cls.trading_calendars = {}
for cal_str in cls.TRADING_CALENDAR_STRS:
for cal_str in (
set(cls.TRADING_CALENDAR_STRS) |
{cls.TRADING_CALENDAR_PRIMARY_CAL}
):
# Set name to allow aliasing.
calendar = get_calendar(cal_str)
setattr(cls,