Merge pull request #1432 from quantopian/reindex-reader

ENH: Add a reader base which reindexes results.
This commit is contained in:
Eddie Hebert
2016-08-25 09:34:06 -04:00
committed by GitHub
2 changed files with 256 additions and 1 deletions
+125 -1
View File
@@ -16,7 +16,7 @@ from numbers import Real
from nose_parameterized import parameterized
from numpy.testing import assert_almost_equal
from numpy import nan, array
from numpy import nan, array, full
import pandas as pd
from pandas import DataFrame
from six import iteritems
@@ -25,11 +25,14 @@ from zipline.data.resample import (
minute_to_session,
DailyHistoryAggregator,
MinuteResampleSessionBarReader,
ReindexMinuteBarReader,
ReindexSessionBarReader,
)
from zipline.testing.fixtures import (
WithEquityMinuteBarData,
WithBcolzEquityMinuteBarReader,
WithBcolzEquityDailyBarReader,
WithBcolzFutureMinuteBarReader,
ZiplineTestCase,
)
@@ -527,3 +530,124 @@ class TestResampleSessionBars(WithBcolzFutureMinuteBarReader,
assert_almost_equal(values[col], result,
err_msg="sid={0} col={1} dt={2}".
format(sid, col, dt))
class TestReindexMinuteBars(WithBcolzEquityMinuteBarReader,
ZiplineTestCase):
TRADING_CALENDAR_STRS = ('CME', 'NYSE')
TRADING_CALENDAR_PRIMARY_CAL = 'CME'
ASSET_FINDER_EQUITY_SIDS = 1, 2, 3
START_DATE = pd.Timestamp('2015-12-01', tz='UTC')
END_DATE = pd.Timestamp('2015-12-31', tz='UTC')
def test_load_raw_arrays(self):
reindex_reader = ReindexMinuteBarReader(
self.trading_calendar,
self.bcolz_equity_minute_bar_reader,
self.START_DATE,
self.END_DATE,
)
m_open, m_close = self.trading_calendar.open_and_close_for_session(
self.START_DATE)
outer_minutes = self.trading_calendar.minutes_in_range(m_open, m_close)
result = reindex_reader.load_raw_arrays(
OHLCV, m_open, m_close, [1, 2])
opens = DataFrame(data=result[0], index=outer_minutes,
columns=[1, 2])
opens_with_price = opens.dropna()
self.assertEqual(
1440,
len(opens),
"The result should have 1440 bars, the number of minutes in a "
"trading session on the target calendar."
)
self.assertEqual(
390,
len(opens_with_price),
"The result, after dropping nans, should have 390 bars, the "
" number of bars in a trading session in the reader's calendar."
)
slicer = outer_minutes.slice_indexer(
end=pd.Timestamp('2015-12-01 14:30', tz='UTC'))
assert_almost_equal(
opens[1][slicer],
full(slicer.stop, nan),
err_msg="All values before the NYSE market open should be nan.")
slicer = outer_minutes.slice_indexer(
start=pd.Timestamp('2015-12-01 21:01', tz='UTC'))
assert_almost_equal(
opens[1][slicer],
full(slicer.stop - slicer.start, nan),
err_msg="All values after the NYSE market close should be nan.")
first_minute_loc = outer_minutes.get_loc(pd.Timestamp(
'2015-12-01 14:31', tz='UTC'))
# Spot check a value.
# The value is the autogenerated value from test fixtures.
assert_almost_equal(
10.0,
opens[1][first_minute_loc],
err_msg="The value for Equity 1, should be 10.0, at NYSE open.")
class TestReindexSessionBars(WithBcolzEquityDailyBarReader,
ZiplineTestCase):
TRADING_CALENDAR_STRS = ('CME', 'NYSE')
TRADING_CALENDAR_PRIMARY_CAL = 'CME'
ASSET_FINDER_EQUITY_SIDS = 1, 2, 3
# Dates are chosen to span Thanksgiving, which is not a Holiday on CME.
START_DATE = pd.Timestamp('2015-11-01', tz='UTC')
END_DATE = pd.Timestamp('2015-11-30', tz='UTC')
def test_load_raw_arrays(self):
reindex_reader = ReindexSessionBarReader(
self.trading_calendar,
self.bcolz_equity_daily_bar_reader,
self.START_DATE,
self.END_DATE,
)
outer_sessions = self.trading_calendar.sessions_in_range(
self.START_DATE, self.END_DATE)
result = reindex_reader.load_raw_arrays(
OHLCV, self.START_DATE, self.END_DATE, [1, 2])
opens = DataFrame(data=result[0], index=outer_sessions,
columns=[1, 2])
opens_with_price = opens.dropna()
self.assertEqual(
21,
len(opens),
"The reindexed result should have 21 days, which is the number of "
"business days in 2015-11")
self.assertEqual(
20,
len(opens_with_price),
"The reindexed result after dropping nans should have 20 days, "
"because Thanksgiving is a NYSE holiday.")
# Thanksgiving, 2015-11-26.
# Is a holiday in NYSE, but not in CME.
tday_loc = outer_sessions.get_loc(pd.Timestamp('2015-11-26', tz='UTC'))
assert_almost_equal(
nan,
opens[1][tday_loc],
err_msg="2015-11-26 should be `nan`, since Thanksgiving is a "
"holiday in the reader's calendar.")
+131
View File
@@ -12,11 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict
from abc import ABCMeta, abstractmethod
import numpy as np
import pandas as pd
from pandas import DataFrame
from six import with_metaclass
from zipline.data.minute_bars import MinuteBarReader
from zipline.data.session_bars import SessionBarReader
from zipline.utils.memoize import lazyval
@@ -498,3 +501,131 @@ class MinuteResampleSessionBarReader(SessionBarReader):
return self.trading_calendar.minute_to_session_label(
self._minute_bar_reader.last_available_dt
)
class ReindexBarReader(with_metaclass(ABCMeta)):
"""
A base class for readers which reindexes results, filling in the additional
indices with empty data.
Used to align the reading assets which trade on different calendars.
Currently only supports a ``trading_calendar`` which is a superset of the
``reader``'s calendar.
Also, the currenty implementation only reindexes the results from
``load_raw_arrays``, but in the future, `get_value` may also be made to
provide an empty result instead of raising on error.
Parameters
----------
- trading_calendar : zipline.utils.trading_calendar.TradingCalendar
The calendar to use when indexing results from the reader.
- reader : MinuteBarReader|SessionBarReader
The reader which has a calendar that is a subset of the desired
``trading_calendar``.
- first_trading_session : pd.Timestamp
The first trading session the reader should provide. Must be specified,
since the ``reader``'s first session may not exactly align with the
desired calendar. Specifically, in the case where the first session
on the target calendar is a holiday on the ``reader``'s calendar.
- last_trading_session : pd.Timestamp
The last trading session the reader should provide. Must be specified,
since the ``reader``'s last session may not exactly align with the
desired calendar. Specifically, in the case where the last session
on the target calendar is a holiday on the ``reader``'s calendar.
"""
def __init__(self,
trading_calendar,
reader,
first_trading_session,
last_trading_session):
self._trading_calendar = trading_calendar
self._reader = reader
self._first_trading_session = first_trading_session
self._last_trading_session = last_trading_session
@property
def last_available_dt(self):
return self._reader.last_available_dt
def get_last_traded_dt(self, sid, dt):
return self._reader.get_last_traded_dt(sid, dt)
@property
def first_trading_day(self):
return self._reader.first_trading_day
def get_value(self, sid, dt, field):
return self._reader.get_value(sid, dt, field)
@abstractmethod
def _outer_dts(self, start_dt, end_dt):
pass
@abstractmethod
def _inner_dts(self, start_dt, end_dt):
pass
@property
def trading_calendar(self):
return self._trading_calendar
@lazyval
def sessions(self):
return self.trading_calendar.sessions_in_range(
self._first_trading_session,
self._last_trading_session
)
def load_raw_arrays(self, fields, start_dt, end_dt, sids):
outer_dts = self._outer_dts(start_dt, end_dt)
inner_dts = self._inner_dts(start_dt, end_dt)
indices = outer_dts.searchsorted(inner_dts)
shape = len(outer_dts), len(sids)
outer_results = []
inner_results = self._reader.load_raw_arrays(
fields, inner_dts[0], inner_dts[-1], sids)
for i, field in enumerate(fields):
if field != 'volume':
out = np.full(shape, np.nan)
else:
out = np.zeros(shape, dtype=np.uint32)
out[indices] = inner_results[i]
outer_results.append(out)
return outer_results
class ReindexMinuteBarReader(ReindexBarReader, MinuteBarReader):
"""
See: ``ReindexBarReader``
"""
def _outer_dts(self, start_dt, end_dt):
return self._trading_calendar.minutes_in_range(start_dt, end_dt)
def _inner_dts(self, start_dt, end_dt):
return self._reader.calendar.minutes_in_range(start_dt, end_dt)
class ReindexSessionBarReader(ReindexBarReader, SessionBarReader):
"""
See: ``ReindexBarReader``
"""
def _outer_dts(self, start_dt, end_dt):
return self.trading_calendar.sessions_in_range(start_dt, end_dt)
def _inner_dts(self, start_dt, end_dt):
return self._reader.trading_calendar.sessions_in_range(
start_dt, end_dt)