From 562098dbf831b2e3199c951aa06c71468a127676 Mon Sep 17 00:00:00 2001 From: Eddie Hebert Date: Thu, 18 Aug 2016 16:51:03 -0400 Subject: [PATCH] ENH: Add a reader base which reindexes results. Working towards history results which contain mixed asset types, add a reader which makes `load_raw_arrays` return results indexed on the session/minute ranges specified by the specified `trading_calendar` instead of the calendar of the backing reader. This reader will be used to make Equity readers align with Future readers. It is intended for use as part of another reader (which will dispatch queries based on asset type and then recombined results) which will be passed to the `[Minute|Session]HistoryLoaders in the data portal. --- tests/data/test_resample.py | 126 +++++++++++++++++++++++++++++++++- zipline/data/resample.py | 131 ++++++++++++++++++++++++++++++++++++ 2 files changed, 256 insertions(+), 1 deletion(-) diff --git a/tests/data/test_resample.py b/tests/data/test_resample.py index 5d7a5168..2629009f 100644 --- a/tests/data/test_resample.py +++ b/tests/data/test_resample.py @@ -16,7 +16,7 @@ from numbers import Real from nose_parameterized import parameterized from numpy.testing import assert_almost_equal -from numpy import nan, array +from numpy import nan, array, full import pandas as pd from pandas import DataFrame from six import iteritems @@ -25,11 +25,14 @@ from zipline.data.resample import ( minute_to_session, DailyHistoryAggregator, MinuteResampleSessionBarReader, + ReindexMinuteBarReader, + ReindexSessionBarReader, ) from zipline.testing.fixtures import ( WithEquityMinuteBarData, WithBcolzEquityMinuteBarReader, + WithBcolzEquityDailyBarReader, WithBcolzFutureMinuteBarReader, ZiplineTestCase, ) @@ -527,3 +530,124 @@ class TestResampleSessionBars(WithBcolzFutureMinuteBarReader, assert_almost_equal(values[col], result, err_msg="sid={0} col={1} dt={2}". format(sid, col, dt)) + + +class TestReindexMinuteBars(WithBcolzEquityMinuteBarReader, + ZiplineTestCase): + + TRADING_CALENDAR_STRS = ('CME', 'NYSE') + TRADING_CALENDAR_PRIMARY_CAL = 'CME' + + ASSET_FINDER_EQUITY_SIDS = 1, 2, 3 + + START_DATE = pd.Timestamp('2015-12-01', tz='UTC') + END_DATE = pd.Timestamp('2015-12-31', tz='UTC') + + def test_load_raw_arrays(self): + reindex_reader = ReindexMinuteBarReader( + self.trading_calendar, + self.bcolz_equity_minute_bar_reader, + self.START_DATE, + self.END_DATE, + ) + m_open, m_close = self.trading_calendar.open_and_close_for_session( + self.START_DATE) + outer_minutes = self.trading_calendar.minutes_in_range(m_open, m_close) + result = reindex_reader.load_raw_arrays( + OHLCV, m_open, m_close, [1, 2]) + + opens = DataFrame(data=result[0], index=outer_minutes, + columns=[1, 2]) + opens_with_price = opens.dropna() + + self.assertEqual( + 1440, + len(opens), + "The result should have 1440 bars, the number of minutes in a " + "trading session on the target calendar." + ) + + self.assertEqual( + 390, + len(opens_with_price), + "The result, after dropping nans, should have 390 bars, the " + " number of bars in a trading session in the reader's calendar." + ) + + slicer = outer_minutes.slice_indexer( + end=pd.Timestamp('2015-12-01 14:30', tz='UTC')) + + assert_almost_equal( + opens[1][slicer], + full(slicer.stop, nan), + err_msg="All values before the NYSE market open should be nan.") + + slicer = outer_minutes.slice_indexer( + start=pd.Timestamp('2015-12-01 21:01', tz='UTC')) + + assert_almost_equal( + opens[1][slicer], + full(slicer.stop - slicer.start, nan), + err_msg="All values after the NYSE market close should be nan.") + + first_minute_loc = outer_minutes.get_loc(pd.Timestamp( + '2015-12-01 14:31', tz='UTC')) + + # Spot check a value. + # The value is the autogenerated value from test fixtures. + assert_almost_equal( + 10.0, + opens[1][first_minute_loc], + err_msg="The value for Equity 1, should be 10.0, at NYSE open.") + + +class TestReindexSessionBars(WithBcolzEquityDailyBarReader, + ZiplineTestCase): + + TRADING_CALENDAR_STRS = ('CME', 'NYSE') + TRADING_CALENDAR_PRIMARY_CAL = 'CME' + + ASSET_FINDER_EQUITY_SIDS = 1, 2, 3 + + # Dates are chosen to span Thanksgiving, which is not a Holiday on CME. + START_DATE = pd.Timestamp('2015-11-01', tz='UTC') + END_DATE = pd.Timestamp('2015-11-30', tz='UTC') + + def test_load_raw_arrays(self): + reindex_reader = ReindexSessionBarReader( + self.trading_calendar, + self.bcolz_equity_daily_bar_reader, + self.START_DATE, + self.END_DATE, + ) + + outer_sessions = self.trading_calendar.sessions_in_range( + self.START_DATE, self.END_DATE) + + result = reindex_reader.load_raw_arrays( + OHLCV, self.START_DATE, self.END_DATE, [1, 2]) + + opens = DataFrame(data=result[0], index=outer_sessions, + columns=[1, 2]) + opens_with_price = opens.dropna() + + self.assertEqual( + 21, + len(opens), + "The reindexed result should have 21 days, which is the number of " + "business days in 2015-11") + self.assertEqual( + 20, + len(opens_with_price), + "The reindexed result after dropping nans should have 20 days, " + "because Thanksgiving is a NYSE holiday.") + + # Thanksgiving, 2015-11-26. + # Is a holiday in NYSE, but not in CME. + tday_loc = outer_sessions.get_loc(pd.Timestamp('2015-11-26', tz='UTC')) + + assert_almost_equal( + nan, + opens[1][tday_loc], + err_msg="2015-11-26 should be `nan`, since Thanksgiving is a " + "holiday in the reader's calendar.") diff --git a/zipline/data/resample.py b/zipline/data/resample.py index fd147366..06af49db 100644 --- a/zipline/data/resample.py +++ b/zipline/data/resample.py @@ -12,11 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. from collections import OrderedDict +from abc import ABCMeta, abstractmethod import numpy as np import pandas as pd from pandas import DataFrame +from six import with_metaclass +from zipline.data.minute_bars import MinuteBarReader from zipline.data.session_bars import SessionBarReader from zipline.utils.memoize import lazyval @@ -498,3 +501,131 @@ class MinuteResampleSessionBarReader(SessionBarReader): return self.trading_calendar.minute_to_session_label( self._minute_bar_reader.last_available_dt ) + + +class ReindexBarReader(with_metaclass(ABCMeta)): + """ + A base class for readers which reindexes results, filling in the additional + indices with empty data. + + Used to align the reading assets which trade on different calendars. + + Currently only supports a ``trading_calendar`` which is a superset of the + ``reader``'s calendar. + + Also, the currenty implementation only reindexes the results from + ``load_raw_arrays``, but in the future, `get_value` may also be made to + provide an empty result instead of raising on error. + + Parameters + ---------- + + - trading_calendar : zipline.utils.trading_calendar.TradingCalendar + The calendar to use when indexing results from the reader. + - reader : MinuteBarReader|SessionBarReader + The reader which has a calendar that is a subset of the desired + ``trading_calendar``. + - first_trading_session : pd.Timestamp + The first trading session the reader should provide. Must be specified, + since the ``reader``'s first session may not exactly align with the + desired calendar. Specifically, in the case where the first session + on the target calendar is a holiday on the ``reader``'s calendar. + - last_trading_session : pd.Timestamp + The last trading session the reader should provide. Must be specified, + since the ``reader``'s last session may not exactly align with the + desired calendar. Specifically, in the case where the last session + on the target calendar is a holiday on the ``reader``'s calendar. + """ + + def __init__(self, + trading_calendar, + reader, + first_trading_session, + last_trading_session): + self._trading_calendar = trading_calendar + self._reader = reader + self._first_trading_session = first_trading_session + self._last_trading_session = last_trading_session + + @property + def last_available_dt(self): + return self._reader.last_available_dt + + def get_last_traded_dt(self, sid, dt): + return self._reader.get_last_traded_dt(sid, dt) + + @property + def first_trading_day(self): + return self._reader.first_trading_day + + def get_value(self, sid, dt, field): + return self._reader.get_value(sid, dt, field) + + @abstractmethod + def _outer_dts(self, start_dt, end_dt): + pass + + @abstractmethod + def _inner_dts(self, start_dt, end_dt): + pass + + @property + def trading_calendar(self): + return self._trading_calendar + + @lazyval + def sessions(self): + return self.trading_calendar.sessions_in_range( + self._first_trading_session, + self._last_trading_session + ) + + def load_raw_arrays(self, fields, start_dt, end_dt, sids): + outer_dts = self._outer_dts(start_dt, end_dt) + inner_dts = self._inner_dts(start_dt, end_dt) + + indices = outer_dts.searchsorted(inner_dts) + + shape = len(outer_dts), len(sids) + + outer_results = [] + + inner_results = self._reader.load_raw_arrays( + fields, inner_dts[0], inner_dts[-1], sids) + + for i, field in enumerate(fields): + if field != 'volume': + out = np.full(shape, np.nan) + else: + out = np.zeros(shape, dtype=np.uint32) + + out[indices] = inner_results[i] + + outer_results.append(out) + + return outer_results + + +class ReindexMinuteBarReader(ReindexBarReader, MinuteBarReader): + """ + See: ``ReindexBarReader`` + """ + + def _outer_dts(self, start_dt, end_dt): + return self._trading_calendar.minutes_in_range(start_dt, end_dt) + + def _inner_dts(self, start_dt, end_dt): + return self._reader.calendar.minutes_in_range(start_dt, end_dt) + + +class ReindexSessionBarReader(ReindexBarReader, SessionBarReader): + """ + See: ``ReindexBarReader`` + """ + + def _outer_dts(self, start_dt, end_dt): + return self.trading_calendar.sessions_in_range(start_dt, end_dt) + + def _inner_dts(self, start_dt, end_dt): + return self._reader.trading_calendar.sessions_in_range( + start_dt, end_dt)