diff --git a/tests/data/test_resample.py b/tests/data/test_resample.py index 5d7a5168..2629009f 100644 --- a/tests/data/test_resample.py +++ b/tests/data/test_resample.py @@ -16,7 +16,7 @@ from numbers import Real from nose_parameterized import parameterized from numpy.testing import assert_almost_equal -from numpy import nan, array +from numpy import nan, array, full import pandas as pd from pandas import DataFrame from six import iteritems @@ -25,11 +25,14 @@ from zipline.data.resample import ( minute_to_session, DailyHistoryAggregator, MinuteResampleSessionBarReader, + ReindexMinuteBarReader, + ReindexSessionBarReader, ) from zipline.testing.fixtures import ( WithEquityMinuteBarData, WithBcolzEquityMinuteBarReader, + WithBcolzEquityDailyBarReader, WithBcolzFutureMinuteBarReader, ZiplineTestCase, ) @@ -527,3 +530,124 @@ class TestResampleSessionBars(WithBcolzFutureMinuteBarReader, assert_almost_equal(values[col], result, err_msg="sid={0} col={1} dt={2}". format(sid, col, dt)) + + +class TestReindexMinuteBars(WithBcolzEquityMinuteBarReader, + ZiplineTestCase): + + TRADING_CALENDAR_STRS = ('CME', 'NYSE') + TRADING_CALENDAR_PRIMARY_CAL = 'CME' + + ASSET_FINDER_EQUITY_SIDS = 1, 2, 3 + + START_DATE = pd.Timestamp('2015-12-01', tz='UTC') + END_DATE = pd.Timestamp('2015-12-31', tz='UTC') + + def test_load_raw_arrays(self): + reindex_reader = ReindexMinuteBarReader( + self.trading_calendar, + self.bcolz_equity_minute_bar_reader, + self.START_DATE, + self.END_DATE, + ) + m_open, m_close = self.trading_calendar.open_and_close_for_session( + self.START_DATE) + outer_minutes = self.trading_calendar.minutes_in_range(m_open, m_close) + result = reindex_reader.load_raw_arrays( + OHLCV, m_open, m_close, [1, 2]) + + opens = DataFrame(data=result[0], index=outer_minutes, + columns=[1, 2]) + opens_with_price = opens.dropna() + + self.assertEqual( + 1440, + len(opens), + "The result should have 1440 bars, the number of minutes in a " + "trading session on the target calendar." + ) + + self.assertEqual( + 390, + len(opens_with_price), + "The result, after dropping nans, should have 390 bars, the " + " number of bars in a trading session in the reader's calendar." + ) + + slicer = outer_minutes.slice_indexer( + end=pd.Timestamp('2015-12-01 14:30', tz='UTC')) + + assert_almost_equal( + opens[1][slicer], + full(slicer.stop, nan), + err_msg="All values before the NYSE market open should be nan.") + + slicer = outer_minutes.slice_indexer( + start=pd.Timestamp('2015-12-01 21:01', tz='UTC')) + + assert_almost_equal( + opens[1][slicer], + full(slicer.stop - slicer.start, nan), + err_msg="All values after the NYSE market close should be nan.") + + first_minute_loc = outer_minutes.get_loc(pd.Timestamp( + '2015-12-01 14:31', tz='UTC')) + + # Spot check a value. + # The value is the autogenerated value from test fixtures. + assert_almost_equal( + 10.0, + opens[1][first_minute_loc], + err_msg="The value for Equity 1, should be 10.0, at NYSE open.") + + +class TestReindexSessionBars(WithBcolzEquityDailyBarReader, + ZiplineTestCase): + + TRADING_CALENDAR_STRS = ('CME', 'NYSE') + TRADING_CALENDAR_PRIMARY_CAL = 'CME' + + ASSET_FINDER_EQUITY_SIDS = 1, 2, 3 + + # Dates are chosen to span Thanksgiving, which is not a Holiday on CME. + START_DATE = pd.Timestamp('2015-11-01', tz='UTC') + END_DATE = pd.Timestamp('2015-11-30', tz='UTC') + + def test_load_raw_arrays(self): + reindex_reader = ReindexSessionBarReader( + self.trading_calendar, + self.bcolz_equity_daily_bar_reader, + self.START_DATE, + self.END_DATE, + ) + + outer_sessions = self.trading_calendar.sessions_in_range( + self.START_DATE, self.END_DATE) + + result = reindex_reader.load_raw_arrays( + OHLCV, self.START_DATE, self.END_DATE, [1, 2]) + + opens = DataFrame(data=result[0], index=outer_sessions, + columns=[1, 2]) + opens_with_price = opens.dropna() + + self.assertEqual( + 21, + len(opens), + "The reindexed result should have 21 days, which is the number of " + "business days in 2015-11") + self.assertEqual( + 20, + len(opens_with_price), + "The reindexed result after dropping nans should have 20 days, " + "because Thanksgiving is a NYSE holiday.") + + # Thanksgiving, 2015-11-26. + # Is a holiday in NYSE, but not in CME. + tday_loc = outer_sessions.get_loc(pd.Timestamp('2015-11-26', tz='UTC')) + + assert_almost_equal( + nan, + opens[1][tday_loc], + err_msg="2015-11-26 should be `nan`, since Thanksgiving is a " + "holiday in the reader's calendar.") diff --git a/zipline/data/resample.py b/zipline/data/resample.py index fd147366..06af49db 100644 --- a/zipline/data/resample.py +++ b/zipline/data/resample.py @@ -12,11 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. from collections import OrderedDict +from abc import ABCMeta, abstractmethod import numpy as np import pandas as pd from pandas import DataFrame +from six import with_metaclass +from zipline.data.minute_bars import MinuteBarReader from zipline.data.session_bars import SessionBarReader from zipline.utils.memoize import lazyval @@ -498,3 +501,131 @@ class MinuteResampleSessionBarReader(SessionBarReader): return self.trading_calendar.minute_to_session_label( self._minute_bar_reader.last_available_dt ) + + +class ReindexBarReader(with_metaclass(ABCMeta)): + """ + A base class for readers which reindexes results, filling in the additional + indices with empty data. + + Used to align the reading assets which trade on different calendars. + + Currently only supports a ``trading_calendar`` which is a superset of the + ``reader``'s calendar. + + Also, the currenty implementation only reindexes the results from + ``load_raw_arrays``, but in the future, `get_value` may also be made to + provide an empty result instead of raising on error. + + Parameters + ---------- + + - trading_calendar : zipline.utils.trading_calendar.TradingCalendar + The calendar to use when indexing results from the reader. + - reader : MinuteBarReader|SessionBarReader + The reader which has a calendar that is a subset of the desired + ``trading_calendar``. + - first_trading_session : pd.Timestamp + The first trading session the reader should provide. Must be specified, + since the ``reader``'s first session may not exactly align with the + desired calendar. Specifically, in the case where the first session + on the target calendar is a holiday on the ``reader``'s calendar. + - last_trading_session : pd.Timestamp + The last trading session the reader should provide. Must be specified, + since the ``reader``'s last session may not exactly align with the + desired calendar. Specifically, in the case where the last session + on the target calendar is a holiday on the ``reader``'s calendar. + """ + + def __init__(self, + trading_calendar, + reader, + first_trading_session, + last_trading_session): + self._trading_calendar = trading_calendar + self._reader = reader + self._first_trading_session = first_trading_session + self._last_trading_session = last_trading_session + + @property + def last_available_dt(self): + return self._reader.last_available_dt + + def get_last_traded_dt(self, sid, dt): + return self._reader.get_last_traded_dt(sid, dt) + + @property + def first_trading_day(self): + return self._reader.first_trading_day + + def get_value(self, sid, dt, field): + return self._reader.get_value(sid, dt, field) + + @abstractmethod + def _outer_dts(self, start_dt, end_dt): + pass + + @abstractmethod + def _inner_dts(self, start_dt, end_dt): + pass + + @property + def trading_calendar(self): + return self._trading_calendar + + @lazyval + def sessions(self): + return self.trading_calendar.sessions_in_range( + self._first_trading_session, + self._last_trading_session + ) + + def load_raw_arrays(self, fields, start_dt, end_dt, sids): + outer_dts = self._outer_dts(start_dt, end_dt) + inner_dts = self._inner_dts(start_dt, end_dt) + + indices = outer_dts.searchsorted(inner_dts) + + shape = len(outer_dts), len(sids) + + outer_results = [] + + inner_results = self._reader.load_raw_arrays( + fields, inner_dts[0], inner_dts[-1], sids) + + for i, field in enumerate(fields): + if field != 'volume': + out = np.full(shape, np.nan) + else: + out = np.zeros(shape, dtype=np.uint32) + + out[indices] = inner_results[i] + + outer_results.append(out) + + return outer_results + + +class ReindexMinuteBarReader(ReindexBarReader, MinuteBarReader): + """ + See: ``ReindexBarReader`` + """ + + def _outer_dts(self, start_dt, end_dt): + return self._trading_calendar.minutes_in_range(start_dt, end_dt) + + def _inner_dts(self, start_dt, end_dt): + return self._reader.calendar.minutes_in_range(start_dt, end_dt) + + +class ReindexSessionBarReader(ReindexBarReader, SessionBarReader): + """ + See: ``ReindexBarReader`` + """ + + def _outer_dts(self, start_dt, end_dt): + return self.trading_calendar.sessions_in_range(start_dt, end_dt) + + def _inner_dts(self, start_dt, end_dt): + return self._reader.trading_calendar.sessions_in_range( + start_dt, end_dt)