mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-29 03:19:15 +08:00
Merge pull request #1432 from quantopian/reindex-reader
ENH: Add a reader base which reindexes results.
This commit is contained in:
+125
-1
@@ -16,7 +16,7 @@ from numbers import Real
|
||||
|
||||
from nose_parameterized import parameterized
|
||||
from numpy.testing import assert_almost_equal
|
||||
from numpy import nan, array
|
||||
from numpy import nan, array, full
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
from six import iteritems
|
||||
@@ -25,11 +25,14 @@ from zipline.data.resample import (
|
||||
minute_to_session,
|
||||
DailyHistoryAggregator,
|
||||
MinuteResampleSessionBarReader,
|
||||
ReindexMinuteBarReader,
|
||||
ReindexSessionBarReader,
|
||||
)
|
||||
|
||||
from zipline.testing.fixtures import (
|
||||
WithEquityMinuteBarData,
|
||||
WithBcolzEquityMinuteBarReader,
|
||||
WithBcolzEquityDailyBarReader,
|
||||
WithBcolzFutureMinuteBarReader,
|
||||
ZiplineTestCase,
|
||||
)
|
||||
@@ -527,3 +530,124 @@ class TestResampleSessionBars(WithBcolzFutureMinuteBarReader,
|
||||
assert_almost_equal(values[col], result,
|
||||
err_msg="sid={0} col={1} dt={2}".
|
||||
format(sid, col, dt))
|
||||
|
||||
|
||||
class TestReindexMinuteBars(WithBcolzEquityMinuteBarReader,
|
||||
ZiplineTestCase):
|
||||
|
||||
TRADING_CALENDAR_STRS = ('CME', 'NYSE')
|
||||
TRADING_CALENDAR_PRIMARY_CAL = 'CME'
|
||||
|
||||
ASSET_FINDER_EQUITY_SIDS = 1, 2, 3
|
||||
|
||||
START_DATE = pd.Timestamp('2015-12-01', tz='UTC')
|
||||
END_DATE = pd.Timestamp('2015-12-31', tz='UTC')
|
||||
|
||||
def test_load_raw_arrays(self):
|
||||
reindex_reader = ReindexMinuteBarReader(
|
||||
self.trading_calendar,
|
||||
self.bcolz_equity_minute_bar_reader,
|
||||
self.START_DATE,
|
||||
self.END_DATE,
|
||||
)
|
||||
m_open, m_close = self.trading_calendar.open_and_close_for_session(
|
||||
self.START_DATE)
|
||||
outer_minutes = self.trading_calendar.minutes_in_range(m_open, m_close)
|
||||
result = reindex_reader.load_raw_arrays(
|
||||
OHLCV, m_open, m_close, [1, 2])
|
||||
|
||||
opens = DataFrame(data=result[0], index=outer_minutes,
|
||||
columns=[1, 2])
|
||||
opens_with_price = opens.dropna()
|
||||
|
||||
self.assertEqual(
|
||||
1440,
|
||||
len(opens),
|
||||
"The result should have 1440 bars, the number of minutes in a "
|
||||
"trading session on the target calendar."
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
390,
|
||||
len(opens_with_price),
|
||||
"The result, after dropping nans, should have 390 bars, the "
|
||||
" number of bars in a trading session in the reader's calendar."
|
||||
)
|
||||
|
||||
slicer = outer_minutes.slice_indexer(
|
||||
end=pd.Timestamp('2015-12-01 14:30', tz='UTC'))
|
||||
|
||||
assert_almost_equal(
|
||||
opens[1][slicer],
|
||||
full(slicer.stop, nan),
|
||||
err_msg="All values before the NYSE market open should be nan.")
|
||||
|
||||
slicer = outer_minutes.slice_indexer(
|
||||
start=pd.Timestamp('2015-12-01 21:01', tz='UTC'))
|
||||
|
||||
assert_almost_equal(
|
||||
opens[1][slicer],
|
||||
full(slicer.stop - slicer.start, nan),
|
||||
err_msg="All values after the NYSE market close should be nan.")
|
||||
|
||||
first_minute_loc = outer_minutes.get_loc(pd.Timestamp(
|
||||
'2015-12-01 14:31', tz='UTC'))
|
||||
|
||||
# Spot check a value.
|
||||
# The value is the autogenerated value from test fixtures.
|
||||
assert_almost_equal(
|
||||
10.0,
|
||||
opens[1][first_minute_loc],
|
||||
err_msg="The value for Equity 1, should be 10.0, at NYSE open.")
|
||||
|
||||
|
||||
class TestReindexSessionBars(WithBcolzEquityDailyBarReader,
|
||||
ZiplineTestCase):
|
||||
|
||||
TRADING_CALENDAR_STRS = ('CME', 'NYSE')
|
||||
TRADING_CALENDAR_PRIMARY_CAL = 'CME'
|
||||
|
||||
ASSET_FINDER_EQUITY_SIDS = 1, 2, 3
|
||||
|
||||
# Dates are chosen to span Thanksgiving, which is not a Holiday on CME.
|
||||
START_DATE = pd.Timestamp('2015-11-01', tz='UTC')
|
||||
END_DATE = pd.Timestamp('2015-11-30', tz='UTC')
|
||||
|
||||
def test_load_raw_arrays(self):
|
||||
reindex_reader = ReindexSessionBarReader(
|
||||
self.trading_calendar,
|
||||
self.bcolz_equity_daily_bar_reader,
|
||||
self.START_DATE,
|
||||
self.END_DATE,
|
||||
)
|
||||
|
||||
outer_sessions = self.trading_calendar.sessions_in_range(
|
||||
self.START_DATE, self.END_DATE)
|
||||
|
||||
result = reindex_reader.load_raw_arrays(
|
||||
OHLCV, self.START_DATE, self.END_DATE, [1, 2])
|
||||
|
||||
opens = DataFrame(data=result[0], index=outer_sessions,
|
||||
columns=[1, 2])
|
||||
opens_with_price = opens.dropna()
|
||||
|
||||
self.assertEqual(
|
||||
21,
|
||||
len(opens),
|
||||
"The reindexed result should have 21 days, which is the number of "
|
||||
"business days in 2015-11")
|
||||
self.assertEqual(
|
||||
20,
|
||||
len(opens_with_price),
|
||||
"The reindexed result after dropping nans should have 20 days, "
|
||||
"because Thanksgiving is a NYSE holiday.")
|
||||
|
||||
# Thanksgiving, 2015-11-26.
|
||||
# Is a holiday in NYSE, but not in CME.
|
||||
tday_loc = outer_sessions.get_loc(pd.Timestamp('2015-11-26', tz='UTC'))
|
||||
|
||||
assert_almost_equal(
|
||||
nan,
|
||||
opens[1][tday_loc],
|
||||
err_msg="2015-11-26 should be `nan`, since Thanksgiving is a "
|
||||
"holiday in the reader's calendar.")
|
||||
|
||||
@@ -12,11 +12,14 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from collections import OrderedDict
|
||||
from abc import ABCMeta, abstractmethod
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
from six import with_metaclass
|
||||
|
||||
from zipline.data.minute_bars import MinuteBarReader
|
||||
from zipline.data.session_bars import SessionBarReader
|
||||
from zipline.utils.memoize import lazyval
|
||||
|
||||
@@ -498,3 +501,131 @@ class MinuteResampleSessionBarReader(SessionBarReader):
|
||||
return self.trading_calendar.minute_to_session_label(
|
||||
self._minute_bar_reader.last_available_dt
|
||||
)
|
||||
|
||||
|
||||
class ReindexBarReader(with_metaclass(ABCMeta)):
|
||||
"""
|
||||
A base class for readers which reindexes results, filling in the additional
|
||||
indices with empty data.
|
||||
|
||||
Used to align the reading assets which trade on different calendars.
|
||||
|
||||
Currently only supports a ``trading_calendar`` which is a superset of the
|
||||
``reader``'s calendar.
|
||||
|
||||
Also, the currenty implementation only reindexes the results from
|
||||
``load_raw_arrays``, but in the future, `get_value` may also be made to
|
||||
provide an empty result instead of raising on error.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
- trading_calendar : zipline.utils.trading_calendar.TradingCalendar
|
||||
The calendar to use when indexing results from the reader.
|
||||
- reader : MinuteBarReader|SessionBarReader
|
||||
The reader which has a calendar that is a subset of the desired
|
||||
``trading_calendar``.
|
||||
- first_trading_session : pd.Timestamp
|
||||
The first trading session the reader should provide. Must be specified,
|
||||
since the ``reader``'s first session may not exactly align with the
|
||||
desired calendar. Specifically, in the case where the first session
|
||||
on the target calendar is a holiday on the ``reader``'s calendar.
|
||||
- last_trading_session : pd.Timestamp
|
||||
The last trading session the reader should provide. Must be specified,
|
||||
since the ``reader``'s last session may not exactly align with the
|
||||
desired calendar. Specifically, in the case where the last session
|
||||
on the target calendar is a holiday on the ``reader``'s calendar.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
trading_calendar,
|
||||
reader,
|
||||
first_trading_session,
|
||||
last_trading_session):
|
||||
self._trading_calendar = trading_calendar
|
||||
self._reader = reader
|
||||
self._first_trading_session = first_trading_session
|
||||
self._last_trading_session = last_trading_session
|
||||
|
||||
@property
|
||||
def last_available_dt(self):
|
||||
return self._reader.last_available_dt
|
||||
|
||||
def get_last_traded_dt(self, sid, dt):
|
||||
return self._reader.get_last_traded_dt(sid, dt)
|
||||
|
||||
@property
|
||||
def first_trading_day(self):
|
||||
return self._reader.first_trading_day
|
||||
|
||||
def get_value(self, sid, dt, field):
|
||||
return self._reader.get_value(sid, dt, field)
|
||||
|
||||
@abstractmethod
|
||||
def _outer_dts(self, start_dt, end_dt):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _inner_dts(self, start_dt, end_dt):
|
||||
pass
|
||||
|
||||
@property
|
||||
def trading_calendar(self):
|
||||
return self._trading_calendar
|
||||
|
||||
@lazyval
|
||||
def sessions(self):
|
||||
return self.trading_calendar.sessions_in_range(
|
||||
self._first_trading_session,
|
||||
self._last_trading_session
|
||||
)
|
||||
|
||||
def load_raw_arrays(self, fields, start_dt, end_dt, sids):
|
||||
outer_dts = self._outer_dts(start_dt, end_dt)
|
||||
inner_dts = self._inner_dts(start_dt, end_dt)
|
||||
|
||||
indices = outer_dts.searchsorted(inner_dts)
|
||||
|
||||
shape = len(outer_dts), len(sids)
|
||||
|
||||
outer_results = []
|
||||
|
||||
inner_results = self._reader.load_raw_arrays(
|
||||
fields, inner_dts[0], inner_dts[-1], sids)
|
||||
|
||||
for i, field in enumerate(fields):
|
||||
if field != 'volume':
|
||||
out = np.full(shape, np.nan)
|
||||
else:
|
||||
out = np.zeros(shape, dtype=np.uint32)
|
||||
|
||||
out[indices] = inner_results[i]
|
||||
|
||||
outer_results.append(out)
|
||||
|
||||
return outer_results
|
||||
|
||||
|
||||
class ReindexMinuteBarReader(ReindexBarReader, MinuteBarReader):
|
||||
"""
|
||||
See: ``ReindexBarReader``
|
||||
"""
|
||||
|
||||
def _outer_dts(self, start_dt, end_dt):
|
||||
return self._trading_calendar.minutes_in_range(start_dt, end_dt)
|
||||
|
||||
def _inner_dts(self, start_dt, end_dt):
|
||||
return self._reader.calendar.minutes_in_range(start_dt, end_dt)
|
||||
|
||||
|
||||
class ReindexSessionBarReader(ReindexBarReader, SessionBarReader):
|
||||
"""
|
||||
See: ``ReindexBarReader``
|
||||
"""
|
||||
|
||||
def _outer_dts(self, start_dt, end_dt):
|
||||
return self.trading_calendar.sessions_in_range(start_dt, end_dt)
|
||||
|
||||
def _inner_dts(self, start_dt, end_dt):
|
||||
return self._reader.trading_calendar.sessions_in_range(
|
||||
start_dt, end_dt)
|
||||
|
||||
Reference in New Issue
Block a user