MAINT: Add BarReader base class for both minute and session readers

This commit is contained in:
Jean Bredeche
2016-09-14 10:31:52 -04:00
parent 1f859d34e5
commit 2856fd0ecf
6 changed files with 186 additions and 205 deletions
+25
View File
@@ -518,6 +518,19 @@ class TestResampleSessionBars(WithBcolzFutureMinuteBarReader,
END_DATE = pd.Timestamp('2016-03-17', tz='UTC')
NUM_SESSIONS = 2
@classmethod
def make_futures_info(cls):
future_dict = {}
for future_sid in cls.ASSET_FINDER_FUTURE_SIDS:
future_dict[future_sid] = {
'multiplier': 1000,
'exchange': 'CME',
'root_symbol': "ABC"
}
return pd.DataFrame.from_dict(future_dict, orient='index')
@classmethod
def make_future_minute_bar_data(cls):
for sid in cls.ASSET_FINDER_FUTURE_SIDS:
@@ -582,6 +595,18 @@ class TestResampleSessionBars(WithBcolzFutureMinuteBarReader,
self.assertEqual(self.START_DATE,
self.session_bar_reader.first_trading_day)
def test_get_last_traded_dt(self):
future = self.asset_finder.retrieve_asset(
self.ASSET_FINDER_FUTURE_SIDS[0]
)
self.assertEqual(
self.trading_calendar.open_and_close_for_session(
self.trading_calendar.previous_session_label(self.END_DATE)
)[1],
self.session_bar_reader.get_last_traded_dt(future, self.END_DATE)
)
class TestReindexMinuteBars(WithBcolzEquityMinuteBarReader,
ZiplineTestCase):
+132
View File
@@ -0,0 +1,132 @@
# Copyright 2016 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from abc import ABCMeta, abstractmethod, abstractproperty
from six import with_metaclass
class NoDataOnDate(Exception):
"""
Raised when a spot price can be found for the sid and date.
"""
pass
class NoDataBeforeDate(Exception):
pass
class NoDataAfterDate(Exception):
pass
class BarReader(with_metaclass(ABCMeta, object)):
@abstractproperty
def data_frequency(self):
pass
@abstractmethod
def load_raw_arrays(self, columns, start_date, end_date, assets):
"""
Parameters
----------
fields : list of str
'open', 'high', 'low', 'close', or 'volume'
start_dt: Timestamp
Beginning of the window range.
end_dt: Timestamp
End of the window range.
sids : list of int
The asset identifiers in the window.
Returns
-------
list of np.ndarray
A list with an entry per field of ndarrays with shape
(minutes in range, sids) with a dtype of float64, containing the
values for the respective field over start and end dt range.
"""
pass
@abstractproperty
def last_available_dt(self):
"""
Returns
-------
dt : pd.Timestamp
The last session for which the reader can provide data.
"""
pass
@abstractproperty
def trading_calendar(self):
"""
Returns the zipline.utils.calendar.trading_calendar used to read
the data. Can be None (if the writer didn't specify it).
"""
pass
@abstractproperty
def first_trading_day(self):
"""
Returns
-------
dt : pd.Timestamp
The first trading day (session) for which the reader can provide
data.
"""
pass
@abstractmethod
def get_value(self, sid, dt, field):
"""
Retrieve the value at the given coordinates.
Parameters
----------
sid : int
The asset identifier.
dt : pd.Timestamp
The timestamp for the desired data point.
field : string
The OHLVC name for the desired data point.
Returns
-------
value : float|int
The value at the given coordinates, ``float`` for OHLC, ``int``
for 'volume'.
"""
pass
@abstractmethod
def get_last_traded_dt(self, asset, dt):
"""
Get the latest minute on or before ``dt`` in which ``asset`` traded.
If there are no trades on or before ``dt``, returns ``pd.NaT``.
Parameters
----------
asset : zipline.asset.Asset
The asset for which to get the last traded minute.
dt : pd.Timestamp
The minute at which to start searching for the last traded minute.
Returns
-------
last_traded : pd.Timestamp
The dt of the last trade for the given asset, using the input
dt as a vantage point.
"""
pass
+4 -100
View File
@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from abc import ABCMeta, abstractmethod, abstractproperty
import json
import os
from os.path import join
@@ -23,7 +22,6 @@ from bcolz import ctable
from intervaltree import IntervalTree
import numpy as np
import pandas as pd
from six import with_metaclass
from toolz import keymap, valmap
from zipline.data._minute_bar_internal import (
@@ -33,6 +31,8 @@ from zipline.data._minute_bar_internal import (
)
from zipline.gens.sim_engine import NANOS_IN_MINUTE
from zipline.data.bar_reader import BarReader
from zipline.utils.calendars import get_calendar
from zipline.utils.cli import maybe_show_progress
from zipline.utils.memoize import lazyval
@@ -53,106 +53,10 @@ class BcolzMinuteWriterColumnMismatch(Exception):
pass
class MinuteBarReader(with_metaclass(ABCMeta)):
_data_frequency = 'minute'
class MinuteBarReader(BarReader):
@property
def data_frequency(self):
return self._data_frequency
@abstractproperty
def last_available_dt(self):
"""
Returns
-------
dt : pd.Timestamp
The last minute for which the reader can provide data.
"""
pass
@abstractproperty
def first_trading_day(self):
"""
Returns
-------
dt : pd.Timestamp
The first trading day (session) for which the reader can provide
data.
"""
pass
@abstractmethod
def get_value(self, sid, dt, field):
"""
Retrieve the value at the given coordinates.
Parameters
----------
sid : int
The asset identifier.
dt : pd.Timestamp
The minute label for the desired data point.
field : string
The OHLVC name for the desired data point.
Returns
-------
value : float|int
The value at the given coordinates, ``float`` for OHLC, ``int``
for 'volume'.
"""
pass
@abstractmethod
def get_last_traded_dt(self, asset, dt):
"""
Get the latest minute on or before ``dt`` in which ``asset`` traded.
If there are no trades on or before ``dt`` returns ``pd.NaT``
Parameters
----------
asset : zipline.asset.Asset
The asset for which to get the last traded minute.
dt : pd.Timestamp
The minute at which to start searching for the last traded minute.
Returns
-------
last_traded : pd.Timestamp
The minute of the last trade for the given asset, using the input
dt as a vantage point.
"""
pass
@abstractmethod
def load_raw_arrays(self, fields, start_dt, end_dt, sids):
"""
Retrieve the arrays of pricing data for the given coordinates of
``fields`` (OHLCV), minute range [``start_dt``, ``end_dt``] and sids.
Parameters
----------
fields : iterable of str
The OHLCV fields ('open', 'high', 'low', 'close', 'volume') for
which to read data.
start_dt : pd.Timestamp
The first minute of the date range for which to read data.
end_dt : pd.Timestamp
The last minute of the date range for which to read data.
sids : iterable of int
The sid identifiers for which to retrieve data.
Returns
-------
raw_arrays : list of ndarray
A list where each item corresponds with the fields in the order
the fields are given.
Each item is a 2D array with a shape of (minutes_in_range, sids)
The OHLC arrays are floats; the 'volume' array is ints.
"""
pass
return "minute"
def _calc_minute_index(market_opens, minutes_per_day):
+3
View File
@@ -525,6 +525,9 @@ class MinuteResampleSessionBarReader(SessionBarReader):
def first_trading_day(self):
return self._minute_bar_reader.first_trading_day
def get_last_traded_dt(self, asset, dt):
return self._minute_bar_reader.get_last_traded_dt(asset, dt)
class ReindexBarReader(with_metaclass(ABCMeta)):
"""
+5 -92
View File
@@ -11,87 +11,18 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from abc import ABCMeta, abstractmethod, abstractproperty
from six import with_metaclass
from abc import abstractproperty
from zipline.data.bar_reader import BarReader
class NoDataOnDate(Exception):
"""
Raised when a spot price can be found for the sid and date.
"""
pass
class NoDataBeforeDate(Exception):
pass
class NoDataAfterDate(Exception):
pass
class SessionBarReader(with_metaclass(ABCMeta)):
class SessionBarReader(BarReader):
"""
Reader for OHCLV pricing data at a session frequency.
"""
_data_frequency = 'session'
@property
def data_frequency(self):
return self._data_frequency
@abstractmethod
def load_raw_arrays(self, columns, start_date, end_date, assets):
"""
Parameters
----------
fields : list of str
'open', 'high', 'low', 'close', or 'volume'
start_dt: Timestamp
Beginning of the window range.
end_dt: Timestamp
End of the window range.
sids : list of int
The asset identifiers in the window.
Returns
-------
list of np.ndarray
A list with an entry per field of ndarrays with shape
(minutes in range, sids) with a dtype of float64, containing the
values for the respective field over start and end dt range.
"""
pass
@abstractmethod
def get_value(self, sid, session, colname):
"""
Retrieve the value at the given coordinates.
This function shares the same input and semantics as the
``MinuteBarReaders``'s ``get_value``, in the future, the names may be
made consistent.
Parameters
----------
sid : int
The asset identifier.
session : pd.Timestamp
The session label for the desired data point.
colname : string
The OHLVC name for the desired data point.
Returns
-------
value : float|int
The value at the given coordinates, ``float`` for OHLC, ``int``
for 'volume'.
See Also
--------
zipline.minute_bars.MinuteBarReader.get_value
"""
pass
return 'session'
@abstractproperty
def sessions(self):
@@ -103,21 +34,3 @@ class SessionBarReader(with_metaclass(ABCMeta)):
reader can provide.
"""
pass
@abstractproperty
def last_available_dt(self):
"""
Returns
-------
dt : pd.Timestamp
The last session for which the reader can provide data.
"""
pass
@abstractproperty
def trading_calendar(self):
"""
Returns the zipline.utils.calendar.trading_calendar used to read
the data. Can be None (if the writer didn't specify it).
"""
pass
+17 -13
View File
@@ -49,8 +49,8 @@ from six import (
string_types,
)
from zipline.data.session_bars import (
SessionBarReader,
from zipline.data.session_bars import SessionBarReader
from zipline.data.bar_reader import (
NoDataAfterDate,
NoDataBeforeDate,
NoDataOnDate,
@@ -684,7 +684,7 @@ class BcolzDailyBarReader(SessionBarReader):
day, sid))
return ix
def get_value(self, sid, day, colname):
def get_value(self, sid, dt, field):
"""
Parameters
----------
@@ -704,11 +704,11 @@ class BcolzDailyBarReader(SessionBarReader):
Returns -1 if the day is within the date range, but the price is
0.
"""
ix = self.sid_day_index(sid, day)
price = self._spot_col(colname)[ix]
ix = self.sid_day_index(sid, dt)
price = self._spot_col(field)[ix]
if price == 0:
return -1
if colname != 'volume':
if field != 'volume':
return price * 0.001
else:
return price
@@ -747,7 +747,7 @@ class PanelBarReader(SessionBarReader):
panel.loc[:, :, 'volume'] = int(1e9)
self.trading_calendar = trading_calendar
self.first_trading_day = trading_calendar.minute_to_session_label(
self._first_trading_day = trading_calendar.minute_to_session_label(
panel.major_axis[0]
)
last_trading_day = trading_calendar.minute_to_session_label(
@@ -785,7 +785,7 @@ class PanelBarReader(SessionBarReader):
list(columns)
].reindex(major_axis=cal[cal.slice_indexer(start_dt, end_dt)]).values.T
def get_value(self, sid, dt, colname):
def get_value(self, sid, dt, field):
"""
Parameters
----------
@@ -793,7 +793,7 @@ class PanelBarReader(SessionBarReader):
The asset identifier.
day : datetime64-like
Midnight of the day for which data is requested.
colname : string
field : string
The price field. e.g. ('open', 'high', 'low', 'close', 'volume')
Returns
@@ -805,13 +805,13 @@ class PanelBarReader(SessionBarReader):
Returns -1 if the day is within the date range, but the price is
0.
"""
return self.panel.loc[sid, dt, colname]
return self.panel.loc[sid, dt, field]
def get_last_traded_dt(self, sid, dt):
def get_last_traded_dt(self, asset, dt):
"""
Parameters
----------
sid : int
asset : zipline.asset.Asset
The asset identifier.
dt : datetime64-like
Midnight of the day for which data is requested.
@@ -822,10 +822,14 @@ class PanelBarReader(SessionBarReader):
NaT if no trade is found before the given dt.
"""
try:
return self.panel.loc[sid, :dt, 'close'].last_valid_index()
return self.panel.loc[int(asset), :dt, 'close'].last_valid_index()
except IndexError:
return NaT
@property
def first_trading_day(self):
return self._first_trading_day
class SQLiteAdjustmentWriter(object):
"""