From 2856fd0ecf0c266d46294ddda73f01fd84420b2f Mon Sep 17 00:00:00 2001 From: Jean Bredeche Date: Wed, 14 Sep 2016 10:31:52 -0400 Subject: [PATCH] MAINT: Add BarReader base class for both minute and session readers --- tests/data/test_resample.py | 25 ++++++ zipline/data/bar_reader.py | 132 ++++++++++++++++++++++++++++++ zipline/data/minute_bars.py | 104 +---------------------- zipline/data/resample.py | 3 + zipline/data/session_bars.py | 97 ++-------------------- zipline/data/us_equity_pricing.py | 30 ++++--- 6 files changed, 186 insertions(+), 205 deletions(-) create mode 100644 zipline/data/bar_reader.py diff --git a/tests/data/test_resample.py b/tests/data/test_resample.py index 4e10af16..035354af 100644 --- a/tests/data/test_resample.py +++ b/tests/data/test_resample.py @@ -518,6 +518,19 @@ class TestResampleSessionBars(WithBcolzFutureMinuteBarReader, END_DATE = pd.Timestamp('2016-03-17', tz='UTC') NUM_SESSIONS = 2 + @classmethod + def make_futures_info(cls): + future_dict = {} + + for future_sid in cls.ASSET_FINDER_FUTURE_SIDS: + future_dict[future_sid] = { + 'multiplier': 1000, + 'exchange': 'CME', + 'root_symbol': "ABC" + } + + return pd.DataFrame.from_dict(future_dict, orient='index') + @classmethod def make_future_minute_bar_data(cls): for sid in cls.ASSET_FINDER_FUTURE_SIDS: @@ -582,6 +595,18 @@ class TestResampleSessionBars(WithBcolzFutureMinuteBarReader, self.assertEqual(self.START_DATE, self.session_bar_reader.first_trading_day) + def test_get_last_traded_dt(self): + future = self.asset_finder.retrieve_asset( + self.ASSET_FINDER_FUTURE_SIDS[0] + ) + + self.assertEqual( + self.trading_calendar.open_and_close_for_session( + self.trading_calendar.previous_session_label(self.END_DATE) + )[1], + self.session_bar_reader.get_last_traded_dt(future, self.END_DATE) + ) + class TestReindexMinuteBars(WithBcolzEquityMinuteBarReader, ZiplineTestCase): diff --git a/zipline/data/bar_reader.py b/zipline/data/bar_reader.py new file mode 100644 index 00000000..386da2f9 --- /dev/null +++ b/zipline/data/bar_reader.py @@ -0,0 +1,132 @@ +# Copyright 2016 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from abc import ABCMeta, abstractmethod, abstractproperty +from six import with_metaclass + + +class NoDataOnDate(Exception): + """ + Raised when a spot price can be found for the sid and date. + """ + pass + + +class NoDataBeforeDate(Exception): + pass + + +class NoDataAfterDate(Exception): + pass + + +class BarReader(with_metaclass(ABCMeta, object)): + @abstractproperty + def data_frequency(self): + pass + + @abstractmethod + def load_raw_arrays(self, columns, start_date, end_date, assets): + """ + Parameters + ---------- + fields : list of str + 'open', 'high', 'low', 'close', or 'volume' + start_dt: Timestamp + Beginning of the window range. + end_dt: Timestamp + End of the window range. + sids : list of int + The asset identifiers in the window. + + Returns + ------- + list of np.ndarray + A list with an entry per field of ndarrays with shape + (minutes in range, sids) with a dtype of float64, containing the + values for the respective field over start and end dt range. + """ + pass + + @abstractproperty + def last_available_dt(self): + """ + Returns + ------- + dt : pd.Timestamp + The last session for which the reader can provide data. + """ + pass + + @abstractproperty + def trading_calendar(self): + """ + Returns the zipline.utils.calendar.trading_calendar used to read + the data. Can be None (if the writer didn't specify it). + """ + pass + + @abstractproperty + def first_trading_day(self): + """ + Returns + ------- + dt : pd.Timestamp + The first trading day (session) for which the reader can provide + data. + """ + pass + + @abstractmethod + def get_value(self, sid, dt, field): + """ + Retrieve the value at the given coordinates. + + Parameters + ---------- + sid : int + The asset identifier. + dt : pd.Timestamp + The timestamp for the desired data point. + field : string + The OHLVC name for the desired data point. + + Returns + ------- + value : float|int + The value at the given coordinates, ``float`` for OHLC, ``int`` + for 'volume'. + """ + pass + + @abstractmethod + def get_last_traded_dt(self, asset, dt): + """ + Get the latest minute on or before ``dt`` in which ``asset`` traded. + + If there are no trades on or before ``dt``, returns ``pd.NaT``. + + Parameters + ---------- + asset : zipline.asset.Asset + The asset for which to get the last traded minute. + dt : pd.Timestamp + The minute at which to start searching for the last traded minute. + + Returns + ------- + last_traded : pd.Timestamp + The dt of the last trade for the given asset, using the input + dt as a vantage point. + """ + pass diff --git a/zipline/data/minute_bars.py b/zipline/data/minute_bars.py index d85c48aa..8c8947ab 100644 --- a/zipline/data/minute_bars.py +++ b/zipline/data/minute_bars.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from abc import ABCMeta, abstractmethod, abstractproperty import json import os from os.path import join @@ -23,7 +22,6 @@ from bcolz import ctable from intervaltree import IntervalTree import numpy as np import pandas as pd -from six import with_metaclass from toolz import keymap, valmap from zipline.data._minute_bar_internal import ( @@ -33,6 +31,8 @@ from zipline.data._minute_bar_internal import ( ) from zipline.gens.sim_engine import NANOS_IN_MINUTE + +from zipline.data.bar_reader import BarReader from zipline.utils.calendars import get_calendar from zipline.utils.cli import maybe_show_progress from zipline.utils.memoize import lazyval @@ -53,106 +53,10 @@ class BcolzMinuteWriterColumnMismatch(Exception): pass -class MinuteBarReader(with_metaclass(ABCMeta)): - - _data_frequency = 'minute' - +class MinuteBarReader(BarReader): @property def data_frequency(self): - return self._data_frequency - - @abstractproperty - def last_available_dt(self): - """ - Returns - ------- - dt : pd.Timestamp - The last minute for which the reader can provide data. - """ - pass - - @abstractproperty - def first_trading_day(self): - """ - Returns - ------- - dt : pd.Timestamp - The first trading day (session) for which the reader can provide - data. - """ - pass - - @abstractmethod - def get_value(self, sid, dt, field): - """ - Retrieve the value at the given coordinates. - - Parameters - ---------- - sid : int - The asset identifier. - dt : pd.Timestamp - The minute label for the desired data point. - field : string - The OHLVC name for the desired data point. - - Returns - ------- - value : float|int - The value at the given coordinates, ``float`` for OHLC, ``int`` - for 'volume'. - """ - pass - - @abstractmethod - def get_last_traded_dt(self, asset, dt): - """ - Get the latest minute on or before ``dt`` in which ``asset`` traded. - - If there are no trades on or before ``dt`` returns ``pd.NaT`` - - Parameters - ---------- - asset : zipline.asset.Asset - The asset for which to get the last traded minute. - dt : pd.Timestamp - The minute at which to start searching for the last traded minute. - - Returns - ------- - last_traded : pd.Timestamp - The minute of the last trade for the given asset, using the input - dt as a vantage point. - """ - pass - - @abstractmethod - def load_raw_arrays(self, fields, start_dt, end_dt, sids): - """ - Retrieve the arrays of pricing data for the given coordinates of - ``fields`` (OHLCV), minute range [``start_dt``, ``end_dt``] and sids. - - Parameters - ---------- - fields : iterable of str - The OHLCV fields ('open', 'high', 'low', 'close', 'volume') for - which to read data. - start_dt : pd.Timestamp - The first minute of the date range for which to read data. - end_dt : pd.Timestamp - The last minute of the date range for which to read data. - sids : iterable of int - The sid identifiers for which to retrieve data. - - Returns - ------- - raw_arrays : list of ndarray - A list where each item corresponds with the fields in the order - the fields are given. - Each item is a 2D array with a shape of (minutes_in_range, sids) - The OHLC arrays are floats; the 'volume' array is ints. - """ - pass + return "minute" def _calc_minute_index(market_opens, minutes_per_day): diff --git a/zipline/data/resample.py b/zipline/data/resample.py index bb0551c8..339eb82b 100644 --- a/zipline/data/resample.py +++ b/zipline/data/resample.py @@ -525,6 +525,9 @@ class MinuteResampleSessionBarReader(SessionBarReader): def first_trading_day(self): return self._minute_bar_reader.first_trading_day + def get_last_traded_dt(self, asset, dt): + return self._minute_bar_reader.get_last_traded_dt(asset, dt) + class ReindexBarReader(with_metaclass(ABCMeta)): """ diff --git a/zipline/data/session_bars.py b/zipline/data/session_bars.py index fb0585bf..cac3fa0f 100644 --- a/zipline/data/session_bars.py +++ b/zipline/data/session_bars.py @@ -11,87 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from abc import ABCMeta, abstractmethod, abstractproperty -from six import with_metaclass +from abc import abstractproperty + +from zipline.data.bar_reader import BarReader -class NoDataOnDate(Exception): - """ - Raised when a spot price can be found for the sid and date. - """ - pass - - -class NoDataBeforeDate(Exception): - pass - - -class NoDataAfterDate(Exception): - pass - - -class SessionBarReader(with_metaclass(ABCMeta)): +class SessionBarReader(BarReader): """ Reader for OHCLV pricing data at a session frequency. """ - _data_frequency = 'session' - @property def data_frequency(self): - return self._data_frequency - - @abstractmethod - def load_raw_arrays(self, columns, start_date, end_date, assets): - """ - Parameters - ---------- - fields : list of str - 'open', 'high', 'low', 'close', or 'volume' - start_dt: Timestamp - Beginning of the window range. - end_dt: Timestamp - End of the window range. - sids : list of int - The asset identifiers in the window. - - Returns - ------- - list of np.ndarray - A list with an entry per field of ndarrays with shape - (minutes in range, sids) with a dtype of float64, containing the - values for the respective field over start and end dt range. - """ - pass - - @abstractmethod - def get_value(self, sid, session, colname): - """ - Retrieve the value at the given coordinates. - - This function shares the same input and semantics as the - ``MinuteBarReaders``'s ``get_value``, in the future, the names may be - made consistent. - - Parameters - ---------- - sid : int - The asset identifier. - session : pd.Timestamp - The session label for the desired data point. - colname : string - The OHLVC name for the desired data point. - - Returns - ------- - value : float|int - The value at the given coordinates, ``float`` for OHLC, ``int`` - for 'volume'. - - See Also - -------- - zipline.minute_bars.MinuteBarReader.get_value - """ - pass + return 'session' @abstractproperty def sessions(self): @@ -103,21 +34,3 @@ class SessionBarReader(with_metaclass(ABCMeta)): reader can provide. """ pass - - @abstractproperty - def last_available_dt(self): - """ - Returns - ------- - dt : pd.Timestamp - The last session for which the reader can provide data. - """ - pass - - @abstractproperty - def trading_calendar(self): - """ - Returns the zipline.utils.calendar.trading_calendar used to read - the data. Can be None (if the writer didn't specify it). - """ - pass diff --git a/zipline/data/us_equity_pricing.py b/zipline/data/us_equity_pricing.py index dc76dc3d..16771cc1 100644 --- a/zipline/data/us_equity_pricing.py +++ b/zipline/data/us_equity_pricing.py @@ -49,8 +49,8 @@ from six import ( string_types, ) -from zipline.data.session_bars import ( - SessionBarReader, +from zipline.data.session_bars import SessionBarReader +from zipline.data.bar_reader import ( NoDataAfterDate, NoDataBeforeDate, NoDataOnDate, @@ -684,7 +684,7 @@ class BcolzDailyBarReader(SessionBarReader): day, sid)) return ix - def get_value(self, sid, day, colname): + def get_value(self, sid, dt, field): """ Parameters ---------- @@ -704,11 +704,11 @@ class BcolzDailyBarReader(SessionBarReader): Returns -1 if the day is within the date range, but the price is 0. """ - ix = self.sid_day_index(sid, day) - price = self._spot_col(colname)[ix] + ix = self.sid_day_index(sid, dt) + price = self._spot_col(field)[ix] if price == 0: return -1 - if colname != 'volume': + if field != 'volume': return price * 0.001 else: return price @@ -747,7 +747,7 @@ class PanelBarReader(SessionBarReader): panel.loc[:, :, 'volume'] = int(1e9) self.trading_calendar = trading_calendar - self.first_trading_day = trading_calendar.minute_to_session_label( + self._first_trading_day = trading_calendar.minute_to_session_label( panel.major_axis[0] ) last_trading_day = trading_calendar.minute_to_session_label( @@ -785,7 +785,7 @@ class PanelBarReader(SessionBarReader): list(columns) ].reindex(major_axis=cal[cal.slice_indexer(start_dt, end_dt)]).values.T - def get_value(self, sid, dt, colname): + def get_value(self, sid, dt, field): """ Parameters ---------- @@ -793,7 +793,7 @@ class PanelBarReader(SessionBarReader): The asset identifier. day : datetime64-like Midnight of the day for which data is requested. - colname : string + field : string The price field. e.g. ('open', 'high', 'low', 'close', 'volume') Returns @@ -805,13 +805,13 @@ class PanelBarReader(SessionBarReader): Returns -1 if the day is within the date range, but the price is 0. """ - return self.panel.loc[sid, dt, colname] + return self.panel.loc[sid, dt, field] - def get_last_traded_dt(self, sid, dt): + def get_last_traded_dt(self, asset, dt): """ Parameters ---------- - sid : int + asset : zipline.asset.Asset The asset identifier. dt : datetime64-like Midnight of the day for which data is requested. @@ -822,10 +822,14 @@ class PanelBarReader(SessionBarReader): NaT if no trade is found before the given dt. """ try: - return self.panel.loc[sid, :dt, 'close'].last_valid_index() + return self.panel.loc[int(asset), :dt, 'close'].last_valid_index() except IndexError: return NaT + @property + def first_trading_day(self): + return self._first_trading_day + class SQLiteAdjustmentWriter(object): """