ENH: Add initial commit for DataPortal and readers

Moved from the `lazy-mainline` branch,
https://github.com/quantopian/zipline/pull/858

The intent of this patch to provide the basic class and readers
interfaces, developed on that branch, so that the use of creating the
object and opening paths etc. can be tested internally.

Additional changes beyond the lazy-mainline branch, addition of future
minute reader, and daily bar reader.

Also allow an argument of the future_daily_reader, though no such reader
yet exists.

It may be that future and equity readers share an interface, and a
further improvement would be providing an abstract base class.

co-author: @jbredeche <jean@quantopian.com>
This commit is contained in:
Eddie Hebert
2015-12-08 13:23:18 -05:00
parent 84d8cc4d41
commit e5b5023d42
3 changed files with 310 additions and 0 deletions
+190
View File
@@ -0,0 +1,190 @@
#
# Copyright 2015 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logbook import Logger
log = Logger('DataPortal')
BASE_FIELDS = {
'open': 'open',
'open_price': 'open',
'high': 'high',
'low': 'low',
'close': 'close',
'close_price': 'close',
'volume': 'volume',
'price': 'close'
}
class DataPortal(object):
def __init__(self,
env,
sim_params=None,
equity_daily_reader=None,
equity_minute_reader=None,
future_daily_reader=None,
future_minute_reader=None,
adjustment_reader=None):
self._adjustment_reader = adjustment_reader
self._equity_daily_reader = equity_daily_reader
self._equity_minute_reader = equity_minute_reader
self._future_daily_reader = future_daily_reader
self._future_minute_reader = future_minute_reader
def get_previous_value(self, asset, field, dt, data_frequency):
"""
Given an asset and a column and a dt, returns the previous value for
the same asset/column pair. If this data portal is in minute mode,
it's the previous minute value, otherwise it's the previous day's
value.
Parameters
---------
asset : Asset
The asset whose data is desired.
field: string
The desired field of the asset. Valid values are "open",
"open_price", "high", "low", "close", "close_price", "volume", and
"price".
dt: pd.Timestamp
The timestamp from which to go back in time one slot.
data_frequency: string
The frequency of the data to query; i.e. whether the data is
'daily' or 'minute' bars
Returns
-------
The value of the desired field at the desired time.
"""
NotImplementedError
def get_spot_value(self, asset, field, dt, data_frequency):
"""
Public API method that returns a scalar value representing the value
of the desired asset's field at either the given dt.
Parameters
---------
asset : Asset
The asset whose data is desired.gith
field: string
The desired field of the asset. Valid values are "open",
"open_price", "high", "low", "close", "close_price", "volume", and
"price".
dt: pd.Timestamp
The timestamp for the desired value.
data_frequency: string
The frequency of the data to query; i.e. whether the data is
'daily' or 'minute' bars
Returns
-------
The value of the desired field at the desired time.
"""
NotImplementedError
def get_history_window(self, assets, end_dt, bar_count, frequency, field,
ffill=True):
"""
Public API method that returns a dataframe containing the requested
history window. Data is fully adjusted.
Parameters
---------
assets : list of zipline.data.Asset objects
The assets whose data is desired.
bar_count: int
The number of bars desired.
frequency: string
"1d" or "1m"
field: string
The desired field of the asset.
ffill: boolean
Forward-fill missing values. Only has effect if field
is 'price'.
Returns
-------
A dataframe containing the requested data.
"""
NotImplementedError
def get_splits(self, sids, dt):
"""
Returns any splits for the given sids and the given dt.
Parameters
----------
sids : list
Sids for which we want splits.
dt: pd.Timestamp
The date for which we are checking for splits. Note: this is
expected to be midnight UTC.
Returns
-------
list: List of splits, where each split is a (sid, ratio) tuple.
"""
NotImplementedError
def get_stock_dividends(self, sid, trading_days):
"""
Returns all the stock dividends for a specific sid that occur
in the given trading range.
Parameters
----------
sid: int
The asset whose stock dividends should be returned.
trading_days: pd.DatetimeIndex
The trading range.
Returns
-------
list: A list of objects with all relevant attributes populated.
All timestamp fields are converted to pd.Timestamps.
"""
NotImplementedError
def get_fetcher_assets(self, day):
"""
Returns a list of assets for the current date, as defined by the
fetcher data.
Notes
-----
Data is forward-filled. If there is no fetcher data defined for day
N, we use day N-1's data (if available, otherwise we keep going back).
Returns
-------
list: a list of Asset objects.
"""
NotImplementedError
+27
View File
@@ -0,0 +1,27 @@
# Copyright 2015 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class FutureDailyReader(object):
"""
Stubbed out. Currently unimplemented.
"""
pass
class FutureMinuteReader(object):
def __init__(self, rootdir, sid_path_func=None):
self.rootdir = rootdir
self.sid_path_func = sid_path_func
+93
View File
@@ -0,0 +1,93 @@
# Copyright 2015 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import bcolz
import json
import os
import pandas as pd
MINUTES_PER_DAY = 390
METADATA_FILENAME = 'metadata.json'
class BcolzMinuteBarReader(object):
def __init__(self, rootdir, sid_path_func=None):
self.rootdir = rootdir
metadata = self._get_metadata()
self.first_trading_day = pd.Timestamp(
metadata['first_trading_day'], tz='UTC')
self._sid_path_func = sid_path_func
self._carrays = {
'open': {},
'high': {},
'low': {},
'close': {},
'volume': {},
'sid': {},
'dt': {},
}
def _get_metadata(self):
with open(os.path.join(self.rootdir, METADATA_FILENAME)) as fp:
return json.load(fp)
def _get_ctable(self, asset):
sid = int(asset)
if self._sid_path_func is not None:
path = self._sid_path_func(self.rootdir, sid)
else:
path = "{0}/{1}.bcolz".format(self.rootdir, sid)
return bcolz.open(path, mode='r')
def _find_position_of_minute(self, minute_dt):
"""
Internal method that returns the position of the given minute in the
list of every trading minute since market open of the first trading
day.
IMPORTANT: This method assumes every day is 390 minutes long, even
early closes. Our minute bcolz files are generated like this to
support fast lookup.
ex. this method would return 2 for 1/2/2002 9:32 AM Eastern, if
1/2/2002 is the first trading day of the dataset.
Parameters
----------
minute_dt: pd.Timestamp
The minute whose position should be calculated.
Returns
-------
The position of the given minute in the list of all trading minutes
since market open on the first trading day.
"""
NotImplementedError
def _open_minute_file(self, field, asset):
sid_str = str(int(asset))
try:
carray = self._carrays[field][sid_str]
except KeyError:
carray = self._carrays[field][sid_str] = \
self._get_ctable(asset)[field]
return carray