diff --git a/zipline/data/data_portal.py b/zipline/data/data_portal.py new file mode 100644 index 00000000..d691fc5b --- /dev/null +++ b/zipline/data/data_portal.py @@ -0,0 +1,190 @@ +# +# Copyright 2015 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from logbook import Logger + +log = Logger('DataPortal') + +BASE_FIELDS = { + 'open': 'open', + 'open_price': 'open', + 'high': 'high', + 'low': 'low', + 'close': 'close', + 'close_price': 'close', + 'volume': 'volume', + 'price': 'close' +} + + +class DataPortal(object): + def __init__(self, + env, + sim_params=None, + equity_daily_reader=None, + equity_minute_reader=None, + future_daily_reader=None, + future_minute_reader=None, + adjustment_reader=None): + + self._adjustment_reader = adjustment_reader + + self._equity_daily_reader = equity_daily_reader + self._equity_minute_reader = equity_minute_reader + self._future_daily_reader = future_daily_reader + self._future_minute_reader = future_minute_reader + + def get_previous_value(self, asset, field, dt, data_frequency): + """ + Given an asset and a column and a dt, returns the previous value for + the same asset/column pair. If this data portal is in minute mode, + it's the previous minute value, otherwise it's the previous day's + value. + + Parameters + --------- + asset : Asset + The asset whose data is desired. + + field: string + The desired field of the asset. Valid values are "open", + "open_price", "high", "low", "close", "close_price", "volume", and + "price". + + dt: pd.Timestamp + The timestamp from which to go back in time one slot. + + data_frequency: string + The frequency of the data to query; i.e. whether the data is + 'daily' or 'minute' bars + + Returns + ------- + The value of the desired field at the desired time. + """ + NotImplementedError + + def get_spot_value(self, asset, field, dt, data_frequency): + """ + Public API method that returns a scalar value representing the value + of the desired asset's field at either the given dt. + + Parameters + --------- + asset : Asset + The asset whose data is desired.gith + + field: string + The desired field of the asset. Valid values are "open", + "open_price", "high", "low", "close", "close_price", "volume", and + "price". + + dt: pd.Timestamp + The timestamp for the desired value. + + data_frequency: string + The frequency of the data to query; i.e. whether the data is + 'daily' or 'minute' bars + + Returns + ------- + The value of the desired field at the desired time. + """ + NotImplementedError + + def get_history_window(self, assets, end_dt, bar_count, frequency, field, + ffill=True): + """ + Public API method that returns a dataframe containing the requested + history window. Data is fully adjusted. + + Parameters + --------- + assets : list of zipline.data.Asset objects + The assets whose data is desired. + + bar_count: int + The number of bars desired. + + frequency: string + "1d" or "1m" + + field: string + The desired field of the asset. + + ffill: boolean + Forward-fill missing values. Only has effect if field + is 'price'. + + Returns + ------- + A dataframe containing the requested data. + """ + NotImplementedError + + def get_splits(self, sids, dt): + """ + Returns any splits for the given sids and the given dt. + + Parameters + ---------- + sids : list + Sids for which we want splits. + + dt: pd.Timestamp + The date for which we are checking for splits. Note: this is + expected to be midnight UTC. + + Returns + ------- + list: List of splits, where each split is a (sid, ratio) tuple. + """ + NotImplementedError + + def get_stock_dividends(self, sid, trading_days): + """ + Returns all the stock dividends for a specific sid that occur + in the given trading range. + + Parameters + ---------- + sid: int + The asset whose stock dividends should be returned. + + trading_days: pd.DatetimeIndex + The trading range. + + Returns + ------- + list: A list of objects with all relevant attributes populated. + All timestamp fields are converted to pd.Timestamps. + """ + NotImplementedError + + def get_fetcher_assets(self, day): + """ + Returns a list of assets for the current date, as defined by the + fetcher data. + + Notes + ----- + Data is forward-filled. If there is no fetcher data defined for day + N, we use day N-1's data (if available, otherwise we keep going back). + + Returns + ------- + list: a list of Asset objects. + """ + NotImplementedError diff --git a/zipline/data/future_pricing.py b/zipline/data/future_pricing.py new file mode 100644 index 00000000..a485f78c --- /dev/null +++ b/zipline/data/future_pricing.py @@ -0,0 +1,27 @@ +# Copyright 2015 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class FutureDailyReader(object): + """ + Stubbed out. Currently unimplemented. + """ + pass + + +class FutureMinuteReader(object): + + def __init__(self, rootdir, sid_path_func=None): + self.rootdir = rootdir + self.sid_path_func = sid_path_func diff --git a/zipline/data/us_equity_minutes.py b/zipline/data/us_equity_minutes.py new file mode 100644 index 00000000..c57036b8 --- /dev/null +++ b/zipline/data/us_equity_minutes.py @@ -0,0 +1,93 @@ +# Copyright 2015 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import bcolz +import json +import os +import pandas as pd + +MINUTES_PER_DAY = 390 + +METADATA_FILENAME = 'metadata.json' + + +class BcolzMinuteBarReader(object): + + def __init__(self, rootdir, sid_path_func=None): + self.rootdir = rootdir + + metadata = self._get_metadata() + + self.first_trading_day = pd.Timestamp( + metadata['first_trading_day'], tz='UTC') + + self._sid_path_func = sid_path_func + + self._carrays = { + 'open': {}, + 'high': {}, + 'low': {}, + 'close': {}, + 'volume': {}, + 'sid': {}, + 'dt': {}, + } + + def _get_metadata(self): + with open(os.path.join(self.rootdir, METADATA_FILENAME)) as fp: + return json.load(fp) + + def _get_ctable(self, asset): + sid = int(asset) + if self._sid_path_func is not None: + path = self._sid_path_func(self.rootdir, sid) + else: + path = "{0}/{1}.bcolz".format(self.rootdir, sid) + + return bcolz.open(path, mode='r') + + def _find_position_of_minute(self, minute_dt): + """ + Internal method that returns the position of the given minute in the + list of every trading minute since market open of the first trading + day. + + IMPORTANT: This method assumes every day is 390 minutes long, even + early closes. Our minute bcolz files are generated like this to + support fast lookup. + + ex. this method would return 2 for 1/2/2002 9:32 AM Eastern, if + 1/2/2002 is the first trading day of the dataset. + + Parameters + ---------- + minute_dt: pd.Timestamp + The minute whose position should be calculated. + + Returns + ------- + The position of the given minute in the list of all trading minutes + since market open on the first trading day. + """ + NotImplementedError + + def _open_minute_file(self, field, asset): + sid_str = str(int(asset)) + + try: + carray = self._carrays[field][sid_str] + except KeyError: + carray = self._carrays[field][sid_str] = \ + self._get_ctable(asset)[field] + + return carray