MAINT: Use TradingCalendar objects for bundles (#1397)

* MAINT: Use TradingCalendar objects for bundles

Instead of trading days, opens, and closes, register now takes a
TradingCalendar object, along with a start_session and end_session. The
ingest function is now passed these values instead as well.

* Accept calendar name in addition to the actual object

* Updates bundles documentation for changes

* Fix typo in docs

* Use class formatting

* Force start_session and end_session within the bounds of the calendar

* Use UTC timestamps in test_core

* Document Trading Calendar API in appendix.rst
This commit is contained in:
Andrew Daniels
2016-08-17 13:37:07 -04:00
committed by GitHub
parent deac07d264
commit 440806ad60
8 changed files with 142 additions and 75 deletions
+19
View File
@@ -261,6 +261,24 @@ Asset Metadata
.. autoclass:: zipline.assets.AssetConvertible
:members:
Trading Calendar API
~~~~~~~~~~~~~~~~~~~~
.. autofunction:: zipline.utils.calendars.get_calendar
.. autoclass:: zipline.utils.calendars.TradingCalendar
:members:
.. autofunction:: zipline.utils.calendars.register_calendar
.. autofunction:: zipline.utils.calendars.register_calendar_type
.. autofunction:: zipline.utils.calendars.deregister_calendar
.. autofunction:: zipline.utils.calendars.clear_calendars
Data API
~~~~~~~~
@@ -318,6 +336,7 @@ Bundles
.. autofunction:: zipline.data.bundles.yahoo_equities
Utilities
~~~~~~~~~
+17 -3
View File
@@ -216,6 +216,8 @@ The signature of the ingest function should be:
daily_bar_writer,
adjustment_writer,
calendar,
start_session,
end_session,
cache,
show_progress,
output_dir)
@@ -300,9 +302,21 @@ have.
``calendar``
````````````
``calendar`` is a ``pandas.DatetimeIndex`` object holding all of the trading
days that the bundle should load data for. The calendar is provided to help some
bundles generate queries for the days needed.
``calendar`` is an instance of
:class:`zipline.utils.calendars.TradingCalendar`. The calendar is provided to
help some bundles generate queries for the days needed.
``start_session``
````````````
``start_session`` is a :class:`pandas.Timestamp` object indicating the first
day that the bundle should load data for.
``end_session``
````````````
``end_session`` is a :class:`pandas.Timestamp` object indicating the last day
that the bundle should load data for.
``cache``
`````````
+25 -20
View File
@@ -30,7 +30,7 @@ from zipline.testing.predicates import (
)
from zipline.utils.cache import dataframe_cache
from zipline.utils.functional import apply
from zipline.utils.calendars import get_calendar
from zipline.utils.calendars import TradingCalendar, get_calendar
import zipline.utils.paths as pth
@@ -96,6 +96,8 @@ class BundleCoreTestCase(WithInstanceTmpDir, ZiplineTestCase):
daily_bar_writer,
adjustment_writer,
calendar,
start_session,
end_session,
cache,
show_progress,
output_dir):
@@ -111,20 +113,19 @@ class BundleCoreTestCase(WithInstanceTmpDir, ZiplineTestCase):
def test_ingest(self):
start = pd.Timestamp('2014-01-06', tz='utc')
end = pd.Timestamp('2014-01-10', tz='utc')
trading_days = get_calendar('NYSE').all_sessions
calendar = trading_days[trading_days.slice_indexer(start, end)]
minutes = get_calendar('NYSE').minutes_for_sessions_in_range(
calendar[0], calendar[-1]
)
calendar = get_calendar('NYSE')
sessions = calendar.sessions_in_range(start, end)
minutes = calendar.minutes_for_sessions_in_range(start, end)
sids = tuple(range(3))
equities = make_simple_equity_info(
sids,
calendar[0],
calendar[-1],
start,
end,
)
daily_bar_data = make_bar_data(equities, calendar)
daily_bar_data = make_bar_data(equities, sessions)
minute_bar_data = make_bar_data(equities, minutes)
first_split_ratio = 0.5
second_split_ratio = 0.1
@@ -141,13 +142,11 @@ class BundleCoreTestCase(WithInstanceTmpDir, ZiplineTestCase):
},
])
schedule = get_calendar('NYSE').schedule
@self.register(
'bundle',
calendar=calendar,
opens=schedule.market_open[calendar[0]:calendar[-1]],
closes=schedule.market_close[calendar[0]: calendar[-1]],
start_session=start,
end_session=end,
)
def bundle_ingest(environ,
asset_db_writer,
@@ -155,6 +154,8 @@ class BundleCoreTestCase(WithInstanceTmpDir, ZiplineTestCase):
daily_bar_writer,
adjustment_writer,
calendar,
start_session,
end_session,
cache,
show_progress,
output_dir):
@@ -165,7 +166,7 @@ class BundleCoreTestCase(WithInstanceTmpDir, ZiplineTestCase):
daily_bar_writer.write(daily_bar_data)
adjustment_writer.write(splits=splits)
assert_is_instance(calendar, pd.DatetimeIndex)
assert_is_instance(calendar, TradingCalendar)
assert_is_instance(cache, dataframe_cache)
assert_is_instance(show_progress, bool)
@@ -192,19 +193,19 @@ class BundleCoreTestCase(WithInstanceTmpDir, ZiplineTestCase):
actual = bundle.equity_daily_bar_reader.load_raw_arrays(
columns,
calendar[0],
calendar[-1],
start,
end,
sids,
)
for actual_column, colname in zip(actual, columns):
assert_equal(
actual_column,
expected_bar_values_2d(calendar, equities, colname),
expected_bar_values_2d(sessions, equities, colname),
msg=colname,
)
adjustments_for_cols = bundle.adjustment_reader.load_adjustments(
columns,
calendar,
sessions,
pd.Index(sids),
)
for column, adjustments in zip(columns, adjustments_for_cols[:-1]):
@@ -263,7 +264,7 @@ class BundleCoreTestCase(WithInstanceTmpDir, ZiplineTestCase):
# register but do not ingest data
self.register('bundle', lambda *args: None)
ts = pd.Timestamp('2014')
ts = pd.Timestamp('2014', tz='UTC')
with assert_raises(ValueError) as e:
self.load('bundle', timestamp=ts, environ=self.environ)
@@ -291,13 +292,17 @@ class BundleCoreTestCase(WithInstanceTmpDir, ZiplineTestCase):
"""
if not self.bundles:
@self.register('bundle',
calendar=pd.DatetimeIndex([pd.Timestamp('2014')]))
calendar=get_calendar('NYSE'),
start_session=pd.Timestamp('2014', tz='UTC'),
end_session=pd.Timestamp('2014', tz='UTC'))
def _(environ,
asset_db_writer,
minute_bar_writer,
daily_bar_writer,
adjustment_writer,
calendar,
start_session,
end_session,
cache,
show_progress,
output_dir):
+11 -8
View File
@@ -27,9 +27,10 @@ class QuandlBundleTestCase(ZiplineTestCase):
symbols = 'AAPL', 'BRK_A', 'MSFT', 'ZEN'
asset_start = pd.Timestamp('2014-01', tz='utc')
asset_end = pd.Timestamp('2015-01', tz='utc')
calendar = bundles['quandl'].calendar
start_date = calendar[0]
end_date = calendar[-1]
bundle = bundles['quandl']
calendar = bundle.calendar
start_date = bundle.start_session
end_date = bundle.end_session
api_key = 'ayylmao'
columns = 'open', 'high', 'low', 'close', 'volume'
@@ -87,7 +88,9 @@ class QuandlBundleTestCase(ZiplineTestCase):
yield vs
# the first index our written data will appear in the files on disk
start_idx = self.calendar.get_loc(self.asset_start, 'ffill') + 1
start_idx = (
self.calendar.all_sessions.get_loc(self.asset_start, 'ffill') + 1
)
# convert an index into the raw dataframe into an index into the
# final data
@@ -215,11 +218,11 @@ class QuandlBundleTestCase(ZiplineTestCase):
assert_equal(equity.start_date, self.asset_start, msg=equity)
assert_equal(equity.end_date, self.asset_end, msg=equity)
cal = self.calendar
sessions = self.calendar.all_sessions
actual = bundle.equity_daily_bar_reader.load_raw_arrays(
self.columns,
cal[cal.get_loc(self.asset_start, 'bfill')],
cal[cal.get_loc(self.asset_end, 'ffill')],
sessions[sessions.get_loc(self.asset_start, 'bfill')],
sessions[sessions.get_loc(self.asset_end, 'ffill')],
sids,
)
expected_pricing, expected_adjustments = self._expected_data(
@@ -229,7 +232,7 @@ class QuandlBundleTestCase(ZiplineTestCase):
adjustments_for_cols = bundle.adjustment_reader.load_adjustments(
self.columns,
cal,
sessions,
pd.Index(sids),
)
+9 -10
View File
@@ -20,11 +20,8 @@ class YahooBundleTestCase(WithResponses, ZiplineTestCase):
columns = 'open', 'high', 'low', 'close', 'volume'
asset_start = pd.Timestamp('2014-01-02', tz='utc')
asset_end = pd.Timestamp('2014-12-31', tz='utc')
trading_days = get_calendar('NYSE').all_sessions
calendar = trading_days[
(trading_days >= asset_start) &
(trading_days <= asset_end)
]
calendar = get_calendar('NYSE')
sessions = calendar.sessions_in_range(asset_start, asset_end)
@classmethod
def init_class_fixtures(cls):
@@ -157,11 +154,12 @@ class YahooBundleTestCase(WithResponses, ZiplineTestCase):
adjustments_callback,
)
cal = self.calendar
self.register(
'bundle',
yahoo_equities(self.symbols),
calendar=cal,
calendar=self.calendar,
start_session=self.asset_start,
end_session=self.asset_end,
)
zipline_root = self.enter_instance_context(tmp_dir()).path
@@ -181,10 +179,11 @@ class YahooBundleTestCase(WithResponses, ZiplineTestCase):
assert_equal(equity.start_date, self.asset_start, msg=equity)
assert_equal(equity.end_date, self.asset_end, msg=equity)
sessions = self.sessions
actual = bundle.equity_daily_bar_reader.load_raw_arrays(
self.columns,
cal[cal.get_loc(self.asset_start, 'bfill')],
cal[cal.get_loc(self.asset_end, 'ffill')],
sessions[sessions.get_loc(self.asset_start, 'bfill')],
sessions[sessions.get_loc(self.asset_end, 'ffill')],
sids,
)
expected_pricing, expected_adjustments = self._expected_data()
@@ -192,7 +191,7 @@ class YahooBundleTestCase(WithResponses, ZiplineTestCase):
adjustments_for_cols = bundle.adjustment_reader.load_adjustments(
self.columns,
cal,
self.sessions,
pd.Index(sids),
)
+45 -28
View File
@@ -30,10 +30,6 @@ import zipline.utils.paths as pth
from zipline.utils.preprocess import preprocess
from zipline.utils.calendars import get_calendar, register_calendar
nyse_cal = get_calendar('NYSE')
trading_days = nyse_cal.all_sessions
open_and_closes = nyse_cal.schedule
def asset_db_path(bundle_name, timestr, environ=None):
return pth.data_path(
@@ -125,7 +121,7 @@ def from_bundle_ingest_dirname(cs):
_BundlePayload = namedtuple(
'_BundlePayload',
'calendar opens closes minutes_per_day ingest create_writers',
'calendar start_session end_session minutes_per_day ingest create_writers',
)
BundleData = namedtuple(
@@ -210,9 +206,9 @@ def _make_bundle_core():
@curry
def register(name,
f,
calendar=trading_days,
opens=open_and_closes['market_open'],
closes=open_and_closes['market_close'],
calendar='NYSE',
start_session=None,
end_session=None,
minutes_per_day=390,
create_writers=True):
"""Register a data bundle ingest function.
@@ -234,8 +230,12 @@ def _make_bundle_core():
The daily bar writer to write into.
adjustment_writer : SQLiteAdjustmentWriter
The adjustment db writer to write into.
calendar : pd.DatetimeIndex
calendar : zipline.utils.calendars.TradingCalendar
The trading calendar to ingest for.
start_session : pd.Timestamp
The first session of data to ingest.
end_session : pd.Timestamp
The last session of data to ingest.
cache : DataFrameCache
A mapping object to temporarily store dataframes.
This should be used to cache intermediates in case the load
@@ -243,15 +243,18 @@ def _make_bundle_core():
successful load.
show_progress : bool
Show the progress for the current load where possible.
calendar : pd.DatetimeIndex, optional
The exchange calendar to align the data to. This defaults to the
NYSE calendar.
market_open : pd.DatetimeIndex, optional
The minute when the market opens each day. This defaults to the
NYSE calendar.
market_close : pd.DatetimeIndex, optional
The minute when the market closes each day. This defaults to the
NYSE calendar.
calendar : zipline.utils.calendars.TradingCalendar or str, optional
The trading calendar to align the data to, or the name of a trading
calendar. This defaults to 'NYSE', in which case we use the NYSE
calendar.
start_session : pd.Timestamp, optional
The first session for which we want data. If not provided,
or if the date lies outside the range supported by the
calendar, the first_session of the calendar is used.
end_session : pd.Timestamp, optional
The last session for which we want data. If not provided,
or if the date lies outside the range supported by the
calendar, the last_session of the calendar is used.
minutes_per_day : int, optional
The number of minutes in each normal trading day.
create_writers : bool, optional
@@ -278,10 +281,22 @@ def _make_bundle_core():
'Overwriting bundle with name %r' % name,
stacklevel=3,
)
if isinstance(calendar, str):
calendar = get_calendar(calendar)
# If the start and end sessions are not provided or lie outside
# the bounds of the calendar being used, set them to the first
# and last sessions of the calendar.
if start_session is None or start_session < calendar.first_session:
start_session = calendar.first_session
if end_session is None or end_session > calendar.last_session:
end_session = calendar.last_session
_bundles[name] = _BundlePayload(
calendar,
opens,
closes,
start_session,
end_session,
minutes_per_day,
f,
create_writers,
@@ -355,9 +370,9 @@ def _make_bundle_core():
)
daily_bar_writer = BcolzDailyBarWriter(
daily_bars_path,
nyse_cal,
bundle.calendar[0],
bundle.calendar[-1]
bundle.calendar,
bundle.start_session,
bundle.end_session,
)
# Do an empty write to ensure that the daily ctables exist
# when we create the SQLiteAdjustmentWriter below. The
@@ -366,12 +381,12 @@ def _make_bundle_core():
daily_bar_writer.write(())
minute_bar_writer = BcolzMinuteBarWriter(
bundle.calendar[0],
bundle.start_session,
wd.ensure_dir(*minute_equity_relative(
name, timestr, environ=environ)
),
bundle.opens,
bundle.closes,
bundle.calendar.schedule['market_open'],
bundle.calendar.schedule['market_close'],
minutes_per_day=bundle.minutes_per_day,
)
asset_db_writer = AssetDBWriter(
@@ -385,7 +400,7 @@ def _make_bundle_core():
wd.getpath(*adjustment_db_relative(
name, timestr, environ=environ)),
BcolzDailyBarReader(daily_bars_path),
bundle.calendar,
bundle.calendar.all_sessions,
overwrite=True,
)
)
@@ -401,6 +416,8 @@ def _make_bundle_core():
daily_bar_writer,
adjustment_db_writer,
bundle.calendar,
bundle.start_session,
bundle.end_session,
cache,
show_progress,
pth.data_path([name, timestr], environ=environ),
@@ -471,7 +488,7 @@ def _make_bundle_core():
asset_db_path(name, timestr, environ=environ),
),
equity_minute_bar_reader=BcolzMinuteBarReader(
minute_equity_path(name, timestr, environ=environ),
minute_equity_path(name, timestr, environ=environ),
),
equity_daily_bar_reader=BcolzDailyBarReader(
daily_equity_path(name, timestr, environ=environ),
+13 -5
View File
@@ -234,11 +234,11 @@ def gen_symbol_data(api_key,
cache,
symbol_map,
calendar,
start_session,
end_session,
splits,
dividends,
retries):
start_date = calendar[0]
end_date = calendar[-1]
for asset_id, symbol in symbol_map.iteritems():
start_time = time()
try:
@@ -250,16 +250,18 @@ def gen_symbol_data(api_key,
raw_data = cache[symbol] = fetch_single_equity(
api_key,
symbol,
start_date=start_date,
end_date=end_date,
start_date=start_session,
end_date=end_session,
)
should_sleep = True
_update_splits(splits, asset_id, raw_data)
_update_dividends(dividends, asset_id, raw_data)
sessions = calendar.sessions_in_range(start_session, end_session)
raw_data = raw_data.reindex(
calendar.tz_localize(None),
sessions.tz_localize(None),
copy=False,
).fillna(0.0)
yield asset_id, raw_data
@@ -277,6 +279,8 @@ def quandl_bundle(environ,
daily_bar_writer,
adjustment_writer,
calendar,
start_session,
end_session,
cache,
show_progress,
output_dir):
@@ -301,6 +305,8 @@ def quandl_bundle(environ,
cache,
symbol_map,
calendar,
start_session,
end_session,
splits,
dividends,
environ.get('QUANDL_DOWNLOAD_ATTEMPTS', 5),
@@ -378,6 +384,8 @@ def quantopian_quandl_bundle(environ,
daily_bar_writer,
adjustment_writer,
calendar,
start_session,
end_session,
cache,
show_progress,
output_dir):
+3 -1
View File
@@ -61,6 +61,8 @@ def yahoo_equities(symbols, start=None, end=None):
daily_bar_writer,
adjustment_writer,
calendar,
start_session,
end_session,
cache,
show_progress,
output_dir,
@@ -68,7 +70,7 @@ def yahoo_equities(symbols, start=None, end=None):
start=start,
end=end):
if start is None:
start = calendar[0]
start = start_session
if end is None:
end = None