Merge pull request #1471 from quantopian/fix-slow-startup

PERF: Remove import-time calendar creations.
This commit is contained in:
Scott Sanderson
2016-09-08 10:21:00 -04:00
committed by GitHub
13 changed files with 345 additions and 109 deletions
@@ -0,0 +1,95 @@
"""
Tests for TradingCalendarDispatcher.
"""
from zipline.errors import (
CalendarNameCollision,
CyclicCalendarAlias,
InvalidCalendarName,
)
from zipline.testing import ZiplineTestCase
from zipline.utils.calendars.calendar_utils import TradingCalendarDispatcher
from zipline.utils.calendars.exchange_calendar_ice import ICEExchangeCalendar
class CalendarAliasTestCase(ZiplineTestCase):
@classmethod
def init_class_fixtures(cls):
super(CalendarAliasTestCase, cls).init_class_fixtures()
# Make a calendar once so that we don't spend time in every test
# instantiating calendars.
cls.dispatcher_kwargs = dict(
calendars={'ICE': ICEExchangeCalendar()},
calendar_factories={},
aliases={
'ICE_ALIAS': 'ICE',
'ICE_ALIAS_ALIAS': 'ICE_ALIAS',
},
)
def init_instance_fixtures(self):
super(CalendarAliasTestCase, self).init_instance_fixtures()
self.dispatcher = TradingCalendarDispatcher(
# Make copies here so that tests that mutate the dispatcher dicts
# are isolated from one another.
**{k: v.copy() for k, v in self.dispatcher_kwargs.items()}
)
def test_follow_alias_chain(self):
self.assertIs(
self.dispatcher.get_calendar('ICE_ALIAS'),
self.dispatcher.get_calendar('ICE'),
)
self.assertIs(
self.dispatcher.get_calendar('ICE_ALIAS_ALIAS'),
self.dispatcher.get_calendar('ICE'),
)
def test_add_new_aliases(self):
with self.assertRaises(InvalidCalendarName):
self.dispatcher.get_calendar('NOT_ICE')
self.dispatcher.register_calendar_alias('NOT_ICE', 'ICE')
self.assertIs(
self.dispatcher.get_calendar('NOT_ICE'),
self.dispatcher.get_calendar('ICE'),
)
self.dispatcher.register_calendar_alias(
'ICE_ALIAS_ALIAS_ALIAS',
'ICE_ALIAS_ALIAS'
)
self.assertIs(
self.dispatcher.get_calendar('ICE_ALIAS_ALIAS_ALIAS'),
self.dispatcher.get_calendar('ICE'),
)
def test_remove_aliases(self):
self.dispatcher.deregister_calendar('ICE_ALIAS_ALIAS')
with self.assertRaises(InvalidCalendarName):
self.dispatcher.get_calendar('ICE_ALIAS_ALIAS')
def test_reject_alias_that_already_exists(self):
with self.assertRaises(CalendarNameCollision):
self.dispatcher.register_calendar_alias('ICE', 'NOT_ICE')
with self.assertRaises(CalendarNameCollision):
self.dispatcher.register_calendar_alias('ICE_ALIAS', 'NOT_ICE')
def test_allow_alias_override_with_force(self):
self.dispatcher.register_calendar_alias('ICE', 'NOT_ICE', force=True)
with self.assertRaises(InvalidCalendarName):
self.dispatcher.get_calendar('ICE')
def test_reject_cyclic_aliases(self):
add_alias = self.dispatcher.register_calendar_alias
add_alias('A', 'B')
add_alias('B', 'C')
with self.assertRaises(CyclicCalendarAlias) as e:
add_alias('C', 'A')
expected = "Cycle in calendar aliases: ['C' -> 'A' -> 'B' -> 'C']"
self.assertEqual(str(e.exception), expected)
+8 -3
View File
@@ -39,8 +39,12 @@ from zipline.utils.calendars import(
deregister_calendar,
get_calendar,
)
from zipline.utils.calendars.calendar_utils import register_calendar_type, \
_default_calendar_factories
from zipline.utils.calendars.calendar_utils import (
_default_calendar_aliases,
_default_calendar_factories,
register_calendar_type,
)
from zipline.utils.calendars.trading_calendar import days_at_time, \
TradingCalendar
@@ -123,7 +127,8 @@ class CalendarRegistrationTestCase(TestCase):
class DefaultsTestCase(TestCase):
def test_default_calendars(self):
for name in concat(_default_calendar_factories):
for name in concat([_default_calendar_factories,
_default_calendar_aliases]):
self.assertIsNotNone(get_calendar(name),
"get_calendar(%r) returned None" % name)
+2 -2
View File
@@ -155,7 +155,7 @@ class BundleCoreTestCase(WithInstanceTmpDir,
@self.register(
'bundle',
calendar=calendar,
calendar_name='NYSE',
start_session=self.START_DATE,
end_session=self.END_DATE,
)
@@ -369,7 +369,7 @@ class BundleCoreTestCase(WithInstanceTmpDir,
"""
if not self.bundles:
@self.register('bundle',
calendar=get_calendar('NYSE'),
calendar_name='NYSE',
start_session=pd.Timestamp('2014', tz='UTC'),
end_session=pd.Timestamp('2014', tz='UTC'))
def _(environ,
+4 -3
View File
@@ -5,6 +5,7 @@ import pandas as pd
from toolz import merge
import toolz.curried.operator as op
from zipline import get_calendar
from zipline.data.bundles import ingest, load, bundles
from zipline.data.bundles.quandl import (
format_wiki_url,
@@ -28,9 +29,9 @@ class QuandlBundleTestCase(ZiplineTestCase):
asset_start = pd.Timestamp('2014-01', tz='utc')
asset_end = pd.Timestamp('2015-01', tz='utc')
bundle = bundles['quandl']
calendar = bundle.calendar
start_date = bundle.start_session
end_date = bundle.end_session
calendar = get_calendar(bundle.calendar_name)
start_date = calendar.first_session
end_date = calendar.last_session
api_key = 'ayylmao'
columns = 'open', 'high', 'low', 'close', 'volume'
+1 -1
View File
@@ -157,7 +157,7 @@ class YahooBundleTestCase(WithResponses, ZiplineTestCase):
self.register(
'bundle',
yahoo_equities(self.symbols),
calendar=self.calendar,
calendar_name='NYSE',
start_session=self.asset_start,
end_session=self.asset_end,
)
+18
View File
@@ -38,12 +38,15 @@ from . import data
from . import finance
from . import gens
from . import utils
from .utils.calendars import get_calendar
from .utils.run_algo import run_algorithm
from ._version import get_versions
# These need to happen after the other imports.
from . algorithm import TradingAlgorithm
from . import api
__version__ = get_versions()['version']
del get_versions
@@ -53,11 +56,26 @@ def load_ipython_extension(ipython):
ipython.register_magic_function(zipline_magic, 'line_cell', 'zipline')
# PERF: Fire a warning if calendars were instantiated during zipline import.
# Having calendars doesn't break anything per-se, but it makes zipline imports
# noticeably slower, which becomes particularly noticeable in the Zipline CLI.
from zipline.utils.calendars.calendar_utils import global_calendar_dispatcher
if global_calendar_dispatcher._calendars:
import warnings
warnings.warn(
"Found TradingCalendar instances after zipline import.\n"
"Zipline startup will be much slower until this is fixed!",
)
del warnings
del global_calendar_dispatcher
__all__ = [
'TradingAlgorithm',
'api',
'data',
'finance',
'get_calendar',
'gens',
'run_algorithm',
'utils',
+46 -41
View File
@@ -7,7 +7,6 @@ import warnings
from contextlib2 import ExitStack
import click
import pandas as pd
from six import string_types
from toolz import curry, complement, take
from ..us_equity_pricing import (
@@ -31,7 +30,7 @@ from zipline.utils.compat import mappingproxy
from zipline.utils.input_validation import ensure_timestamp, optionally
import zipline.utils.paths as pth
from zipline.utils.preprocess import preprocess
from zipline.utils.calendars import get_calendar, register_calendar
from zipline.utils.calendars import get_calendar
def asset_db_path(bundle_name, timestr, environ=None, db_version=None):
@@ -133,9 +132,14 @@ def ingestions_for_bundle(bundle, environ=None):
)
_BundlePayload = namedtuple(
'_BundlePayload',
'calendar start_session end_session minutes_per_day ingest create_writers',
RegisteredBundle = namedtuple(
'RegisteredBundle',
['calendar_name',
'start_session',
'end_session',
'minutes_per_day',
'ingest',
'create_writers']
)
BundleData = namedtuple(
@@ -220,7 +224,7 @@ def _make_bundle_core():
@curry
def register(name,
f,
calendar='NYSE',
calendar_name='NYSE',
start_session=None,
end_session=None,
minutes_per_day=390,
@@ -257,10 +261,9 @@ def _make_bundle_core():
successful load.
show_progress : bool
Show the progress for the current load where possible.
calendar : zipline.utils.calendars.TradingCalendar or str, optional
The trading calendar to align the data to, or the name of a trading
calendar. This defaults to 'NYSE', in which case we use the NYSE
calendar.
calendar_name : str, optional
The name of a calendar used to align bundle data.
Default is 'NYSE'.
start_session : pd.Timestamp, optional
The first session for which we want data. If not provided,
or if the date lies outside the range supported by the
@@ -296,24 +299,17 @@ def _make_bundle_core():
stacklevel=3,
)
if isinstance(calendar, string_types):
calendar = get_calendar(calendar)
# If the start and end sessions are not provided or lie outside
# the bounds of the calendar being used, set them to the first
# and last sessions of the calendar.
if start_session is None or start_session < calendar.first_session:
start_session = calendar.first_session
if end_session is None or end_session > calendar.last_session:
end_session = calendar.last_session
_bundles[name] = _BundlePayload(
calendar,
start_session,
end_session,
minutes_per_day,
f,
create_writers,
# NOTE: We don't eagerly compute calendar values here because
# `register` is called at module scope in zipline, and creating a
# calendar currently takes between 0.5 and 1 seconds, which causes a
# noticeable delay on the zipline CLI.
_bundles[name] = RegisteredBundle(
calendar_name=calendar_name,
start_session=start_session,
end_session=end_session,
minutes_per_day=minutes_per_day,
ingest=f,
create_writers=create_writers,
)
return f
@@ -365,9 +361,21 @@ def _make_bundle_core():
except KeyError:
raise UnknownBundle(name)
calendar = get_calendar(bundle.calendar_name)
start_session = bundle.start_session
end_session = bundle.end_session
if start_session is None or start_session < calendar.first_session:
start_session = calendar.first_session
if end_session is None or end_session > calendar.last_session:
end_session = calendar.last_session
if timestamp is None:
timestamp = pd.Timestamp.utcnow()
timestamp = timestamp.tz_convert('utc').tz_localize(None)
timestr = to_bundle_ingest_dirname(timestamp)
cachepath = cache_path(name, environ=environ)
pth.ensure_directory(pth.data_path([name, timestr], environ=environ))
@@ -387,9 +395,9 @@ def _make_bundle_core():
)
daily_bar_writer = BcolzDailyBarWriter(
daily_bars_path,
bundle.calendar,
bundle.start_session,
bundle.end_session,
calendar,
start_session,
end_session,
)
# Do an empty write to ensure that the daily ctables exist
# when we create the SQLiteAdjustmentWriter below. The
@@ -401,9 +409,9 @@ def _make_bundle_core():
wd.ensure_dir(*minute_equity_relative(
name, timestr, environ=environ)
),
bundle.calendar,
bundle.start_session,
bundle.end_session,
calendar,
start_session,
end_session,
minutes_per_day=bundle.minutes_per_day,
)
assets_db_path = wd.getpath(*asset_db_relative(
@@ -416,7 +424,7 @@ def _make_bundle_core():
wd.getpath(*adjustment_db_relative(
name, timestr, environ=environ)),
BcolzDailyBarReader(daily_bars_path),
bundle.calendar.all_sessions,
calendar.all_sessions,
overwrite=True,
)
)
@@ -435,9 +443,9 @@ def _make_bundle_core():
minute_bar_writer,
daily_bar_writer,
adjustment_db_writer,
bundle.calendar,
bundle.start_session,
bundle.end_session,
calendar,
start_session,
end_session,
cache,
show_progress,
pth.data_path([name, timestr], environ=environ),
@@ -611,6 +619,3 @@ def _make_bundle_core():
return BundleCore(bundles, register, unregister, ingest, load, clean)
bundles, register, unregister, ingest, load, clean = _make_bundle_core()
register_calendar("YAHOO", get_calendar("NYSE"))
register_calendar("QUANDL", get_calendar("NYSE"))
+6 -1
View File
@@ -12,9 +12,11 @@ import pandas as pd
import requests
from six.moves.urllib.parse import urlencode
from . import core as bundles
from zipline.utils.calendars import register_calendar_alias
from zipline.utils.cli import maybe_show_progress
from . import core as bundles
log = Logger(__name__)
seconds_per_call = (pd.Timedelta('10 minutes') / 2000).total_seconds()
# Invalid symbols that quandl has had in its metadata:
@@ -402,3 +404,6 @@ def quantopian_quandl_bundle(environ,
if show_progress:
print("Writing data to %s." % output_dir)
tar.extractall(output_dir)
register_calendar_alias("QUANDL", "NYSE")
+3
View File
@@ -5,6 +5,7 @@ import pandas as pd
from pandas_datareader.data import DataReader
import requests
from zipline.utils.calendars import register_calendar_alias
from zipline.utils.cli import maybe_show_progress
from .core import register
@@ -198,3 +199,5 @@ register(
pd.Timestamp('2015-01-01', tz='utc'),
),
)
register_calendar_alias("YAHOO", "NYSE")
+6 -7
View File
@@ -45,10 +45,6 @@ INDEX_MAPPING = {
ONE_HOUR = pd.Timedelta(hours=1)
nyse_cal = get_calendar('NYSE')
trading_day_nyse = nyse_cal.day
trading_days_nyse = nyse_cal.all_sessions
def last_modified_time(path):
"""
@@ -95,9 +91,7 @@ def has_data_for_dates(series_or_df, first_date, last_date):
return (first <= first_date) and (last >= last_date)
def load_market_data(trading_day=trading_day_nyse,
trading_days=trading_days_nyse,
bm_symbol='^GSPC'):
def load_market_data(trading_day=None, trading_days=None, bm_symbol='^GSPC'):
"""
Load benchmark returns and treasury yield curves for the given calendar and
benchmark symbol.
@@ -136,6 +130,11 @@ def load_market_data(trading_day=trading_day_nyse,
'1month', '3month', '6month',
'1year','2year','3year','5year','7year','10year','20year','30year'
"""
if trading_day is None:
trading_day = get_calendar('NYSE').trading_day
if trading_days is None:
trading_days = get_calendar('NYSE').all_sessions
first_date = trading_days[0]
now = pd.Timestamp.utcnow()
+7
View File
@@ -680,6 +680,13 @@ class CalendarNameCollision(ZiplineError):
)
class CyclicCalendarAlias(ZiplineError):
"""
Raised when calendar aliases form a cycle.
"""
msg = "Cycle in calendar aliases: [{cycle}]"
class ScheduleFunctionWithoutCalendar(ZiplineError):
"""
Raised when schedule_function is called but there is not a calendar to be
+10 -2
View File
@@ -16,11 +16,19 @@
from .trading_calendar import TradingCalendar
from .calendar_utils import (
get_calendar,
register_calendar_alias,
register_calendar,
register_calendar_type,
deregister_calendar,
clear_calendars
)
__all__ = ['get_calendar', 'TradingCalendar', 'register_calendar',
'register_calendar_type', 'deregister_calendar', 'clear_calendars']
__all__ = [
'TradingCalendar',
'clear_calendars',
'deregister_calendar',
'get_calendar',
'register_calendar',
'register_calendar_alias',
'register_calendar_type',
]
+139 -49
View File
@@ -1,6 +1,7 @@
from zipline.errors import (
InvalidCalendarName,
CalendarNameCollision,
CyclicCalendarAlias,
InvalidCalendarName,
)
from zipline.utils.calendars.exchange_calendar_cfe import CFEExchangeCalendar
from zipline.utils.calendars.exchange_calendar_ice import ICEExchangeCalendar
@@ -13,38 +14,24 @@ from zipline.utils.calendars.us_futures_calendar import (
QuantopianUSFuturesCalendar,
)
NYSE_CALENDAR_EXCHANGE_NAMES = frozenset([
"NYSE",
"NASDAQ",
"BATS",
])
CME_CALENDAR_EXCHANGE_NAMES = frozenset([
"CBOT",
"CME",
"COMEX",
"NYMEX",
])
ICE_CALENDAR_EXCHANGE_NAMES = frozenset([
"ICEUS",
"NYFE",
])
CFE_CALENDAR_EXCHANGE_NAMES = frozenset(["CFE"])
BMF_CALENDAR_EXCHANGE_NAMES = frozenset(["BMF"])
LSE_CALENDAR_EXCHANGE_NAMES = frozenset(["LSE"])
TSX_CALENDAR_EXCHANGE_NAMES = frozenset(["TSX"])
US_FUTURES_CALENDAR_NAMES = frozenset(["us_futures"])
_default_calendar_factories = {
NYSE_CALENDAR_EXCHANGE_NAMES: NYSEExchangeCalendar,
CME_CALENDAR_EXCHANGE_NAMES: CMEExchangeCalendar,
ICE_CALENDAR_EXCHANGE_NAMES: ICEExchangeCalendar,
CFE_CALENDAR_EXCHANGE_NAMES: CFEExchangeCalendar,
BMF_CALENDAR_EXCHANGE_NAMES: BMFExchangeCalendar,
LSE_CALENDAR_EXCHANGE_NAMES: LSEExchangeCalendar,
TSX_CALENDAR_EXCHANGE_NAMES: TSXExchangeCalendar,
US_FUTURES_CALENDAR_NAMES: QuantopianUSFuturesCalendar,
'NYSE': NYSEExchangeCalendar,
'CME': CMEExchangeCalendar,
'ICE': ICEExchangeCalendar,
'CFE': CFEExchangeCalendar,
'BMF': BMFExchangeCalendar,
'LSE': LSEExchangeCalendar,
'TSX': TSXExchangeCalendar,
'us_futures': QuantopianUSFuturesCalendar,
}
_default_calendar_aliases = {
'NASDAQ': 'NYSE',
'BATS': 'NYSE',
'CBOT': 'CME',
'COMEX': 'CME',
'NYMEX': 'CME',
'ICEUS': 'ICE',
'NYFE': 'ICE',
}
@@ -54,10 +41,20 @@ class TradingCalendarDispatcher(object):
Methods of a global instance of this class are provided by
zipline.utils.calendar_utils.
Parameters
----------
calendars : dict[str -> TradingCalendar]
Initial set of calendars.
calendar_factories : dict[str -> function]
Factories for lazy calendar creation.
aliases : dict[str -> str]
Calendar name aliases.
"""
def __init__(self, calendar_factories):
self._calendars = {}
def __init__(self, calendars, calendar_factories, aliases):
self._calendars = calendars
self._calendar_factories = calendar_factories
self._aliases = aliases
def get_calendar(self, name):
"""
@@ -73,20 +70,33 @@ class TradingCalendarDispatcher(object):
TradingCalendar
The desired calendar.
"""
canonical_name = self.resolve_alias(name)
try:
return self._calendars[name]
return self._calendars[canonical_name]
except KeyError:
# We haven't loaded this calendar yet, so make a new one.
pass
for names, factory in self._calendar_factories.items():
if name in names:
# Use the same calendar for all exchanges that share the same
# factory.
calendar = factory()
self._calendars.update({n: calendar for n in names})
return calendar
try:
factory = self._calendar_factories[canonical_name]
except KeyError:
# We don't have a factory registered for this name. Barf.
raise InvalidCalendarName(calendar_name=name)
raise InvalidCalendarName(calendar_name=name)
# Cache the calendar for future use.
calendar = self._calendars[canonical_name] = factory()
return calendar
def has_calendar(self, name):
"""
Do we have (or have the ability to make) a calendar with ``name``?
"""
return (
name in self._calendars
or name in self._calendar_factories
or name in self._aliases
)
def register_calendar(self, name, calendar, force=False):
"""
@@ -100,7 +110,8 @@ class TradingCalendarDispatcher(object):
The calendar to be registered for retrieval.
force : bool, optional
If True, old calendars will be overwritten on a name collision.
If False, name collisions will raise an exception. Default: False.
If False, name collisions will raise an exception.
Default is False.
Raises
------
@@ -110,7 +121,7 @@ class TradingCalendarDispatcher(object):
if force:
self.deregister_calendar(name)
if name in self._calendars or name in self._calendar_factories:
if self.has_calendar(name):
raise CalendarNameCollision(calendar_name=name)
self._calendars[name] = calendar
@@ -119,6 +130,9 @@ class TradingCalendarDispatcher(object):
"""
Registers a calendar by type.
This is useful for registering a new calendar to be lazily instantiated
at some future point in time.
Parameters
----------
name: str
@@ -127,7 +141,8 @@ class TradingCalendarDispatcher(object):
The type of the calendar to register.
force : bool, optional
If True, old calendars will be overwritten on a name collision.
If False, name collisions will raise an exception. Default: False.
If False, name collisions will raise an exception.
Default is False.
Raises
------
@@ -135,13 +150,83 @@ class TradingCalendarDispatcher(object):
If a calendar is already registered with the given calendar's name.
"""
if force:
self._calendar_factories.pop(name, None)
self.deregister_calendar(name)
if name in self._calendars or name in self._calendar_factories:
if self.has_calendar(name):
raise CalendarNameCollision(calendar_name=name)
self._calendar_factories[name] = calendar_type
def register_calendar_alias(self, alias, real_name, force=False):
"""
Register an alias for a calendar.
This is useful when multiple exchanges should share a calendar, or when
there are multiple ways to refer to the same exchange.
After calling ``register_alias('alias', 'real_name')``, subsequent
calls to ``get_calendar('alias')`` will return the same result as
``get_calendar('real_name')``.
Parameters
----------
alias : str
The name to be used to refer to a calendar.
real_name : str
The canonical name of the registered calendar.
force : bool, optional
If True, old calendars will be overwritten on a name collision.
If False, name collisions will raise an exception.
Default is False.
"""
if force:
self.deregister_calendar(alias)
if self.has_calendar(alias):
raise CalendarNameCollision(calendar_name=alias)
self._aliases[alias] = real_name
# Ensure that the new alias doesn't create a cycle, and back it out if
# we did.
try:
self.resolve_alias(alias)
except CyclicCalendarAlias:
del self._aliases[alias]
raise
def resolve_alias(self, name):
"""
Resolve a calendar alias for retrieval.
Parameters
----------
name : str
The name of the requested calendar.
Returns
-------
canonical_name : str
The real name of the calendar to create/return.
"""
# Use an OrderedDict as an ordered set so that we can return the order
# of aliases in the event of a cycle.
seen = []
while name in self._aliases:
seen.append(name)
name = self._aliases[name]
# This is O(N ** 2), but if there's an alias chain longer than 2,
# something strange has happened.
if name in seen:
seen.append(name)
raise CyclicCalendarAlias(
cycle=" -> ".join(repr(k) for k in seen)
)
return name
def deregister_calendar(self, name):
"""
If a calendar is registered with the given name, it is de-registered.
@@ -153,6 +238,7 @@ class TradingCalendarDispatcher(object):
"""
self._calendars.pop(name, None)
self._calendar_factories.pop(name, None)
self._aliases.pop(name, None)
def clear_calendars(self):
"""
@@ -160,13 +246,16 @@ class TradingCalendarDispatcher(object):
"""
self._calendars.clear()
self._calendar_factories.clear()
self._aliases.clear()
# We maintain a global calendar dispatcher so that users can just do
# `register_calendar('my_calendar', calendar) and then use `get_calendar`
# without having to thread around a dispatcher.
global_calendar_dispatcher = TradingCalendarDispatcher(
_default_calendar_factories
calendars={},
calendar_factories=_default_calendar_factories,
aliases=_default_calendar_aliases,
)
get_calendar = global_calendar_dispatcher.get_calendar
@@ -174,3 +263,4 @@ clear_calendars = global_calendar_dispatcher.clear_calendars
deregister_calendar = global_calendar_dispatcher.deregister_calendar
register_calendar = global_calendar_dispatcher.register_calendar
register_calendar_type = global_calendar_dispatcher.register_calendar_type
register_calendar_alias = global_calendar_dispatcher.register_calendar_alias