PERF: Remove module-scope calendar creations.

Remove module scope invocations of `get_calendar('NYSE')`, which cuts
zipline import time in half on my machine. This make the zipline CLI
noticeably more responsive, and it reduces memory consumed at import
time from 130MB to 90MB.

Before:

$ time python -c 'import zipline'

real    0m1.262s
user    0m1.128s
sys     0m0.120s

After:

$ time python -c 'import zipline'

real    0m0.676s
user    0m0.536s
sys     0m0.132s
This commit is contained in:
Scott Sanderson
2016-09-05 17:02:37 -04:00
parent 548e0675be
commit a8a2cc1582
10 changed files with 330 additions and 107 deletions
@@ -0,0 +1,95 @@
"""
Tests for TradingCalendarDispatcher.
"""
from zipline.errors import (
CalendarNameCollision,
CyclicCalendarAlias,
InvalidCalendarName,
)
from zipline.testing import ZiplineTestCase
from zipline.utils.calendars.calendar_utils import TradingCalendarDispatcher
from zipline.utils.calendars.exchange_calendar_ice import ICEExchangeCalendar
class CalendarAliasTestCase(ZiplineTestCase):
@classmethod
def init_class_fixtures(cls):
super(CalendarAliasTestCase, cls).init_class_fixtures()
# Make a calendar once so that we don't spend time in every test
# instantiating calendars.
cls.dispatcher_kwargs = dict(
calendars={'ICE': ICEExchangeCalendar()},
calendar_factories={},
aliases={
'ICE_ALIAS': 'ICE',
'ICE_ALIAS_ALIAS': 'ICE_ALIAS',
},
)
def init_instance_fixtures(self):
super(CalendarAliasTestCase, self).init_instance_fixtures()
self.dispatcher = TradingCalendarDispatcher(
# Make copies here so that tests that mutate the dispatcher dicts
# are isolated from one another.
**{k: v.copy() for k, v in self.dispatcher_kwargs.items()}
)
def test_follow_alias_chain(self):
self.assertIs(
self.dispatcher.get_calendar('ICE'),
self.dispatcher.get_calendar('ICE_ALIAS'),
)
self.assertIs(
self.dispatcher.get_calendar('ICE'),
self.dispatcher.get_calendar('ICE_ALIAS_ALIAS'),
)
def test_add_new_aliases(self):
with self.assertRaises(InvalidCalendarName):
self.dispatcher.get_calendar('NOT_ICE')
self.dispatcher.register_calendar_alias('NOT_ICE', 'ICE')
self.assertIs(
self.dispatcher.get_calendar('ICE'),
self.dispatcher.get_calendar('NOT_ICE'),
)
self.dispatcher.register_calendar_alias(
'ICE_ALIAS_ALIAS_ALIAS',
'ICE_ALIAS_ALIAS'
)
self.assertIs(
self.dispatcher.get_calendar('ICE'),
self.dispatcher.get_calendar('ICE_ALIAS_ALIAS_ALIAS'),
)
def test_remove_aliases(self):
self.dispatcher.deregister_calendar('ICE_ALIAS_ALIAS')
with self.assertRaises(InvalidCalendarName):
self.dispatcher.get_calendar('ICE_ALIAS_ALIAS')
def test_reject_alias_that_already_exists(self):
with self.assertRaises(CalendarNameCollision):
self.dispatcher.register_calendar_alias('ICE', 'NOT_ICE')
with self.assertRaises(CalendarNameCollision):
self.dispatcher.register_calendar_alias('ICE_ALIAS', 'NOT_ICE')
def test_allow_alias_override_with_force(self):
self.dispatcher.register_calendar_alias('ICE', 'NOT_ICE', force=True)
with self.assertRaises(InvalidCalendarName):
self.dispatcher.get_calendar('ICE')
def test_reject_cyclic_aliases(self):
add_alias = self.dispatcher.register_calendar_alias
add_alias('A', 'B')
add_alias('B', 'C')
with self.assertRaises(CyclicCalendarAlias) as e:
add_alias('C', 'A')
expected = "Cycle in calendar aliases: ['C' -> 'A' -> 'B' -> 'C']"
self.assertEqual(str(e.exception), expected)
+2 -2
View File
@@ -155,7 +155,7 @@ class BundleCoreTestCase(WithInstanceTmpDir,
@self.register(
'bundle',
calendar=calendar,
calendar_name='NYSE',
start_session=self.START_DATE,
end_session=self.END_DATE,
)
@@ -369,7 +369,7 @@ class BundleCoreTestCase(WithInstanceTmpDir,
"""
if not self.bundles:
@self.register('bundle',
calendar=get_calendar('NYSE'),
calendar_name=('NYSE'),
start_session=pd.Timestamp('2014', tz='UTC'),
end_session=pd.Timestamp('2014', tz='UTC'))
def _(environ,
+4 -3
View File
@@ -5,6 +5,7 @@ import pandas as pd
from toolz import merge
import toolz.curried.operator as op
from zipline import get_calendar
from zipline.data.bundles import ingest, load, bundles
from zipline.data.bundles.quandl import (
format_wiki_url,
@@ -28,9 +29,9 @@ class QuandlBundleTestCase(ZiplineTestCase):
asset_start = pd.Timestamp('2014-01', tz='utc')
asset_end = pd.Timestamp('2015-01', tz='utc')
bundle = bundles['quandl']
calendar = bundle.calendar
start_date = bundle.start_session
end_date = bundle.end_session
calendar = get_calendar(bundle.calendar_name)
start_date = calendar.first_session
end_date = calendar.last_session
api_key = 'ayylmao'
columns = 'open', 'high', 'low', 'close', 'volume'
+1 -1
View File
@@ -157,7 +157,7 @@ class YahooBundleTestCase(WithResponses, ZiplineTestCase):
self.register(
'bundle',
yahoo_equities(self.symbols),
calendar=self.calendar,
calendar_name='NYSE',
start_session=self.asset_start,
end_session=self.asset_end,
)
+19 -3
View File
@@ -38,26 +38,42 @@ from . import data
from . import finance
from . import gens
from . import utils
from .utils.calendars import get_calendar
from .utils.run_algo import run_algorithm
from ._version import get_versions
__version__ = get_versions()['version']
del get_versions
# These need to happen after the other imports.
from . algorithm import TradingAlgorithm
from . import api
__version__ = get_versions()['version']
del get_versions
def load_ipython_extension(ipython):
from .__main__ import zipline_magic
ipython.register_magic_function(zipline_magic, 'line_cell', 'zipline')
# PERF: Fire a warning if calendars were instantiated during zipline import.
# Having calendars doesn't break anything per-se, but it makes zipline imports
# noticeably slower, which becomes particularly noticeable in the Zipline CLI.
from zipline.utils.calendars.calendar_utils import global_calendar_dispatcher
if global_calendar_dispatcher._calendars:
import warnings
warnings.warn(
"Found TradingCalendar instances after zipline import.\n"
"Zipline startup will be much slower until this is fixed!",
)
del warnings
del global_calendar_dispatcher
__all__ = [
'TradingAlgorithm',
'api',
'data',
'finance',
'get_calendar',
'gens',
'run_algorithm',
'utils',
+48 -40
View File
@@ -7,7 +7,6 @@ import warnings
from contextlib2 import ExitStack
import click
import pandas as pd
from six import string_types
from toolz import curry, complement, take
from ..us_equity_pricing import (
@@ -31,7 +30,7 @@ from zipline.utils.compat import mappingproxy
from zipline.utils.input_validation import ensure_timestamp, optionally
import zipline.utils.paths as pth
from zipline.utils.preprocess import preprocess
from zipline.utils.calendars import get_calendar, register_calendar
from zipline.utils.calendars import get_calendar, register_calendar_alias
def asset_db_path(bundle_name, timestr, environ=None, db_version=None):
@@ -133,9 +132,14 @@ def ingestions_for_bundle(bundle, environ=None):
)
_BundlePayload = namedtuple(
'_BundlePayload',
'calendar start_session end_session minutes_per_day ingest create_writers',
RegisteredBundle = namedtuple(
'RegisteredBundle',
['calendar_name',
'start_session',
'end_session',
'minutes_per_day',
'ingest',
'create_writers']
)
BundleData = namedtuple(
@@ -220,7 +224,7 @@ def _make_bundle_core():
@curry
def register(name,
f,
calendar='NYSE',
calendar_name='NYSE',
start_session=None,
end_session=None,
minutes_per_day=390,
@@ -257,10 +261,9 @@ def _make_bundle_core():
successful load.
show_progress : bool
Show the progress for the current load where possible.
calendar : zipline.utils.calendars.TradingCalendar or str, optional
The trading calendar to align the data to, or the name of a trading
calendar. This defaults to 'NYSE', in which case we use the NYSE
calendar.
calendar_name : str, optional
The name of a calendar used to align bundle data.
Default is 'NYSE'.
start_session : pd.Timestamp, optional
The first session for which we want data. If not provided,
or if the date lies outside the range supported by the
@@ -296,24 +299,17 @@ def _make_bundle_core():
stacklevel=3,
)
if isinstance(calendar, string_types):
calendar = get_calendar(calendar)
# If the start and end sessions are not provided or lie outside
# the bounds of the calendar being used, set them to the first
# and last sessions of the calendar.
if start_session is None or start_session < calendar.first_session:
start_session = calendar.first_session
if end_session is None or end_session > calendar.last_session:
end_session = calendar.last_session
_bundles[name] = _BundlePayload(
calendar,
start_session,
end_session,
minutes_per_day,
f,
create_writers,
# NOTE: We don't eagerly compute calendar values here because
# `register` is called at module scope in zipline, and creating a
# calendar currently takes between 0.5 and 1 seconds, which causes a
# noticeable delay on the zipline CLI.
_bundles[name] = RegisteredBundle(
calendar_name=calendar_name,
start_session=start_session,
end_session=end_session,
minutes_per_day=minutes_per_day,
ingest=f,
create_writers=create_writers,
)
return f
@@ -365,9 +361,21 @@ def _make_bundle_core():
except KeyError:
raise UnknownBundle(name)
calendar = get_calendar(bundle.calendar_name)
start_session = bundle.start_session
end_session = bundle.end_session
if start_session is None or start_session < calendar.first_session:
start_session = calendar.first_session
if end_session is None or end_session > calendar.last_session:
end_session = calendar.last_session
if timestamp is None:
timestamp = pd.Timestamp.utcnow()
timestamp = timestamp.tz_convert('utc').tz_localize(None)
timestr = to_bundle_ingest_dirname(timestamp)
cachepath = cache_path(name, environ=environ)
pth.ensure_directory(pth.data_path([name, timestr], environ=environ))
@@ -387,9 +395,9 @@ def _make_bundle_core():
)
daily_bar_writer = BcolzDailyBarWriter(
daily_bars_path,
bundle.calendar,
bundle.start_session,
bundle.end_session,
calendar,
start_session,
end_session,
)
# Do an empty write to ensure that the daily ctables exist
# when we create the SQLiteAdjustmentWriter below. The
@@ -401,9 +409,9 @@ def _make_bundle_core():
wd.ensure_dir(*minute_equity_relative(
name, timestr, environ=environ)
),
bundle.calendar,
bundle.start_session,
bundle.end_session,
calendar,
start_session,
end_session,
minutes_per_day=bundle.minutes_per_day,
)
assets_db_path = wd.getpath(*asset_db_relative(
@@ -416,7 +424,7 @@ def _make_bundle_core():
wd.getpath(*adjustment_db_relative(
name, timestr, environ=environ)),
BcolzDailyBarReader(daily_bars_path),
bundle.calendar.all_sessions,
calendar.all_sessions,
overwrite=True,
)
)
@@ -435,9 +443,9 @@ def _make_bundle_core():
minute_bar_writer,
daily_bar_writer,
adjustment_db_writer,
bundle.calendar,
bundle.start_session,
bundle.end_session,
calendar,
start_session,
end_session,
cache,
show_progress,
pth.data_path([name, timestr], environ=environ),
@@ -612,5 +620,5 @@ def _make_bundle_core():
bundles, register, unregister, ingest, load, clean = _make_bundle_core()
register_calendar("YAHOO", get_calendar("NYSE"))
register_calendar("QUANDL", get_calendar("NYSE"))
register_calendar_alias("YAHOO", "NYSE")
register_calendar_alias("QUANDL", "NYSE")
+6 -7
View File
@@ -45,10 +45,6 @@ INDEX_MAPPING = {
ONE_HOUR = pd.Timedelta(hours=1)
nyse_cal = get_calendar('NYSE')
trading_day_nyse = nyse_cal.day
trading_days_nyse = nyse_cal.all_sessions
def last_modified_time(path):
"""
@@ -95,9 +91,7 @@ def has_data_for_dates(series_or_df, first_date, last_date):
return (first <= first_date) and (last >= last_date)
def load_market_data(trading_day=trading_day_nyse,
trading_days=trading_days_nyse,
bm_symbol='^GSPC'):
def load_market_data(trading_day=None, trading_days=None, bm_symbol='^GSPC'):
"""
Load benchmark returns and treasury yield curves for the given calendar and
benchmark symbol.
@@ -136,6 +130,11 @@ def load_market_data(trading_day=trading_day_nyse,
'1month', '3month', '6month',
'1year','2year','3year','5year','7year','10year','20year','30year'
"""
if trading_day is None:
trading_day = get_calendar('NYSE').trading_day
if trading_days is None:
trading_days = get_calendar('NYSE').all_sessions
first_date = trading_days[0]
now = pd.Timestamp.utcnow()
+7
View File
@@ -680,6 +680,13 @@ class CalendarNameCollision(ZiplineError):
)
class CyclicCalendarAlias(ZiplineError):
"""
Raised when calendar aliases form a cycle.
"""
msg = "Cycle in calendar aliases: [{cycle}]"
class ScheduleFunctionWithoutCalendar(ZiplineError):
"""
Raised when schedule_function is called but there is not a calendar to be
+10 -2
View File
@@ -16,11 +16,19 @@
from .trading_calendar import TradingCalendar
from .calendar_utils import (
get_calendar,
register_calendar_alias,
register_calendar,
register_calendar_type,
deregister_calendar,
clear_calendars
)
__all__ = ['get_calendar', 'TradingCalendar', 'register_calendar',
'register_calendar_type', 'deregister_calendar', 'clear_calendars']
__all__ = [
'TradingCalendar',
'clear_calendars',
'deregister_calendar',
'get_calendar',
'register_calendar',
'register_calendar_alias',
'register_calendar_type',
]
+138 -49
View File
@@ -1,6 +1,8 @@
from collections import OrderedDict
from zipline.errors import (
InvalidCalendarName,
CalendarNameCollision,
CyclicCalendarAlias,
InvalidCalendarName,
)
from zipline.utils.calendars.exchange_calendar_cfe import CFEExchangeCalendar
from zipline.utils.calendars.exchange_calendar_ice import ICEExchangeCalendar
@@ -13,38 +15,24 @@ from zipline.utils.calendars.us_futures_calendar import (
QuantopianUSFuturesCalendar,
)
NYSE_CALENDAR_EXCHANGE_NAMES = frozenset([
"NYSE",
"NASDAQ",
"BATS",
])
CME_CALENDAR_EXCHANGE_NAMES = frozenset([
"CBOT",
"CME",
"COMEX",
"NYMEX",
])
ICE_CALENDAR_EXCHANGE_NAMES = frozenset([
"ICEUS",
"NYFE",
])
CFE_CALENDAR_EXCHANGE_NAMES = frozenset(["CFE"])
BMF_CALENDAR_EXCHANGE_NAMES = frozenset(["BMF"])
LSE_CALENDAR_EXCHANGE_NAMES = frozenset(["LSE"])
TSX_CALENDAR_EXCHANGE_NAMES = frozenset(["TSX"])
US_FUTURES_CALENDAR_NAMES = frozenset(["us_futures"])
_default_calendar_factories = {
NYSE_CALENDAR_EXCHANGE_NAMES: NYSEExchangeCalendar,
CME_CALENDAR_EXCHANGE_NAMES: CMEExchangeCalendar,
ICE_CALENDAR_EXCHANGE_NAMES: ICEExchangeCalendar,
CFE_CALENDAR_EXCHANGE_NAMES: CFEExchangeCalendar,
BMF_CALENDAR_EXCHANGE_NAMES: BMFExchangeCalendar,
LSE_CALENDAR_EXCHANGE_NAMES: LSEExchangeCalendar,
TSX_CALENDAR_EXCHANGE_NAMES: TSXExchangeCalendar,
US_FUTURES_CALENDAR_NAMES: QuantopianUSFuturesCalendar,
'NYSE': NYSEExchangeCalendar,
'CME': CMEExchangeCalendar,
'ICE': ICEExchangeCalendar,
'CFE': CFEExchangeCalendar,
'BMF': BMFExchangeCalendar,
'LSE': LSEExchangeCalendar,
'TSX': TSXExchangeCalendar,
'us_futures': QuantopianUSFuturesCalendar,
}
_default_aliases = {
'NASDAQ': 'NYSE',
'BATS': 'NYSE',
'CBOT': 'CME',
'COMEX': 'CME',
'NYMEX': 'CME',
'ICEUS': 'ICE',
'NYFE': 'ICE',
}
@@ -54,10 +42,19 @@ class TradingCalendarDispatcher(object):
Methods of a global instance of this class are provided by
zipline.utils.calendar_utils.
Parameters
----------
calendars : dict[str -> TradingCalendar]
Initial set of calendars.
calendar_factories : dict[str -> function]
Factories for lazy
aliases : dict[str -> str]
"""
def __init__(self, calendar_factories):
self._calendars = {}
def __init__(self, calendars, calendar_factories, aliases):
self._calendars = calendars
self._calendar_factories = calendar_factories
self._aliases = aliases
def get_calendar(self, name):
"""
@@ -73,20 +70,33 @@ class TradingCalendarDispatcher(object):
TradingCalendar
The desired calendar.
"""
canonical_name = self.resolve_alias(name)
try:
return self._calendars[name]
return self._calendars[canonical_name]
except KeyError:
# We haven't loaded this calendar yet, so make a new one.
pass
for names, factory in self._calendar_factories.items():
if name in names:
# Use the same calendar for all exchanges that share the same
# factory.
calendar = factory()
self._calendars.update({n: calendar for n in names})
return calendar
try:
factory = self._calendar_factories[canonical_name]
except KeyError:
# We don't have a factory registered for this name. Barf.
raise InvalidCalendarName(calendar_name=name)
raise InvalidCalendarName(calendar_name=name)
# Cache the calendar for future use.
calendar = self._calendars[canonical_name] = factory()
return calendar
def has_calendar(self, name):
"""
Do we have (or have the ability to make) a calendar with ``name``?
"""
return (
name in self._calendars
or name in self._calendar_factories
or name in self._aliases
)
def register_calendar(self, name, calendar, force=False):
"""
@@ -100,7 +110,8 @@ class TradingCalendarDispatcher(object):
The calendar to be registered for retrieval.
force : bool, optional
If True, old calendars will be overwritten on a name collision.
If False, name collisions will raise an exception. Default: False.
If False, name collisions will raise an exception.
Default is False.
Raises
------
@@ -110,7 +121,7 @@ class TradingCalendarDispatcher(object):
if force:
self.deregister_calendar(name)
if name in self._calendars or name in self._calendar_factories:
if self.has_calendar(name):
raise CalendarNameCollision(calendar_name=name)
self._calendars[name] = calendar
@@ -119,6 +130,9 @@ class TradingCalendarDispatcher(object):
"""
Registers a calendar by type.
This is useful for registering a new calendar to be lazily instantiated
at some future point in time.
Parameters
----------
name: str
@@ -127,7 +141,8 @@ class TradingCalendarDispatcher(object):
The type of the calendar to register.
force : bool, optional
If True, old calendars will be overwritten on a name collision.
If False, name collisions will raise an exception. Default: False.
If False, name collisions will raise an exception.
Default is False.
Raises
------
@@ -135,13 +150,82 @@ class TradingCalendarDispatcher(object):
If a calendar is already registered with the given calendar's name.
"""
if force:
self._calendar_factories.pop(name, None)
self.deregister_calendar(name)
if name in self._calendars or name in self._calendar_factories:
if self.has_calendar(name):
raise CalendarNameCollision(calendar_name=name)
self._calendar_factories[name] = calendar_type
def register_calendar_alias(self, alias, real_name, force=False):
"""
Register an alias for a calendar.
This is useful when multiple exchanges should share a calendar, or when
there are multiple ways to refer to the same exchange.
After calling ``register_alias('alias', 'real_name')``, subsequent
calls to ``get_calendar('alias')`` will return the same result as
``get_calendar('real_name')``.
Parameters
----------
alias : str
The name to be used to refer to a calendar.
real_name : str
The canonical name of the registered calendar.
force : bool, optional
If True, old calendars will be overwritten on a name collision.
If False, name collisions will raise an exception.
Default is False.
"""
if force:
self.deregister_calendar(alias)
if self.has_calendar(alias):
raise CalendarNameCollision(calendar_name=alias)
self._aliases[alias] = real_name
# Ensure that the new alias doesn't create a cycle, and back it out if
# we did.
try:
self.resolve_alias(alias)
except CyclicCalendarAlias:
del self._aliases[alias]
raise
def resolve_alias(self, name):
"""
Resolve a calendar alias for retrieval.
Parameters
----------
name : str
The name of the requested calendar.
Returns
-------
canonical_name : str
The real name of the calendar to create/return.
"""
# Use an OrderedDict as an ordered set so that we can return the order
# of aliases in the event of a cycle.
seen = OrderedDict({name: None})
while name in self._aliases:
seen[name] = None
name = self._aliases[name]
if name in seen:
cycle = seen.keys()
cycle.append(name)
raise CyclicCalendarAlias(
cycle=" -> ".join([repr(k) for k in cycle])
)
return name
def deregister_calendar(self, name):
"""
If a calendar is registered with the given name, it is de-registered.
@@ -153,6 +237,7 @@ class TradingCalendarDispatcher(object):
"""
self._calendars.pop(name, None)
self._calendar_factories.pop(name, None)
self._aliases.pop(name, None)
def clear_calendars(self):
"""
@@ -160,13 +245,16 @@ class TradingCalendarDispatcher(object):
"""
self._calendars.clear()
self._calendar_factories.clear()
self._aliases.clear()
# We maintain a global calendar dispatcher so that users can just do
# `register_calendar('my_calendar', calendar) and then use `get_calendar`
# without having to thread around a dispatcher.
global_calendar_dispatcher = TradingCalendarDispatcher(
_default_calendar_factories
calendars={},
calendar_factories=_default_calendar_factories,
aliases=_default_aliases,
)
get_calendar = global_calendar_dispatcher.get_calendar
@@ -174,3 +262,4 @@ clear_calendars = global_calendar_dispatcher.clear_calendars
deregister_calendar = global_calendar_dispatcher.deregister_calendar
register_calendar = global_calendar_dispatcher.register_calendar
register_calendar_type = global_calendar_dispatcher.register_calendar_type
register_calendar_alias = global_calendar_dispatcher.register_calendar_alias