mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-30 03:28:20 +08:00
BUG: Clear asset caches when mapping DataFrame.
Our DataFrame index resolution logic relies on failed lookups **not** being cached, but not caching failed lookups is a nontrivial performance hit when repeatedly looking up sids. The "solution" here is to clear the caches after writing in new assets. The real fix for this is either: 1. Don't construct an AssetFinder until we have the datasource in hand in run(), or 2. Don't symbol-map the user's input source if it's a DataFrame. Instead we should make our data loaders pre-map the data.
This commit is contained in:
+11
-4
@@ -21,8 +21,10 @@ from pandas.tseries.tools import normalize_date
|
||||
import numpy as np
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from itertools import groupby, chain, repeat
|
||||
from numbers import Integral
|
||||
from operator import attrgetter
|
||||
|
||||
from six.moves import filter
|
||||
from six import (
|
||||
exec_,
|
||||
@@ -30,7 +32,6 @@ from six import (
|
||||
itervalues,
|
||||
string_types,
|
||||
)
|
||||
from operator import attrgetter
|
||||
|
||||
|
||||
from zipline.errors import (
|
||||
@@ -96,6 +97,7 @@ from zipline.utils.events import (
|
||||
TimeRuleFactory,
|
||||
)
|
||||
from zipline.utils.factory import create_simulation_parameters
|
||||
from zipline.utils.functional import is_a
|
||||
from zipline.utils.math_utils import tolerant_equals
|
||||
from zipline.utils.preprocess import preprocess
|
||||
|
||||
@@ -608,8 +610,7 @@ class TradingAlgorithm(object):
|
||||
if isinstance(identifier, Asset):
|
||||
asset = self.asset_finder.retrieve_asset(sid=identifier.sid,
|
||||
default_none=True)
|
||||
|
||||
elif hasattr(identifier, '__int__'):
|
||||
elif isinstance(identifier, Integral):
|
||||
asset = self.asset_finder.retrieve_asset(sid=identifier,
|
||||
default_none=True)
|
||||
if asset is None:
|
||||
@@ -618,6 +619,12 @@ class TradingAlgorithm(object):
|
||||
self.trading_environment.write_data(
|
||||
equities_identifiers=identifiers_to_build)
|
||||
|
||||
# We need to clear out any cache misses that were stored while trying
|
||||
# to do lookups. The real fix for this problem is to not construct an
|
||||
# AssetFinder until we `run()` when we actually have all the data we
|
||||
# need to so.
|
||||
self.asset_finder._reset_caches()
|
||||
|
||||
return self.asset_finder.map_identifier_index_to_sids(
|
||||
identifiers, as_of_date,
|
||||
)
|
||||
|
||||
@@ -99,12 +99,23 @@ class AssetFinder(object):
|
||||
#
|
||||
# The caches are read through, i.e. accessing an asset through
|
||||
# retrieve_asset will populate the cache on first retrieval.
|
||||
self._asset_cache = {}
|
||||
self._asset_type_cache = {}
|
||||
self._caches = (self._asset_cache, self._asset_type_cache) = {}, {}
|
||||
|
||||
# Populated on first call to `lifetimes`.
|
||||
self._asset_lifetimes = None
|
||||
|
||||
def _reset_caches(self):
|
||||
"""
|
||||
Reset our asset caches.
|
||||
|
||||
You probably shouldn't call this method.
|
||||
"""
|
||||
# This method exists as a workaround for the in-place mutating behavior
|
||||
# of `TradingAlgorithm._write_and_map_id_index_to_sids`. No one else
|
||||
# should be calling this.
|
||||
for cache in self._caches:
|
||||
cache.clear()
|
||||
|
||||
def lookup_asset_types(self, sids):
|
||||
"""
|
||||
Retrieve asset types for a list of sids.
|
||||
|
||||
Reference in New Issue
Block a user