diff --git a/tests/test_assets.py b/tests/test_assets.py index 8fd41cc7..07597bdd 100644 --- a/tests/test_assets.py +++ b/tests/test_assets.py @@ -34,10 +34,11 @@ import pandas as pd from nose_parameterized import parameterized from zipline.finance.trading import with_environment -from zipline.assets import Asset, Future, AssetFinder +from zipline.assets import Asset, Equity, Future, AssetFinder from zipline.errors import ( SymbolNotFound, MultipleSymbolsFound, + SidAssignmentError, ) @@ -565,3 +566,47 @@ class AssetFinderTestCase(TestCase): self.assertEqual('Microsoft', finder.metadata_cache[1]['asset_name']) # Check that old data survived self.assertEqual('equity', finder.metadata_cache[0]['asset_type']) + + def test_consume_asset_as_identifier(self): + + # Build some end dates + eq_end = pd.Timestamp('2012-01-01', tz='UTC') + fut_end = pd.Timestamp('2008-01-01', tz='UTC') + + # Build some simple Assets + equity_asset = Equity(1, symbol="TESTEQ", end_date=eq_end) + future_asset = Future(200, symbol="TESTFUT", end_date=fut_end) + + # Consume the Assets + finder = AssetFinder() + finder.consume_identifiers([equity_asset, future_asset]) + finder.populate_cache() + + # Test equality with newly built Assets + self.assertEqual(equity_asset, finder.retrieve_asset(1)) + self.assertEqual(future_asset, finder.retrieve_asset(200)) + self.assertEqual(eq_end, finder.retrieve_asset(1).end_date) + self.assertEqual(fut_end, finder.retrieve_asset(200).end_date) + + def test_sid_assignment(self): + + # This metadata does not contain SIDs + metadata = {'PLAY': {'symbol': 'PLAY'}, + 'MSFT': {'symbol': 'MSFT'}} + + # Build a finder that is allowed to assign sids + finder = AssetFinder(metadata=metadata, allow_sid_assignment=True) + + # Verify that Assets were built + play = finder.retrieve_asset_by_identifier('PLAY') + self.assertEqual('PLAY', play.symbol) + + def test_sid_assignment_failure(self): + + # This metadata does not contain SIDs + metadata = {'PLAY': {'symbol': 'PLAY'}, + 'MSFT': {'symbol': 'MSFT'}} + + # Build a finder that is not allowed to assign sids, asserting failure + with self.assertRaises(SidAssignmentError): + AssetFinder(metadata=metadata, allow_sid_assignment=False) diff --git a/zipline/assets/assets.py b/zipline/assets/assets.py index 8cc234b3..c2483a2f 100644 --- a/zipline/assets/assets.py +++ b/zipline/assets/assets.py @@ -25,12 +25,13 @@ from pandas.tseries.tools import normalize_date from six import with_metaclass, string_types from zipline.errors import ( - SymbolNotFound, - MultipleSymbolsFound, - SidNotFound, - IdentifierNotFound, ConsumeAssetMetaDataError, + IdentifierNotFound, InvalidAssetType, + MultipleSymbolsFound, + SidAssignmentError, + SidNotFound, + SymbolNotFound, ) from zipline.utils import tradingcalendar from zipline.assets._assets import ( @@ -64,7 +65,8 @@ class AssetFinder(object): def __init__(self, metadata=None, - trading_calendar=tradingcalendar): + trading_calendar=tradingcalendar, + allow_sid_assignment=True): # Any particular instance of AssetFinder should be # consistent throughout its lifetime, so we grab a reference @@ -75,6 +77,11 @@ class AssetFinder(object): self.identifier_cache = {} self.fuzzy_match = {} + # This flag controls if the AssetFinder is allowed to generate its own + # sids. If False, metadata that does not contain a sid will raise an + # exception when building assets. + self.allow_sid_assignment = allow_sid_assignment + # The AssetFinder also holds a nested-dict of all metadata for # reference when building Assets self.metadata_cache = {} @@ -92,6 +99,8 @@ class AssetFinder(object): def _assign_sid(self, identifier): if hasattr(identifier, '__int__'): return identifier.__int__() + if not self.allow_sid_assignment: + raise SidAssignmentError(identifier=identifier) if isinstance(identifier, string_types): return self._next_free_sid() @@ -253,14 +262,10 @@ class AssetFinder(object): kwargs['sid'] pass except KeyError: - # Assign the identifier as the sid, if applicable - if isinstance(identifier, int): - kwargs['sid'] = identifier # If the identifier is not a sid, assign one - else: - kwargs['sid'] = self._assign_sid(identifier) - # Update the metadata object with the new sid - self.insert_metadata(identifier=identifier, sid=kwargs['sid']) + kwargs['sid'] = self._assign_sid(identifier) + # Update the metadata object with the new sid + self.insert_metadata(identifier=identifier, sid=kwargs['sid']) # If the file_name is in the kwargs, it may be the symbol try: @@ -329,7 +334,7 @@ class AssetFinder(object): except KeyError: pass - # Build an Asset of the appropriate type + # Build an Asset of the appropriate type, default to Equity asset_type = kwargs.pop('asset_type', 'equity') if asset_type.lower() == 'equity': asset = Equity(**kwargs) @@ -340,7 +345,7 @@ class AssetFinder(object): self.cache[asset.sid] = asset self.identifier_cache[identifier] = asset - if asset.symbol is not "": + if asset.symbol is not '': self.sym_cache.setdefault(asset.symbol, []).append(asset) return asset @@ -456,6 +461,9 @@ class AssetFinder(object): # Do not accept Nones if value is None: continue + # Do not accept empty strings + if value == '': + continue # Do not accept nans from dataframes if isinstance(value, float) and np.isnan(value): continue @@ -464,8 +472,21 @@ class AssetFinder(object): self.metadata_cache[identifier] = entry def consume_identifiers(self, identifiers): + """ + Consumes the given identifiers in to the metadata cache of this + AssetFinder. + """ for identifier in identifiers: - self.insert_metadata(identifier) + # Handle case where full Assets are passed in + # For example, in the creation of a DataFrameSource, the source's + # 'sid' args may be full Assets + if isinstance(identifier, Asset): + sid = identifier.sid + metadata = identifier.to_dict() + metadata['asset_type'] = identifier.__class__.__name__ + self.insert_metadata(identifier=sid, **metadata) + else: + self.insert_metadata(identifier) def consume_metadata(self, metadata): """ diff --git a/zipline/errors.py b/zipline/errors.py index 43a2939f..3632bf18 100644 --- a/zipline/errors.py +++ b/zipline/errors.py @@ -263,6 +263,16 @@ AssetMetaData can not consume {obj}. MetaData must be a dict, a DataFrame, or """.strip() +class SidAssignmentError(ZiplineError): + """ + Raised when an AssetFinder tries to build an Asset that does not have a sid + and that AssetFinder is not permitted to assign sids. + """ + msg = """ +AssetFinder metadata is missing a SID for identifier '{identifier}'. +""".strip() + + class NoSourceError(ZiplineError): """ Raised when no source is given to the pipeline