From 876b9c74c90d475c989aef54205e4e1c5d47bfa3 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Wed, 28 Dec 2016 11:46:43 -0500 Subject: [PATCH 1/3] ENH: Add vectorized lookup_symbol. Currently only supports one as_of date.c --- tests/test_assets.py | 71 ++++++++++++++++++++++++++++++++++++++++ zipline/assets/assets.py | 28 ++++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/tests/test_assets.py b/tests/test_assets.py index ec317435..557638dd 100644 --- a/tests/test_assets.py +++ b/tests/test_assets.py @@ -74,6 +74,7 @@ from zipline.errors import ( from zipline.testing import ( all_subindices, empty_assets_db, + parameter_space, tmp_assets_db, ) from zipline.testing.predicates import assert_equal @@ -1475,3 +1476,73 @@ class TestAssetDBVersioning(ZiplineTestCase): )) assert_equal(expected_data, actual_data) + + +class TestVectorizedSymbolLookup(WithAssetFinder, ZiplineTestCase): + + @classmethod + def make_equity_info(cls): + T = partial(pd.Timestamp, tz='UTC') + + def asset(sid, symbol, start_date, end_date): + return dict( + sid=sid, + symbol=symbol, + start_date=T(start_date), + end_date=T(end_date), + exchange='NYSE', + exchange_full='NYSE', + ) + + records = [ + asset(1, 'A', '2014-01-02', '2014-01-31'), + asset(2, 'A', '2014-02-03', '2015-01-02'), + asset(3, 'B', '2014-01-02', '2014-01-15'), + asset(4, 'B', '2014-01-17', '2015-01-02'), + asset(5, 'C', '2001-01-02', '2015-01-02'), + asset(6, 'D', '2001-01-02', '2015-01-02'), + asset(7, 'FUZZY', '2001-01-02', '2015-01-02'), + ] + return pd.DataFrame.from_records(records) + + @parameter_space( + as_of=pd.to_datetime([ + '2014-01-02', + '2014-01-15', + '2014-01-17', + '2015-01-02', + ], utc=True), + symbols=[ + ['A.'], ['B.'], ['C.'], ['D.'], + list('ABCD'), + list('ABCDDCBA'), + list('AABBAABBACABD'), + ], + ) + def test_lookup_symbols(self, as_of, symbols): + af = self.asset_finder + expected = [ + af.lookup_symbol(symbol, as_of) for symbol in symbols + ] + result = af.lookup_symbols(symbols, as_of) + assert_equal(result, expected) + + def test_fuzzy(self): + af = self.asset_finder + + # FUZZ.Y shouldn't resolve unless fuzzy=True. + syms = ['A', 'B', 'FUZZ.Y'] + dt = pd.Timestamp('2014-01-15', tz='UTC') + + with self.assertRaises(SymbolNotFound): + af.lookup_symbols(syms, pd.Timestamp('2014-01-15', tz='UTC')) + + with self.assertRaises(SymbolNotFound): + af.lookup_symbols( + syms, + pd.Timestamp('2014-01-15', tz='UTC'), + fuzzy=False, + ) + + results = af.lookup_symbols(syms, dt, fuzzy=True) + assert_equal(results, af.retrieve_all([1, 3, 7])) diff --git a/zipline/assets/assets.py b/zipline/assets/assets.py index 2ffb49b2..89a5274e 100644 --- a/zipline/assets/assets.py +++ b/zipline/assets/assets.py @@ -827,6 +827,34 @@ class AssetFinder(object): return self._lookup_symbol_fuzzy(symbol, as_of_date) return self._lookup_symbol_strict(symbol, as_of_date) + def lookup_symbols(self, symbols, as_of_date, fuzzy=False): + """ + Lookup a list of equities by symbol. + + Parameters + ---------- + symbols : sequence[str] + Sequence of ticker symbols to resolve. + as_of_date : pd.Timestamp + Forwarded to ``lookup_symbol``. + fuzzy : bool, optional + Forwarded to ``lookup_symbol``. + + Returns + ------- + equities : list[Equity] + """ + memo = {} + out = [] + append_output = out.append + for sym in symbols: + if sym in memo: + append_output(memo[sym]) + else: + equity = memo[sym] = self.lookup_symbol(sym, as_of_date, fuzzy) + append_output(equity) + return out + def lookup_future_symbol(self, symbol): """Lookup a future contract by symbol. From 64029a9e7e8100d22fdfef3ffab7cd4b8eee1e75 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Wed, 28 Dec 2016 14:40:18 -0500 Subject: [PATCH 2/3] TEST: Tweaks to vectorized symbol tests. - Test against an empty list. - Don't test empty share class lookups. - Add another comprehension test for completeness. --- tests/test_assets.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_assets.py b/tests/test_assets.py index 557638dd..2e31b3ec 100644 --- a/tests/test_assets.py +++ b/tests/test_assets.py @@ -1513,7 +1513,8 @@ class TestVectorizedSymbolLookup(WithAssetFinder, ZiplineTestCase): '2015-01-02', ], utc=True), symbols=[ - ['A.'], ['B.'], ['C.'], ['D.'], + [], + ['A'], ['B'], ['C'], ['D'], list('ABCD'), list('ABCDDCBA'), list('AABBAABBACABD'), @@ -1546,3 +1547,7 @@ class TestVectorizedSymbolLookup(WithAssetFinder, ZiplineTestCase): results = af.lookup_symbols(syms, dt, fuzzy=True) assert_equal(results, af.retrieve_all([1, 3, 7])) + assert_equal( + results, + [af.lookup_symbol(sym, dt, fuzzy=True) for sym in syms], + ) From e9b378fdda07aabeb657001028bc4678e731ad92 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Wed, 28 Dec 2016 14:43:15 -0500 Subject: [PATCH 3/3] DOC: Add note on lookup_symbols. --- zipline/assets/assets.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/zipline/assets/assets.py b/zipline/assets/assets.py index 89a5274e..7ce6d0a5 100644 --- a/zipline/assets/assets.py +++ b/zipline/assets/assets.py @@ -831,6 +831,12 @@ class AssetFinder(object): """ Lookup a list of equities by symbol. + Equivalent to:: + + [finder.lookup_symbol(s, as_of, fuzzy) for s in symbols] + + but potentially faster because repeated lookups are memoized. + Parameters ---------- symbols : sequence[str]