mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-27 19:30:28 +08:00
1609 lines
56 KiB
Python
1609 lines
56 KiB
Python
#
|
|
# Copyright 2015 Quantopian, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""
|
|
Tests for the catalyst.assets package
|
|
"""
|
|
from contextlib import contextmanager
|
|
from datetime import timedelta
|
|
from functools import partial
|
|
import pickle
|
|
import sys
|
|
from types import GetSetDescriptorType
|
|
from unittest import TestCase
|
|
import uuid
|
|
import warnings
|
|
|
|
from nose_parameterized import parameterized
|
|
from numpy import full, int32, int64
|
|
import pandas as pd
|
|
from pandas.util.testing import assert_frame_equal
|
|
from six import PY2, viewkeys
|
|
import sqlalchemy as sa
|
|
|
|
from catalyst.assets import (
|
|
Asset,
|
|
Equity,
|
|
Future,
|
|
AssetDBWriter,
|
|
AssetFinder,
|
|
)
|
|
from catalyst.assets.synthetic import (
|
|
make_commodity_future_info,
|
|
make_rotating_equity_info,
|
|
make_simple_equity_info,
|
|
)
|
|
from six import itervalues, integer_types
|
|
from toolz import valmap
|
|
|
|
from catalyst.assets.asset_writer import (
|
|
check_version_info,
|
|
write_version_info,
|
|
_futures_defaults,
|
|
SQLITE_MAX_VARIABLE_NUMBER,
|
|
)
|
|
from catalyst.assets.asset_db_schema import ASSET_DB_VERSION
|
|
from catalyst.assets.asset_db_migrations import (
|
|
downgrade
|
|
)
|
|
from catalyst.errors import (
|
|
EquitiesNotFound,
|
|
FutureContractsNotFound,
|
|
MultipleSymbolsFound,
|
|
MultipleValuesFoundForField,
|
|
MultipleValuesFoundForSid,
|
|
NoValueForSid,
|
|
AssetDBVersionError,
|
|
SidsNotFound,
|
|
SymbolNotFound,
|
|
AssetDBImpossibleDowngrade,
|
|
ValueNotFoundForField,
|
|
)
|
|
from catalyst.testing import (
|
|
all_subindices,
|
|
empty_assets_db,
|
|
parameter_space,
|
|
tmp_assets_db,
|
|
)
|
|
from catalyst.testing.predicates import assert_equal
|
|
from catalyst.testing.fixtures import (
|
|
WithAssetFinder,
|
|
ZiplineTestCase,
|
|
WithTradingCalendars,
|
|
)
|
|
from catalyst.utils.range import range
|
|
|
|
|
|
@contextmanager
|
|
def build_lookup_generic_cases(asset_finder_type):
|
|
"""
|
|
Generate test cases for the type of asset finder specific by
|
|
asset_finder_type for test_lookup_generic.
|
|
"""
|
|
|
|
unique_start = pd.Timestamp('2013-01-01', tz='UTC')
|
|
unique_end = pd.Timestamp('2014-01-01', tz='UTC')
|
|
|
|
dupe_0_start = pd.Timestamp('2013-01-01', tz='UTC')
|
|
dupe_0_end = dupe_0_start + timedelta(days=1)
|
|
|
|
dupe_1_start = pd.Timestamp('2013-01-03', tz='UTC')
|
|
dupe_1_end = dupe_1_start + timedelta(days=1)
|
|
|
|
equities = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 0,
|
|
'symbol': 'duplicated',
|
|
'start_date': dupe_0_start.value,
|
|
'end_date': dupe_0_end.value,
|
|
'exchange': 'TEST',
|
|
},
|
|
{
|
|
'sid': 1,
|
|
'symbol': 'duplicated',
|
|
'start_date': dupe_1_start.value,
|
|
'end_date': dupe_1_end.value,
|
|
'exchange': 'TEST',
|
|
},
|
|
{
|
|
'sid': 2,
|
|
'symbol': 'unique',
|
|
'start_date': unique_start.value,
|
|
'end_date': unique_end.value,
|
|
'exchange': 'TEST',
|
|
},
|
|
],
|
|
index='sid'
|
|
)
|
|
|
|
fof14_sid = 10000
|
|
|
|
futures = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': fof14_sid,
|
|
'symbol': 'FOF14',
|
|
'root_symbol': 'FO',
|
|
'start_date': unique_start.value,
|
|
'end_date': unique_end.value,
|
|
'auto_close_date': unique_end.value,
|
|
'exchange': 'FUT',
|
|
},
|
|
],
|
|
index='sid'
|
|
)
|
|
|
|
root_symbols = pd.DataFrame({
|
|
'root_symbol': ['FO'],
|
|
'root_symbol_id': [1],
|
|
'exchange': ['CME'],
|
|
})
|
|
|
|
with tmp_assets_db(
|
|
equities=equities, futures=futures, root_symbols=root_symbols) \
|
|
as assets_db:
|
|
finder = asset_finder_type(assets_db)
|
|
dupe_0, dupe_1, unique = assets = [
|
|
finder.retrieve_asset(i)
|
|
for i in range(3)
|
|
]
|
|
fof14 = finder.retrieve_asset(fof14_sid)
|
|
cf = finder.create_continuous_future(
|
|
root_symbol=fof14.root_symbol,
|
|
offset=0,
|
|
roll_style='volume',
|
|
adjustment=None,
|
|
)
|
|
|
|
dupe_0_start = dupe_0.start_date
|
|
dupe_1_start = dupe_1.start_date
|
|
yield (
|
|
##
|
|
# Scalars
|
|
|
|
# Asset object
|
|
(finder, assets[0], None, assets[0]),
|
|
(finder, assets[1], None, assets[1]),
|
|
(finder, assets[2], None, assets[2]),
|
|
# int
|
|
(finder, 0, None, assets[0]),
|
|
(finder, 1, None, assets[1]),
|
|
(finder, 2, None, assets[2]),
|
|
# Duplicated symbol with resolution date
|
|
(finder, 'DUPLICATED', dupe_0_start, dupe_0),
|
|
(finder, 'DUPLICATED', dupe_1_start, dupe_1),
|
|
# Unique symbol, with or without resolution date.
|
|
(finder, 'UNIQUE', unique_start, unique),
|
|
(finder, 'UNIQUE', None, unique),
|
|
|
|
# Futures
|
|
(finder, 'FOF14', None, fof14),
|
|
# Future symbols should be unique, but including as_of date
|
|
# make sure that code path is exercised.
|
|
(finder, 'FOF14', unique_start, fof14),
|
|
|
|
# Futures int
|
|
(finder, fof14_sid, None, fof14),
|
|
# Future symbols should be unique, but including as_of date
|
|
# make sure that code path is exercised.
|
|
(finder, fof14_sid, unique_start, fof14),
|
|
|
|
# ContinuousFuture
|
|
(finder, cf, None, cf),
|
|
|
|
##
|
|
# Iterables
|
|
|
|
# Iterables of Asset objects.
|
|
(finder, assets, None, assets),
|
|
(finder, iter(assets), None, assets),
|
|
# Iterables of ints
|
|
(finder, (0, 1), None, assets[:-1]),
|
|
(finder, iter((0, 1)), None, assets[:-1]),
|
|
# Iterables of symbols.
|
|
(finder, ('DUPLICATED', 'UNIQUE'), dupe_0_start, [dupe_0, unique]),
|
|
(finder, ('DUPLICATED', 'UNIQUE'), dupe_1_start, [dupe_1, unique]),
|
|
# Mixed types
|
|
(finder,
|
|
('DUPLICATED', 2, 'UNIQUE', 1, dupe_1),
|
|
dupe_0_start,
|
|
[dupe_0, assets[2], unique, assets[1], dupe_1]),
|
|
|
|
# Futures and Equities
|
|
(finder, ['FOF14', 0], None, [fof14, assets[0]]),
|
|
|
|
# ContinuousFuture and Equity
|
|
(finder, [cf, 0], None, [cf, assets[0]]),
|
|
)
|
|
|
|
|
|
class AssetTestCase(TestCase):
|
|
|
|
# Dynamically list the Asset properties we want to test.
|
|
asset_attrs = [name for name, value in vars(Asset).items()
|
|
if isinstance(value, GetSetDescriptorType)]
|
|
|
|
# Very wow
|
|
asset = Asset(
|
|
1337,
|
|
symbol="DOGE",
|
|
asset_name="DOGECOIN",
|
|
start_date=pd.Timestamp('2013-12-08 9:31AM', tz='UTC'),
|
|
end_date=pd.Timestamp('2014-06-25 11:21AM', tz='UTC'),
|
|
first_traded=pd.Timestamp('2013-12-08 9:31AM', tz='UTC'),
|
|
auto_close_date=pd.Timestamp('2014-06-26 11:21AM', tz='UTC'),
|
|
exchange='THE MOON',
|
|
)
|
|
|
|
asset3 = Asset(3, exchange="test")
|
|
asset4 = Asset(4, exchange="test")
|
|
asset5 = Asset(5, exchange="still testing")
|
|
|
|
def test_asset_object(self):
|
|
the_asset = Asset(5061, exchange="bar")
|
|
|
|
self.assertEquals({5061: 'foo'}[the_asset], 'foo')
|
|
self.assertEquals(the_asset, 5061)
|
|
self.assertEquals(5061, the_asset)
|
|
|
|
self.assertEquals(the_asset, the_asset)
|
|
self.assertEquals(int(the_asset), 5061)
|
|
|
|
self.assertEquals(str(the_asset), 'Asset(5061)')
|
|
|
|
def test_to_and_from_dict(self):
|
|
asset_from_dict = Asset.from_dict(self.asset.to_dict())
|
|
for attr in self.asset_attrs:
|
|
self.assertEqual(
|
|
getattr(self.asset, attr), getattr(asset_from_dict, attr),
|
|
)
|
|
|
|
def test_asset_is_pickleable(self):
|
|
asset_unpickled = pickle.loads(pickle.dumps(self.asset))
|
|
for attr in self.asset_attrs:
|
|
self.assertEqual(
|
|
getattr(self.asset, attr), getattr(asset_unpickled, attr),
|
|
)
|
|
|
|
def test_asset_comparisons(self):
|
|
|
|
s_23 = Asset(23, exchange="test")
|
|
s_24 = Asset(24, exchange="test")
|
|
|
|
self.assertEqual(s_23, s_23)
|
|
self.assertEqual(s_23, 23)
|
|
self.assertEqual(23, s_23)
|
|
self.assertEqual(int32(23), s_23)
|
|
self.assertEqual(int64(23), s_23)
|
|
self.assertEqual(s_23, int32(23))
|
|
self.assertEqual(s_23, int64(23))
|
|
# Check all int types (includes long on py2):
|
|
for int_type in integer_types:
|
|
self.assertEqual(int_type(23), s_23)
|
|
self.assertEqual(s_23, int_type(23))
|
|
|
|
self.assertNotEqual(s_23, s_24)
|
|
self.assertNotEqual(s_23, 24)
|
|
self.assertNotEqual(s_23, "23")
|
|
self.assertNotEqual(s_23, 23.5)
|
|
self.assertNotEqual(s_23, [])
|
|
self.assertNotEqual(s_23, None)
|
|
# Compare to a value that doesn't fit into a platform int:
|
|
self.assertNotEqual(s_23, sys.maxsize + 1)
|
|
|
|
self.assertLess(s_23, s_24)
|
|
self.assertLess(s_23, 24)
|
|
self.assertGreater(24, s_23)
|
|
self.assertGreater(s_24, s_23)
|
|
|
|
def test_lt(self):
|
|
self.assertTrue(self.asset3 < self.asset4)
|
|
self.assertFalse(self.asset4 < self.asset4)
|
|
self.assertFalse(self.asset5 < self.asset4)
|
|
|
|
def test_le(self):
|
|
self.assertTrue(self.asset3 <= self.asset4)
|
|
self.assertTrue(self.asset4 <= self.asset4)
|
|
self.assertFalse(self.asset5 <= self.asset4)
|
|
|
|
def test_eq(self):
|
|
self.assertFalse(self.asset3 == self.asset4)
|
|
self.assertTrue(self.asset4 == self.asset4)
|
|
self.assertFalse(self.asset5 == self.asset4)
|
|
|
|
def test_ge(self):
|
|
self.assertFalse(self.asset3 >= self.asset4)
|
|
self.assertTrue(self.asset4 >= self.asset4)
|
|
self.assertTrue(self.asset5 >= self.asset4)
|
|
|
|
def test_gt(self):
|
|
self.assertFalse(self.asset3 > self.asset4)
|
|
self.assertFalse(self.asset4 > self.asset4)
|
|
self.assertTrue(self.asset5 > self.asset4)
|
|
|
|
def test_type_mismatch(self):
|
|
if sys.version_info.major < 3:
|
|
self.assertIsNotNone(self.asset3 < 'a')
|
|
self.assertIsNotNone('a' < self.asset3)
|
|
else:
|
|
with self.assertRaises(TypeError):
|
|
self.asset3 < 'a'
|
|
with self.assertRaises(TypeError):
|
|
'a' < self.asset3
|
|
|
|
|
|
class TestFuture(WithAssetFinder, ZiplineTestCase):
|
|
@classmethod
|
|
def make_futures_info(cls):
|
|
return pd.DataFrame.from_dict(
|
|
{
|
|
2468: {
|
|
'symbol': 'OMH15',
|
|
'root_symbol': 'OM',
|
|
'notice_date': pd.Timestamp('2014-01-20', tz='UTC'),
|
|
'expiration_date': pd.Timestamp('2014-02-20', tz='UTC'),
|
|
'auto_close_date': pd.Timestamp('2014-01-18', tz='UTC'),
|
|
'tick_size': .01,
|
|
'multiplier': 500.0,
|
|
'exchange': "TEST",
|
|
},
|
|
0: {
|
|
'symbol': 'CLG06',
|
|
'root_symbol': 'CL',
|
|
'start_date': pd.Timestamp('2005-12-01', tz='UTC'),
|
|
'notice_date': pd.Timestamp('2005-12-20', tz='UTC'),
|
|
'expiration_date': pd.Timestamp('2006-01-20', tz='UTC'),
|
|
'multiplier': 1.0,
|
|
'exchange': 'TEST',
|
|
},
|
|
},
|
|
orient='index',
|
|
)
|
|
|
|
@classmethod
|
|
def init_class_fixtures(cls):
|
|
super(TestFuture, cls).init_class_fixtures()
|
|
cls.future = cls.asset_finder.lookup_future_symbol('OMH15')
|
|
cls.future2 = cls.asset_finder.lookup_future_symbol('CLG06')
|
|
|
|
def test_str(self):
|
|
strd = str(self.future)
|
|
self.assertEqual("Future(2468 [OMH15])", strd)
|
|
|
|
def test_repr(self):
|
|
reprd = repr(self.future)
|
|
self.assertIn("Future", reprd)
|
|
self.assertIn("2468", reprd)
|
|
self.assertIn("OMH15", reprd)
|
|
self.assertIn("root_symbol=%s'OM'" % ('u' if PY2 else ''), reprd)
|
|
self.assertIn(
|
|
"notice_date=Timestamp('2014-01-20 00:00:00+0000', tz='UTC')",
|
|
reprd,
|
|
)
|
|
self.assertIn(
|
|
"expiration_date=Timestamp('2014-02-20 00:00:00+0000'",
|
|
reprd,
|
|
)
|
|
self.assertIn(
|
|
"auto_close_date=Timestamp('2014-01-18 00:00:00+0000'",
|
|
reprd,
|
|
)
|
|
self.assertIn("tick_size=0.01", reprd)
|
|
self.assertIn("multiplier=500", reprd)
|
|
|
|
def test_reduce(self):
|
|
assert_equal(
|
|
pickle.loads(pickle.dumps(self.future)).to_dict(),
|
|
self.future.to_dict(),
|
|
)
|
|
|
|
def test_to_and_from_dict(self):
|
|
dictd = self.future.to_dict()
|
|
for field in _futures_defaults.keys():
|
|
self.assertTrue(field in dictd)
|
|
|
|
from_dict = Future.from_dict(dictd)
|
|
self.assertTrue(isinstance(from_dict, Future))
|
|
self.assertEqual(self.future, from_dict)
|
|
|
|
def test_root_symbol(self):
|
|
self.assertEqual('OM', self.future.root_symbol)
|
|
|
|
def test_lookup_future_symbol(self):
|
|
"""
|
|
Test the lookup_future_symbol method.
|
|
"""
|
|
om = TestFuture.asset_finder.lookup_future_symbol('OMH15')
|
|
self.assertEqual(om.sid, 2468)
|
|
self.assertEqual(om.symbol, 'OMH15')
|
|
self.assertEqual(om.root_symbol, 'OM')
|
|
self.assertEqual(om.notice_date, pd.Timestamp('2014-01-20', tz='UTC'))
|
|
self.assertEqual(om.expiration_date,
|
|
pd.Timestamp('2014-02-20', tz='UTC'))
|
|
self.assertEqual(om.auto_close_date,
|
|
pd.Timestamp('2014-01-18', tz='UTC'))
|
|
|
|
cl = TestFuture.asset_finder.lookup_future_symbol('CLG06')
|
|
self.assertEqual(cl.sid, 0)
|
|
self.assertEqual(cl.symbol, 'CLG06')
|
|
self.assertEqual(cl.root_symbol, 'CL')
|
|
self.assertEqual(cl.start_date, pd.Timestamp('2005-12-01', tz='UTC'))
|
|
self.assertEqual(cl.notice_date, pd.Timestamp('2005-12-20', tz='UTC'))
|
|
self.assertEqual(cl.expiration_date,
|
|
pd.Timestamp('2006-01-20', tz='UTC'))
|
|
|
|
with self.assertRaises(SymbolNotFound):
|
|
TestFuture.asset_finder.lookup_future_symbol('')
|
|
|
|
with self.assertRaises(SymbolNotFound):
|
|
TestFuture.asset_finder.lookup_future_symbol('#&?!')
|
|
|
|
with self.assertRaises(SymbolNotFound):
|
|
TestFuture.asset_finder.lookup_future_symbol('FOOBAR')
|
|
|
|
with self.assertRaises(SymbolNotFound):
|
|
TestFuture.asset_finder.lookup_future_symbol('XXX99')
|
|
|
|
|
|
class AssetFinderTestCase(WithTradingCalendars, ZiplineTestCase):
|
|
asset_finder_type = AssetFinder
|
|
|
|
def write_assets(self, **kwargs):
|
|
self._asset_writer.write(**kwargs)
|
|
|
|
def init_instance_fixtures(self):
|
|
super(AssetFinderTestCase, self).init_instance_fixtures()
|
|
|
|
conn = self.enter_instance_context(empty_assets_db())
|
|
self._asset_writer = AssetDBWriter(conn)
|
|
self.asset_finder = self.asset_finder_type(conn)
|
|
|
|
def test_blocked_lookup_symbol_query(self):
|
|
# we will try to query for more variables than sqlite supports
|
|
# to make sure we are properly chunking on the client side
|
|
as_of = pd.Timestamp('2013-01-01', tz='UTC')
|
|
# we need more sids than we can query from sqlite
|
|
nsids = SQLITE_MAX_VARIABLE_NUMBER + 10
|
|
sids = range(nsids)
|
|
frame = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': sid,
|
|
'symbol': 'TEST.%d' % sid,
|
|
'start_date': as_of.value,
|
|
'end_date': as_of.value,
|
|
'exchange': uuid.uuid4().hex
|
|
}
|
|
for sid in sids
|
|
]
|
|
)
|
|
self.write_assets(equities=frame)
|
|
assets = self.asset_finder.retrieve_equities(sids)
|
|
assert_equal(viewkeys(assets), set(sids))
|
|
|
|
def test_lookup_symbol_delimited(self):
|
|
as_of = pd.Timestamp('2013-01-01', tz='UTC')
|
|
frame = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': i,
|
|
'symbol': 'TEST.%d' % i,
|
|
'company_name': "company%d" % i,
|
|
'start_date': as_of.value,
|
|
'end_date': as_of.value,
|
|
'exchange': uuid.uuid4().hex
|
|
}
|
|
for i in range(3)
|
|
]
|
|
)
|
|
self.write_assets(equities=frame)
|
|
finder = self.asset_finder
|
|
asset_0, asset_1, asset_2 = (
|
|
finder.retrieve_asset(i) for i in range(3)
|
|
)
|
|
|
|
# we do it twice to catch caching bugs
|
|
for i in range(2):
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('TEST', as_of)
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('TEST1', as_of)
|
|
# '@' is not a supported delimiter
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('TEST@1', as_of)
|
|
|
|
# Adding an unnecessary fuzzy shouldn't matter.
|
|
for fuzzy_char in ['-', '/', '_', '.']:
|
|
self.assertEqual(
|
|
asset_1,
|
|
finder.lookup_symbol('TEST%s1' % fuzzy_char, as_of)
|
|
)
|
|
|
|
def test_lookup_symbol_fuzzy(self):
|
|
metadata = pd.DataFrame.from_records([
|
|
{'symbol': 'PRTY_HRD', 'exchange': "TEST"},
|
|
{'symbol': 'BRKA', 'exchange': "TEST"},
|
|
{'symbol': 'BRK_A', 'exchange': "TEST"},
|
|
])
|
|
self.write_assets(equities=metadata)
|
|
finder = self.asset_finder
|
|
dt = pd.Timestamp('2013-01-01', tz='UTC')
|
|
|
|
# Try combos of looking up PRTYHRD with and without a time or fuzzy
|
|
# Both non-fuzzys get no result
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('PRTYHRD', None)
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('PRTYHRD', dt)
|
|
# Both fuzzys work
|
|
self.assertEqual(0, finder.lookup_symbol('PRTYHRD', None, fuzzy=True))
|
|
self.assertEqual(0, finder.lookup_symbol('PRTYHRD', dt, fuzzy=True))
|
|
|
|
# Try combos of looking up PRTY_HRD, all returning sid 0
|
|
self.assertEqual(0, finder.lookup_symbol('PRTY_HRD', None))
|
|
self.assertEqual(0, finder.lookup_symbol('PRTY_HRD', dt))
|
|
self.assertEqual(0, finder.lookup_symbol('PRTY_HRD', None, fuzzy=True))
|
|
self.assertEqual(0, finder.lookup_symbol('PRTY_HRD', dt, fuzzy=True))
|
|
|
|
# Try combos of looking up BRKA, all returning sid 1
|
|
self.assertEqual(1, finder.lookup_symbol('BRKA', None))
|
|
self.assertEqual(1, finder.lookup_symbol('BRKA', dt))
|
|
self.assertEqual(1, finder.lookup_symbol('BRKA', None, fuzzy=True))
|
|
self.assertEqual(1, finder.lookup_symbol('BRKA', dt, fuzzy=True))
|
|
|
|
# Try combos of looking up BRK_A, all returning sid 2
|
|
self.assertEqual(2, finder.lookup_symbol('BRK_A', None))
|
|
self.assertEqual(2, finder.lookup_symbol('BRK_A', dt))
|
|
self.assertEqual(2, finder.lookup_symbol('BRK_A', None, fuzzy=True))
|
|
self.assertEqual(2, finder.lookup_symbol('BRK_A', dt, fuzzy=True))
|
|
|
|
def test_lookup_symbol_change_ticker(self):
|
|
T = partial(pd.Timestamp, tz='utc')
|
|
metadata = pd.DataFrame.from_records(
|
|
[
|
|
# sid 0
|
|
{
|
|
'symbol': 'A',
|
|
'asset_name': 'Asset A',
|
|
'start_date': T('2014-01-01'),
|
|
'end_date': T('2014-01-05'),
|
|
'exchange': "TEST",
|
|
},
|
|
{
|
|
'symbol': 'B',
|
|
'asset_name': 'Asset B',
|
|
'start_date': T('2014-01-06'),
|
|
'end_date': T('2014-01-10'),
|
|
'exchange': "TEST",
|
|
},
|
|
|
|
# sid 1
|
|
{
|
|
'symbol': 'C',
|
|
'asset_name': 'Asset C',
|
|
'start_date': T('2014-01-01'),
|
|
'end_date': T('2014-01-05'),
|
|
'exchange': "TEST",
|
|
},
|
|
{
|
|
'symbol': 'A', # claiming the unused symbol 'A'
|
|
'asset_name': 'Asset A',
|
|
'start_date': T('2014-01-06'),
|
|
'end_date': T('2014-01-10'),
|
|
'exchange': "TEST",
|
|
},
|
|
],
|
|
index=[0, 0, 1, 1],
|
|
)
|
|
self.write_assets(equities=metadata)
|
|
finder = self.asset_finder
|
|
|
|
# note: these assertions walk forward in time, starting at assertions
|
|
# about ownership before the start_date and ending with assertions
|
|
# after the end_date; new assertions should be inserted in the correct
|
|
# locations
|
|
|
|
# no one held 'A' before 01
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('A', T('2013-12-31'))
|
|
|
|
# no one held 'C' before 01
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('C', T('2013-12-31'))
|
|
|
|
for asof in pd.date_range('2014-01-01', '2014-01-05', tz='utc'):
|
|
# from 01 through 05 sid 0 held 'A'
|
|
A_result = finder.lookup_symbol('A', asof)
|
|
assert_equal(
|
|
A_result,
|
|
finder.retrieve_asset(0),
|
|
msg=str(asof),
|
|
)
|
|
# The symbol and asset_name should always be the last held values
|
|
assert_equal(A_result.symbol, 'B')
|
|
assert_equal(A_result.asset_name, 'Asset B')
|
|
|
|
# from 01 through 05 sid 1 held 'C'
|
|
C_result = finder.lookup_symbol('C', asof)
|
|
assert_equal(
|
|
C_result,
|
|
finder.retrieve_asset(1),
|
|
msg=str(asof),
|
|
)
|
|
# The symbol and asset_name should always be the last held values
|
|
assert_equal(C_result.symbol, 'A')
|
|
assert_equal(C_result.asset_name, 'Asset A')
|
|
|
|
# no one held 'B' before 06
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('B', T('2014-01-05'))
|
|
|
|
# no one held 'C' after 06, however, no one has claimed it yet
|
|
# so it still maps to sid 1
|
|
assert_equal(
|
|
finder.lookup_symbol('C', T('2014-01-07')),
|
|
finder.retrieve_asset(1),
|
|
)
|
|
|
|
for asof in pd.date_range('2014-01-06', '2014-01-11', tz='utc'):
|
|
# from 06 through 10 sid 0 held 'B'
|
|
# we test through the 11th because sid 1 is the last to hold 'B'
|
|
# so it should ffill
|
|
B_result = finder.lookup_symbol('B', asof)
|
|
assert_equal(
|
|
B_result,
|
|
finder.retrieve_asset(0),
|
|
msg=str(asof),
|
|
)
|
|
assert_equal(B_result.symbol, 'B')
|
|
assert_equal(B_result.asset_name, 'Asset B')
|
|
|
|
# from 06 through 10 sid 1 held 'A'
|
|
# we test through the 11th because sid 1 is the last to hold 'A'
|
|
# so it should ffill
|
|
A_result = finder.lookup_symbol('A', asof)
|
|
assert_equal(
|
|
A_result,
|
|
finder.retrieve_asset(1),
|
|
msg=str(asof),
|
|
)
|
|
assert_equal(A_result.symbol, 'A')
|
|
assert_equal(A_result.asset_name, 'Asset A')
|
|
|
|
def test_lookup_symbol(self):
|
|
|
|
# Incrementing by two so that start and end dates for each
|
|
# generated Asset don't overlap (each Asset's end_date is the
|
|
# day after its start date.)
|
|
dates = pd.date_range('2013-01-01', freq='2D', periods=5, tz='UTC')
|
|
df = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': i,
|
|
'symbol': 'existing',
|
|
'start_date': date.value,
|
|
'end_date': (date + timedelta(days=1)).value,
|
|
'exchange': 'NYSE',
|
|
}
|
|
for i, date in enumerate(dates)
|
|
]
|
|
)
|
|
self.write_assets(equities=df)
|
|
finder = self.asset_finder
|
|
for _ in range(2): # Run checks twice to test for caching bugs.
|
|
with self.assertRaises(SymbolNotFound):
|
|
finder.lookup_symbol('NON_EXISTING', dates[0])
|
|
|
|
with self.assertRaises(MultipleSymbolsFound):
|
|
finder.lookup_symbol('EXISTING', None)
|
|
|
|
for i, date in enumerate(dates):
|
|
# Verify that we correctly resolve multiple symbols using
|
|
# the supplied date
|
|
result = finder.lookup_symbol('EXISTING', date)
|
|
self.assertEqual(result.symbol, 'EXISTING')
|
|
self.assertEqual(result.sid, i)
|
|
|
|
def test_fail_to_write_overlapping_data(self):
|
|
df = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 1,
|
|
'symbol': 'multiple',
|
|
'start_date': pd.Timestamp('2010-01-01'),
|
|
'end_date': pd.Timestamp('2012-01-01'),
|
|
'exchange': 'NYSE'
|
|
},
|
|
# Same as asset 1, but with a later end date.
|
|
{
|
|
'sid': 2,
|
|
'symbol': 'multiple',
|
|
'start_date': pd.Timestamp('2010-01-01'),
|
|
'end_date': pd.Timestamp('2013-01-01'),
|
|
'exchange': 'NYSE'
|
|
},
|
|
# Same as asset 1, but with a later start_date
|
|
{
|
|
'sid': 3,
|
|
'symbol': 'multiple',
|
|
'start_date': pd.Timestamp('2011-01-01'),
|
|
'end_date': pd.Timestamp('2012-01-01'),
|
|
'exchange': 'NYSE'
|
|
},
|
|
]
|
|
)
|
|
|
|
with self.assertRaises(ValueError) as e:
|
|
self.write_assets(equities=df)
|
|
|
|
self.assertEqual(
|
|
str(e.exception),
|
|
"Ambiguous ownership for 1 symbol, multiple assets held the"
|
|
" following symbols:\n"
|
|
"MULTIPLE:\n"
|
|
" intersections: (('2010-01-01 00:00:00', '2012-01-01 00:00:00'),"
|
|
" ('2011-01-01 00:00:00', '2012-01-01 00:00:00'))\n"
|
|
" start_date end_date\n"
|
|
" sid \n"
|
|
" 1 2010-01-01 2012-01-01\n"
|
|
" 2 2010-01-01 2013-01-01\n"
|
|
" 3 2011-01-01 2012-01-01"
|
|
)
|
|
|
|
def test_lookup_generic(self):
|
|
"""
|
|
Ensure that lookup_generic works with various permutations of inputs.
|
|
"""
|
|
with build_lookup_generic_cases(self.asset_finder_type) as cases:
|
|
for finder, symbols, reference_date, expected in cases:
|
|
results, missing = finder.lookup_generic(symbols,
|
|
reference_date)
|
|
self.assertEqual(results, expected)
|
|
self.assertEqual(missing, [])
|
|
|
|
def test_lookup_none_raises(self):
|
|
"""
|
|
If lookup_symbol is vectorized across multiple symbols, and one of them
|
|
is None, want to raise a TypeError.
|
|
"""
|
|
|
|
with self.assertRaises(TypeError):
|
|
self.asset_finder.lookup_symbol(None, pd.Timestamp('2013-01-01'))
|
|
|
|
def test_lookup_mult_are_one(self):
|
|
"""
|
|
Ensure that multiple symbols that return the same sid are collapsed to
|
|
a single returned asset.
|
|
"""
|
|
|
|
date = pd.Timestamp('2013-01-01', tz='UTC')
|
|
|
|
df = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 1,
|
|
'symbol': symbol,
|
|
'start_date': date.value,
|
|
'end_date': (date + timedelta(days=30)).value,
|
|
'exchange': 'NYSE',
|
|
}
|
|
for symbol in ('FOOB', 'FOO_B')
|
|
]
|
|
)
|
|
self.write_assets(equities=df)
|
|
finder = self.asset_finder
|
|
|
|
# If we are able to resolve this with any result, means that we did not
|
|
# raise a MultipleSymbolError.
|
|
result = finder.lookup_symbol('FOO/B', date + timedelta(1), fuzzy=True)
|
|
self.assertEqual(result.sid, 1)
|
|
|
|
def test_endless_multiple_resolves(self):
|
|
"""
|
|
Situation:
|
|
1. Asset 1 w/ symbol FOOB changes to FOO_B, and then is delisted.
|
|
2. Asset 2 is listed with symbol FOO_B.
|
|
|
|
If someone asks for FOO_B with fuzzy matching after 2 has been listed,
|
|
they should be able to correctly get 2.
|
|
"""
|
|
|
|
date = pd.Timestamp('2013-01-01', tz='UTC')
|
|
|
|
df = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 1,
|
|
'symbol': 'FOOB',
|
|
'start_date': date.value,
|
|
'end_date': date.max.value,
|
|
'exchange': 'NYSE',
|
|
},
|
|
{
|
|
'sid': 1,
|
|
'symbol': 'FOO_B',
|
|
'start_date': (date + timedelta(days=31)).value,
|
|
'end_date': (date + timedelta(days=60)).value,
|
|
'exchange': 'NYSE',
|
|
},
|
|
{
|
|
'sid': 2,
|
|
'symbol': 'FOO_B',
|
|
'start_date': (date + timedelta(days=61)).value,
|
|
'end_date': date.max.value,
|
|
'exchange': 'NYSE',
|
|
},
|
|
|
|
]
|
|
)
|
|
self.write_assets(equities=df)
|
|
finder = self.asset_finder
|
|
|
|
# If we are able to resolve this with any result, means that we did not
|
|
# raise a MultipleSymbolError.
|
|
result = finder.lookup_symbol(
|
|
'FOO/B',
|
|
date + timedelta(days=90),
|
|
fuzzy=True
|
|
)
|
|
self.assertEqual(result.sid, 2)
|
|
|
|
def test_lookup_generic_handle_missing(self):
|
|
data = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 0,
|
|
'symbol': 'real',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
{
|
|
'sid': 1,
|
|
'symbol': 'also_real',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
# Sid whose end date is before our query date. We should
|
|
# still correctly find it.
|
|
{
|
|
'sid': 2,
|
|
'symbol': 'real_but_old',
|
|
'start_date': pd.Timestamp('2002-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2003-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
# Sid whose start_date is **after** our query date. We should
|
|
# **not** find it.
|
|
{
|
|
'sid': 3,
|
|
'symbol': 'real_but_in_the_future',
|
|
'start_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2020-1-1', tz='UTC'),
|
|
'exchange': 'THE FUTURE',
|
|
},
|
|
]
|
|
)
|
|
self.write_assets(equities=data)
|
|
finder = self.asset_finder
|
|
results, missing = finder.lookup_generic(
|
|
['REAL', 1, 'FAKE', 'REAL_BUT_OLD', 'REAL_BUT_IN_THE_FUTURE'],
|
|
pd.Timestamp('2013-02-01', tz='UTC'),
|
|
)
|
|
|
|
self.assertEqual(len(results), 3)
|
|
self.assertEqual(results[0].symbol, 'REAL')
|
|
self.assertEqual(results[0].sid, 0)
|
|
self.assertEqual(results[1].symbol, 'ALSO_REAL')
|
|
self.assertEqual(results[1].sid, 1)
|
|
self.assertEqual(results[2].symbol, 'REAL_BUT_OLD')
|
|
self.assertEqual(results[2].sid, 2)
|
|
|
|
self.assertEqual(len(missing), 2)
|
|
self.assertEqual(missing[0], 'FAKE')
|
|
self.assertEqual(missing[1], 'REAL_BUT_IN_THE_FUTURE')
|
|
|
|
def test_security_dates_warning(self):
|
|
|
|
# Build an asset with an end_date
|
|
eq_end = pd.Timestamp('2012-01-01', tz='UTC')
|
|
equity_asset = Equity(1, symbol="TESTEQ", end_date=eq_end,
|
|
exchange="TEST")
|
|
|
|
# Catch all warnings
|
|
with warnings.catch_warnings(record=True) as w:
|
|
# Cause all warnings to always be triggered
|
|
warnings.simplefilter("always")
|
|
equity_asset.security_start_date
|
|
equity_asset.security_end_date
|
|
equity_asset.security_name
|
|
# Verify the warning
|
|
self.assertEqual(3, len(w))
|
|
for warning in w:
|
|
self.assertTrue(issubclass(warning.category,
|
|
DeprecationWarning))
|
|
|
|
def test_map_identifier_index_to_sids(self):
|
|
# Build an empty finder and some Assets
|
|
dt = pd.Timestamp('2014-01-01', tz='UTC')
|
|
finder = self.asset_finder
|
|
asset1 = Equity(1, symbol="AAPL", exchange="TEST")
|
|
asset2 = Equity(2, symbol="GOOG", exchange="TEST")
|
|
asset200 = Future(200, symbol="CLK15", exchange="TEST")
|
|
asset201 = Future(201, symbol="CLM15", exchange="TEST")
|
|
|
|
# Check for correct mapping and types
|
|
pre_map = [asset1, asset2, asset200, asset201]
|
|
post_map = finder.map_identifier_index_to_sids(pre_map, dt)
|
|
self.assertListEqual([1, 2, 200, 201], post_map)
|
|
for sid in post_map:
|
|
self.assertIsInstance(sid, int)
|
|
|
|
# Change order and check mapping again
|
|
pre_map = [asset201, asset2, asset200, asset1]
|
|
post_map = finder.map_identifier_index_to_sids(pre_map, dt)
|
|
self.assertListEqual([201, 2, 200, 1], post_map)
|
|
|
|
def test_compute_lifetimes(self):
|
|
num_assets = 4
|
|
trading_day = self.trading_calendar.day
|
|
first_start = pd.Timestamp('2015-04-01', tz='UTC')
|
|
|
|
frame = make_rotating_equity_info(
|
|
num_assets=num_assets,
|
|
first_start=first_start,
|
|
frequency=trading_day,
|
|
periods_between_starts=3,
|
|
asset_lifetime=5
|
|
)
|
|
self.write_assets(equities=frame)
|
|
finder = self.asset_finder
|
|
|
|
all_dates = pd.date_range(
|
|
start=first_start,
|
|
end=frame.end_date.max(),
|
|
freq=trading_day,
|
|
)
|
|
|
|
for dates in all_subindices(all_dates):
|
|
expected_with_start_raw = full(
|
|
shape=(len(dates), num_assets),
|
|
fill_value=False,
|
|
dtype=bool,
|
|
)
|
|
expected_no_start_raw = full(
|
|
shape=(len(dates), num_assets),
|
|
fill_value=False,
|
|
dtype=bool,
|
|
)
|
|
|
|
for i, date in enumerate(dates):
|
|
it = frame[['start_date', 'end_date']].itertuples()
|
|
for j, start, end in it:
|
|
# This way of doing the checks is redundant, but very
|
|
# clear.
|
|
if start <= date <= end:
|
|
expected_with_start_raw[i, j] = True
|
|
if start < date:
|
|
expected_no_start_raw[i, j] = True
|
|
|
|
expected_with_start = pd.DataFrame(
|
|
data=expected_with_start_raw,
|
|
index=dates,
|
|
columns=frame.index.values,
|
|
)
|
|
result = finder.lifetimes(dates, include_start_date=True)
|
|
assert_frame_equal(result, expected_with_start)
|
|
|
|
expected_no_start = pd.DataFrame(
|
|
data=expected_no_start_raw,
|
|
index=dates,
|
|
columns=frame.index.values,
|
|
)
|
|
result = finder.lifetimes(dates, include_start_date=False)
|
|
assert_frame_equal(result, expected_no_start)
|
|
|
|
def test_sids(self):
|
|
# Ensure that the sids property of the AssetFinder is functioning
|
|
self.write_assets(equities=make_simple_equity_info(
|
|
[0, 1, 2],
|
|
pd.Timestamp('2014-01-01'),
|
|
pd.Timestamp('2014-01-02'),
|
|
))
|
|
self.assertEqual({0, 1, 2}, set(self.asset_finder.sids))
|
|
|
|
def test_lookup_by_supplementary_field(self):
|
|
equities = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 0,
|
|
'symbol': 'A',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
{
|
|
'sid': 1,
|
|
'symbol': 'B',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
{
|
|
'sid': 2,
|
|
'symbol': 'C',
|
|
'start_date': pd.Timestamp('2013-7-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
]
|
|
)
|
|
|
|
equity_supplementary_mappings = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 0,
|
|
'field': 'ALT_ID',
|
|
'value': '100000000',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2013-6-28', tz='UTC'),
|
|
},
|
|
{
|
|
'sid': 1,
|
|
'field': 'ALT_ID',
|
|
'value': '100000001',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
},
|
|
{
|
|
'sid': 0,
|
|
'field': 'ALT_ID',
|
|
'value': '100000002',
|
|
'start_date': pd.Timestamp('2013-7-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
},
|
|
{
|
|
'sid': 2,
|
|
'field': 'ALT_ID',
|
|
'value': '100000000',
|
|
'start_date': pd.Timestamp('2013-7-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
},
|
|
]
|
|
)
|
|
|
|
self.write_assets(
|
|
equities=equities,
|
|
equity_supplementary_mappings=equity_supplementary_mappings,
|
|
)
|
|
|
|
af = self.asset_finder
|
|
|
|
# Before sid 0 has changed ALT_ID.
|
|
dt = pd.Timestamp('2013-6-28', tz='UTC')
|
|
|
|
asset_0 = af.lookup_by_supplementary_field('ALT_ID', '100000000', dt)
|
|
self.assertEqual(asset_0.sid, 0)
|
|
|
|
asset_1 = af.lookup_by_supplementary_field('ALT_ID', '100000001', dt)
|
|
self.assertEqual(asset_1.sid, 1)
|
|
|
|
# We don't know about this ALT_ID yet.
|
|
with self.assertRaisesRegexp(
|
|
ValueNotFoundForField,
|
|
"Value '{}' was not found for field '{}'.".format(
|
|
'100000002',
|
|
'ALT_ID',
|
|
)
|
|
):
|
|
af.lookup_by_supplementary_field('ALT_ID', '100000002', dt)
|
|
|
|
# After all assets have ended.
|
|
dt = pd.Timestamp('2014-01-02', tz='UTC')
|
|
|
|
asset_2 = af.lookup_by_supplementary_field('ALT_ID', '100000000', dt)
|
|
self.assertEqual(asset_2.sid, 2)
|
|
|
|
asset_1 = af.lookup_by_supplementary_field('ALT_ID', '100000001', dt)
|
|
self.assertEqual(asset_1.sid, 1)
|
|
|
|
asset_0 = af.lookup_by_supplementary_field('ALT_ID', '100000002', dt)
|
|
self.assertEqual(asset_0.sid, 0)
|
|
|
|
# At this point both sids 0 and 2 have held this value, so an
|
|
# as_of_date is required.
|
|
expected_in_repr = (
|
|
"Multiple occurrences of the value '{}' found for field '{}'."
|
|
).format('100000000', 'ALT_ID')
|
|
|
|
with self.assertRaisesRegexp(
|
|
MultipleValuesFoundForField,
|
|
expected_in_repr,
|
|
):
|
|
af.lookup_by_supplementary_field('ALT_ID', '100000000', None)
|
|
|
|
def test_get_supplementary_field(self):
|
|
equities = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 0,
|
|
'symbol': 'A',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
{
|
|
'sid': 1,
|
|
'symbol': 'B',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
{
|
|
'sid': 2,
|
|
'symbol': 'C',
|
|
'start_date': pd.Timestamp('2013-7-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
'exchange': 'TEST',
|
|
},
|
|
]
|
|
)
|
|
|
|
equity_supplementary_mappings = pd.DataFrame.from_records(
|
|
[
|
|
{
|
|
'sid': 0,
|
|
'field': 'ALT_ID',
|
|
'value': '100000000',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2013-6-28', tz='UTC'),
|
|
},
|
|
{
|
|
'sid': 1,
|
|
'field': 'ALT_ID',
|
|
'value': '100000001',
|
|
'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
},
|
|
{
|
|
'sid': 0,
|
|
'field': 'ALT_ID',
|
|
'value': '100000002',
|
|
'start_date': pd.Timestamp('2013-7-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
},
|
|
{
|
|
'sid': 2,
|
|
'field': 'ALT_ID',
|
|
'value': '100000000',
|
|
'start_date': pd.Timestamp('2013-7-1', tz='UTC'),
|
|
'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
|
|
},
|
|
]
|
|
)
|
|
|
|
self.write_assets(
|
|
equities=equities,
|
|
equity_supplementary_mappings=equity_supplementary_mappings,
|
|
)
|
|
finder = self.asset_finder
|
|
|
|
# Before sid 0 has changed ALT_ID and sid 2 has started.
|
|
dt = pd.Timestamp('2013-6-28', tz='UTC')
|
|
|
|
for sid, expected in [(0, '100000000'), (1, '100000001')]:
|
|
self.assertEqual(
|
|
finder.get_supplementary_field(sid, 'ALT_ID', dt),
|
|
expected,
|
|
)
|
|
|
|
# Since sid 2 has not yet started, we don't know about its
|
|
# ALT_ID.
|
|
with self.assertRaisesRegexp(
|
|
NoValueForSid,
|
|
"No '{}' value found for sid '{}'.".format('ALT_ID', 2),
|
|
):
|
|
finder.get_supplementary_field(2, 'ALT_ID', dt),
|
|
|
|
# After all assets have ended.
|
|
dt = pd.Timestamp('2014-01-02', tz='UTC')
|
|
|
|
for sid, expected in [
|
|
(0, '100000002'), (1, '100000001'), (2, '100000000'),
|
|
]:
|
|
self.assertEqual(
|
|
finder.get_supplementary_field(sid, 'ALT_ID', dt),
|
|
expected,
|
|
)
|
|
|
|
# Sid 0 has historically held two values for ALT_ID by this dt.
|
|
with self.assertRaisesRegexp(
|
|
MultipleValuesFoundForSid,
|
|
"Multiple '{}' values found for sid '{}'.".format('ALT_ID', 0),
|
|
):
|
|
finder.get_supplementary_field(0, 'ALT_ID', None),
|
|
|
|
def test_group_by_type(self):
|
|
equities = make_simple_equity_info(
|
|
range(5),
|
|
start_date=pd.Timestamp('2014-01-01'),
|
|
end_date=pd.Timestamp('2015-01-01'),
|
|
)
|
|
futures = make_commodity_future_info(
|
|
first_sid=6,
|
|
root_symbols=['CL'],
|
|
years=[2014],
|
|
)
|
|
# Intersecting sid queries, to exercise loading of partially-cached
|
|
# results.
|
|
queries = [
|
|
([0, 1, 3], [6, 7]),
|
|
([0, 2, 3], [7, 10]),
|
|
(list(equities.index), list(futures.index)),
|
|
]
|
|
self.write_assets(
|
|
equities=equities,
|
|
futures=futures,
|
|
)
|
|
finder = self.asset_finder
|
|
for equity_sids, future_sids in queries:
|
|
results = finder.group_by_type(equity_sids + future_sids)
|
|
self.assertEqual(
|
|
results,
|
|
{'equity': set(equity_sids), 'future': set(future_sids)},
|
|
)
|
|
|
|
@parameterized.expand([
|
|
(Equity, 'retrieve_equities', EquitiesNotFound),
|
|
(Future, 'retrieve_futures_contracts', FutureContractsNotFound),
|
|
])
|
|
def test_retrieve_specific_type(self, type_, lookup_name, failure_type):
|
|
equities = make_simple_equity_info(
|
|
range(5),
|
|
start_date=pd.Timestamp('2014-01-01'),
|
|
end_date=pd.Timestamp('2015-01-01'),
|
|
)
|
|
max_equity = equities.index.max()
|
|
futures = make_commodity_future_info(
|
|
first_sid=max_equity + 1,
|
|
root_symbols=['CL'],
|
|
years=[2014],
|
|
)
|
|
equity_sids = [0, 1]
|
|
future_sids = [max_equity + 1, max_equity + 2, max_equity + 3]
|
|
if type_ == Equity:
|
|
success_sids = equity_sids
|
|
fail_sids = future_sids
|
|
else:
|
|
fail_sids = equity_sids
|
|
success_sids = future_sids
|
|
|
|
self.write_assets(
|
|
equities=equities,
|
|
futures=futures,
|
|
)
|
|
finder = self.asset_finder
|
|
# Run twice to exercise caching.
|
|
lookup = getattr(finder, lookup_name)
|
|
for _ in range(2):
|
|
results = lookup(success_sids)
|
|
self.assertIsInstance(results, dict)
|
|
self.assertEqual(set(results.keys()), set(success_sids))
|
|
self.assertEqual(
|
|
valmap(int, results),
|
|
dict(zip(success_sids, success_sids)),
|
|
)
|
|
self.assertEqual(
|
|
{type_},
|
|
{type(asset) for asset in itervalues(results)},
|
|
)
|
|
with self.assertRaises(failure_type):
|
|
lookup(fail_sids)
|
|
with self.assertRaises(failure_type):
|
|
# Should fail if **any** of the assets are bad.
|
|
lookup([success_sids[0], fail_sids[0]])
|
|
|
|
def test_retrieve_all(self):
|
|
equities = make_simple_equity_info(
|
|
range(5),
|
|
start_date=pd.Timestamp('2014-01-01'),
|
|
end_date=pd.Timestamp('2015-01-01'),
|
|
)
|
|
max_equity = equities.index.max()
|
|
futures = make_commodity_future_info(
|
|
first_sid=max_equity + 1,
|
|
root_symbols=['CL'],
|
|
years=[2014],
|
|
)
|
|
self.write_assets(
|
|
equities=equities,
|
|
futures=futures,
|
|
)
|
|
finder = self.asset_finder
|
|
all_sids = finder.sids
|
|
self.assertEqual(len(all_sids), len(equities) + len(futures))
|
|
queries = [
|
|
# Empty Query.
|
|
(),
|
|
# Only Equities.
|
|
tuple(equities.index[:2]),
|
|
# Only Futures.
|
|
tuple(futures.index[:3]),
|
|
# Mixed, all cache misses.
|
|
tuple(equities.index[2:]) + tuple(futures.index[3:]),
|
|
# Mixed, all cache hits.
|
|
tuple(equities.index[2:]) + tuple(futures.index[3:]),
|
|
# Everything.
|
|
all_sids,
|
|
all_sids,
|
|
]
|
|
for sids in queries:
|
|
equity_sids = [i for i in sids if i <= max_equity]
|
|
future_sids = [i for i in sids if i > max_equity]
|
|
results = finder.retrieve_all(sids)
|
|
self.assertEqual(sids, tuple(map(int, results)))
|
|
|
|
self.assertEqual(
|
|
[Equity for _ in equity_sids] +
|
|
[Future for _ in future_sids],
|
|
list(map(type, results)),
|
|
)
|
|
self.assertEqual(
|
|
(
|
|
list(equities.symbol.loc[equity_sids]) +
|
|
list(futures.symbol.loc[future_sids])
|
|
),
|
|
list(asset.symbol for asset in results),
|
|
)
|
|
|
|
@parameterized.expand([
|
|
(EquitiesNotFound, 'equity', 'equities'),
|
|
(FutureContractsNotFound, 'future contract', 'future contracts'),
|
|
(SidsNotFound, 'asset', 'assets'),
|
|
])
|
|
def test_error_message_plurality(self,
|
|
error_type,
|
|
singular,
|
|
plural):
|
|
try:
|
|
raise error_type(sids=[1])
|
|
except error_type as e:
|
|
self.assertEqual(
|
|
str(e),
|
|
"No {singular} found for sid: 1.".format(singular=singular)
|
|
)
|
|
try:
|
|
raise error_type(sids=[1, 2])
|
|
except error_type as e:
|
|
self.assertEqual(
|
|
str(e),
|
|
"No {plural} found for sids: [1, 2].".format(plural=plural)
|
|
)
|
|
|
|
|
|
class TestAssetDBVersioning(ZiplineTestCase):
|
|
|
|
def init_instance_fixtures(self):
|
|
super(TestAssetDBVersioning, self).init_instance_fixtures()
|
|
self.engine = eng = self.enter_instance_context(empty_assets_db())
|
|
self.metadata = sa.MetaData(eng, reflect=True)
|
|
|
|
def test_check_version(self):
|
|
version_table = self.metadata.tables['version_info']
|
|
|
|
# This should not raise an error
|
|
check_version_info(self.engine, version_table, ASSET_DB_VERSION)
|
|
|
|
# This should fail because the version is too low
|
|
with self.assertRaises(AssetDBVersionError):
|
|
check_version_info(
|
|
self.engine,
|
|
version_table,
|
|
ASSET_DB_VERSION - 1,
|
|
)
|
|
|
|
# This should fail because the version is too high
|
|
with self.assertRaises(AssetDBVersionError):
|
|
check_version_info(
|
|
self.engine,
|
|
version_table,
|
|
ASSET_DB_VERSION + 1,
|
|
)
|
|
|
|
def test_write_version(self):
|
|
version_table = self.metadata.tables['version_info']
|
|
version_table.delete().execute()
|
|
|
|
# Assert that the version is not present in the table
|
|
self.assertIsNone(sa.select((version_table.c.version,)).scalar())
|
|
|
|
# This should fail because the table has no version info and is,
|
|
# therefore, consdered v0
|
|
with self.assertRaises(AssetDBVersionError):
|
|
check_version_info(self.engine, version_table, -2)
|
|
|
|
# This should not raise an error because the version has been written
|
|
write_version_info(self.engine, version_table, -2)
|
|
check_version_info(self.engine, version_table, -2)
|
|
|
|
# Assert that the version is in the table and correct
|
|
self.assertEqual(sa.select((version_table.c.version,)).scalar(), -2)
|
|
|
|
# Assert that trying to overwrite the version fails
|
|
with self.assertRaises(sa.exc.IntegrityError):
|
|
write_version_info(self.engine, version_table, -3)
|
|
|
|
def test_finder_checks_version(self):
|
|
version_table = self.metadata.tables['version_info']
|
|
version_table.delete().execute()
|
|
write_version_info(self.engine, version_table, -2)
|
|
check_version_info(self.engine, version_table, -2)
|
|
|
|
# Assert that trying to build a finder with a bad db raises an error
|
|
with self.assertRaises(AssetDBVersionError):
|
|
AssetFinder(engine=self.engine)
|
|
|
|
# Change the version number of the db to the correct version
|
|
version_table.delete().execute()
|
|
write_version_info(self.engine, version_table, ASSET_DB_VERSION)
|
|
check_version_info(self.engine, version_table, ASSET_DB_VERSION)
|
|
|
|
# Now that the versions match, this Finder should succeed
|
|
AssetFinder(engine=self.engine)
|
|
|
|
def test_downgrade(self):
|
|
# Attempt to downgrade a current assets db all the way down to v0
|
|
conn = self.engine.connect()
|
|
|
|
# first downgrade to v3
|
|
downgrade(self.engine, 3)
|
|
metadata = sa.MetaData(conn)
|
|
metadata.reflect()
|
|
check_version_info(conn, metadata.tables['version_info'], 3)
|
|
self.assertFalse('exchange_full' in metadata.tables)
|
|
|
|
# now go all the way to v0
|
|
downgrade(self.engine, 0)
|
|
|
|
# Verify that the db version is now 0
|
|
metadata = sa.MetaData(conn)
|
|
metadata.reflect()
|
|
version_table = metadata.tables['version_info']
|
|
check_version_info(conn, version_table, 0)
|
|
|
|
# Check some of the v1-to-v0 downgrades
|
|
self.assertTrue('futures_contracts' in metadata.tables)
|
|
self.assertTrue('version_info' in metadata.tables)
|
|
self.assertFalse('tick_size' in
|
|
metadata.tables['futures_contracts'].columns)
|
|
self.assertTrue('contract_multiplier' in
|
|
metadata.tables['futures_contracts'].columns)
|
|
|
|
def test_impossible_downgrade(self):
|
|
# Attempt to downgrade a current assets db to a
|
|
# higher-than-current version
|
|
with self.assertRaises(AssetDBImpossibleDowngrade):
|
|
downgrade(self.engine, ASSET_DB_VERSION + 5)
|
|
|
|
def test_v5_to_v4_selects_most_recent_ticker(self):
|
|
T = pd.Timestamp
|
|
AssetDBWriter(self.engine).write(
|
|
equities=pd.DataFrame(
|
|
[['A', 'A', T('2014-01-01'), T('2014-01-02')],
|
|
['B', 'B', T('2014-01-01'), T('2014-01-02')],
|
|
# these two are both ticker sid 2
|
|
['B', 'C', T('2014-01-03'), T('2014-01-04')],
|
|
['C', 'C', T('2014-01-01'), T('2014-01-02')]],
|
|
index=[0, 1, 2, 2],
|
|
columns=['symbol', 'asset_name', 'start_date', 'end_date'],
|
|
),
|
|
)
|
|
|
|
downgrade(self.engine, 4)
|
|
metadata = sa.MetaData(self.engine)
|
|
metadata.reflect()
|
|
|
|
def select_fields(r):
|
|
return r.sid, r.symbol, r.asset_name, r.start_date, r.end_date
|
|
|
|
expected_data = {
|
|
(0, 'A', 'A', T('2014-01-01').value, T('2014-01-02').value),
|
|
(1, 'B', 'B', T('2014-01-01').value, T('2014-01-02').value),
|
|
(2, 'B', 'C', T('2014-01-01').value, T('2014-01-04').value),
|
|
}
|
|
actual_data = set(map(
|
|
select_fields,
|
|
sa.select(metadata.tables['equities'].c).execute(),
|
|
))
|
|
|
|
assert_equal(expected_data, actual_data)
|
|
|
|
|
|
class TestVectorizedSymbolLookup(WithAssetFinder, ZiplineTestCase):
|
|
|
|
@classmethod
|
|
def make_equity_info(cls):
|
|
T = partial(pd.Timestamp, tz='UTC')
|
|
|
|
def asset(sid, symbol, start_date, end_date):
|
|
return dict(
|
|
sid=sid,
|
|
symbol=symbol,
|
|
start_date=T(start_date),
|
|
end_date=T(end_date),
|
|
exchange='NYSE',
|
|
exchange_full='NYSE',
|
|
)
|
|
|
|
records = [
|
|
asset(1, 'A', '2014-01-02', '2014-01-31'),
|
|
asset(2, 'A', '2014-02-03', '2015-01-02'),
|
|
asset(3, 'B', '2014-01-02', '2014-01-15'),
|
|
asset(4, 'B', '2014-01-17', '2015-01-02'),
|
|
asset(5, 'C', '2001-01-02', '2015-01-02'),
|
|
asset(6, 'D', '2001-01-02', '2015-01-02'),
|
|
asset(7, 'FUZZY', '2001-01-02', '2015-01-02'),
|
|
]
|
|
return pd.DataFrame.from_records(records)
|
|
|
|
@parameter_space(
|
|
as_of=pd.to_datetime([
|
|
'2014-01-02',
|
|
'2014-01-15',
|
|
'2014-01-17',
|
|
'2015-01-02',
|
|
], utc=True),
|
|
symbols=[
|
|
[],
|
|
['A'], ['B'], ['C'], ['D'],
|
|
list('ABCD'),
|
|
list('ABCDDCBA'),
|
|
list('AABBAABBACABD'),
|
|
],
|
|
)
|
|
def test_lookup_symbols(self, as_of, symbols):
|
|
af = self.asset_finder
|
|
expected = [
|
|
af.lookup_symbol(symbol, as_of) for symbol in symbols
|
|
]
|
|
result = af.lookup_symbols(symbols, as_of)
|
|
assert_equal(result, expected)
|
|
|
|
def test_fuzzy(self):
|
|
af = self.asset_finder
|
|
|
|
# FUZZ.Y shouldn't resolve unless fuzzy=True.
|
|
syms = ['A', 'B', 'FUZZ.Y']
|
|
dt = pd.Timestamp('2014-01-15', tz='UTC')
|
|
|
|
with self.assertRaises(SymbolNotFound):
|
|
af.lookup_symbols(syms, pd.Timestamp('2014-01-15', tz='UTC'))
|
|
|
|
with self.assertRaises(SymbolNotFound):
|
|
af.lookup_symbols(
|
|
syms,
|
|
pd.Timestamp('2014-01-15', tz='UTC'),
|
|
fuzzy=False,
|
|
)
|
|
|
|
results = af.lookup_symbols(syms, dt, fuzzy=True)
|
|
assert_equal(results, af.retrieve_all([1, 3, 7]))
|
|
assert_equal(
|
|
results,
|
|
[af.lookup_symbol(sym, dt, fuzzy=True) for sym in syms],
|
|
)
|