Files
catalyst/tests/test_assets.py
T

613 lines
21 KiB
Python

#
# Copyright 2015 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Tests for the zipline.assets package
"""
import sys
from unittest import TestCase
from datetime import (
datetime,
timedelta,
)
import pickle
import pprint
import pytz
import uuid
import pandas as pd
from nose_parameterized import parameterized
from zipline.finance.trading import with_environment
from zipline.assets import Asset, Equity, Future, AssetFinder
from zipline.errors import (
SymbolNotFound,
MultipleSymbolsFound,
SidAssignmentError,
)
class FakeTable(object):
def __init__(self, name, count, dt, fuzzy_str):
self.name = name
self.count = count
self.dt = dt
self.fuzzy_str = fuzzy_str
self.df = pd.DataFrame.from_records(
[
{
'sid': i,
'file_name': 'TEST%s%s' % (self.fuzzy_str, i),
'company_name': self.name + str(i),
'start_date_nano': pd.Timestamp(dt, tz='UTC').value,
'end_date_nano': pd.Timestamp(dt, tz='UTC').value,
'exchange': self.name,
}
for i in range(1, self.count + 1)
]
)
def read(self, *args, **kwargs):
return self.df.to_records()
class FakeTableIdenticalSymbols(object):
def __init__(self, name, as_of_dates):
self.name = name
self.as_of_dates = as_of_dates
self.df = pd.DataFrame.from_records(
[
{
'sid': i,
'file_name': self.name,
'company_name': self.name,
'start_date_nano': date.value,
'end_date_nano': (date + timedelta(days=1)).value,
'exchange': self.name,
}
for i, date in enumerate(self.as_of_dates)
]
)
def read(self, *args, **kwargs):
return self.df.to_records()
class FakeTableFromRecords(object):
def __init__(self, records):
self.records = records
self.df = pd.DataFrame.from_records(self.records)
def read(self, *args, **kwargs):
return self.df.to_records()
@with_environment()
def build_lookup_generic_cases(env=None):
"""
Generate test cases for AssetFinder test_lookup_generic.
"""
unique_start = pd.Timestamp('2013-01-01', tz='UTC')
unique_end = pd.Timestamp('2014-01-01', tz='UTC')
dupe_0_start = pd.Timestamp('2013-01-01', tz='UTC')
dupe_0_end = dupe_0_start + timedelta(days=1)
dupe_1_start = pd.Timestamp('2013-01-03', tz='UTC')
dupe_1_end = dupe_1_start + timedelta(days=1)
table = FakeTableFromRecords(
[
{
'sid': 0,
'file_name': 'duplicated',
'company_name': 'duplicated_0',
'start_date_nano': dupe_0_start.value,
'end_date_nano': dupe_0_end.value,
'exchange': '',
},
{
'sid': 1,
'file_name': 'duplicated',
'company_name': 'duplicated_1',
'start_date_nano': dupe_1_start.value,
'end_date_nano': dupe_1_end.value,
'exchange': '',
},
{
'sid': 2,
'file_name': 'unique',
'company_name': 'unique',
'start_date_nano': unique_start.value,
'end_date_nano': unique_end.value,
'exchange': '',
},
],
)
env.update_asset_finder(asset_metadata=table.df)
dupe_0, dupe_1, unique = assets = [
env.asset_finder.retrieve_asset(i)
for i in range(3)
]
# This expansion code is run at module import time, which means we have to
# clear the AssetFinder here or else it will interfere with the cache
# for other tests.
env.update_asset_finder(clear_metadata=True)
dupe_0_start = dupe_0.start_date
dupe_1_start = dupe_1.start_date
cases = [
##
# Scalars
# Asset object
(table, assets[0], None, assets[0]),
(table, assets[1], None, assets[1]),
(table, assets[2], None, assets[2]),
# int
(table, 0, None, assets[0]),
(table, 1, None, assets[1]),
(table, 2, None, assets[2]),
# Duplicated symbol with resolution date
(table, 'duplicated', dupe_0_start, dupe_0),
(table, 'duplicated', dupe_1_start, dupe_1),
# Unique symbol, with or without resolution date.
(table, 'unique', unique_start, unique),
(table, 'unique', None, unique),
##
# Iterables
# Iterables of Asset objects.
(table, assets, None, assets),
(table, iter(assets), None, assets),
# Iterables of ints
(table, (0, 1), None, assets[:-1]),
(table, iter((0, 1)), None, assets[:-1]),
# Iterables of symbols.
(table, ('duplicated', 'unique'), dupe_0_start, [dupe_0, unique]),
(table, ('duplicated', 'unique'), dupe_1_start, [dupe_1, unique]),
# Mixed types
(table,
('duplicated', 2, 'unique', 1, dupe_1),
dupe_0_start,
[dupe_0, assets[2], unique, assets[1], dupe_1]),
]
return cases
class AssetTestCase(TestCase):
def test_asset_object(self):
self.assertEquals({5061: 'foo'}[Asset(5061)], 'foo')
self.assertEquals(Asset(5061), 5061)
self.assertEquals(5061, Asset(5061))
self.assertEquals(Asset(5061), Asset(5061))
self.assertEquals(int(Asset(5061)), 5061)
self.assertEquals(str(Asset(5061)), 'Asset(5061)')
def test_asset_is_pickleable(self):
# Very wow
s = Asset(
1337,
symbol="DOGE",
asset_name="DOGECOIN",
start_date=pd.Timestamp('2013-12-08 9:31AM', tz='UTC'),
end_date=pd.Timestamp('2014-06-25 11:21AM', tz='UTC'),
first_traded=pd.Timestamp('2013-12-08 9:31AM', tz='UTC'),
exchange='THE MOON',
)
s_unpickled = pickle.loads(pickle.dumps(s))
attrs_to_check = ['end_date',
'exchange',
'first_traded',
'asset_end_date',
'asset_name',
'asset_start_date',
'sid',
'start_date',
'symbol']
for attr in attrs_to_check:
self.assertEqual(getattr(s, attr), getattr(s_unpickled, attr))
def test_asset_comparisons(self):
s_23 = Asset(23)
s_24 = Asset(24)
self.assertEqual(s_23, s_23)
self.assertEqual(s_23, 23)
self.assertEqual(23, s_23)
self.assertNotEqual(s_23, s_24)
self.assertNotEqual(s_23, 24)
self.assertNotEqual(s_23, "23")
self.assertNotEqual(s_23, 23.5)
self.assertNotEqual(s_23, [])
self.assertNotEqual(s_23, None)
self.assertLess(s_23, s_24)
self.assertLess(s_23, 24)
self.assertGreater(24, s_23)
self.assertGreater(s_24, s_23)
def test_lt(self):
self.assertTrue(Asset(3) < Asset(4))
self.assertFalse(Asset(4) < Asset(4))
self.assertFalse(Asset(5) < Asset(4))
def test_le(self):
self.assertTrue(Asset(3) <= Asset(4))
self.assertTrue(Asset(4) <= Asset(4))
self.assertFalse(Asset(5) <= Asset(4))
def test_eq(self):
self.assertFalse(Asset(3) == Asset(4))
self.assertTrue(Asset(4) == Asset(4))
self.assertFalse(Asset(5) == Asset(4))
def test_ge(self):
self.assertFalse(Asset(3) >= Asset(4))
self.assertTrue(Asset(4) >= Asset(4))
self.assertTrue(Asset(5) >= Asset(4))
def test_gt(self):
self.assertFalse(Asset(3) > Asset(4))
self.assertFalse(Asset(4) > Asset(4))
self.assertTrue(Asset(5) > Asset(4))
def test_type_mismatch(self):
if sys.version_info.major < 3:
self.assertIsNotNone(Asset(3) < 'a')
self.assertIsNotNone('a' < Asset(3))
else:
with self.assertRaises(TypeError):
Asset(3) < 'a'
with self.assertRaises(TypeError):
'a' < Asset(3)
class TestFuture(TestCase):
future = Future(2468,
symbol='OMK15',
notice_date='2014-01-20',
expiration_date='2014-02-20',
contract_multiplier=500)
def test_str(self):
strd = self.future.__str__()
self.assertEqual("Future(2468 [OMK15])", strd)
def test_repr(self):
reprd = self.future.__repr__()
self.assertTrue("Future" in reprd)
self.assertTrue("2468" in reprd)
self.assertTrue("OMK15" in reprd)
self.assertTrue("notice_date='2014-01-20'" in reprd)
self.assertTrue("expiration_date='2014-02-20'" in reprd)
self.assertTrue("contract_multiplier=500" in reprd)
def test_reduce(self):
reduced = self.future.__reduce__()
self.assertEqual(Future, reduced[0])
def test_to_and_from_dict(self):
dictd = self.future.to_dict()
self.assertTrue('notice_date' in dictd)
self.assertTrue('expiration_date' in dictd)
self.assertTrue('contract_multiplier' in dictd)
from_dict = Future.from_dict(dictd)
self.assertTrue(isinstance(from_dict, Future))
self.assertEqual(self.future, from_dict)
class AssetFinderTestCase(TestCase):
@with_environment()
def test_lookup_symbol_fuzzy(self, env=None):
fuzzy_str = '@'
as_of_date = datetime(2013, 1, 1, tzinfo=pytz.utc)
table = FakeTable(uuid.uuid4().hex, 2, as_of_date,
fuzzy_str)
env.update_asset_finder(asset_metadata=table.df)
sf = env.asset_finder
try:
for i in range(2): # we do it twice to test for caching bugs
self.assertIsNone(sf.lookup_symbol('test', as_of_date))
self.assertIsNotNone(sf.lookup_symbol(
'test%s%s' % (fuzzy_str, 1), as_of_date))
self.assertIsNone(sf.lookup_symbol('test%s' % 1, as_of_date))
self.assertIsNone(sf.lookup_symbol(table.name, as_of_date,
fuzzy=fuzzy_str))
self.assertIsNotNone(sf.lookup_symbol(
'test%s%s' % (fuzzy_str, 1), as_of_date, fuzzy=fuzzy_str))
self.assertIsNotNone(sf.lookup_symbol(
'test%s' % 1, as_of_date, fuzzy=fuzzy_str))
finally:
env.update_asset_finder(clear_metadata=True)
@with_environment()
def test_lookup_symbol_resolve_multiple(self, env=None):
as_of_dates = [
pd.Timestamp('2013-01-01', tz='UTC') + timedelta(days=i)
# Incrementing by two so that start and end dates for each
# generated Asset don't overlap (each Asset's end_date is the
# day after its start date.)
for i in range(0, 10, 2)
]
table = FakeTableIdenticalSymbols(
name='existing',
as_of_dates=as_of_dates,
)
env.update_asset_finder(asset_metadata=table.df)
sf = env.asset_finder
try:
for _ in range(2): # we do it twice to test for caching bugs
with self.assertRaises(SymbolNotFound):
sf.lookup_symbol_resolve_multiple('non_existing',
as_of_dates[0])
with self.assertRaises(MultipleSymbolsFound):
sf.lookup_symbol_resolve_multiple('existing',
None)
for i, date in enumerate(as_of_dates):
# Verify that we correctly resolve multiple symbols using
# the supplied date
result = sf.lookup_symbol_resolve_multiple(
'existing',
date,
)
self.assertEqual(result.symbol, 'existing')
self.assertEqual(result.sid, i)
finally:
env.update_asset_finder(clear_metadata=True)
@with_environment()
def test_lookup_symbol_nasdaq_underscore_collisions(self, env=None):
"""
Ensure that each NASDAQ symbol without underscores maps back to the
original symbol when using fuzzy matching.
"""
sf = env.asset_finder
fuzzy_str = '_'
collisions = []
try:
for sid in sf.sids:
sec = sf.retrieve_asset(sid)
if sec.exchange.startswith('NASDAQ'):
found = sf.lookup_symbol(sec.symbol.replace(fuzzy_str, ''),
sec.end_date, fuzzy=fuzzy_str)
if found != sec:
collisions.append((found, sec))
# KNOWN BUG: Filter out assets that have intersections in their
# start and end dates. We can't correctly resolve these.
unexpected_errors = []
for first, second in collisions:
overlapping_dates = (
first.end_date >= second.start_date or
second.end_date >= first.end_date
)
if not overlapping_dates:
unexpected_errors.append((first, second))
self.assertFalse(
unexpected_errors,
pprint.pformat(unexpected_errors),
)
finally:
env.update_asset_finder(clear_metadata=True)
@parameterized.expand(
build_lookup_generic_cases()
)
@with_environment()
def test_lookup_generic(self, table, symbols, reference_date, expected,
env=None):
"""
Ensure that lookup_generic works with various permutations of inputs.
"""
try:
env.update_asset_finder(asset_metadata=table.df)
finder = env.asset_finder
results, missing = finder.lookup_generic(symbols, reference_date)
self.assertEqual(results, expected)
self.assertEqual(missing, [])
finally:
env.update_asset_finder(clear_metadata=True)
@with_environment()
def test_lookup_generic_handle_missing(self, env=None):
try:
table = FakeTableFromRecords(
[
# Sids that will be found when we do lookups.
{
'sid': 0,
'file_name': 'real',
'company_name': 'real',
'start_date_nano': pd.Timestamp('2013-1-1', tz='UTC'),
'end_date_nano': pd.Timestamp('2014-1-1', tz='UTC'),
'exchange': '',
},
{
'sid': 1,
'file_name': 'also_real',
'company_name': 'also_real',
'start_date_nano': pd.Timestamp('2013-1-1', tz='UTC'),
'end_date_nano': pd.Timestamp('2014-1-1', tz='UTC'),
'exchange': '',
},
# Sid whose end date is before our query date. We should
# still correctly find it.
{
'sid': 2,
'file_name': 'real_but_old',
'company_name': 'real_but_old',
'start_date_nano': pd.Timestamp('2002-1-1', tz='UTC'),
'end_date_nano': pd.Timestamp('2003-1-1', tz='UTC'),
'exchange': '',
},
# Sid whose end date is before our query date. We should
# still correctly find it.
{
'sid': 3,
'file_name': 'real_but_in_the_future',
'company_name': 'real_but_in_the_future',
'start_date_nano': pd.Timestamp('2014-1-1', tz='UTC'),
'end_date_nano': pd.Timestamp('2020-1-1', tz='UTC'),
'exchange': 'THE FUTURE',
},
]
)
env.update_asset_finder(asset_metadata=table.df)
symbols = [
'real', 1, 'fake', 'real_but_old', 'real_but_in_the_future',
]
results, missing = env.asset_finder.lookup_generic(
symbols,
pd.Timestamp('2013-02-01', tz='UTC'),
)
self.assertEqual(len(results), 3)
self.assertEqual(results[0].symbol, 'real')
self.assertEqual(results[0].sid, 0)
self.assertEqual(results[1].symbol, 'also_real')
self.assertEqual(results[1].sid, 1)
self.assertEqual(len(missing), 2)
self.assertEqual(missing[0], 'fake')
self.assertEqual(missing[1], 'real_but_in_the_future')
finally:
env.update_asset_finder(clear_metadata=True)
def test_insert_metadata(self):
finder = AssetFinder()
finder.insert_metadata(0,
asset_type='equity',
start_date='2014-01-01',
end_date='2015-01-01',
symbol="PLAY",
foo_data="FOO",)
# Test proper insertion
self.assertEqual('equity', finder.metadata_cache[0]['asset_type'])
self.assertEqual('PLAY', finder.metadata_cache[0]['symbol'])
self.assertEqual('2015-01-01', finder.metadata_cache[0]['end_date'])
# Test invalid field
self.assertFalse('foo_data' in finder.metadata_cache[0])
# Test updating fields
finder.insert_metadata(0,
asset_type='equity',
start_date='2014-01-01',
end_date='2015-02-01',
symbol="PLAY",
exchange="NYSE",)
self.assertEqual('2015-02-01', finder.metadata_cache[0]['end_date'])
self.assertEqual('NYSE', finder.metadata_cache[0]['exchange'])
# Check that old data survived
self.assertEqual('PLAY', finder.metadata_cache[0]['symbol'])
def test_consume_metadata(self):
# Test dict consumption
finder = AssetFinder({0: {'asset_type': 'equity'}})
dict_to_consume = {0: {'symbol': 'PLAY'},
1: {'symbol': 'MSFT'}}
finder.consume_metadata(dict_to_consume)
self.assertEqual('equity', finder.metadata_cache[0]['asset_type'])
self.assertEqual('PLAY', finder.metadata_cache[0]['symbol'])
# Test dataframe consumption
df = pd.DataFrame(columns=['asset_name', 'exchange'], index=[0, 1])
df['asset_name'][0] = "Dave'N'Busters"
df['exchange'][0] = "NASDAQ"
df['asset_name'][1] = "Microsoft"
df['exchange'][1] = "NYSE"
finder.consume_metadata(df)
self.assertEqual('NASDAQ', finder.metadata_cache[0]['exchange'])
self.assertEqual('Microsoft', finder.metadata_cache[1]['asset_name'])
# Check that old data survived
self.assertEqual('equity', finder.metadata_cache[0]['asset_type'])
def test_consume_asset_as_identifier(self):
# Build some end dates
eq_end = pd.Timestamp('2012-01-01', tz='UTC')
fut_end = pd.Timestamp('2008-01-01', tz='UTC')
# Build some simple Assets
equity_asset = Equity(1, symbol="TESTEQ", end_date=eq_end)
future_asset = Future(200, symbol="TESTFUT", end_date=fut_end)
# Consume the Assets
finder = AssetFinder()
finder.consume_identifiers([equity_asset, future_asset])
finder.populate_cache()
# Test equality with newly built Assets
self.assertEqual(equity_asset, finder.retrieve_asset(1))
self.assertEqual(future_asset, finder.retrieve_asset(200))
self.assertEqual(eq_end, finder.retrieve_asset(1).end_date)
self.assertEqual(fut_end, finder.retrieve_asset(200).end_date)
def test_sid_assignment(self):
# This metadata does not contain SIDs
metadata = {'PLAY': {'symbol': 'PLAY'},
'MSFT': {'symbol': 'MSFT'}}
# Build a finder that is allowed to assign sids
finder = AssetFinder(metadata=metadata, allow_sid_assignment=True)
# Verify that Assets were built
play = finder.retrieve_asset_by_identifier('PLAY')
self.assertEqual('PLAY', play.symbol)
def test_sid_assignment_failure(self):
# This metadata does not contain SIDs
metadata = {'PLAY': {'symbol': 'PLAY'},
'MSFT': {'symbol': 'MSFT'}}
# Build a finder that is not allowed to assign sids, asserting failure
with self.assertRaises(SidAssignmentError):
AssetFinder(metadata=metadata, allow_sid_assignment=False)