mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-30 17:05:35 +08:00
ENH: Adds AssetFinder and asset metadata management
This commit is contained in:
+489
-11
@@ -20,7 +20,177 @@ Tests for the zipline.assets package
|
||||
import sys
|
||||
from unittest import TestCase
|
||||
|
||||
from zipline.assets._assets import Asset, Future
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
import pickle
|
||||
import pprint
|
||||
import pytz
|
||||
import uuid
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from nose_parameterized import parameterized
|
||||
|
||||
from zipline.finance.trading import with_environment
|
||||
from zipline.assets import Asset, Future, AssetFinder
|
||||
from zipline.errors import (
|
||||
SymbolNotFound,
|
||||
MultipleSymbolsFound,
|
||||
)
|
||||
|
||||
|
||||
class FakeTable(object):
|
||||
def __init__(self, name, count, dt, fuzzy_str):
|
||||
self.name = name
|
||||
self.count = count
|
||||
self.dt = dt
|
||||
self.fuzzy_str = fuzzy_str
|
||||
self.df = pd.DataFrame.from_records(
|
||||
[
|
||||
{
|
||||
'sid': i,
|
||||
'file_name': 'TEST%s%s' % (self.fuzzy_str, i),
|
||||
'company_name': self.name + str(i),
|
||||
'start_date_nano': pd.Timestamp(dt, tz='UTC').value,
|
||||
'end_date_nano': pd.Timestamp(dt, tz='UTC').value,
|
||||
'exchange': self.name,
|
||||
}
|
||||
for i in range(1, self.count + 1)
|
||||
]
|
||||
)
|
||||
|
||||
def read(self, *args, **kwargs):
|
||||
return self.df.to_records()
|
||||
|
||||
|
||||
class FakeTableIdenticalSymbols(object):
|
||||
def __init__(self, name, as_of_dates):
|
||||
self.name = name
|
||||
self.as_of_dates = as_of_dates
|
||||
self.df = pd.DataFrame.from_records(
|
||||
[
|
||||
{
|
||||
'sid': i,
|
||||
'file_name': self.name,
|
||||
'company_name': self.name,
|
||||
'start_date_nano': date.value,
|
||||
'end_date_nano': (date + timedelta(days=1)).value,
|
||||
'exchange': self.name,
|
||||
}
|
||||
for i, date in enumerate(self.as_of_dates)
|
||||
]
|
||||
)
|
||||
|
||||
def read(self, *args, **kwargs):
|
||||
return self.df.to_records()
|
||||
|
||||
|
||||
class FakeTableFromRecords(object):
|
||||
|
||||
def __init__(self, records):
|
||||
self.records = records
|
||||
self.df = pd.DataFrame.from_records(self.records)
|
||||
|
||||
def read(self, *args, **kwargs):
|
||||
return self.df.to_records()
|
||||
|
||||
|
||||
@with_environment()
|
||||
def build_lookup_generic_cases(env=None):
|
||||
"""
|
||||
Generate test cases for AssetFinder test_lookup_generic.
|
||||
"""
|
||||
|
||||
unique_start = pd.Timestamp('2013-01-01', tz='UTC')
|
||||
unique_end = pd.Timestamp('2014-01-01', tz='UTC')
|
||||
|
||||
dupe_0_start = pd.Timestamp('2013-01-01', tz='UTC')
|
||||
dupe_0_end = dupe_0_start + timedelta(days=1)
|
||||
|
||||
dupe_1_start = pd.Timestamp('2013-01-03', tz='UTC')
|
||||
dupe_1_end = dupe_1_start + timedelta(days=1)
|
||||
|
||||
table = FakeTableFromRecords(
|
||||
[
|
||||
{
|
||||
'sid': 0,
|
||||
'file_name': 'duplicated',
|
||||
'company_name': 'duplicated_0',
|
||||
'start_date_nano': dupe_0_start.value,
|
||||
'end_date_nano': dupe_0_end.value,
|
||||
'exchange': '',
|
||||
},
|
||||
{
|
||||
'sid': 1,
|
||||
'file_name': 'duplicated',
|
||||
'company_name': 'duplicated_1',
|
||||
'start_date_nano': dupe_1_start.value,
|
||||
'end_date_nano': dupe_1_end.value,
|
||||
'exchange': '',
|
||||
},
|
||||
{
|
||||
'sid': 2,
|
||||
'file_name': 'unique',
|
||||
'company_name': 'unique',
|
||||
'start_date_nano': unique_start.value,
|
||||
'end_date_nano': unique_end.value,
|
||||
'exchange': '',
|
||||
},
|
||||
],
|
||||
)
|
||||
env.update_asset_finder(asset_metadata=table.df)
|
||||
dupe_0, dupe_1, unique = assets = [
|
||||
env.asset_finder.retrieve_asset(i)
|
||||
for i in range(3)
|
||||
]
|
||||
|
||||
# This expansion code is run at module import time, which means we have to
|
||||
# clear the AssetFinder here or else it will interfere with the cache
|
||||
# for other tests.
|
||||
env.update_asset_finder(clear_metadata=True)
|
||||
|
||||
dupe_0_start = dupe_0.start_date
|
||||
dupe_1_start = dupe_1.start_date
|
||||
cases = [
|
||||
##
|
||||
# Scalars
|
||||
|
||||
# Asset object
|
||||
(table, assets[0], None, assets[0]),
|
||||
(table, assets[1], None, assets[1]),
|
||||
(table, assets[2], None, assets[2]),
|
||||
# int
|
||||
(table, 0, None, assets[0]),
|
||||
(table, 1, None, assets[1]),
|
||||
(table, 2, None, assets[2]),
|
||||
# Duplicated symbol with resolution date
|
||||
(table, 'duplicated', dupe_0_start, dupe_0),
|
||||
(table, 'duplicated', dupe_1_start, dupe_1),
|
||||
# Unique symbol, with or without resolution date.
|
||||
(table, 'unique', unique_start, unique),
|
||||
(table, 'unique', None, unique),
|
||||
|
||||
##
|
||||
# Iterables
|
||||
|
||||
# Iterables of Asset objects.
|
||||
(table, assets, None, assets),
|
||||
(table, iter(assets), None, assets),
|
||||
# Iterables of ints
|
||||
(table, (0, 1), None, assets[:-1]),
|
||||
(table, iter((0, 1)), None, assets[:-1]),
|
||||
# Iterables of symbols.
|
||||
(table, ('duplicated', 'unique'), dupe_0_start, [dupe_0, unique]),
|
||||
(table, ('duplicated', 'unique'), dupe_1_start, [dupe_1, unique]),
|
||||
# Mixed types
|
||||
(table,
|
||||
('duplicated', 2, 'unique', 1, dupe_1),
|
||||
dupe_0_start,
|
||||
[dupe_0, assets[2], unique, assets[1], dupe_1]),
|
||||
]
|
||||
return cases
|
||||
|
||||
|
||||
class AssetTestCase(TestCase):
|
||||
@@ -35,8 +205,53 @@ class AssetTestCase(TestCase):
|
||||
|
||||
self.assertEquals(str(Asset(5061)), 'Asset(5061)')
|
||||
|
||||
def test_asset_is_pickleable(self):
|
||||
|
||||
class TestAssetRichCmp(TestCase):
|
||||
# Very wow
|
||||
s = Asset(
|
||||
1337,
|
||||
symbol="DOGE",
|
||||
asset_name="DOGECOIN",
|
||||
start_date=pd.Timestamp('2013-12-08 9:31AM', tz='UTC'),
|
||||
end_date=pd.Timestamp('2014-06-25 11:21AM', tz='UTC'),
|
||||
first_traded=pd.Timestamp('2013-12-08 9:31AM', tz='UTC'),
|
||||
exchange='THE MOON',
|
||||
)
|
||||
s_unpickled = pickle.loads(pickle.dumps(s))
|
||||
|
||||
attrs_to_check = ['end_date',
|
||||
'exchange',
|
||||
'first_traded',
|
||||
'asset_end_date',
|
||||
'asset_name',
|
||||
'asset_start_date',
|
||||
'sid',
|
||||
'start_date',
|
||||
'symbol']
|
||||
|
||||
for attr in attrs_to_check:
|
||||
self.assertEqual(getattr(s, attr), getattr(s_unpickled, attr))
|
||||
|
||||
def test_asset_comparisons(self):
|
||||
|
||||
s_23 = Asset(23)
|
||||
s_24 = Asset(24)
|
||||
|
||||
self.assertEqual(s_23, s_23)
|
||||
self.assertEqual(s_23, 23)
|
||||
self.assertEqual(23, s_23)
|
||||
|
||||
self.assertNotEqual(s_23, s_24)
|
||||
self.assertNotEqual(s_23, 24)
|
||||
self.assertNotEqual(s_23, "23")
|
||||
self.assertNotEqual(s_23, 23.5)
|
||||
self.assertNotEqual(s_23, [])
|
||||
self.assertNotEqual(s_23, None)
|
||||
|
||||
self.assertLess(s_23, s_24)
|
||||
self.assertLess(s_23, 24)
|
||||
self.assertGreater(24, s_23)
|
||||
self.assertGreater(s_24, s_23)
|
||||
|
||||
def test_lt(self):
|
||||
self.assertTrue(Asset(3) < Asset(4))
|
||||
@@ -74,16 +289,279 @@ class TestAssetRichCmp(TestCase):
|
||||
'a' < Asset(3)
|
||||
|
||||
|
||||
class testFuture(TestCase):
|
||||
class TestFuture(TestCase):
|
||||
|
||||
future = Future(2468,
|
||||
symbol='OMK15',
|
||||
notice_date='2014-01-20',
|
||||
expiration_date='2014-02-20',
|
||||
contract_multiplier=500)
|
||||
|
||||
def test_str(self):
|
||||
strd = self.future.__str__()
|
||||
self.assertEqual("Future(2468 [OMK15])", strd)
|
||||
|
||||
def test_repr(self):
|
||||
reprd = self.future.__repr__()
|
||||
self.assertTrue("Future" in reprd)
|
||||
self.assertTrue("2468" in reprd)
|
||||
self.assertTrue("OMK15" in reprd)
|
||||
self.assertTrue("notice_date='2014-01-20'" in reprd)
|
||||
self.assertTrue("expiration_date='2014-02-20'" in reprd)
|
||||
self.assertTrue("contract_multiplier=500" in reprd)
|
||||
|
||||
future = Future(2468,
|
||||
notice_date='2014-01-20',
|
||||
expiration_date='2014-02-20')
|
||||
rep = future.__repr__()
|
||||
def test_reduce(self):
|
||||
reduced = self.future.__reduce__()
|
||||
self.assertEqual(Future, reduced[0])
|
||||
|
||||
self.assertTrue("Future" in rep)
|
||||
self.assertTrue("2468" in rep)
|
||||
self.assertTrue("notice_date='2014-01-20'" in rep)
|
||||
self.assertTrue("expiration_date='2014-02-20'" in rep)
|
||||
def test_to_and_from_dict(self):
|
||||
dictd = self.future.to_dict()
|
||||
self.assertTrue('notice_date' in dictd)
|
||||
self.assertTrue('expiration_date' in dictd)
|
||||
self.assertTrue('contract_multiplier' in dictd)
|
||||
|
||||
from_dict = Future.from_dict(dictd)
|
||||
self.assertTrue(isinstance(from_dict, Future))
|
||||
self.assertEqual(self.future, from_dict)
|
||||
|
||||
|
||||
class AssetFinderTestCase(TestCase):
|
||||
|
||||
@with_environment()
|
||||
def test_lookup_symbol_fuzzy(self, env=None):
|
||||
fuzzy_str = '@'
|
||||
as_of_date = datetime(2013, 1, 1, tzinfo=pytz.utc)
|
||||
table = FakeTable(uuid.uuid4().hex, 2, as_of_date,
|
||||
fuzzy_str)
|
||||
env.update_asset_finder(asset_metadata=table.df)
|
||||
sf = env.asset_finder
|
||||
|
||||
try:
|
||||
for i in range(2): # we do it twice to test for caching bugs
|
||||
self.assertIsNone(sf.lookup_symbol('test', as_of_date))
|
||||
self.assertIsNotNone(sf.lookup_symbol(
|
||||
'test%s%s' % (fuzzy_str, 1), as_of_date))
|
||||
self.assertIsNone(sf.lookup_symbol('test%s' % 1, as_of_date))
|
||||
|
||||
self.assertIsNone(sf.lookup_symbol(table.name, as_of_date,
|
||||
fuzzy=fuzzy_str))
|
||||
self.assertIsNotNone(sf.lookup_symbol(
|
||||
'test%s%s' % (fuzzy_str, 1), as_of_date, fuzzy=fuzzy_str))
|
||||
self.assertIsNotNone(sf.lookup_symbol(
|
||||
'test%s' % 1, as_of_date, fuzzy=fuzzy_str))
|
||||
finally:
|
||||
env.update_asset_finder(clear_metadata=True)
|
||||
|
||||
@with_environment()
|
||||
def test_lookup_symbol_resolve_multiple(self, env=None):
|
||||
|
||||
as_of_dates = [
|
||||
pd.Timestamp('2013-01-01', tz='UTC') + timedelta(days=i)
|
||||
# Incrementing by two so that start and end dates for each
|
||||
# generated Asset don't overlap (each Asset's end_date is the
|
||||
# day after its start date.)
|
||||
for i in range(0, 10, 2)
|
||||
]
|
||||
|
||||
table = FakeTableIdenticalSymbols(
|
||||
name='existing',
|
||||
as_of_dates=as_of_dates,
|
||||
)
|
||||
env.update_asset_finder(asset_metadata=table.df)
|
||||
sf = env.asset_finder
|
||||
|
||||
try:
|
||||
for _ in range(2): # we do it twice to test for caching bugs
|
||||
with self.assertRaises(SymbolNotFound):
|
||||
sf.lookup_symbol_resolve_multiple('non_existing',
|
||||
as_of_dates[0])
|
||||
with self.assertRaises(MultipleSymbolsFound):
|
||||
sf.lookup_symbol_resolve_multiple('existing',
|
||||
None)
|
||||
|
||||
for i, date in enumerate(as_of_dates):
|
||||
# Verify that we correctly resolve multiple symbols using
|
||||
# the supplied date
|
||||
result = sf.lookup_symbol_resolve_multiple(
|
||||
'existing',
|
||||
date,
|
||||
)
|
||||
self.assertEqual(result.symbol, 'existing')
|
||||
self.assertEqual(result.sid, i)
|
||||
|
||||
finally:
|
||||
env.update_asset_finder(clear_metadata=True)
|
||||
|
||||
@with_environment()
|
||||
def test_lookup_symbol_nasdaq_underscore_collisions(self, env=None):
|
||||
"""
|
||||
Ensure that each NASDAQ symbol without underscores maps back to the
|
||||
original symbol when using fuzzy matching.
|
||||
"""
|
||||
sf = env.asset_finder
|
||||
fuzzy_str = '_'
|
||||
collisions = []
|
||||
|
||||
try:
|
||||
for sid in sf.sids:
|
||||
sec = sf.retrieve_asset(sid)
|
||||
if sec.exchange.startswith('NASDAQ'):
|
||||
found = sf.lookup_symbol(sec.symbol.replace(fuzzy_str, ''),
|
||||
sec.end_date, fuzzy=fuzzy_str)
|
||||
if found != sec:
|
||||
collisions.append((found, sec))
|
||||
|
||||
# KNOWN BUG: Filter out assets that have intersections in their
|
||||
# start and end dates. We can't correctly resolve these.
|
||||
unexpected_errors = []
|
||||
for first, second in collisions:
|
||||
overlapping_dates = (
|
||||
first.end_date >= second.start_date or
|
||||
second.end_date >= first.end_date
|
||||
)
|
||||
if not overlapping_dates:
|
||||
unexpected_errors.append((first, second))
|
||||
|
||||
self.assertFalse(
|
||||
unexpected_errors,
|
||||
pprint.pformat(unexpected_errors),
|
||||
)
|
||||
finally:
|
||||
env.update_asset_finder(clear_metadata=True)
|
||||
|
||||
@parameterized.expand(
|
||||
build_lookup_generic_cases()
|
||||
)
|
||||
@with_environment()
|
||||
def test_lookup_generic(self, table, symbols, reference_date, expected,
|
||||
env=None):
|
||||
"""
|
||||
Ensure that lookup_generic works with various permutations of inputs.
|
||||
"""
|
||||
try:
|
||||
env.update_asset_finder(asset_metadata=table.df)
|
||||
finder = env.asset_finder
|
||||
results, missing = finder.lookup_generic(symbols, reference_date)
|
||||
self.assertEqual(results, expected)
|
||||
self.assertEqual(missing, [])
|
||||
finally:
|
||||
env.update_asset_finder(clear_metadata=True)
|
||||
|
||||
@with_environment()
|
||||
def test_lookup_generic_handle_missing(self, env=None):
|
||||
try:
|
||||
table = FakeTableFromRecords(
|
||||
[
|
||||
# Sids that will be found when we do lookups.
|
||||
{
|
||||
'sid': 0,
|
||||
'file_name': 'real',
|
||||
'company_name': 'real',
|
||||
'start_date_nano': pd.Timestamp('2013-1-1', tz='UTC'),
|
||||
'end_date_nano': pd.Timestamp('2014-1-1', tz='UTC'),
|
||||
'exchange': '',
|
||||
},
|
||||
{
|
||||
'sid': 1,
|
||||
'file_name': 'also_real',
|
||||
'company_name': 'also_real',
|
||||
'start_date_nano': pd.Timestamp('2013-1-1', tz='UTC'),
|
||||
'end_date_nano': pd.Timestamp('2014-1-1', tz='UTC'),
|
||||
'exchange': '',
|
||||
},
|
||||
# Sid whose end date is before our query date. We should
|
||||
# still correctly find it.
|
||||
{
|
||||
'sid': 2,
|
||||
'file_name': 'real_but_old',
|
||||
'company_name': 'real_but_old',
|
||||
'start_date_nano': pd.Timestamp('2002-1-1', tz='UTC'),
|
||||
'end_date_nano': pd.Timestamp('2003-1-1', tz='UTC'),
|
||||
'exchange': '',
|
||||
},
|
||||
# Sid whose end date is before our query date. We should
|
||||
# still correctly find it.
|
||||
{
|
||||
'sid': 3,
|
||||
'file_name': 'real_but_in_the_future',
|
||||
'company_name': 'real_but_in_the_future',
|
||||
'start_date_nano': pd.Timestamp('2014-1-1', tz='UTC'),
|
||||
'end_date_nano': pd.Timestamp('2020-1-1', tz='UTC'),
|
||||
'exchange': 'THE FUTURE',
|
||||
},
|
||||
]
|
||||
)
|
||||
env.update_asset_finder(asset_metadata=table.df)
|
||||
symbols = [
|
||||
'real', 1, 'fake', 'real_but_old', 'real_but_in_the_future',
|
||||
]
|
||||
|
||||
results, missing = env.asset_finder.lookup_generic(
|
||||
symbols,
|
||||
pd.Timestamp('2013-02-01', tz='UTC'),
|
||||
)
|
||||
|
||||
self.assertEqual(len(results), 3)
|
||||
self.assertEqual(results[0].symbol, 'real')
|
||||
self.assertEqual(results[0].sid, 0)
|
||||
self.assertEqual(results[1].symbol, 'also_real')
|
||||
self.assertEqual(results[1].sid, 1)
|
||||
|
||||
self.assertEqual(len(missing), 2)
|
||||
self.assertEqual(missing[0], 'fake')
|
||||
self.assertEqual(missing[1], 'real_but_in_the_future')
|
||||
|
||||
finally:
|
||||
env.update_asset_finder(clear_metadata=True)
|
||||
|
||||
def test_insert_metadata(self):
|
||||
finder = AssetFinder()
|
||||
finder.insert_metadata(0,
|
||||
asset_type='equity',
|
||||
start_date='2014-01-01',
|
||||
end_date='2015-01-01',
|
||||
symbol="PLAY",
|
||||
foo_data="FOO",)
|
||||
|
||||
# Test proper insertion
|
||||
self.assertEqual('equity', finder.metadata_cache[0]['asset_type'])
|
||||
self.assertEqual('PLAY', finder.metadata_cache[0]['symbol'])
|
||||
self.assertEqual('2015-01-01', finder.metadata_cache[0]['end_date'])
|
||||
|
||||
# Test invalid field
|
||||
self.assertFalse('foo_data' in finder.metadata_cache[0])
|
||||
|
||||
# Test updating fields
|
||||
finder.insert_metadata(0,
|
||||
asset_type='equity',
|
||||
start_date='2014-01-01',
|
||||
end_date='2015-02-01',
|
||||
symbol="PLAY",
|
||||
exchange="NYSE",)
|
||||
self.assertEqual('2015-02-01', finder.metadata_cache[0]['end_date'])
|
||||
self.assertEqual('NYSE', finder.metadata_cache[0]['exchange'])
|
||||
|
||||
# Check that old data survived
|
||||
self.assertEqual('PLAY', finder.metadata_cache[0]['symbol'])
|
||||
|
||||
def test_consume_metadata(self):
|
||||
|
||||
# Test dict consumption
|
||||
finder = AssetFinder({0: {'asset_type': 'equity'}})
|
||||
dict_to_consume = {0: {'symbol': 'PLAY'},
|
||||
1: {'symbol': 'MSFT'}}
|
||||
finder.consume_metadata(dict_to_consume)
|
||||
self.assertEqual('equity', finder.metadata_cache[0]['asset_type'])
|
||||
self.assertEqual('PLAY', finder.metadata_cache[0]['symbol'])
|
||||
|
||||
# Test dataframe consumption
|
||||
df = pd.DataFrame(columns=['asset_name', 'exchange'], index=[0, 1])
|
||||
df['asset_name'][0] = "Dave'N'Busters"
|
||||
df['exchange'][0] = "NASDAQ"
|
||||
df['asset_name'][1] = "Microsoft"
|
||||
df['exchange'][1] = "NYSE"
|
||||
finder.consume_metadata(df)
|
||||
self.assertEqual('NASDAQ', finder.metadata_cache[0]['exchange'])
|
||||
self.assertEqual('Microsoft', finder.metadata_cache[1]['asset_name'])
|
||||
# Check that old data survived
|
||||
self.assertEqual('equity', finder.metadata_cache[0]['asset_type'])
|
||||
|
||||
@@ -12,3 +12,25 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from ._assets import (
|
||||
Asset,
|
||||
Equity,
|
||||
Future,
|
||||
make_asset_array,
|
||||
CACHE_FILE_TEMPLATE
|
||||
)
|
||||
from .assets import (
|
||||
AssetFinder,
|
||||
AssetConvertible
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'Asset',
|
||||
'Equity',
|
||||
'Future',
|
||||
'AssetFinder',
|
||||
'AssetConvertible',
|
||||
'make_asset_array',
|
||||
'CACHE_FILE_TEMPLATE'
|
||||
]
|
||||
|
||||
+63
-23
@@ -21,9 +21,10 @@ cimport cython
|
||||
import numpy as np
|
||||
cimport numpy as np
|
||||
|
||||
cdef enum AssetType:
|
||||
EQUITY = 1
|
||||
FUTURE = 2
|
||||
# IMPORTANT NOTE: You must change this template if you change
|
||||
# Asset.__reduce__, or else we'll attempt to unpickle an old version of this
|
||||
# class
|
||||
CACHE_FILE_TEMPLATE = '/tmp/.%s-%s.v4.cache'
|
||||
|
||||
cdef class Asset:
|
||||
|
||||
@@ -33,9 +34,7 @@ cdef class Asset:
|
||||
|
||||
cdef readonly object symbol
|
||||
cdef readonly object asset_name
|
||||
cdef readonly AssetType asset_type
|
||||
|
||||
# TODO: Maybe declare as pandas Timestamp?
|
||||
cdef readonly object start_date
|
||||
cdef readonly object end_date
|
||||
cdef public object first_traded
|
||||
@@ -179,26 +178,21 @@ cdef class Asset:
|
||||
'exchange': self.exchange,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def from_dict(dict_):
|
||||
@classmethod
|
||||
def from_dict(cls, dict_):
|
||||
"""
|
||||
Build an Asset instance from a dict.
|
||||
"""
|
||||
return Asset(**dict_)
|
||||
return cls(**dict_)
|
||||
|
||||
|
||||
cdef class Equity(Asset):
|
||||
|
||||
def __cinit__(self,
|
||||
int sid, # sid is required
|
||||
object symbol="",
|
||||
object asset_name="",
|
||||
object start_date=None,
|
||||
object end_date=None,
|
||||
object first_traded=None,
|
||||
object exchange=""):
|
||||
|
||||
self.asset_type = EQUITY
|
||||
def __str__(self):
|
||||
if self.symbol:
|
||||
return 'Equity(%d [%s])' % (self.sid, self.symbol)
|
||||
else:
|
||||
return 'Equity(%d)' % self.sid
|
||||
|
||||
def __repr__(self):
|
||||
attrs = ('symbol', 'asset_name', 'exchange',
|
||||
@@ -214,6 +208,7 @@ cdef class Future(Asset):
|
||||
|
||||
cdef readonly object notice_date
|
||||
cdef readonly object expiration_date
|
||||
cdef readonly int contract_multiplier
|
||||
|
||||
def __cinit__(self,
|
||||
int sid, # sid is required
|
||||
@@ -224,18 +219,63 @@ cdef class Future(Asset):
|
||||
object notice_date=None,
|
||||
object expiration_date=None,
|
||||
object first_traded=None,
|
||||
object exchange=""):
|
||||
object exchange="",
|
||||
int contract_multiplier=1):
|
||||
|
||||
self.asset_type = FUTURE
|
||||
self.notice_date = notice_date
|
||||
self.expiration_date = expiration_date
|
||||
self.notice_date = notice_date
|
||||
self.expiration_date = expiration_date
|
||||
self.contract_multiplier = contract_multiplier
|
||||
|
||||
# Assign the expiration as the end_date if end_date is not explicit
|
||||
if self.end_date is None:
|
||||
self.end_date = expiration_date
|
||||
|
||||
def __str__(self):
|
||||
if self.symbol:
|
||||
return 'Future(%d [%s])' % (self.sid, self.symbol)
|
||||
else:
|
||||
return 'Future(%d)' % self.sid
|
||||
|
||||
def __repr__(self):
|
||||
attrs = ('symbol', 'asset_name', 'exchange',
|
||||
'start_date', 'end_date', 'first_traded', 'notice_date',
|
||||
'expiration_date')
|
||||
'expiration_date', 'contract_multiplier')
|
||||
tuples = ((attr, repr(getattr(self, attr, None)))
|
||||
for attr in attrs)
|
||||
strings = ('%s=%s' % (t[0], t[1]) for t in tuples)
|
||||
params = ', '.join(strings)
|
||||
return 'Future(%d, %s)' % (self.sid, params)
|
||||
|
||||
cpdef __reduce__(self):
|
||||
"""
|
||||
Function used by pickle to determine how to serialize/deserialize this
|
||||
class. Should return a tuple whose first element is self.__class__,
|
||||
and whose second element is a tuple of all the attributes that should
|
||||
be serialized/deserialized during pickling.
|
||||
"""
|
||||
return (self.__class__, (self.sid,
|
||||
self.symbol,
|
||||
self.asset_name,
|
||||
self.start_date,
|
||||
self.end_date,
|
||||
self.notice_date,
|
||||
self.expiration_date,
|
||||
self.first_traded,
|
||||
self.exchange,
|
||||
self.contract_multiplier,))
|
||||
|
||||
cpdef to_dict(self):
|
||||
"""
|
||||
Convert to a python dict.
|
||||
"""
|
||||
super_dict = super(Future, self).to_dict()
|
||||
super_dict['notice_date'] = self.notice_date
|
||||
super_dict['expiration_date'] = self.expiration_date
|
||||
super_dict['contract_multiplier'] = self.contract_multiplier
|
||||
return super_dict
|
||||
|
||||
|
||||
def make_asset_array(int size, Asset asset):
|
||||
cdef np.ndarray out = np.empty([size], dtype=object)
|
||||
out.fill(asset)
|
||||
return out
|
||||
|
||||
@@ -0,0 +1,541 @@
|
||||
#
|
||||
# Copyright 2015 Quantopian, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from abc import ABCMeta
|
||||
from itertools import chain
|
||||
from numbers import Integral
|
||||
import numpy as np
|
||||
import operator
|
||||
|
||||
from logbook import Logger
|
||||
import pandas as pd
|
||||
from pandas.tseries.tools import normalize_date
|
||||
from six import with_metaclass, string_types
|
||||
|
||||
from zipline.errors import (
|
||||
SymbolNotFound,
|
||||
MultipleSymbolsFound,
|
||||
SidNotFound,
|
||||
IdentifierNotFound,
|
||||
ConsumeAssetMetaDataError,
|
||||
InvalidAssetType,
|
||||
)
|
||||
from zipline.utils import tradingcalendar
|
||||
from zipline.assets._assets import (
|
||||
Asset, Equity, Future
|
||||
)
|
||||
|
||||
log = Logger('assets.py')
|
||||
|
||||
# Expected fields for an Asset's metadata
|
||||
ASSET_FIELDS = [
|
||||
'sid',
|
||||
'asset_type',
|
||||
'symbol',
|
||||
'asset_name',
|
||||
'start_date',
|
||||
'end_date',
|
||||
'first_traded',
|
||||
'exchange',
|
||||
'notice_date',
|
||||
'expiration_date',
|
||||
'contract_multiplier',
|
||||
# The following fields are for compatibility with other systems
|
||||
'file_name', # Used as symbol
|
||||
'company_name', # Used as asset_name
|
||||
'start_date_nano', # Used as start_date
|
||||
'end_date_nano', # Used as end_date
|
||||
]
|
||||
|
||||
|
||||
class AssetFinder(object):
|
||||
|
||||
def __init__(self,
|
||||
metadata=None,
|
||||
trading_calendar=tradingcalendar):
|
||||
|
||||
# Any particular instance of AssetFinder should be
|
||||
# consistent throughout its lifetime, so we grab a reference
|
||||
# to our cache now. That way, if the cache is refreshed later,
|
||||
# our instance will continue to use the old one.
|
||||
self.cache = {}
|
||||
self.sym_cache = {}
|
||||
self.identifier_cache = {}
|
||||
self.fuzzy_match = {}
|
||||
|
||||
# The AssetFinder also holds a nested-dict of all metadata for
|
||||
# reference when building Assets
|
||||
self.metadata_cache = {}
|
||||
if metadata:
|
||||
self.consume_metadata(metadata)
|
||||
|
||||
self.trading_calendar = trading_calendar
|
||||
self.populate_cache()
|
||||
|
||||
def _next_free_sid(self):
|
||||
if len(self.cache) > 0:
|
||||
return max(self.cache.keys()) + 1
|
||||
return 0
|
||||
|
||||
def _assign_sid(self, identifier):
|
||||
if hasattr(identifier, '__int__'):
|
||||
return identifier.__int__()
|
||||
if isinstance(identifier, string_types):
|
||||
return self._next_free_sid()
|
||||
|
||||
def retrieve_asset(self, sid, default_none=False):
|
||||
if isinstance(sid, Asset):
|
||||
return sid
|
||||
asset = self.cache.get(sid)
|
||||
if asset is not None:
|
||||
return asset
|
||||
elif default_none:
|
||||
return None
|
||||
else:
|
||||
raise SidNotFound(sid=sid)
|
||||
|
||||
def retrieve_asset_by_identifier(self, identifier):
|
||||
if isinstance(identifier, Asset):
|
||||
return identifier
|
||||
asset = self.identifier_cache.get(identifier)
|
||||
if asset is not None:
|
||||
return asset
|
||||
else:
|
||||
raise IdentifierNotFound(identifier=identifier)
|
||||
|
||||
@staticmethod
|
||||
def _lookup_symbol_in_infos(infos, as_of_date):
|
||||
"""
|
||||
Search a list of symbols matching a given asset for the most recent
|
||||
known symbol as of as_of_date.
|
||||
|
||||
Returns a pair of (Asset, bool), representing the best match we
|
||||
found for as_of_date, and whether or not that match was actually
|
||||
trading at as_of_date.
|
||||
|
||||
If no entry in infos started before as_of_date, return (None, False).
|
||||
"""
|
||||
# Sort entries by end_date before iterating. If asset start and end
|
||||
# dates were always disjoint, then we could sort by either start or
|
||||
# end_date and get the same sorting.
|
||||
infos = sorted(infos, key=operator.attrgetter('end_date'))
|
||||
|
||||
# Find the newest asset that started before as_of_date.
|
||||
candidates = [i for i in infos
|
||||
if (i.start_date is None or i.start_date <= as_of_date)
|
||||
and (i.end_date is None or as_of_date <= i.end_date)]
|
||||
|
||||
# If one SID exists for symbol, return that symbol
|
||||
if len(candidates) == 1:
|
||||
return candidates[0], True
|
||||
|
||||
# If no SID exists for symbol, return SID with the
|
||||
# highest-but-not-over end_date
|
||||
if len(candidates) == 0:
|
||||
candidates = [i for i in infos
|
||||
if i.end_date < as_of_date]
|
||||
return (candidates[-1], False) if candidates else (None, False)
|
||||
|
||||
# If multiple SIDs exist for symbol, return latest start_date with
|
||||
# end_date as a tie-breaker
|
||||
if len(candidates) > 1:
|
||||
best_candidate = sorted(
|
||||
candidates,
|
||||
key=lambda x: (x.start_date, x.end_date)
|
||||
)[-1]
|
||||
return best_candidate, True
|
||||
|
||||
def lookup_symbol_resolve_multiple(self, symbol, as_of_date=None):
|
||||
"""
|
||||
Return matching Asset of name symbol in database.
|
||||
|
||||
If multiple Assets are found and as_of_date is not set,
|
||||
raises MultipleSymbolsFound.
|
||||
|
||||
If no Asset was active at as_of_date, and allow_expired is False
|
||||
raises SymbolNotFound.
|
||||
"""
|
||||
if as_of_date is not None:
|
||||
as_of_date = normalize_date(as_of_date)
|
||||
|
||||
if symbol not in self.sym_cache:
|
||||
raise SymbolNotFound(symbol=symbol)
|
||||
|
||||
infos = self.sym_cache[symbol]
|
||||
if as_of_date is None:
|
||||
if len(infos) == 1:
|
||||
return infos[0]
|
||||
else:
|
||||
raise MultipleSymbolsFound(symbol=symbol,
|
||||
options=str(infos))
|
||||
|
||||
# Try to find symbol matching as_of_date
|
||||
asset, _ = self._lookup_symbol_in_infos(infos, as_of_date)
|
||||
if asset is None:
|
||||
raise SymbolNotFound(symbol=symbol)
|
||||
return asset
|
||||
|
||||
def lookup_symbol(self, symbol, as_of_date, fuzzy=None):
|
||||
"""
|
||||
If a fuzzy string is provided, then we try various symbols based on
|
||||
the provided symbol. This is to facilitate mapping from a broker's
|
||||
symbol to ours in cases where mapping to the broker's symbol loses
|
||||
information. For example, if we have CMCS_A, but a broker has CMCSA,
|
||||
when the broker provides CMCSA, it can also provide fuzzy='_',
|
||||
so we can find a match by inserting an underscore.
|
||||
"""
|
||||
symbol = symbol.upper()
|
||||
as_of_date = normalize_date(as_of_date)
|
||||
|
||||
if not fuzzy:
|
||||
try:
|
||||
return self.lookup_symbol_resolve_multiple(symbol, as_of_date)
|
||||
except SymbolNotFound:
|
||||
return None
|
||||
else:
|
||||
try:
|
||||
return self.fuzzy_match[(symbol, fuzzy, as_of_date)]
|
||||
except KeyError:
|
||||
# if symbol is CMCSA and fuzzy is '_', then
|
||||
# try CMCSA, then CMCS_A, then CMC_SA, etc.
|
||||
for fuzzy_symbol in chain(
|
||||
(symbol,),
|
||||
(symbol[:i] + fuzzy + symbol[i:]
|
||||
for i in range(len(symbol) - 1, 0, -1))):
|
||||
|
||||
infos = self.sym_cache.get(fuzzy_symbol)
|
||||
if infos:
|
||||
info, date_match = self._lookup_symbol_in_infos(
|
||||
infos,
|
||||
as_of_date,
|
||||
)
|
||||
|
||||
if info is not None and date_match:
|
||||
self.fuzzy_match[(symbol, fuzzy, as_of_date)] = \
|
||||
info
|
||||
return info
|
||||
else:
|
||||
self.fuzzy_match[(symbol, fuzzy, as_of_date)] = None
|
||||
|
||||
def populate_cache(self):
|
||||
"""
|
||||
Populates the asset cache with all values in the assets
|
||||
collection.
|
||||
"""
|
||||
|
||||
# Wipe caches before repopulating
|
||||
self.cache = {}
|
||||
self.sym_cache = {}
|
||||
self.identifier_cache = {}
|
||||
self.fuzzy_match = {}
|
||||
|
||||
counter = 0
|
||||
for identifier, row in self.metadata_cache.items():
|
||||
self.spawn_asset(identifier=identifier, **row)
|
||||
counter += 1
|
||||
|
||||
def spawn_asset(self, identifier, **kwargs):
|
||||
|
||||
# Check if the sid is declared
|
||||
try:
|
||||
kwargs['sid']
|
||||
pass
|
||||
except KeyError:
|
||||
# Assign the identifier as the sid, if applicable
|
||||
if isinstance(identifier, int):
|
||||
kwargs['sid'] = identifier
|
||||
# If the identifier is not a sid, assign one
|
||||
else:
|
||||
kwargs['sid'] = self._assign_sid(identifier)
|
||||
# Update the metadata object with the new sid
|
||||
self.insert_metadata(identifier=identifier, sid=kwargs['sid'])
|
||||
|
||||
# If the file_name is in the kwargs, it may be the symbol
|
||||
try:
|
||||
file_name = kwargs.pop('file_name')
|
||||
try:
|
||||
kwargs['symbol']
|
||||
except KeyError:
|
||||
kwargs['symbol'] = file_name
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# If the identifier coming in was a string and there is no defined
|
||||
# symbol yet, set the symbol to the incoming identifier
|
||||
try:
|
||||
kwargs['symbol']
|
||||
pass
|
||||
except KeyError:
|
||||
if isinstance(identifier, string_types):
|
||||
kwargs['symbol'] = identifier
|
||||
|
||||
# If the company_name is in the kwargs, it may be the asset_name
|
||||
try:
|
||||
company_name = kwargs.pop('company_name')
|
||||
try:
|
||||
kwargs['asset_name']
|
||||
except KeyError:
|
||||
kwargs['asset_name'] = company_name
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# If dates are given as nanos, pop them
|
||||
try:
|
||||
kwargs['start_date'] = kwargs.pop('start_date_nano')
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
kwargs['end_date'] = kwargs.pop('end_date_nano')
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
kwargs['notice_date'] = kwargs.pop('notice_date_nano')
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
kwargs['expiration_date'] = kwargs.pop('expiration_date_nano')
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# Process dates to Timestamps
|
||||
try:
|
||||
kwargs['start_date'] = pd.Timestamp(kwargs['start_date'], tz='UTC')
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
kwargs['end_date'] = pd.Timestamp(kwargs['end_date'], tz='UTC')
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
kwargs['notice_date'] = pd.Timestamp(kwargs['notice_date'],
|
||||
tz='UTC')
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
kwargs['expiration_date'] = pd.Timestamp(kwargs['expiration_date'],
|
||||
tz='UTC')
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# Build an Asset of the appropriate type
|
||||
asset_type = kwargs.pop('asset_type', 'equity')
|
||||
if asset_type.lower() == 'equity':
|
||||
asset = Equity(**kwargs)
|
||||
elif asset_type.lower() == 'future':
|
||||
asset = Future(**kwargs)
|
||||
else:
|
||||
raise InvalidAssetType(asset_type=asset_type)
|
||||
|
||||
self.cache[asset.sid] = asset
|
||||
self.identifier_cache[identifier] = asset
|
||||
if asset.symbol is not "":
|
||||
self.sym_cache.setdefault(asset.symbol, []).append(asset)
|
||||
|
||||
return asset
|
||||
|
||||
@property
|
||||
def sids(self):
|
||||
return self.cache.keys()
|
||||
|
||||
@property
|
||||
def assets(self):
|
||||
return self.cache.values()
|
||||
|
||||
def _lookup_generic_scalar(self,
|
||||
asset_convertible,
|
||||
as_of_date,
|
||||
matches,
|
||||
missing):
|
||||
"""
|
||||
Convert asset_convertible to an asset.
|
||||
|
||||
On success, append to matches.
|
||||
On failure, append to missing.
|
||||
"""
|
||||
try:
|
||||
if isinstance(asset_convertible, Asset):
|
||||
matches.append(asset_convertible)
|
||||
|
||||
elif isinstance(asset_convertible, Integral):
|
||||
result = self.retrieve_asset(int(asset_convertible))
|
||||
if result is None:
|
||||
raise SymbolNotFound(symbol=asset_convertible)
|
||||
matches.append(result)
|
||||
|
||||
elif isinstance(asset_convertible, string_types):
|
||||
# Throws SymbolNotFound on failure to match.
|
||||
matches.append(
|
||||
self.lookup_symbol_resolve_multiple(
|
||||
asset_convertible,
|
||||
as_of_date,
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise NotAssetConvertible(
|
||||
"Input was %s, not AssetConvertible."
|
||||
% asset_convertible
|
||||
)
|
||||
|
||||
except SymbolNotFound:
|
||||
missing.append(asset_convertible)
|
||||
return None
|
||||
|
||||
def lookup_generic(self,
|
||||
asset_convertible_or_iterable,
|
||||
as_of_date):
|
||||
"""
|
||||
Convert a AssetConvertible or iterable of AssetConvertibles into
|
||||
a list of Asset objects.
|
||||
|
||||
This method exists primarily as a convenience for implementing
|
||||
user-facing APIs that can handle multiple kinds of input. It should
|
||||
not be used for internal code where we already know the expected types
|
||||
of our inputs.
|
||||
|
||||
Returns a pair of objects, the first of which is the result of the
|
||||
conversion, and the second of which is a list containing any values
|
||||
that couldn't be resolved.
|
||||
"""
|
||||
matches = []
|
||||
missing = []
|
||||
|
||||
# Interpret input as scalar.
|
||||
if isinstance(asset_convertible_or_iterable, AssetConvertible):
|
||||
self._lookup_generic_scalar(
|
||||
asset_convertible=asset_convertible_or_iterable,
|
||||
as_of_date=as_of_date,
|
||||
matches=matches,
|
||||
missing=missing,
|
||||
)
|
||||
try:
|
||||
return matches[0], missing
|
||||
except IndexError:
|
||||
if hasattr(asset_convertible_or_iterable, '__int__'):
|
||||
raise SidNotFound(sid=asset_convertible_or_iterable)
|
||||
else:
|
||||
raise SymbolNotFound(symbol=asset_convertible_or_iterable)
|
||||
|
||||
# Interpret input as iterable.
|
||||
try:
|
||||
iterator = iter(asset_convertible_or_iterable)
|
||||
except TypeError:
|
||||
raise NotAssetConvertible(
|
||||
"Input was not a AssetConvertible "
|
||||
"or iterable of AssetConvertible."
|
||||
)
|
||||
|
||||
for obj in iterator:
|
||||
self._lookup_generic_scalar(obj, as_of_date, matches, missing)
|
||||
return matches, missing
|
||||
|
||||
def insert_metadata(self, identifier, **kwargs):
|
||||
"""
|
||||
Inserts the given metadata kwargs to the entry for the given
|
||||
identifier. Matching fields in the existing entry will be overwritten.
|
||||
:param identifier: The identifier for which to insert metadata
|
||||
:param kwargs: The keyed metadata to insert
|
||||
"""
|
||||
entry = self.metadata_cache.get(identifier, {})
|
||||
|
||||
for key, value in kwargs.items():
|
||||
# Do not accept invalid fields
|
||||
if key not in ASSET_FIELDS:
|
||||
continue
|
||||
# Do not accept Nones
|
||||
if value is None:
|
||||
continue
|
||||
# Do not accept nans from dataframes
|
||||
if isinstance(value, float) and np.isnan(value):
|
||||
continue
|
||||
entry[key] = value
|
||||
|
||||
self.metadata_cache[identifier] = entry
|
||||
|
||||
def consume_identifiers(self, identifiers):
|
||||
for identifier in identifiers:
|
||||
self.insert_metadata(identifier)
|
||||
|
||||
def consume_metadata(self, metadata):
|
||||
"""
|
||||
Consumes the provided metadata in to the metadata cache. The
|
||||
existing values in the cache will be overwritten when there
|
||||
is a conflict.
|
||||
:param metadata: The metadata to be consumed
|
||||
"""
|
||||
# Handle dicts
|
||||
if isinstance(metadata, dict):
|
||||
self._insert_metadata_dict(metadata)
|
||||
# Handle DataFrames
|
||||
elif isinstance(metadata, pd.DataFrame):
|
||||
self._insert_metadata_dataframe(metadata)
|
||||
# Handle readables
|
||||
elif hasattr(metadata, 'read'):
|
||||
self._insert_metadata_readable(metadata)
|
||||
else:
|
||||
raise ConsumeAssetMetaDataError(obj=metadata)
|
||||
|
||||
def clear_metadata(self):
|
||||
self.metadata_cache = {}
|
||||
|
||||
def _insert_metadata_dataframe(self, dataframe):
|
||||
for identifier, row in dataframe.iterrows():
|
||||
self.insert_metadata(identifier, **row)
|
||||
|
||||
def _insert_metadata_dict(self, dict):
|
||||
for identifier, entry in dict.items():
|
||||
self.insert_metadata(identifier, **entry)
|
||||
|
||||
def _insert_metadata_readable(self, readable):
|
||||
for row in readable.read():
|
||||
# Parse out the row of the readable object
|
||||
metadata_dict = {}
|
||||
for field in ASSET_FIELDS:
|
||||
try:
|
||||
row_value = row[field]
|
||||
# Avoid passing placeholders
|
||||
if row_value and (row_value is not 'None'):
|
||||
metadata_dict[field] = row[field]
|
||||
except KeyError:
|
||||
continue
|
||||
except IndexError:
|
||||
continue
|
||||
# Locate the identifier, fail if not found
|
||||
if 'sid' in metadata_dict:
|
||||
identifier = metadata_dict['sid']
|
||||
elif 'symbol' in metadata_dict:
|
||||
identifier = metadata_dict['symbol']
|
||||
else:
|
||||
raise ConsumeAssetMetaDataError(obj=row)
|
||||
self.insert_metadata(identifier, **metadata_dict)
|
||||
|
||||
|
||||
class AssetConvertible(with_metaclass(ABCMeta)):
|
||||
"""
|
||||
ABC for types that are convertible to integer-representations of
|
||||
Assets.
|
||||
|
||||
Includes Asset, six.string_types, and Integral
|
||||
"""
|
||||
pass
|
||||
|
||||
AssetConvertible.register(Integral)
|
||||
AssetConvertible.register(Asset)
|
||||
# Use six.string_types for Python2/3 compatibility
|
||||
for type in string_types:
|
||||
AssetConvertible.register(type)
|
||||
|
||||
|
||||
class NotAssetConvertible(ValueError):
|
||||
pass
|
||||
Reference in New Issue
Block a user