From 9ecd7e18d0ef1b2e0fb0cb893e07748a22f8022c Mon Sep 17 00:00:00 2001 From: Andrew Daniels Date: Wed, 7 Oct 2015 16:08:43 -0400 Subject: [PATCH] MAINT: Improves handling for contract date info in lookup_future_chain Improves the query for futures contract to use the date that comes first in time (between notice_date and expiration_date) to determine cotnract validity. If one of these is missing, we'll use the other. Also modifies the query to order the resulting contracts by their expiration_date if available, and to use their notice_date if not. --- tests/test_assets.py | 45 +++++++++++++++++++++++++----------- zipline/assets/assets.py | 49 ++++++++++++++++++++++++++++++++++------ 2 files changed, 74 insertions(+), 20 deletions(-) diff --git a/tests/test_assets.py b/tests/test_assets.py index 6038968e..c7fde844 100644 --- a/tests/test_assets.py +++ b/tests/test_assets.py @@ -626,12 +626,13 @@ class AssetFinderTestCase(TestCase): def test_lookup_future_chain(self): metadata = { - # Notice day is today, so not valid + # Notice day is today, so should be valid. 2: { 'symbol': 'ADN15', 'root_symbol': 'AD', 'asset_type': 'future', 'notice_date': pd.Timestamp('2015-05-14', tz='UTC'), + 'expiration_date': pd.Timestamp('2015-06-14', tz='UTC'), 'start_date': pd.Timestamp('2015-01-01', tz='UTC') }, 1: { @@ -639,6 +640,7 @@ class AssetFinderTestCase(TestCase): 'root_symbol': 'AD', 'asset_type': 'future', 'notice_date': pd.Timestamp('2015-08-14', tz='UTC'), + 'expiration_date': pd.Timestamp('2015-09-14', tz='UTC'), 'start_date': pd.Timestamp('2015-01-01', tz='UTC') }, # Starts trading today, so should be valid. @@ -647,6 +649,7 @@ class AssetFinderTestCase(TestCase): 'root_symbol': 'AD', 'asset_type': 'future', 'notice_date': pd.Timestamp('2015-11-16', tz='UTC'), + 'expiration_date': pd.Timestamp('2015-12-16', tz='UTC'), 'start_date': pd.Timestamp('2015-05-14', tz='UTC') }, # Starts trading in August, so not valid. @@ -655,6 +658,16 @@ class AssetFinderTestCase(TestCase): 'root_symbol': 'AD', 'asset_type': 'future', 'notice_date': pd.Timestamp('2015-11-16', tz='UTC'), + 'expiration_date': pd.Timestamp('2015-12-16', tz='UTC'), + 'start_date': pd.Timestamp('2015-08-01', tz='UTC') + }, + # Notice date comes after expiration + 4: { + 'symbol': 'ADZ16', + 'root_symbol': 'AD', + 'asset_type': 'future', + 'notice_date': pd.Timestamp('2015-11-25', tz='UTC'), + 'expiration_date': pd.Timestamp('2016-11-16', tz='UTC'), 'start_date': pd.Timestamp('2015-08-01', tz='UTC') }, } @@ -663,17 +676,18 @@ class AssetFinderTestCase(TestCase): dt = pd.Timestamp('2015-05-14', tz='UTC') last_year = pd.Timestamp('2014-01-01', tz='UTC') first_day = pd.Timestamp('2015-01-01', tz='UTC') + dt_2 = pd.Timestamp('2016-11-17', tz='UTC') # Check that we get the expected number of contracts, in the # right order ad_contracts = finder.lookup_future_chain('AD', dt, dt) - self.assertEqual(len(ad_contracts), 2) - self.assertEqual(ad_contracts[0].sid, 1) - self.assertEqual(ad_contracts[1].sid, 0) + self.assertEqual(len(ad_contracts), 3) + self.assertEqual(ad_contracts[0].sid, 2) + self.assertEqual(ad_contracts[1].sid, 1) # Check that pd.NaT for knowledge_date uses the value of as_of_date ad_contracts = finder.lookup_future_chain('AD', dt, pd.NaT) - self.assertEqual(len(ad_contracts), 2) + self.assertEqual(len(ad_contracts), 3) # Check that we get nothing if our knowledge date is last year ad_contracts = finder.lookup_future_chain('AD', dt, last_year) @@ -681,11 +695,16 @@ class AssetFinderTestCase(TestCase): # Check that we get things that start on the knowledge date ad_contracts = finder.lookup_future_chain('AD', dt, first_day) - self.assertEqual(len(ad_contracts), 1) + self.assertEqual(len(ad_contracts), 2) # Check that pd.NaT for as_of_date gives the whole chain ad_contracts = finder.lookup_future_chain('AD', pd.NaT, first_day) - self.assertEqual(len(ad_contracts), 4) + self.assertEqual(len(ad_contracts), 5) + + # Check that when the expiration_date has past but the + # notice_date hasn't, contract is still considered invalid. + ad_contracts = finder.lookup_future_chain('AD', dt_2, dt_2) + self.assertEqual(len(ad_contracts), 0) def test_map_identifier_index_to_sids(self): # Build an empty finder and some Assets @@ -834,12 +853,12 @@ class TestFutureChain(TestCase): cl = FutureChain(self.asset_finder, lambda: '2005-12-01', 'CL') self.assertEqual(len(cl), 3) - # Sid 0 is still valid the day before its notice date. - cl = FutureChain(self.asset_finder, lambda: '2005-12-19', 'CL') + # Sid 0 is still valid its notice date. + cl = FutureChain(self.asset_finder, lambda: '2005-12-20', 'CL') self.assertEqual(len(cl), 3) # Sid 0 is now invalid, leaving only Sids 1 & 2 valid. - cl = FutureChain(self.asset_finder, lambda: '2005-12-20', 'CL') + cl = FutureChain(self.asset_finder, lambda: '2005-12-21', 'CL') self.assertEqual(len(cl), 2) # Sid 3 has started, so 1, 2, & 3 are now valid. @@ -847,7 +866,7 @@ class TestFutureChain(TestCase): self.assertEqual(len(cl), 3) # All contracts are no longer valid. - cl = FutureChain(self.asset_finder, lambda: '2006-09-20', 'CL') + cl = FutureChain(self.asset_finder, lambda: '2006-09-21', 'CL') self.assertEqual(len(cl), 0) def test_getitem(self): @@ -860,10 +879,10 @@ class TestFutureChain(TestCase): with self.assertRaises(IndexError): cl[3] - cl = FutureChain(self.asset_finder, lambda: '2005-12-19', 'CL') + cl = FutureChain(self.asset_finder, lambda: '2005-12-20', 'CL') self.assertEqual(cl[0], 0) - cl = FutureChain(self.asset_finder, lambda: '2005-12-20', 'CL') + cl = FutureChain(self.asset_finder, lambda: '2005-12-21', 'CL') self.assertEqual(cl[0], 1) cl = FutureChain(self.asset_finder, lambda: '2006-02-01', 'CL') diff --git a/zipline/assets/assets.py b/zipline/assets/assets.py index 108dc5e3..5b19ccea 100644 --- a/zipline/assets/assets.py +++ b/zipline/assets/assets.py @@ -352,11 +352,12 @@ class AssetFinder(object): root_symbol : str Root symbol of the desired future. as_of_date : pd.Timestamp or pd.NaT + Date at which the chain determination is rooted. I.e. the - existing contract whose notice date is first after this - date is the primary contract, etc. If NaT is given, the - chain is unbounded, and all contracts for this root symbol - are returned. + existing contract whose notice date/expiration date is first + after this date is the primary contract, etc. If NaT is + given, the chain is unbounded, and all contracts for this + root symbol are returned. knowledge_date : pd.Timestamp or pd.NaT Date for determining which contracts exist for inclusion in this chain. Contracts exist only if they have a start_date @@ -396,14 +397,48 @@ class AssetFinder(object): knowledge_date = as_of_date else: knowledge_date = knowledge_date.value + sids = list(map( itemgetter('sid'), sa.select((fc_cols.sid,)).where( (fc_cols.root_symbol == root_symbol) & - (fc_cols.notice_date > as_of_date) & - (fc_cols.start_date <= knowledge_date), + (fc_cols.start_date <= knowledge_date) & + + # Filter to contracts that are still valid. If both + # exist, use the one that comes first in time (i.e. + # the lower value). If either notice_date or + # expiration_date is NaT, use the other. If both are + # NaT, the contract cannot be included in any chain. + sa.case( + [ + ( + fc_cols.notice_date == pd.NaT.value, + fc_cols.expiration_date >= as_of_date + ), + ( + fc_cols.expiration_date == pd.NaT.value, + fc_cols.notice_date >= as_of_date + ) + ], + else_=( + sa.func.min( + fc_cols.notice_date, + fc_cols.expiration_date + ) >= as_of_date + ) + ) ).order_by( - fc_cols.notice_date.asc(), + # Sort using expiration_date if valid. If it's NaT, + # use notice_date instead. + sa.case( + [ + ( + fc_cols.expiration_date == pd.NaT.value, + fc_cols.notice_date + ) + ], + else_=fc_cols.expiration_date + ).asc() ).execute().fetchall() ))