From 3136ef3f93df44e0ea90f7e4613a5f12e9861498 Mon Sep 17 00:00:00 2001 From: Andrew Daniels Date: Mon, 15 Aug 2016 20:38:02 -0400 Subject: [PATCH] BUG: Fixes asset writer to the select the latest asset to hold a sid (#1392) * BUG: Fixes asset writer to the select the latest asset to hold a sid When constructing the asset_info dataframe, we were previously taking the first symbol/sid pair to include, when we should be taking the most recent. * Ensure groups are sorted by increasing end_date * Updates test_lookup_symbol_change_ticker to also cover asset_name --- tests/test_assets.py | 12 ++++++++++-- zipline/assets/asset_writer.py | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/test_assets.py b/tests/test_assets.py index a500687b..3668a0cc 100644 --- a/tests/test_assets.py +++ b/tests/test_assets.py @@ -523,12 +523,14 @@ class AssetFinderTestCase(WithTradingCalendars, ZiplineTestCase): # sid 0 { 'symbol': 'A', + 'asset_name': 'Asset A', 'start_date': T('2014-01-01'), 'end_date': T('2014-01-05'), 'exchange': "TEST", }, { 'symbol': 'B', + 'asset_name': 'Asset B', 'start_date': T('2014-01-06'), 'end_date': T('2014-01-10'), 'exchange': "TEST", @@ -537,12 +539,14 @@ class AssetFinderTestCase(WithTradingCalendars, ZiplineTestCase): # sid 1 { 'symbol': 'C', + 'asset_name': 'Asset C', 'start_date': T('2014-01-01'), 'end_date': T('2014-01-05'), 'exchange': "TEST", }, { 'symbol': 'A', # claiming the unused symbol 'A' + 'asset_name': 'Asset A', 'start_date': T('2014-01-06'), 'end_date': T('2014-01-10'), 'exchange': "TEST", @@ -574,8 +578,9 @@ class AssetFinderTestCase(WithTradingCalendars, ZiplineTestCase): finder.retrieve_asset(0), msg=str(asof), ) - # the symbol should always be the last symbol + # The symbol and asset_name should always be the last held values assert_equal(A_result.symbol, 'B') + assert_equal(A_result.asset_name, 'Asset B') # from 01 through 05 sid 1 held 'C' C_result = finder.lookup_symbol('C', asof) @@ -584,8 +589,9 @@ class AssetFinderTestCase(WithTradingCalendars, ZiplineTestCase): finder.retrieve_asset(1), msg=str(asof), ) - # the symbol should always be the last symbol + # The symbol and asset_name should always be the last held values assert_equal(C_result.symbol, 'A') + assert_equal(C_result.asset_name, 'Asset A') # no one held 'B' before 06 with self.assertRaises(SymbolNotFound): @@ -609,6 +615,7 @@ class AssetFinderTestCase(WithTradingCalendars, ZiplineTestCase): msg=str(asof), ) assert_equal(B_result.symbol, 'B') + assert_equal(B_result.asset_name, 'Asset B') # from 06 through 10 sid 1 held 'A' # we test through the 11th because sid 1 is the last to hold 'A' @@ -620,6 +627,7 @@ class AssetFinderTestCase(WithTradingCalendars, ZiplineTestCase): msg=str(asof), ) assert_equal(A_result.symbol, 'A') + assert_equal(A_result.asset_name, 'Asset A') def test_lookup_symbol(self): diff --git a/zipline/assets/asset_writer.py b/zipline/assets/asset_writer.py index 0fa6dc98..17d61b2f 100644 --- a/zipline/assets/asset_writer.py +++ b/zipline/assets/asset_writer.py @@ -187,7 +187,7 @@ def _generate_output_dataframe(data_subset, defaults): def _check_asset_group(group): - row = group.iloc[0] + row = group.sort('end_date').iloc[-1] row.start_date = group.start_date.min() row.end_date = group.end_date.max() row.drop(list(symbol_columns), inplace=True)