From 560ff3cacfa3409299bda27b6cd46ee4bca4153e Mon Sep 17 00:00:00 2001 From: Andrew Daniels Date: Thu, 4 May 2017 10:19:09 -0400 Subject: [PATCH] MAINT: Display diff if input to daily bar writer has gaps/extra bars --- tests/data/test_us_equity_pricing.py | 10 +++++++--- zipline/data/us_equity_pricing.py | 17 ++++++++++++++++- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/tests/data/test_us_equity_pricing.py b/tests/data/test_us_equity_pricing.py index 12c4ce80..c56d8df7 100644 --- a/tests/data/test_us_equity_pricing.py +++ b/tests/data/test_us_equity_pricing.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from sys import maxsize +import re from nose_parameterized import parameterized from numpy import ( @@ -399,9 +400,12 @@ class BcolzDailyBarWriterMissingDataTestCase(WithAssetFinder, # There are 21 sessions between the start and end date for this # asset, and we excluded one. - expected_msg = ( - 'Got 20 rows for daily bars table with first day=2015-06-02, last ' - 'day=2015-06-30, expected 21 rows.' + expected_msg = re.escape( + "Got 20 rows for daily bars table with first day=2015-06-02, last " + "day=2015-06-30, expected 21 rows.\n" + "Missing sessions: " + "[Timestamp('2015-06-15 00:00:00+0000', tz='UTC')]\n" + "Extra sessions: []" ) with self.assertRaisesRegexp(AssertionError, expected_msg): writer.write(bar_data) diff --git a/zipline/data/us_equity_pricing.py b/zipline/data/us_equity_pricing.py index a599b778..ce2f7427 100644 --- a/zipline/data/us_equity_pricing.py +++ b/zipline/data/us_equity_pricing.py @@ -42,6 +42,7 @@ from pandas import ( NaT, read_csv, read_sql, + to_datetime, Timestamp, ) from pandas.tslib import iNaT @@ -356,11 +357,25 @@ class BcolzDailyBarWriter(object): ] assert len(table) == len(asset_sessions), ( 'Got {} rows for daily bars table with first day={}, last ' - 'day={}, expected {} rows.'.format( + 'day={}, expected {} rows.\n' + 'Missing sessions: {}\n' + 'Extra sessions: {}'.format( len(table), asset_first_day.date(), asset_last_day.date(), len(asset_sessions), + asset_sessions.difference( + to_datetime( + np.array(table['day']), + unit='s', + utc=True, + ) + ).tolist(), + to_datetime( + np.array(table['day']), + unit='s', + utc=True, + ).difference(asset_sessions).tolist(), ) )