diff --git a/tests/pipeline/test_events.py b/tests/pipeline/test_events.py index fbcc1105..81394962 100644 --- a/tests/pipeline/test_events.py +++ b/tests/pipeline/test_events.py @@ -6,6 +6,7 @@ import itertools from itertools import product import blaze as bz +from nose_parameterized import parameterized import numpy as np import pandas as pd from pandas.util.testing import assert_series_equal @@ -462,31 +463,44 @@ class BlazeEventsLoaderTestCase(EventsLoaderTestCase): class EventLoaderUtilsTestCase(ZiplineTestCase): - dates = [pd.Timestamp('2013-01-04 3:00:00'), pd.Timestamp('2013-01-24'), - pd.Timestamp('2013-04-04'), pd.Timestamp('2013-04-21')] - combos = list(itertools.permutations(dates)) - - def test_normalize_to_query_time(self): - # Order matters in pandas 18.0.2. Prior to that, using tz_convert on - # a DatetimeIndex with DST/EST timestamps mixed resulted in some of - # them being an hour off (1 hour past midnight). - expected = pd.Series( - [pd.Timestamp('2013-01-04'), + dates = [pd.Timestamp('2013-01-04 3:00:00'), pd.Timestamp('2013-01-24'), + pd.Timestamp('2013-01-31 20:00:00'), pd.Timestamp('2013-04-04'), pd.Timestamp('2013-04-21')] - ).reset_index(drop=True) + combos = list(itertools.permutations(dates)) + + expected_us = pd.Series( + [pd.Timestamp('2013-01-04'), + pd.Timestamp('2013-01-24'), + pd.Timestamp('2013-02-01'), + pd.Timestamp('2013-04-04'), + pd.Timestamp('2013-04-21')] + ) + + # Russia's TZ offset is +3 + expected_russia = pd.Series( + [pd.Timestamp('2013-01-04'), + pd.Timestamp('2013-01-24'), + pd.Timestamp('2013-01-31'), + pd.Timestamp('2013-04-04'), + pd.Timestamp('2013-04-21')] + ) + + # Test with timezones on either side of the meridian + @parameterized.expand([(expected_us, 'US/Eastern', time(8, 45)), + (expected_russia, 'Europe/Moscow', time(16, 45))]) + def test_normalize_to_query_time(self, expected, tz, query_time): + # Order matters in pandas 0.18.2. Prior to that, using tz_convert on + # a DatetimeIndex with DST/EST timestamps mixed resulted in some of + # them being an hour off (1 hour past midnight). for combo in self.combos: df = pd.DataFrame({"timestamp": combo}) - tz = 'US/Eastern' - query_time = time(8, 45) result = normalize_timestamp_to_query_time(df, query_time, tz, inplace=False, ts_field='timestamp') result = result.sort("timestamp").reset_index(drop=True) - assert_series_equal(result['timestamp'], expected) - - - + assert_series_equal(result['timestamp'], expected, + check_names=False) diff --git a/zipline/pipeline/loaders/utils.py b/zipline/pipeline/loaders/utils.py index 50781d4e..77a9f447 100644 --- a/zipline/pipeline/loaders/utils.py +++ b/zipline/pipeline/loaders/utils.py @@ -237,12 +237,15 @@ def normalize_timestamp_to_query_time(df, _midnight, include_end=False, ) - # for all of the times that are greater than our query time add 1 - # day and truncate to the date + # For all of the times that are greater than our query time add 1 + # day and truncate to the date. + # We normalize twice here because of a bug in pandas 0.16.1 that causes + # tz_localize() to shift some timestamps by an hour if they are not grouped + # together by DST/EST. df.loc[to_roll_forward, ts_field] = ( dtidx_local_time[to_roll_forward] + datetime.timedelta(days=1) - ).normalize().tz_localize(None).tz_localize('utc').normalize() # cast - # back to utc + ).normalize().tz_localize(None).tz_localize('utc').normalize() + df.loc[~to_roll_forward, ts_field] = dtidx[~to_roll_forward].normalize() return df