mirror of
https://github.com/wassname/catalyst.git
synced 2026-07-02 19:03:42 +08:00
Merge pull request #1288 from quantopian/pandas-tz-normalize-bug-workaround
Pandas tz normalize bug workaround
This commit is contained in:
@@ -1,9 +1,12 @@
|
||||
"""
|
||||
Tests for setting up an EventsLoader and a BlazeEventsLoader.
|
||||
"""
|
||||
from datetime import time
|
||||
import itertools
|
||||
from itertools import product
|
||||
|
||||
import blaze as bz
|
||||
from nose_parameterized import parameterized
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
@@ -17,10 +20,11 @@ from zipline.pipeline.data import DataSet, Column
|
||||
from zipline.pipeline.loaders.events import EventsLoader
|
||||
from zipline.pipeline.loaders.blaze.events import BlazeEventsLoader
|
||||
from zipline.pipeline.loaders.utils import (
|
||||
previous_event_indexer,
|
||||
next_event_indexer,
|
||||
normalize_timestamp_to_query_time,
|
||||
previous_event_indexer,
|
||||
)
|
||||
from zipline.testing import ZiplineTestCase
|
||||
from zipline.testing import check_arrays, ZiplineTestCase
|
||||
from zipline.testing.fixtures import (
|
||||
WithAssetFinder,
|
||||
WithNYSETradingDays,
|
||||
@@ -456,3 +460,70 @@ class BlazeEventsLoaderTestCase(EventsLoaderTestCase):
|
||||
next_value_columns,
|
||||
previous_value_columns,
|
||||
)
|
||||
|
||||
|
||||
class EventLoaderUtilsTestCase(ZiplineTestCase):
|
||||
# These cases test the following:
|
||||
# 1. Shuffling timestamps in DST/EST produces the correct normalized
|
||||
# timestamps
|
||||
# 2. Timestamps at query time boundaries are normalized correctly
|
||||
boundary_dates = [pd.Timestamp('2013-01-04 8:44:59'),
|
||||
pd.Timestamp('2013-01-04 8:45:00'),
|
||||
pd.Timestamp('2013-01-04 8:46:00')]
|
||||
us_boundary_dates = [date.tz_localize('US/Eastern') for date in
|
||||
boundary_dates]
|
||||
moscow_boundary_dates = [date.tz_localize('Europe/Moscow') for date in
|
||||
boundary_dates]
|
||||
mixed_tz_dates = [pd.Timestamp('2013-01-24'),
|
||||
pd.Timestamp('2013-01-31 20:00:00'),
|
||||
pd.Timestamp('2013-04-04'),
|
||||
pd.Timestamp('2013-04-21')]
|
||||
us_dates = pd.to_datetime(us_boundary_dates + mixed_tz_dates,
|
||||
utc=True).tz_localize(None)
|
||||
moscow_dates = pd.to_datetime(moscow_boundary_dates + mixed_tz_dates,
|
||||
utc=True).tz_localize(None)
|
||||
|
||||
combos = list(map(np.array, itertools.permutations(np.arange(len(
|
||||
boundary_dates + mixed_tz_dates)))))
|
||||
|
||||
expected_us = pd.Series(
|
||||
[pd.Timestamp('2013-01-04'),
|
||||
pd.Timestamp('2013-01-05'),
|
||||
pd.Timestamp('2013-01-05'),
|
||||
pd.Timestamp('2013-01-24'),
|
||||
pd.Timestamp('2013-02-01'),
|
||||
pd.Timestamp('2013-04-04'),
|
||||
pd.Timestamp('2013-04-21')]
|
||||
).values
|
||||
|
||||
# Russia's TZ offset is +4
|
||||
expected_russia = pd.Series(
|
||||
[pd.Timestamp('2013-01-04'),
|
||||
pd.Timestamp('2013-01-05'),
|
||||
pd.Timestamp('2013-01-05'),
|
||||
pd.Timestamp('2013-01-24'),
|
||||
pd.Timestamp('2013-01-31'),
|
||||
pd.Timestamp('2013-04-04'),
|
||||
pd.Timestamp('2013-04-21')]
|
||||
).values
|
||||
|
||||
# Test with timezones on either side of the meridian
|
||||
@parameterized.expand([(expected_us, 'US/Eastern', us_dates),
|
||||
(expected_russia, 'Europe/Moscow', moscow_dates)])
|
||||
def test_normalize_to_query_time(self, expected, tz, dates):
|
||||
# Order matters in pandas 0.18.2. Prior to that, using tz_convert on
|
||||
# a DatetimeIndex with DST/EST timestamps mixed resulted in some of
|
||||
# them being an hour off (1 hour past midnight).
|
||||
for scrambler in self.combos:
|
||||
df = pd.DataFrame({"timestamp": dates[scrambler]})
|
||||
result = normalize_timestamp_to_query_time(df,
|
||||
time(8, 45),
|
||||
tz,
|
||||
inplace=False,
|
||||
ts_field='timestamp')
|
||||
|
||||
timestamps = result['timestamp'].values
|
||||
check_arrays(
|
||||
timestamps,
|
||||
expected[scrambler]
|
||||
)
|
||||
|
||||
@@ -237,11 +237,15 @@ def normalize_timestamp_to_query_time(df,
|
||||
_midnight,
|
||||
include_end=False,
|
||||
)
|
||||
# for all of the times that are greater than our query time add 1
|
||||
# day and truncate to the date
|
||||
# For all of the times that are greater than our query time add 1
|
||||
# day and truncate to the date.
|
||||
# We normalize twice here because of a bug in pandas 0.16.1 that causes
|
||||
# tz_localize() to shift some timestamps by an hour if they are not grouped
|
||||
# together by DST/EST.
|
||||
df.loc[to_roll_forward, ts_field] = (
|
||||
dtidx_local_time[to_roll_forward] + datetime.timedelta(days=1)
|
||||
).normalize().tz_localize(None).tz_localize('utc') # cast back to utc
|
||||
).normalize().tz_localize(None).tz_localize('utc').normalize()
|
||||
|
||||
df.loc[~to_roll_forward, ts_field] = dtidx[~to_roll_forward].normalize()
|
||||
return df
|
||||
|
||||
|
||||
Reference in New Issue
Block a user