From becf897da942079219e475ddbdb77de885ff8be8 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Fri, 15 Jul 2016 12:16:19 -0400 Subject: [PATCH] MAINT: Fix failures/warnings in test_api_shim.py - Fixes a warning on indexing with a float that ultimately came from pd.Timedelta.total_seconds(). Adds ``timedelta_to_integral_seconds`` and ``timedelta_to_integral_minutes()`` functions and replaces various usages of ``int(delta.total_seconds())`` with them. - Fixes a warnings triggered in ``_create_daily_stats`` from passing tz-aware datetimes to np.datetime64. --- zipline/algorithm.py | 5 ++- zipline/data/_adjustments.pyx | 6 ++- zipline/data/data_portal.py | 78 ++++++++++++++++++++++------------- zipline/testing/core.py | 3 +- zipline/utils/numpy_utils.py | 1 + zipline/utils/pandas_utils.py | 14 +++++++ 6 files changed, 73 insertions(+), 34 deletions(-) diff --git a/zipline/algorithm.py b/zipline/algorithm.py index 3b94ca3c..0687808f 100644 --- a/zipline/algorithm.py +++ b/zipline/algorithm.py @@ -819,8 +819,9 @@ class TradingAlgorithm(object): else: self.risk_report = perf - daily_dts = [np.datetime64(perf['period_close'], utc=True) - for perf in daily_perfs] + daily_dts = pd.DatetimeIndex( + [p['period_close'] for p in daily_perfs], tz='UTC' + ) daily_stats = pd.DataFrame(daily_perfs, index=daily_dts) return daily_stats diff --git a/zipline/data/_adjustments.pyx b/zipline/data/_adjustments.pyx index c7900385..151a518d 100644 --- a/zipline/data/_adjustments.pyx +++ b/zipline/data/_adjustments.pyx @@ -33,6 +33,8 @@ from zipline.lib.adjustment import Float64Multiply from zipline.assets.asset_writer import ( SQLITE_MAX_VARIABLE_NUMBER as SQLITE_MAX_IN_STATEMENT, ) +from zipline.utils.pandas_utils import timedelta_to_integral_seconds + _SID_QUERY_TEMPLATE = """ SELECT DISTINCT sid FROM {0} @@ -170,8 +172,8 @@ cpdef load_adjustments_from_sqlite(object adjustments_db, # sqlite3.Connection index. """ - cdef int start_date = int((dates[0] - EPOCH).total_seconds()) - cdef int end_date = int((dates[-1] - EPOCH).total_seconds()) + cdef int start_date = timedelta_to_integral_seconds(dates[0] - EPOCH) + cdef int end_date = timedelta_to_integral_seconds(dates[-1] - EPOCH) cdef set split_sids = _get_split_sids( adjustments_db, diff --git a/zipline/data/data_portal.py b/zipline/data/data_portal.py index a6dcd093..5ebccf87 100644 --- a/zipline/data/data_portal.py +++ b/zipline/data/data_portal.py @@ -44,6 +44,7 @@ from zipline.utils.math_utils import ( nanstd ) from zipline.utils.memoize import remember_last, weak_lru_cache +from zipline.utils.pandas_utils import timedelta_to_integral_minutes from zipline.errors import ( NoTradeDataAvailableTooEarly, NoTradeDataAvailableTooLate, @@ -1106,44 +1107,63 @@ class DataPortal(object): else: return [assets] if isinstance(assets, Asset) else [] + # cache size picked somewhat loosely. this code exists purely to + # handle deprecated API. @weak_lru_cache(20) def _get_minute_count_for_transform(self, ending_minute, days_count): - # cache size picked somewhat loosely. this code exists purely to - # handle deprecated API. + # This function works in three steps. + # Step 1. Count the minutes from ``ending_minute`` to the start of its + # session. + # Step 2. Count the minutes from the prior ``days_count - 1`` sessions. + # Step 3. Return the sum of the results from steps (1) and (2). - # bars is the number of days desired. we have to translate that - # into the number of minutes we want. - # we get all the minutes for the last (bars - 1) days, then add - # all the minutes so far today. the +2 is to account for ignoring - # today, and the previous day, in doing the math. - session_for_minute = self.trading_calendar.minute_to_session_label( - ending_minute - ) - previous_session = self.trading_calendar.previous_session_label( - session_for_minute + # Example (NYSE Calendar) + # ending_minute = 2016-12-28 9:40 AM US/Eastern + # days_count = 3 + # Step 1. Calculate that there are 10 minutes in the ending session. + # Step 2. Calculate that there are 390 + 210 = 600 minutes in the prior + # two sessions. (Prior sessions are 2015-12-23 and 2015-12-24.) + # 2015-12-24 is a half day. + # Step 3. Return 600 + 10 = 610. + + cal = self.trading_calendar + + ending_session = cal.minute_to_session_label( + ending_minute, + direction="none", # It's an error to pass a non-trading minute. ) - sessions = self.trading_calendar.sessions_in_range( - self.trading_calendar.sessions_window(previous_session, - -days_count + 2)[0], - previous_session, + # Assume that calendar days are always full of contiguous minutes, + # which means we can just take 1 + (number of minutes between the last + # minute and the start of the session). We add one so that we include + # the ending minute in the total. + ending_session_minute_count = timedelta_to_integral_minutes( + ending_minute - cal.open_and_close_for_session(ending_session)[0] + ) + 1 + + if days_count == 1: + # We just need sessions for the active day. + return ending_session_minute_count + + # XXX: We're subtracting 2 here to account for two offsets: + # 1. We only want ``days_count - 1`` sessions, since we've already + # accounted for the ending session above. + # 2. The API of ``sessions_window`` is to return one more session than + # the requested number. I don't think any consumers actually want + # that behavior, but it's the tested and documented behavior right + # now, so we have to request one less session than we actually want. + completed_sessions = cal.sessions_window( + cal.previous_session_label(ending_session), + 2 - days_count, ) - minutes_count = \ + completed_sessions_minute_count = ( self.trading_calendar.minutes_count_for_sessions_in_range( - sessions[0], - sessions[-1] + completed_sessions[0], + completed_sessions[-1] ) - - # add the minutes for today - today_open = self.trading_calendar.open_and_close_for_session( - session_for_minute - )[0] - - minutes_count += \ - ((ending_minute - today_open).total_seconds() // 60) + 1 - - return minutes_count + ) + return ending_session_minute_count + completed_sessions_minute_count def get_simple_transform(self, asset, transform_name, dt, data_frequency, bars=None): diff --git a/zipline/testing/core.py b/zipline/testing/core.py index 7beae458..6a23d4e9 100644 --- a/zipline/testing/core.py +++ b/zipline/testing/core.py @@ -50,6 +50,7 @@ from zipline.utils import security_list from zipline.utils.calendars import get_calendar from zipline.utils.input_validation import expect_dimensions from zipline.utils.numpy_utils import as_column +from zipline.utils.pandas_utils import timedelta_to_integral_seconds from zipline.utils.sentinel import sentinel import numpy as np @@ -78,7 +79,7 @@ def str_to_seconds(s): >>> str_to_seconds('2014-01-01') 1388534400 """ - return int((pd.Timestamp(s, tz='UTC') - EPOCH).total_seconds()) + return timedelta_to_integral_seconds(pd.Timestamp(s, tz='UTC') - EPOCH) def drain_zipline(test, zipline): diff --git a/zipline/utils/numpy_utils.py b/zipline/utils/numpy_utils.py index 26d5ab15..e8e2be52 100644 --- a/zipline/utils/numpy_utils.py +++ b/zipline/utils/numpy_utils.py @@ -18,6 +18,7 @@ from numpy import ( flatnonzero, hstack, nan, + timedelta64, vectorize, where ) diff --git a/zipline/utils/pandas_utils.py b/zipline/utils/pandas_utils.py index 1f0a9db2..3e58230a 100644 --- a/zipline/utils/pandas_utils.py +++ b/zipline/utils/pandas_utils.py @@ -146,3 +146,17 @@ def nearest_unequal_elements(dts, dt): upper_value = dts[upper_ix] if upper_ix < len(dts) else None return lower_value, upper_value + + +def timedelta_to_integral_seconds(delta): + """ + Convert a pd.Timedelta to a number of seconds as an int. + """ + return int(delta.total_seconds()) + + +def timedelta_to_integral_minutes(delta): + """ + Convert a pd.Timedelta to a number of minutes as an int. + """ + return timedelta_to_integral_seconds(delta) // 60