TST: Use sum for volume on daily data resample.

Change the mock minute data to no longer use an increasing arange, so
that a days worth of minute data can be summed and fit inside of a
uint32.

This change was required because of working on new test data that looked
like [0, 100, 200, 0, ] which was resulting in a daily rollup of 0 data,
when the coverage needed a non-0 value.

Also, factor out the resampling function, with an eye on a making it
easier to convert from minute bars to daily bars during ingest/load
processes.
This commit is contained in:
Eddie Hebert
2016-08-03 15:14:25 -04:00
parent d2cab0668f
commit dd2c7db22d
4 changed files with 56 additions and 31 deletions
+4 -4
View File
@@ -1208,8 +1208,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase):
self.assertEqual(window[0], 391)
self.assertEqual(window[1], 781)
elif field == 'volume':
self.assertEqual(window[0], 39100)
self.assertEqual(window[1], 78100)
self.assertEqual(window[0], 7663500)
self.assertEqual(window[1], 22873500)
last_val = -1
@@ -1274,8 +1274,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase):
self.assertEqual(window[0], 781)
self.assertEqual(window[1], 1171)
elif field == 'volume':
self.assertEqual(window[0], 78100)
self.assertEqual(window[1], 117100)
self.assertEqual(window[0], 22873500)
self.assertEqual(window[1], 38083500)
last_val = -1
+48
View File
@@ -0,0 +1,48 @@
# Copyright 2016 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
_MINUTE_TO_SESSION_OHCLV_HOW = {
'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last',
'volume': 'sum'
}
def minute_to_session(minute_frame, calendar):
"""
Resample a DataFrame with minute data into the frame expected by a
BcolzDailyBarWriter.
Parameters
----------
minute_frame : pd.DataFrame
A DataFrame with the columns `open`, `high`, `low`, `close`, `volume`,
and `dt` (minute dts)
calendar : zipline.utils.calendars.trading_calendar.TradingCalendar
A TradingCalendar on which session labels to resample from minute
to session.
Return
------
session_frame : pd.DataFrame
A DataFrame with the columns `open`, `high`, `low`, `close`, `volume`,
and `day` (datetime-like).
"""
# Group minutes into their respective days. Note that this will
# create groups for all trading days in the desired range,
# including days with no minute data.
return minute_frame.resample(calendar.day,
how=_MINUTE_TO_SESSION_OHCLV_HOW)
+1 -1
View File
@@ -438,7 +438,7 @@ def create_minute_bar_data(minutes, sids):
'high': np.arange(length) + 15 + sid_idx,
'low': np.arange(length) + 8 + sid_idx,
'close': np.arange(length) + 10 + sid_idx,
'volume': np.arange(length) + 100 + sid_idx,
'volume': 100 + sid_idx,
},
index=minutes,
)
+3 -26
View File
@@ -14,6 +14,7 @@ from .core import (
tmp_dir,
)
from ..data.data_portal import DataPortal
from ..data.resample import minute_to_session
from ..data.us_equity_pricing import (
SQLiteAdjustmentReader,
SQLiteAdjustmentWriter,
@@ -659,34 +660,10 @@ class WithEquityDailyBarData(WithTradingEnvironment):
@classmethod
def _make_equity_daily_bar_from_minute(cls):
assets = cls.asset_finder.retrieve_all(cls.asset_finder.sids)
ohclv_how = {
'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last',
# TODO: Change test data so that large minute volumes are not used,
# so that 'sum' can be used without going over the uint limit.
# When that data is changed, this function can and should be moved
# to the `data` module so that loaders and tests can use the same
# source from minute logic.
'volume': 'last'
}
mm = cls.trading_calendar.all_minutes
m_opens = cls.trading_calendar.schedule.market_open
m_closes = cls.trading_calendar.schedule.market_close
minute_data = dict(cls.make_equity_minute_bar_data())
for asset in assets:
first_minute = m_opens.loc[asset.start_date]
last_minute = m_closes.loc[asset.end_date]
asset_df = minute_data[asset]
slicer = asset_df.index.slice_indexer(first_minute, last_minute)
asset_df = asset_df[slicer]
minutes = mm[mm.slice_indexer(start=first_minute,
end=last_minute)]
asset_df = asset_df.reindex(minutes)
yield asset.sid, asset_df.resample('1d', how=ohclv_how).dropna()
yield asset.sid, minute_to_session(minute_data[asset.sid],
cls.trading_calendar)
@classmethod
def make_equity_daily_bar_data(cls):