diff --git a/tests/test_history.py b/tests/test_history.py index 5714b1c7..d5307cc4 100644 --- a/tests/test_history.py +++ b/tests/test_history.py @@ -1208,8 +1208,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): self.assertEqual(window[0], 391) self.assertEqual(window[1], 781) elif field == 'volume': - self.assertEqual(window[0], 39100) - self.assertEqual(window[1], 78100) + self.assertEqual(window[0], 7663500) + self.assertEqual(window[1], 22873500) last_val = -1 @@ -1274,8 +1274,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase): self.assertEqual(window[0], 781) self.assertEqual(window[1], 1171) elif field == 'volume': - self.assertEqual(window[0], 78100) - self.assertEqual(window[1], 117100) + self.assertEqual(window[0], 22873500) + self.assertEqual(window[1], 38083500) last_val = -1 diff --git a/zipline/data/resample.py b/zipline/data/resample.py new file mode 100644 index 00000000..fc8979d3 --- /dev/null +++ b/zipline/data/resample.py @@ -0,0 +1,48 @@ +# Copyright 2016 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +_MINUTE_TO_SESSION_OHCLV_HOW = { + 'open': 'first', + 'high': 'max', + 'low': 'min', + 'close': 'last', + 'volume': 'sum' +} + + +def minute_to_session(minute_frame, calendar): + """ + Resample a DataFrame with minute data into the frame expected by a + BcolzDailyBarWriter. + + Parameters + ---------- + minute_frame : pd.DataFrame + A DataFrame with the columns `open`, `high`, `low`, `close`, `volume`, + and `dt` (minute dts) + calendar : zipline.utils.calendars.trading_calendar.TradingCalendar + A TradingCalendar on which session labels to resample from minute + to session. + + Return + ------ + session_frame : pd.DataFrame + A DataFrame with the columns `open`, `high`, `low`, `close`, `volume`, + and `day` (datetime-like). + """ + # Group minutes into their respective days. Note that this will + # create groups for all trading days in the desired range, + # including days with no minute data. + return minute_frame.resample(calendar.day, + how=_MINUTE_TO_SESSION_OHCLV_HOW) diff --git a/zipline/testing/core.py b/zipline/testing/core.py index c661d7f0..9701fc30 100644 --- a/zipline/testing/core.py +++ b/zipline/testing/core.py @@ -438,7 +438,7 @@ def create_minute_bar_data(minutes, sids): 'high': np.arange(length) + 15 + sid_idx, 'low': np.arange(length) + 8 + sid_idx, 'close': np.arange(length) + 10 + sid_idx, - 'volume': np.arange(length) + 100 + sid_idx, + 'volume': 100 + sid_idx, }, index=minutes, ) diff --git a/zipline/testing/fixtures.py b/zipline/testing/fixtures.py index 2a8b3294..d4b0bc0e 100644 --- a/zipline/testing/fixtures.py +++ b/zipline/testing/fixtures.py @@ -14,6 +14,7 @@ from .core import ( tmp_dir, ) from ..data.data_portal import DataPortal +from ..data.resample import minute_to_session from ..data.us_equity_pricing import ( SQLiteAdjustmentReader, SQLiteAdjustmentWriter, @@ -659,34 +660,10 @@ class WithEquityDailyBarData(WithTradingEnvironment): @classmethod def _make_equity_daily_bar_from_minute(cls): assets = cls.asset_finder.retrieve_all(cls.asset_finder.sids) - ohclv_how = { - 'open': 'first', - 'high': 'max', - 'low': 'min', - 'close': 'last', - # TODO: Change test data so that large minute volumes are not used, - # so that 'sum' can be used without going over the uint limit. - # When that data is changed, this function can and should be moved - # to the `data` module so that loaders and tests can use the same - # source from minute logic. - 'volume': 'last' - } - mm = cls.trading_calendar.all_minutes - m_opens = cls.trading_calendar.schedule.market_open - m_closes = cls.trading_calendar.schedule.market_close - minute_data = dict(cls.make_equity_minute_bar_data()) - for asset in assets: - first_minute = m_opens.loc[asset.start_date] - last_minute = m_closes.loc[asset.end_date] - asset_df = minute_data[asset] - slicer = asset_df.index.slice_indexer(first_minute, last_minute) - asset_df = asset_df[slicer] - minutes = mm[mm.slice_indexer(start=first_minute, - end=last_minute)] - asset_df = asset_df.reindex(minutes) - yield asset.sid, asset_df.resample('1d', how=ohclv_how).dropna() + yield asset.sid, minute_to_session(minute_data[asset.sid], + cls.trading_calendar) @classmethod def make_equity_daily_bar_data(cls):