mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-27 19:46:52 +08:00
TST: Use sum for volume on daily data resample.
Change the mock minute data to no longer use an increasing arange, so that a days worth of minute data can be summed and fit inside of a uint32. This change was required because of working on new test data that looked like [0, 100, 200, 0, ] which was resulting in a daily rollup of 0 data, when the coverage needed a non-0 value. Also, factor out the resampling function, with an eye on a making it easier to convert from minute bars to daily bars during ingest/load processes.
This commit is contained in:
@@ -1208,8 +1208,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase):
|
||||
self.assertEqual(window[0], 391)
|
||||
self.assertEqual(window[1], 781)
|
||||
elif field == 'volume':
|
||||
self.assertEqual(window[0], 39100)
|
||||
self.assertEqual(window[1], 78100)
|
||||
self.assertEqual(window[0], 7663500)
|
||||
self.assertEqual(window[1], 22873500)
|
||||
|
||||
last_val = -1
|
||||
|
||||
@@ -1274,8 +1274,8 @@ class MinuteEquityHistoryTestCase(WithHistory, ZiplineTestCase):
|
||||
self.assertEqual(window[0], 781)
|
||||
self.assertEqual(window[1], 1171)
|
||||
elif field == 'volume':
|
||||
self.assertEqual(window[0], 78100)
|
||||
self.assertEqual(window[1], 117100)
|
||||
self.assertEqual(window[0], 22873500)
|
||||
self.assertEqual(window[1], 38083500)
|
||||
|
||||
last_val = -1
|
||||
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
# Copyright 2016 Quantopian, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
_MINUTE_TO_SESSION_OHCLV_HOW = {
|
||||
'open': 'first',
|
||||
'high': 'max',
|
||||
'low': 'min',
|
||||
'close': 'last',
|
||||
'volume': 'sum'
|
||||
}
|
||||
|
||||
|
||||
def minute_to_session(minute_frame, calendar):
|
||||
"""
|
||||
Resample a DataFrame with minute data into the frame expected by a
|
||||
BcolzDailyBarWriter.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
minute_frame : pd.DataFrame
|
||||
A DataFrame with the columns `open`, `high`, `low`, `close`, `volume`,
|
||||
and `dt` (minute dts)
|
||||
calendar : zipline.utils.calendars.trading_calendar.TradingCalendar
|
||||
A TradingCalendar on which session labels to resample from minute
|
||||
to session.
|
||||
|
||||
Return
|
||||
------
|
||||
session_frame : pd.DataFrame
|
||||
A DataFrame with the columns `open`, `high`, `low`, `close`, `volume`,
|
||||
and `day` (datetime-like).
|
||||
"""
|
||||
# Group minutes into their respective days. Note that this will
|
||||
# create groups for all trading days in the desired range,
|
||||
# including days with no minute data.
|
||||
return minute_frame.resample(calendar.day,
|
||||
how=_MINUTE_TO_SESSION_OHCLV_HOW)
|
||||
@@ -438,7 +438,7 @@ def create_minute_bar_data(minutes, sids):
|
||||
'high': np.arange(length) + 15 + sid_idx,
|
||||
'low': np.arange(length) + 8 + sid_idx,
|
||||
'close': np.arange(length) + 10 + sid_idx,
|
||||
'volume': np.arange(length) + 100 + sid_idx,
|
||||
'volume': 100 + sid_idx,
|
||||
},
|
||||
index=minutes,
|
||||
)
|
||||
|
||||
@@ -14,6 +14,7 @@ from .core import (
|
||||
tmp_dir,
|
||||
)
|
||||
from ..data.data_portal import DataPortal
|
||||
from ..data.resample import minute_to_session
|
||||
from ..data.us_equity_pricing import (
|
||||
SQLiteAdjustmentReader,
|
||||
SQLiteAdjustmentWriter,
|
||||
@@ -659,34 +660,10 @@ class WithEquityDailyBarData(WithTradingEnvironment):
|
||||
@classmethod
|
||||
def _make_equity_daily_bar_from_minute(cls):
|
||||
assets = cls.asset_finder.retrieve_all(cls.asset_finder.sids)
|
||||
ohclv_how = {
|
||||
'open': 'first',
|
||||
'high': 'max',
|
||||
'low': 'min',
|
||||
'close': 'last',
|
||||
# TODO: Change test data so that large minute volumes are not used,
|
||||
# so that 'sum' can be used without going over the uint limit.
|
||||
# When that data is changed, this function can and should be moved
|
||||
# to the `data` module so that loaders and tests can use the same
|
||||
# source from minute logic.
|
||||
'volume': 'last'
|
||||
}
|
||||
mm = cls.trading_calendar.all_minutes
|
||||
m_opens = cls.trading_calendar.schedule.market_open
|
||||
m_closes = cls.trading_calendar.schedule.market_close
|
||||
|
||||
minute_data = dict(cls.make_equity_minute_bar_data())
|
||||
|
||||
for asset in assets:
|
||||
first_minute = m_opens.loc[asset.start_date]
|
||||
last_minute = m_closes.loc[asset.end_date]
|
||||
asset_df = minute_data[asset]
|
||||
slicer = asset_df.index.slice_indexer(first_minute, last_minute)
|
||||
asset_df = asset_df[slicer]
|
||||
minutes = mm[mm.slice_indexer(start=first_minute,
|
||||
end=last_minute)]
|
||||
asset_df = asset_df.reindex(minutes)
|
||||
yield asset.sid, asset_df.resample('1d', how=ohclv_how).dropna()
|
||||
yield asset.sid, minute_to_session(minute_data[asset.sid],
|
||||
cls.trading_calendar)
|
||||
|
||||
@classmethod
|
||||
def make_equity_daily_bar_data(cls):
|
||||
|
||||
Reference in New Issue
Block a user