mirror of
https://github.com/wassname/catalyst.git
synced 2026-07-03 10:51:22 +08:00
MAINT: Modifies minute bars to use a dict of OHLC ratios (#1428)
For scaling up pricing data before writing to bcolz, the writer now accepts a dict mapping each sid to the ratio to use. It still accepts a single ratio as default_ohlc_ratio, which is used as a fallback if no mapping exists for a given sid. The default is OHLC_RATIO (1000). This allows better handling of futures pricing data, where the required precision across root symbols is not consistent.
This commit is contained in:
+80
-23
@@ -24,6 +24,7 @@ from intervaltree import IntervalTree
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from six import with_metaclass
|
||||
from toolz import keymap, valmap
|
||||
|
||||
from zipline.data._minute_bar_internal import (
|
||||
minute_value,
|
||||
@@ -208,7 +209,7 @@ class BcolzMinuteBarMetadata(object):
|
||||
minutes_per_day : int
|
||||
The number of minutes per each period.
|
||||
"""
|
||||
FORMAT_VERSION = 2
|
||||
FORMAT_VERSION = 3
|
||||
|
||||
METADATA_FILENAME = 'metadata.json'
|
||||
|
||||
@@ -229,7 +230,7 @@ class BcolzMinuteBarMetadata(object):
|
||||
# if version does not match.
|
||||
version = 0
|
||||
|
||||
ohlc_ratio = raw_data['ohlc_ratio']
|
||||
default_ohlc_ratio = raw_data['ohlc_ratio']
|
||||
|
||||
if version >= 1:
|
||||
minutes_per_day = raw_data['minutes_per_day']
|
||||
@@ -254,8 +255,16 @@ class BcolzMinuteBarMetadata(object):
|
||||
raw_data['market_closes'][-1], unit='m', tz='UTC')
|
||||
)
|
||||
|
||||
if version >= 3:
|
||||
ohlc_ratios_per_sid = raw_data['ohlc_ratios_per_sid']
|
||||
if ohlc_ratios_per_sid is not None:
|
||||
ohlc_ratios_per_sid = keymap(int, ohlc_ratios_per_sid)
|
||||
else:
|
||||
ohlc_ratios_per_sid = None
|
||||
|
||||
return cls(
|
||||
ohlc_ratio,
|
||||
default_ohlc_ratio,
|
||||
ohlc_ratios_per_sid,
|
||||
calendar,
|
||||
start_session,
|
||||
end_session,
|
||||
@@ -265,7 +274,8 @@ class BcolzMinuteBarMetadata(object):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ohlc_ratio,
|
||||
default_ohlc_ratio,
|
||||
ohlc_ratios_per_sid,
|
||||
calendar,
|
||||
start_session,
|
||||
end_session,
|
||||
@@ -275,7 +285,8 @@ class BcolzMinuteBarMetadata(object):
|
||||
self.calendar = calendar
|
||||
self.start_session = start_session
|
||||
self.end_session = end_session
|
||||
self.ohlc_ratio = ohlc_ratio
|
||||
self.default_ohlc_ratio = default_ohlc_ratio
|
||||
self.ohlc_ratios_per_sid = ohlc_ratios_per_sid
|
||||
self.minutes_per_day = minutes_per_day
|
||||
self.version = version
|
||||
|
||||
@@ -288,8 +299,14 @@ class BcolzMinuteBarMetadata(object):
|
||||
version : int
|
||||
The value of FORMAT_VERSION of this class.
|
||||
ohlc_ratio : int
|
||||
The factor by which the pricing data is multiplied so that the
|
||||
float data can be stored as an integer.
|
||||
The default ratio by which to multiply the pricing data to
|
||||
convert the floats from floats to an integer to fit within
|
||||
the np.uint32. If ohlc_ratios_per_sid is None or does not
|
||||
contain a mapping for a given sid, this ratio is used.
|
||||
ohlc_ratios_per_sid : dict
|
||||
A dict mapping each sid in the output to the factor by
|
||||
which the pricing data is multiplied so that the float data
|
||||
can be stored as an integer.
|
||||
minutes_per_day : int
|
||||
The number of minutes per each period.
|
||||
calendar_name : str
|
||||
@@ -326,7 +343,8 @@ class BcolzMinuteBarMetadata(object):
|
||||
|
||||
metadata = {
|
||||
'version': self.version,
|
||||
'ohlc_ratio': self.ohlc_ratio,
|
||||
'ohlc_ratio': self.default_ohlc_ratio,
|
||||
'ohlc_ratios_per_sid': self.ohlc_ratios_per_sid,
|
||||
'minutes_per_day': self.minutes_per_day,
|
||||
'calendar_name': self.calendar.name,
|
||||
'start_session': str(self.start_session.date()),
|
||||
@@ -365,12 +383,15 @@ class BcolzMinuteBarWriter(object):
|
||||
The first trading session in the data set.
|
||||
end_session : datetime
|
||||
The last trading session in the data set.
|
||||
ohlc_ratio : int, optional
|
||||
The ratio by which to multiply the pricing data to convert the
|
||||
floats from floats to an integer to fit within the np.uint32.
|
||||
|
||||
The default is 1000 to support pricing data which comes in to the
|
||||
thousands place.
|
||||
default_ohlc_ratio : int, optional
|
||||
The default ratio by which to multiply the pricing data to
|
||||
convert from floats to integers that fit within np.uint32. If
|
||||
ohlc_ratios_per_sid is None or does not contain a mapping for a
|
||||
given sid, this ratio is used. Default is OHLC_RATIO (1000).
|
||||
ohlc_ratios_per_sid : dict, optional
|
||||
A dict mapping each sid in the output to the ratio by which to
|
||||
multiply the pricing data to convert the floats from floats to
|
||||
an integer to fit within the np.uint32.
|
||||
expectedlen : int, optional
|
||||
The expected length of the dataset, used when creating the initial
|
||||
bcolz ctable.
|
||||
@@ -434,7 +455,8 @@ class BcolzMinuteBarWriter(object):
|
||||
start_session,
|
||||
end_session,
|
||||
minutes_per_day,
|
||||
ohlc_ratio=OHLC_RATIO,
|
||||
default_ohlc_ratio=OHLC_RATIO,
|
||||
ohlc_ratios_per_sid=None,
|
||||
expectedlen=DEFAULT_EXPECTEDLEN):
|
||||
|
||||
self._rootdir = rootdir
|
||||
@@ -447,13 +469,15 @@ class BcolzMinuteBarWriter(object):
|
||||
self._session_labels = self._schedule.index
|
||||
self._minutes_per_day = minutes_per_day
|
||||
self._expectedlen = expectedlen
|
||||
self._ohlc_ratio = ohlc_ratio
|
||||
self._default_ohlc_ratio = default_ohlc_ratio
|
||||
self._ohlc_ratios_per_sid = ohlc_ratios_per_sid
|
||||
|
||||
self._minute_index = _calc_minute_index(
|
||||
self._schedule.market_open, self._minutes_per_day)
|
||||
|
||||
metadata = BcolzMinuteBarMetadata(
|
||||
self._ohlc_ratio,
|
||||
self._default_ohlc_ratio,
|
||||
self._ohlc_ratios_per_sid,
|
||||
self._calendar,
|
||||
self._start_session,
|
||||
self._end_session,
|
||||
@@ -465,6 +489,17 @@ class BcolzMinuteBarWriter(object):
|
||||
def first_trading_day(self):
|
||||
return self._start_session
|
||||
|
||||
def ohlc_ratio_for_sid(self, sid):
|
||||
if self._ohlc_ratios_per_sid is not None:
|
||||
try:
|
||||
return self._ohlc_ratios_per_sid[sid]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# If no ohlc_ratios_per_sid dict is passed, or if the specified
|
||||
# sid is not in the dict, fallback to the general ohlc_ratio.
|
||||
return self._default_ohlc_ratio
|
||||
|
||||
def sidpath(self, sid):
|
||||
"""
|
||||
Parameters:
|
||||
@@ -772,7 +807,7 @@ class BcolzMinuteBarWriter(object):
|
||||
dt_ixs = np.searchsorted(all_minutes_in_window.values,
|
||||
dts.astype('datetime64[ns]'))
|
||||
|
||||
ohlc_ratio = self._ohlc_ratio
|
||||
ohlc_ratio = self.ohlc_ratio_for_sid(sid)
|
||||
|
||||
def convert_col(col):
|
||||
"""Adapt float column into a uint32 column.
|
||||
@@ -832,7 +867,13 @@ class BcolzMinuteBarReader(MinuteBarReader):
|
||||
self._market_close_values = self._market_closes.values.\
|
||||
astype('datetime64[m]').astype(np.int64)
|
||||
|
||||
self._ohlc_inverse = 1.0 / metadata.ohlc_ratio
|
||||
self._default_ohlc_inverse = 1.0 / metadata.default_ohlc_ratio
|
||||
ohlc_ratios = metadata.ohlc_ratios_per_sid
|
||||
if ohlc_ratios:
|
||||
self._ohlc_inverses_per_sid = (
|
||||
valmap(lambda x: 1.0 / x, ohlc_ratios))
|
||||
else:
|
||||
self._ohlc_inverses_per_sid = None
|
||||
|
||||
self._minutes_per_day = metadata.minutes_per_day
|
||||
|
||||
@@ -856,6 +897,17 @@ class BcolzMinuteBarReader(MinuteBarReader):
|
||||
def first_trading_day(self):
|
||||
return self._start_session
|
||||
|
||||
def _ohlc_ratio_inverse_for_sid(self, sid):
|
||||
if self._ohlc_inverses_per_sid is not None:
|
||||
try:
|
||||
return self._ohlc_inverses_per_sid[sid]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# If we can not get a sid-specific OHLC inverse for this sid,
|
||||
# fallback to the default.
|
||||
return self._default_ohlc_inverse
|
||||
|
||||
def _minutes_to_exclude(self):
|
||||
"""
|
||||
Calculate the minutes which should be excluded when a window
|
||||
@@ -999,8 +1051,9 @@ class BcolzMinuteBarReader(MinuteBarReader):
|
||||
return 0
|
||||
else:
|
||||
return np.nan
|
||||
|
||||
if field != 'volume':
|
||||
value *= self._ohlc_inverse
|
||||
value *= self._ohlc_ratio_inverse_for_sid(sid)
|
||||
return value
|
||||
|
||||
def get_last_traded_dt(self, asset, dt):
|
||||
@@ -1111,11 +1164,15 @@ class BcolzMinuteBarReader(MinuteBarReader):
|
||||
excl_slice = np.s_[
|
||||
excl_start - start_idx:excl_stop - start_idx + 1]
|
||||
values = np.delete(values, excl_slice)
|
||||
|
||||
where = values != 0
|
||||
# first slice down to len(where) because we might not have
|
||||
# written data for all the minutes requested
|
||||
out[:len(where), i][where] = values[where]
|
||||
if field != 'volume':
|
||||
out *= self._ohlc_inverse
|
||||
if field != 'volume':
|
||||
out[:len(where), i][where] = (
|
||||
values[where] * self._ohlc_ratio_inverse_for_sid(sid))
|
||||
else:
|
||||
out[:len(where), i][where] = values[where]
|
||||
|
||||
results.append(out)
|
||||
return results
|
||||
|
||||
Reference in New Issue
Block a user