mirror of
https://github.com/wassname/catalyst.git
synced 2026-07-01 19:30:05 +08:00
6486e64530
This reverts commit86c7635b45, reversing changes made toc77f2b92df. Some real world cases hit errors with this change, due to the new offset logic attempting to create Adjustments with invalid parameters. Will identify exact conditions that cause this error and add as a test case before remerging.
420 lines
16 KiB
Python
420 lines
16 KiB
Python
# Copyright 2016 Quantopian, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from abc import (
|
|
ABCMeta,
|
|
abstractmethod,
|
|
abstractproperty,
|
|
)
|
|
from lru import LRU
|
|
|
|
from numpy import around, hstack
|
|
from pandas.tslib import normalize_date
|
|
|
|
from six import with_metaclass
|
|
|
|
from zipline.lib._float64window import AdjustedArrayWindow as Float64Window
|
|
from zipline.lib.adjustment import Float64Multiply
|
|
from zipline.utils.cache import ExpiringCache
|
|
from zipline.utils.memoize import lazyval
|
|
from zipline.utils.numpy_utils import float64_dtype
|
|
|
|
|
|
class SlidingWindow(object):
|
|
"""
|
|
Wrapper around an AdjustedArrayWindow which supports monotonically
|
|
increasing (by datetime) requests for a sized window of data.
|
|
|
|
Parameters
|
|
----------
|
|
window : AdjustedArrayWindow
|
|
Window of pricing data with prefetched values beyond the current
|
|
simulation dt.
|
|
cal_start : int
|
|
Index in the overall calendar at which the window starts.
|
|
"""
|
|
|
|
def __init__(self, window, size, cal_start, offset):
|
|
self.window = window
|
|
self.cal_start = cal_start
|
|
self.current = around(next(window), 3)
|
|
self.offset = offset
|
|
self.most_recent_ix = self.cal_start + size
|
|
|
|
def get(self, end_ix):
|
|
"""
|
|
Returns
|
|
-------
|
|
out : A np.ndarray of the equity pricing up to end_ix after adjustments
|
|
and rounding have been applied.
|
|
"""
|
|
if self.most_recent_ix == end_ix:
|
|
return self.current
|
|
|
|
target = end_ix - self.cal_start - self.offset + 1
|
|
self.current = around(self.window.seek(target), 3)
|
|
|
|
self.most_recent_ix = end_ix
|
|
return self.current
|
|
|
|
|
|
class HistoryLoader(with_metaclass(ABCMeta)):
|
|
"""
|
|
Loader for sliding history windows, with support for adjustments.
|
|
|
|
Parameters
|
|
----------
|
|
trading_calendar: TradingCalendar
|
|
Contains the grouping logic needed to assign minutes to periods.
|
|
reader : DailyBarReader, MinuteBarReader
|
|
Reader for pricing bars.
|
|
adjustment_reader : SQLiteAdjustmentReader
|
|
Reader for adjustment data.
|
|
"""
|
|
FIELDS = ('open', 'high', 'low', 'close', 'volume')
|
|
|
|
def __init__(self, trading_calendar, reader, adjustment_reader,
|
|
sid_cache_size=1000):
|
|
self.trading_calendar = trading_calendar
|
|
self._reader = reader
|
|
self._adjustments_reader = adjustment_reader
|
|
self._window_blocks = {
|
|
field: ExpiringCache(LRU(sid_cache_size))
|
|
for field in self.FIELDS
|
|
}
|
|
|
|
@abstractproperty
|
|
def _prefetch_length(self):
|
|
pass
|
|
|
|
@abstractproperty
|
|
def _calendar(self):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def _array(self, start, end, assets, field):
|
|
pass
|
|
|
|
def _get_adjustments_in_range(self, asset, dts, field,
|
|
is_perspective_after):
|
|
"""
|
|
Get the Float64Multiply objects to pass to an AdjustedArrayWindow.
|
|
|
|
For the use of AdjustedArrayWindow in the loader, which looks back
|
|
from current simulation time back to a window of data the dictionary is
|
|
structured with:
|
|
- the key into the dictionary for adjustments is the location of the
|
|
day from which the window is being viewed.
|
|
- the start of all multiply objects is always 0 (in each window all
|
|
adjustments are overlapping)
|
|
- the end of the multiply object is the location before the calendar
|
|
location of the adjustment action, making all days before the event
|
|
adjusted.
|
|
|
|
Parameters
|
|
----------
|
|
asset : Asset
|
|
The assets for which to get adjustments.
|
|
days : iterable of datetime64-like
|
|
The days for which adjustment data is needed.
|
|
field : str
|
|
OHLCV field for which to get the adjustments.
|
|
is_perspective_after : bool
|
|
see: `PricingHistoryLoader.history`
|
|
If True, the index at which the Multiply object is registered to
|
|
be popped is calculated so that it applies to the last slot in the
|
|
sliding window when the adjustment occurs immediately after the dt
|
|
that slot represents.
|
|
|
|
Returns
|
|
-------
|
|
out : The adjustments as a dict of loc -> Float64Multiply
|
|
"""
|
|
sid = int(asset)
|
|
start = normalize_date(dts[0])
|
|
end = normalize_date(dts[-1])
|
|
adjs = {}
|
|
if field != 'volume':
|
|
mergers = self._adjustments_reader.get_adjustments_for_sid(
|
|
'mergers', sid)
|
|
for m in mergers:
|
|
dt = m[0]
|
|
if start < dt <= end:
|
|
end_loc = dts.searchsorted(dt)
|
|
adj_loc = end_loc
|
|
if is_perspective_after:
|
|
# Set adjustment pop location so that it applies
|
|
# to last value if adjustment occurs immediately after
|
|
# the last slot.
|
|
adj_loc -= 1
|
|
mult = Float64Multiply(0,
|
|
end_loc - 1,
|
|
0,
|
|
0,
|
|
m[1])
|
|
try:
|
|
adjs[adj_loc].append(mult)
|
|
except KeyError:
|
|
adjs[adj_loc] = [mult]
|
|
divs = self._adjustments_reader.get_adjustments_for_sid(
|
|
'dividends', sid)
|
|
for d in divs:
|
|
dt = d[0]
|
|
if start < dt <= end:
|
|
end_loc = dts.searchsorted(dt)
|
|
adj_loc = end_loc
|
|
if is_perspective_after:
|
|
# Set adjustment pop location so that it applies
|
|
# to last value if adjustment occurs immediately after
|
|
# the last slot.
|
|
adj_loc -= 1
|
|
mult = Float64Multiply(0,
|
|
end_loc - 1,
|
|
0,
|
|
0,
|
|
d[1])
|
|
try:
|
|
adjs[adj_loc].append(mult)
|
|
except KeyError:
|
|
adjs[adj_loc] = [mult]
|
|
splits = self._adjustments_reader.get_adjustments_for_sid(
|
|
'splits', sid)
|
|
for s in splits:
|
|
dt = s[0]
|
|
if start < dt <= end:
|
|
if field == 'volume':
|
|
ratio = 1.0 / s[1]
|
|
else:
|
|
ratio = s[1]
|
|
end_loc = dts.searchsorted(dt)
|
|
adj_loc = end_loc
|
|
if is_perspective_after:
|
|
# Set adjustment pop location so that it applies
|
|
# to last value if adjustment occurs immediately after
|
|
# the last slot.
|
|
adj_loc -= 1
|
|
mult = Float64Multiply(0,
|
|
end_loc - 1,
|
|
0,
|
|
0,
|
|
ratio)
|
|
try:
|
|
adjs[adj_loc].append(mult)
|
|
except KeyError:
|
|
adjs[adj_loc] = [mult]
|
|
return adjs
|
|
|
|
def _ensure_sliding_windows(self, assets, dts, field,
|
|
is_perspective_after):
|
|
"""
|
|
Ensure that there is a Float64Multiply window for each asset that can
|
|
provide data for the given parameters.
|
|
If the corresponding window for the (assets, len(dts), field) does not
|
|
exist, then create a new one.
|
|
If a corresponding window does exist for (assets, len(dts), field), but
|
|
can not provide data for the current dts range, then create a new
|
|
one and replace the expired window.
|
|
|
|
Parameters
|
|
----------
|
|
assets : iterable of Assets
|
|
The assets in the window
|
|
dts : iterable of datetime64-like
|
|
The datetimes for which to fetch data.
|
|
Makes an assumption that all dts are present and contiguous,
|
|
in the calendar.
|
|
field : str
|
|
The OHLCV field for which to retrieve data.
|
|
is_perspective_after : bool
|
|
see: `PricingHistoryLoader.history`
|
|
|
|
Returns
|
|
-------
|
|
out : list of Float64Window with sufficient data so that each asset's
|
|
window can provide `get` for the index corresponding with the last
|
|
value in `dts`
|
|
"""
|
|
end = dts[-1]
|
|
size = len(dts)
|
|
asset_windows = {}
|
|
needed_assets = []
|
|
|
|
for asset in assets:
|
|
try:
|
|
asset_windows[asset] = self._window_blocks[field].get(
|
|
(asset, size, is_perspective_after), end)
|
|
except KeyError:
|
|
needed_assets.append(asset)
|
|
|
|
if needed_assets:
|
|
start = dts[0]
|
|
|
|
offset = 0
|
|
start_ix = self._calendar.get_loc(start)
|
|
end_ix = self._calendar.get_loc(end)
|
|
|
|
cal = self._calendar
|
|
prefetch_end_ix = min(end_ix + self._prefetch_length, len(cal) - 1)
|
|
prefetch_end = cal[prefetch_end_ix]
|
|
prefetch_dts = cal[start_ix:prefetch_end_ix + 1]
|
|
prefetch_len = len(prefetch_dts)
|
|
array = self._array(prefetch_dts, needed_assets, field)
|
|
view_kwargs = {}
|
|
if field == 'volume':
|
|
array = array.astype(float64_dtype)
|
|
|
|
for i, asset in enumerate(needed_assets):
|
|
if self._adjustments_reader:
|
|
adjs = self._get_adjustments_in_range(
|
|
asset, prefetch_dts, field, is_perspective_after)
|
|
else:
|
|
adjs = {}
|
|
window = Float64Window(
|
|
array[:, i].reshape(prefetch_len, 1),
|
|
view_kwargs,
|
|
adjs,
|
|
offset,
|
|
size
|
|
)
|
|
sliding_window = SlidingWindow(window, size, start_ix, offset)
|
|
asset_windows[asset] = sliding_window
|
|
self._window_blocks[field].set(
|
|
(asset, size, is_perspective_after),
|
|
sliding_window,
|
|
prefetch_end)
|
|
|
|
return [asset_windows[asset] for asset in assets]
|
|
|
|
def history(self, assets, dts, field, is_perspective_after):
|
|
"""
|
|
A window of pricing data with adjustments applied assuming that the
|
|
end of the window is the day before the current simulation time.
|
|
|
|
Parameters
|
|
----------
|
|
assets : iterable of Assets
|
|
The assets in the window.
|
|
dts : iterable of datetime64-like
|
|
The datetimes for which to fetch data.
|
|
Makes an assumption that all dts are present and contiguous,
|
|
in the calendar.
|
|
field : str
|
|
The OHLCV field for which to retrieve data.
|
|
is_perspective_after : bool
|
|
True, if the window is being viewed immediately after the last dt
|
|
in the sliding window.
|
|
False, if the window is viewed on the last dt.
|
|
|
|
This flag is used for handling the case where the last dt in the
|
|
requested window immediately precedes a corporate action, e.g.:
|
|
|
|
- is_perspective_after is True
|
|
|
|
When the viewpoint is after the last dt in the window, as when a
|
|
daily history window is accessed from a simulation that uses a
|
|
minute data frequency, the history call to this loader will not
|
|
include the current simulation dt. At that point in time, the raw
|
|
data for the last day in the window will require adjustment, so the
|
|
most recent adjustment with respect to the simulation time is
|
|
applied to the last dt in the requested window.
|
|
|
|
An example equity which has a 0.5 split ratio dated for 05-27,
|
|
with the dts for a history call of 5 bars with a '1d' frequency at
|
|
05-27 9:31. Simulation frequency is 'minute'.
|
|
|
|
(In this case this function is called with 4 daily dts, and the
|
|
calling function is responsible for stitching back on the
|
|
'current' dt)
|
|
|
|
| | | | | last dt | <-- viewer is here |
|
|
| | 05-23 | 05-24 | 05-25 | 05-26 | 05-27 9:31 |
|
|
| raw | 10.10 | 10.20 | 10.30 | 10.40 | |
|
|
| adj | 5.05 | 5.10 | 5.15 | 5.25 | |
|
|
|
|
The adjustment is applied to the last dt, 05-26, and all previous
|
|
dts.
|
|
|
|
- is_perspective_after is False, daily
|
|
|
|
When the viewpoint is the same point in time as the last dt in the
|
|
window, as when a daily history window is accessed from a
|
|
simulation that uses a daily data frequency, the history call will
|
|
include the current dt. At that point in time, the raw data for the
|
|
last day in the window will be post-adjustment, so no adjustment
|
|
is applied to the last dt.
|
|
|
|
An example equity which has a 0.5 split ratio dated for 05-27,
|
|
with the dts for a history call of 5 bars with a '1d' frequency at
|
|
05-27 0:00. Simulation frequency is 'daily'.
|
|
|
|
| | | | | | <-- viewer is here |
|
|
| | | | | | last dt |
|
|
| | 05-23 | 05-24 | 05-25 | 05-26 | 05-27 |
|
|
| raw | 10.10 | 10.20 | 10.30 | 10.40 | 5.25 |
|
|
| adj | 5.05 | 5.10 | 5.15 | 5.20 | 5.25 |
|
|
|
|
Adjustments are applied 05-23 through 05-26 but not to the last dt,
|
|
05-27
|
|
|
|
Returns
|
|
-------
|
|
out : np.ndarray with shape(len(days between start, end), len(assets))
|
|
"""
|
|
block = self._ensure_sliding_windows(assets,
|
|
dts,
|
|
field,
|
|
is_perspective_after)
|
|
end_ix = self._calendar.get_loc(dts[-1])
|
|
return hstack([window.get(end_ix) for window in block])
|
|
|
|
|
|
class DailyHistoryLoader(HistoryLoader):
|
|
|
|
@property
|
|
def _prefetch_length(self):
|
|
return 40
|
|
|
|
@property
|
|
def _calendar(self):
|
|
return self._reader.sessions
|
|
|
|
def _array(self, dts, assets, field):
|
|
return self._reader.load_raw_arrays(
|
|
[field],
|
|
dts[0],
|
|
dts[-1],
|
|
assets,
|
|
)[0]
|
|
|
|
|
|
class MinuteHistoryLoader(HistoryLoader):
|
|
|
|
@property
|
|
def _prefetch_length(self):
|
|
return 1560
|
|
|
|
@lazyval
|
|
def _calendar(self):
|
|
mm = self.trading_calendar.all_minutes
|
|
return mm[mm.slice_indexer(start=self._reader.first_trading_day,
|
|
end=self._reader.last_available_dt)]
|
|
|
|
def _array(self, dts, assets, field):
|
|
return self._reader.load_raw_arrays(
|
|
[field],
|
|
dts[0],
|
|
dts[-1],
|
|
assets,
|
|
)[0]
|