Files
catalyst/zipline/data/minute_bars.py
T
Joe Jevnik bc0b117dc9 MAINT: make the data loading apis more consistent.
Changes BcolzDailyBarWriter to not be an abc, data is passed as an
iterator of (sid, dataframe) pairs to the write method.

Changes the AssetsDBWriter to be a single class which accepts an engine
at construction time and has a `write` method for writing dataframes for
the various tables. We no longer support writing the various other data
types, callers should coerce their data into a dataframe themselves. See
zipline.assets.synthetic for some helpers to do this.

Adds many new fixtures and updates some existing fixtures to use the new
ones:

WithDefaultDateBounds
  A fixture that provides the suite a START_DATE and END_DATE. This is
  meant to make it easy for other fixtures to synchronize their date
  ranges without depending on eachother in strange ways. For example,
  WithBcolzMinuteBarReader and WithBcolzDailyBarReader by default should
  both have data for the same dates, so they may use depend on
  WithDefaultDates without forcing a dependency between them.

WithTmpDir, WithInstanceTmpDir
  Provides the suite or individual test case a temporary directory.

WithBcolzDailyBarReader
  Provides the suite a BcolzDailyBarReader which reads from bcolz data
  written to a temporary directory. The data will be read from
  dataframes and then converted to bcolz files with
  BcolzDailyBarWriter.write

WithBcolzDailyBarReaderFromCSVs
  Provides the suite a BcolzDailyBarReader which reads from bcolz data
  written to a temporary directory. The data will be read from a
  collection of CSV files and then converted into the bcolz data through
  BcolzDailyBarWriter.write_csvs

WithBcolzMinuteBarReader
  Provides the suite a BcolzMinuteBarReader which reads from bcolz data
  written to a temporary directory. The data will be read from
  dataframes and then converted to bcolz files with
  BcolzMinuteBarWriter.write

WithAdjustmentReader
  Provides the suite a SQLiteAdjustmentReader which reads from an in
  memory sqlite database. The data will be read from dataframes and then
  converted into sqlite with SQLiteAdjustmentWriter.write

WithDataPortal
  Provides each test case a DataPortal object with data from temporary
  resources.
2016-04-15 23:46:10 -04:00

885 lines
30 KiB
Python

# Copyright 2016 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
from os.path import join
from textwrap import dedent
import bcolz
from bcolz import ctable
from intervaltree import IntervalTree
import numpy as np
import pandas as pd
from zipline.data._minute_bar_internal import (
minute_value,
find_position_of_minute,
find_last_traded_position_internal
)
from zipline.gens.sim_engine import NANOS_IN_MINUTE
from zipline.utils.memoize import lazyval
US_EQUITIES_MINUTES_PER_DAY = 390
DEFAULT_EXPECTEDLEN = US_EQUITIES_MINUTES_PER_DAY * 252 * 15
OHLC_RATIO = 1000
class BcolzMinuteOverlappingData(Exception):
pass
class BcolzMinuteWriterColumnMismatch(Exception):
pass
def _calc_minute_index(market_opens, minutes_per_day):
minutes = np.zeros(len(market_opens) * minutes_per_day,
dtype='datetime64[ns]')
deltas = np.arange(0, minutes_per_day, dtype='timedelta64[m]')
for i, market_open in enumerate(market_opens):
start = market_open.asm8
minute_values = start + deltas
start_ix = minutes_per_day * i
end_ix = start_ix + minutes_per_day
minutes[start_ix:end_ix] = minute_values
return pd.to_datetime(minutes, utc=True, box=True)
def _sid_subdir_path(sid):
"""
Format subdir path to limit the number directories in any given
subdirectory to 100.
The number in each directory is designed to support at least 100000
equities.
Parameters:
-----------
sid : int
Asset identifier.
Returns:
--------
out : string
A path for the bcolz rootdir, including subdirectory prefixes based on
the padded string representation of the given sid.
e.g. 1 is formatted as 00/00/000001.bcolz
"""
padded_sid = format(sid, '06')
return os.path.join(
# subdir 1 00/XX
padded_sid[0:2],
# subdir 2 XX/00
padded_sid[2:4],
"{0}.bcolz".format(str(padded_sid))
)
class BcolzMinuteBarMetadata(object):
METADATA_FILENAME = 'metadata.json'
@classmethod
def metadata_path(cls, rootdir):
return os.path.join(rootdir, cls.METADATA_FILENAME)
@classmethod
def read(cls, rootdir):
path = cls.metadata_path(rootdir)
with open(path) as fp:
raw_data = json.load(fp)
first_trading_day = pd.Timestamp(
raw_data['first_trading_day'], tz='UTC')
market_opens = pd.to_datetime(raw_data['market_opens'],
unit='m',
utc=True)
market_closes = pd.to_datetime(raw_data['market_closes'],
unit='m',
utc=True)
ohlc_ratio = raw_data['ohlc_ratio']
return cls(first_trading_day,
market_opens,
market_closes,
ohlc_ratio)
def __init__(self, first_trading_day,
market_opens,
market_closes,
ohlc_ratio):
"""
Parameters:
-----------
first_trading_day : datetime-like
UTC midnight of the first day available in the dataset.
minute_index : pd.DatetimeIndex
The minutes which act as an index into the corresponding values
written into each sid's ctable.
market_opens : pd.DatetimeIndex
The market opens for each day in the data set. (Not yet required.)
market_closes : pd.DatetimeIndex
The market closes for each day in the data set. (Not yet required.)
ohlc_ratio : int
The factor by which the pricing data is multiplied so that the
float data can be stored as an integer.
"""
self.first_trading_day = first_trading_day
self.market_opens = market_opens
self.market_closes = market_closes
self.ohlc_ratio = ohlc_ratio
def write(self, rootdir):
"""
Write the metadata to a JSON file in the rootdir.
Values contained in the metadata are:
first_trading_day : string
'YYYY-MM-DD' formatted representation of the first trading day
available in the dataset.
minute_index : list of integers
nanosecond integer representation of the minutes, the enumeration
of which corresponds to the values in each bcolz carray.
ohlc_ratio : int
The factor by which the pricing data is multiplied so that the
float data can be stored as an integer.
"""
metadata = {
'first_trading_day': str(self.first_trading_day.date()),
'market_opens': self.market_opens.values.
astype('datetime64[m]').
astype(np.int64).tolist(),
'market_closes': self.market_closes.values.
astype('datetime64[m]').
astype(np.int64).tolist(),
'ohlc_ratio': self.ohlc_ratio,
}
with open(self.metadata_path(rootdir), 'w+') as fp:
json.dump(metadata, fp)
class BcolzMinuteBarWriter(object):
"""
Class capable of writing minute OHLCV data to disk into bcolz format.
Writes a bcolz directory for each individual sid, all contained within
a root directory which also contains metadata about the entire dataset.
Each individual asset's data is stored as a bcolz table with a column for
each pricing field: (open, high, low, close, volume)
The open, high, low, and close columns are integers which are 1000 times
the quoted price, so that the data can represented and stored as an
np.uint32, supporting market prices quoted up to the thousands place.
volume is a np.uint32 with no mutation of the tens place.
The 'index' for each individual asset are a repeating period of minutes of
length `minutes_per_day` starting from each market open.
The file format does not account for half-days.
e.g.:
2016-01-19 14:31
2016-01-19 14:32
...
2016-01-19 20:59
2016-01-19 21:00
2016-01-20 14:31
2016-01-20 14:32
...
2016-01-20 20:59
2016-01-20 21:00
All assets are written with a common 'index', sharing a common first
trading day. Assets that do not begin trading until after the first trading
day will have zeros for all pricing data up and until data is traded.
'index' is in quotations, because bcolz does not provide an index. The
format allows index-like behavior by writing each minute's data into the
corresponding position of the enumeration of the aforementioned datetime
index.
The datetimes which correspond to each position are written in the metadata
as integer nanoseconds since the epoch into the `minute_index` key.
"""
COL_NAMES = ('open', 'high', 'low', 'close', 'volume')
def __init__(self,
first_trading_day,
rootdir,
market_opens,
market_closes,
minutes_per_day,
ohlc_ratio=OHLC_RATIO,
expectedlen=DEFAULT_EXPECTEDLEN):
"""
Parameters:
-----------
first_trading_day : datetime-like
The first trading day in the data set.
rootdir : string
Path to the root directory into which to write the metadata and
bcolz subdirectories.
market_opens : pd.Series
The market opens used as a starting point for each periodic span of
minutes in the index.
The index of the series is expected to be a DatetimeIndex of the
UTC midnight of each trading day.
The values are datetime64-like UTC market opens for each day in the
index.
market_closes : pd.Series
The market closes that correspond with the market opens,
The index of the series is expected to be a DatetimeIndex of the
UTC midnight of each trading day.
The values are datetime64-like UTC market opens for each day in the
index.
The closes are written so that the reader can filter out non-market
minutes even though the tail end of early closes are written in
the data arrays to keep a regular shape.
minutes_per_day : int
The number of minutes per each period. Defaults to 390, the mode
of minutes in NYSE trading days.
ohlc_ratio : int
The ratio by which to multiply the pricing data to convert the
floats from floats to an integer to fit within the np.uint32.
The default is 1000 to support pricing data which comes in to the
thousands place.
expectedlen : int
The expected length of the dataset, used when creating the initial
bcolz ctable.
If the expectedlen is not used, the chunksize and corresponding
compression ratios are not ideal.
Defaults to supporting 15 years of NYSE equity market data.
see: http://bcolz.blosc.org/opt-tips.html#informing-about-the-length-of-your-carrays # noqa
"""
self._rootdir = rootdir
self._first_trading_day = first_trading_day
self._market_opens = market_opens[
market_opens.index.slice_indexer(start=self._first_trading_day)]
self._market_closes = market_closes[
market_closes.index.slice_indexer(start=self._first_trading_day)]
self._trading_days = market_opens.index
self._minutes_per_day = minutes_per_day
self._expectedlen = expectedlen
self._ohlc_ratio = ohlc_ratio
self._minute_index = _calc_minute_index(
self._market_opens, self._minutes_per_day)
metadata = BcolzMinuteBarMetadata(
self._first_trading_day,
self._market_opens,
self._market_closes,
self._ohlc_ratio,
)
metadata.write(self._rootdir)
@property
def first_trading_day(self):
return self._first_trading_day
def sidpath(self, sid):
"""
Parameters:
-----------
sid : int
Asset identifier.
Returns:
--------
out : string
Full path to the bcolz rootdir for the given sid.
"""
sid_subdir = _sid_subdir_path(sid)
return join(self._rootdir, sid_subdir)
def last_date_in_output_for_sid(self, sid):
"""
Parameters:
-----------
sid : int
Asset identifier.
Returns:
--------
out : pd.Timestamp
The midnight of the last date written in to the output for the
given sid.
"""
sizes_path = "{0}/close/meta/sizes".format(self.sidpath(sid))
if not os.path.exists(sizes_path):
return pd.NaT
with open(sizes_path, mode='r') as f:
sizes = f.read()
data = json.loads(sizes)
num_days = data['shape'][0] / self._minutes_per_day
if num_days == 0:
# empty container
return pd.NaT
return self._trading_days[num_days - 1]
def _init_ctable(self, path):
"""
Create empty ctable for given path.
Parameters:
-----------
path : string
The path to rootdir of the new ctable.
"""
# Only create the containing subdir on creation.
# This is not to be confused with the `.bcolz` directory, but is the
# directory up one level from the `.bcolz` directories.
sid_containing_dirname = os.path.dirname(path)
if not os.path.exists(sid_containing_dirname):
# Other sids may have already created the containing directory.
os.makedirs(sid_containing_dirname)
initial_array = np.empty(0, np.uint32)
table = ctable(
rootdir=path,
columns=[
initial_array,
initial_array,
initial_array,
initial_array,
initial_array,
],
names=[
'open',
'high',
'low',
'close',
'volume'
],
expectedlen=self._expectedlen,
mode='w',
)
table.flush()
return table
def _ensure_ctable(self, sid):
"""Ensure that a ctable exists for ``sid``, then return it."""
sidpath = self.sidpath(sid)
if not os.path.exists(sidpath):
return self._init_ctable(sidpath)
return bcolz.ctable(rootdir=sidpath, mode='a')
def _zerofill(self, table, numdays):
num_to_prepend = numdays * self._minutes_per_day
prepend_array = np.zeros(num_to_prepend, np.uint32)
# Fill all OHLCV with zeros.
table.append([prepend_array] * 5)
table.flush()
def pad(self, sid, date):
"""
Fill sid container with empty data through the specified date.
e.g. if the date is two days after the last date in the sid's existing
output, 2 x `minute_per_day` worth of zeros will be added to the
output.
Parameters:
-----------
sid : int
The asset identifier for the data being written.
date : datetime-like
The date used to calculate how many slots to be pad.
The padding is done through the date, i.e. after the padding is
done the `last_date_in_output_for_sid` will be equal to `date`
"""
table = self._ensure_ctable(sid)
last_date = self.last_date_in_output_for_sid(sid)
tds = self._trading_days
if date <= last_date or date < tds[0]:
# No need to pad.
return
if last_date == pd.NaT:
# If there is no data, determine how many days to add so that
# desired days are written to the correct slots.
days_to_zerofill = tds[tds.slice_indexer(end=date)]
else:
days_to_zerofill = tds[tds.slice_indexer(
start=last_date + tds.freq,
end=date)]
self._zerofill(table, len(days_to_zerofill))
new_last_date = self.last_date_in_output_for_sid(sid)
assert new_last_date == date, "new_last_date={0} != date={1}".format(
new_last_date, date)
def write(self, sid, df):
"""
Write the OHLCV data for the given sid.
If there is no bcolz ctable yet created for the sid, create it.
If the length of the bcolz ctable is not exactly to the date before
the first day provided, fill the ctable with 0s up to that date.
Writes in blocks of the size of the days times minutes per day.
Parameters:
-----------
sid : int
The asset identifer for the data being written.
df : pd.DataFrame
DataFrame of market data with the following characteristics.
columns : ('open', 'high', 'low', 'close', 'volume')
open : float64
high : float64
low : float64
close : float64
volume : float64|int64
index : DatetimeIndex of market minutes.
"""
cols = {
'open': df.open.values,
'high': df.high.values,
'low': df.low.values,
'close': df.close.values,
'volume': df.volume.values,
}
dts = df.index.values
# Call internal method, since DataFrame has already ensured matching
# index and value lengths.
self._write_cols(sid, dts, cols)
def write_cols(self, sid, dts, cols):
"""
Write the OHLCV data for the given sid.
If there is no bcolz ctable yet created for the sid, create it.
If the length of the bcolz ctable is not exactly to the date before
the first day provided, fill the ctable with 0s up to that date.
Writes in blocks of the size of the days times minutes per day.
Parameters:
-----------
sid : int
The asset identifier for the data being written.
dts : datetime64 array
The dts corresponding to values in cols.
cols : dict of str -> np.array
dict of market data with the following characteristics.
keys are ('open', 'high', 'low', 'close', 'volume')
open : float64
high : float64
low : float64
close : float64
volume : float64|int64
"""
if not all(len(dts) == len(cols[name]) for name in self.COL_NAMES):
raise BcolzMinuteWriterColumnMismatch(
"Length of dts={0} should match cols: {1}".format(
len(dts),
" ".join("{0}={1}".format(name, len(cols[name]))
for name in self.COL_NAMES)))
self._write_cols(sid, dts, cols)
def _write_cols(self, sid, dts, cols):
"""
Internal method for `write_cols` and `write`.
Parameters:
-----------
sid : int
The asset identifier for the data being written.
dts : datetime64 array
The dts corresponding to values in cols.
cols : dict of str -> np.array
dict of market data with the following characteristics.
keys are ('open', 'high', 'low', 'close', 'volume')
open : float64
high : float64
low : float64
close : float64
volume : float64|int64
"""
table = self._ensure_ctable(sid)
tds = self._trading_days
input_first_day = pd.Timestamp(dts[0].astype('datetime64[D]'),
tz='UTC')
input_last_day = pd.Timestamp(dts[-1].astype('datetime64[D]'),
tz='UTC')
last_date = self.last_date_in_output_for_sid(sid)
if last_date >= input_first_day:
raise BcolzMinuteOverlappingData(dedent("""
Data with last_date={0} already includes input start={1} for
sid={2}""".strip()).format(last_date, input_first_day, sid))
day_before_input = input_first_day - tds.freq
self.pad(sid, day_before_input)
table = self._ensure_ctable(sid)
days_to_write = tds[tds.slice_indexer(start=input_first_day,
end=input_last_day)]
minutes_count = len(days_to_write) * self._minutes_per_day
all_minutes = self._minute_index
indexer = all_minutes.slice_indexer(start=days_to_write[0])
all_minutes_in_window = all_minutes[indexer]
open_col = np.zeros(minutes_count, dtype=np.uint32)
high_col = np.zeros(minutes_count, dtype=np.uint32)
low_col = np.zeros(minutes_count, dtype=np.uint32)
close_col = np.zeros(minutes_count, dtype=np.uint32)
vol_col = np.zeros(minutes_count, dtype=np.uint32)
dt_ixs = np.searchsorted(all_minutes_in_window.values,
dts.astype('datetime64[ns]'))
ohlc_ratio = self._ohlc_ratio
def convert_col(col):
"""Adapt float column into a uint32 column.
"""
return (np.nan_to_num(col) * ohlc_ratio).astype(np.uint32)
open_col[dt_ixs] = convert_col(cols['open'])
high_col[dt_ixs] = convert_col(cols['high'])
low_col[dt_ixs] = convert_col(cols['low'])
close_col[dt_ixs] = convert_col(cols['close'])
vol_col[dt_ixs] = cols['volume'].astype(np.uint32)
table.append([
open_col,
high_col,
low_col,
close_col,
vol_col
])
table.flush()
class BcolzMinuteBarReader(object):
def __init__(self, rootdir):
"""
Reader for data written by BcolzMinuteBarWriter
Parameters:
-----------
rootdir : string
The root directory containing the metadata and asset bcolz
directories.
"""
self._rootdir = rootdir
metadata = self._get_metadata()
self._first_trading_day = metadata.first_trading_day
self._market_opens = metadata.market_opens
self._market_open_values = metadata.market_opens.values.\
astype('datetime64[m]').astype(np.int64)
self._market_closes = metadata.market_closes
self._market_close_values = metadata.market_closes.values.\
astype('datetime64[m]').astype(np.int64)
self._ohlc_inverse = 1.0 / metadata.ohlc_ratio
self._carrays = {
'open': {},
'high': {},
'low': {},
'close': {},
'volume': {},
}
self._last_get_value_dt_position = None
self._last_get_value_dt_value = None
def _get_metadata(self):
return BcolzMinuteBarMetadata.read(self._rootdir)
@lazyval
def last_available_dt(self):
return self._market_closes[-1]
@property
def first_trading_day(self):
return self._first_trading_day
def _minutes_to_exclude(self):
"""
Calculate the minutes which should be excluded when a window
occurs on days which had an early close, i.e. days where the close
based on the regular period of minutes per day and the market close
do not match.
Returns:
--------
List of DatetimeIndex representing the minutes to exclude because
of early closes.
"""
market_opens = self._market_opens.values.astype('datetime64[m]')
market_closes = self._market_closes.values.astype('datetime64[m]')
minutes_per_day = (market_closes - market_opens).astype(np.int64)
early_indices = np.where(
minutes_per_day != US_EQUITIES_MINUTES_PER_DAY - 1)[0]
early_opens = self._market_opens[early_indices]
early_closes = self._market_closes[early_indices]
minutes = [(market_open, early_close)
for market_open, early_close
in zip(early_opens, early_closes)]
return minutes
@lazyval
def _minute_exclusion_tree(self):
"""
Build an interval tree keyed by the start and end of each range
of positions should be dropped from windows. (These are the minutes
between an early close and the minute which would be the close based
on the regular period if there were no early close.)
The value of each node is the same start and end position stored as
a tuple.
The data is stored as such in support of a fast answer to the question,
does a given start and end position overlap any of the exclusion spans?
Returns
-------
IntervalTree containing nodes which represent the minutes to exclude
because of early closes.
"""
itree = IntervalTree()
for market_open, early_close in self._minutes_to_exclude():
start_pos = self._find_position_of_minute(early_close) + 1
end_pos = (
self._find_position_of_minute(market_open)
+
US_EQUITIES_MINUTES_PER_DAY
-
1
)
data = (start_pos, end_pos)
itree[start_pos:end_pos + 1] = data
return itree
def _exclusion_indices_for_range(self, start_idx, end_idx):
"""
Returns
-------
List of tuples of (start, stop) which represent the ranges of minutes
which should be excluded when a market minute window is requested.
"""
itree = self._minute_exclusion_tree
if itree.overlaps(start_idx, end_idx):
ranges = []
intervals = itree[start_idx:end_idx]
for interval in intervals:
ranges.append(interval.data)
return sorted(ranges)
else:
return None
def _get_carray_path(self, sid, field):
sid_subdir = _sid_subdir_path(sid)
# carrays are subdirectories of the sid's rootdir
return os.path.join(self._rootdir, sid_subdir, field)
def _open_minute_file(self, field, sid):
sid = int(sid)
try:
carray = self._carrays[field][sid]
except KeyError:
carray = self._carrays[field][sid] = \
bcolz.carray(rootdir=self._get_carray_path(sid, field),
mode='r')
return carray
def get_value(self, sid, dt, field):
"""
Retrieve the pricing info for the given sid, dt, and field.
Parameters:
-----------
sid : int
Asset identifier.
dt : datetime-like
The datetime at which the trade occurred.
field : string
The type of pricing data to retrieve.
('open', 'high', 'low', 'close', 'volume')
Returns:
--------
out : float|int
The market data for the given sid, dt, and field coordinates.
For OHLC:
Returns a float if a trade occurred at the given dt.
If no trade occurred, a np.nan is returned.
For volume:
Returns the integer value of the volume.
(A volume of 0 signifies no trades for the given dt.)
"""
if self._last_get_value_dt_value == dt.value:
minute_pos = self._last_get_value_dt_position
else:
minute_pos = self._find_position_of_minute(dt)
self._last_get_value_dt_value = dt.value
self._last_get_value_dt_position = minute_pos
value = self._open_minute_file(field, sid)[minute_pos]
if value == 0:
if field == 'volume':
return 0
else:
return np.nan
if field != 'volume':
value *= self._ohlc_inverse
return value
def get_last_traded_dt(self, asset, dt):
minute_pos = self._find_last_traded_position(asset, dt)
if minute_pos == -1:
return pd.NaT
return self._pos_to_minute(minute_pos)
def _find_last_traded_position(self, asset, dt):
volumes = self._open_minute_file('volume', asset)
start_date_minutes = asset.start_date.value / NANOS_IN_MINUTE
dt_minutes = dt.value / NANOS_IN_MINUTE
if dt_minutes < start_date_minutes:
return -1
return find_last_traded_position_internal(
self._market_open_values,
self._market_close_values,
dt_minutes,
start_date_minutes,
volumes,
US_EQUITIES_MINUTES_PER_DAY
)
def _pos_to_minute(self, pos):
minute_epoch = minute_value(
self._market_open_values,
pos,
US_EQUITIES_MINUTES_PER_DAY
)
return pd.Timestamp(minute_epoch, tz='UTC', unit="m")
def _find_position_of_minute(self, minute_dt):
"""
Internal method that returns the position of the given minute in the
list of every trading minute since market open of the first trading
day. Adjusts non market minutes to the last close.
ex. this method would return 1 for 2002-01-02 9:32 AM Eastern, if
2002-01-02 is the first trading day of the dataset.
Parameters
----------
minute_dt: pd.Timestamp
The minute whose position should be calculated.
Returns
-------
int: The position of the given minute in the list of all trading
minutes since market open on the first trading day.
"""
return find_position_of_minute(
self._market_open_values,
self._market_close_values,
minute_dt.value / NANOS_IN_MINUTE,
US_EQUITIES_MINUTES_PER_DAY,
)
def unadjusted_window(self, fields, start_dt, end_dt, sids):
"""
Parameters
----------
fields : list of str
'open', 'high', 'low', 'close', or 'volume'
start_dt: Timestamp
Beginning of the window range.
end_dt: Timestamp
End of the window range.
sids : list of int
The asset identifiers in the window.
Returns
-------
list of np.ndarray
A list with an entry per field of ndarrays with shape
(sids, minutes in range) with a dtype of float64, containing the
values for the respective field over start and end dt range.
"""
start_idx = self._find_position_of_minute(start_dt)
end_idx = self._find_position_of_minute(end_dt)
num_minutes = (end_idx - start_idx + 1)
results = []
indices_to_exclude = self._exclusion_indices_for_range(
start_idx, end_idx)
if indices_to_exclude is not None:
for excl_start, excl_stop in indices_to_exclude:
length = excl_stop - excl_start + 1
num_minutes -= length
shape = (len(sids), num_minutes)
for field in fields:
if field != 'volume':
out = np.full(shape, np.nan)
else:
out = np.zeros(shape, dtype=np.uint32)
for i, sid in enumerate(sids):
carray = self._open_minute_file(field, sid)
values = carray[start_idx:end_idx + 1]
if indices_to_exclude is not None:
for excl_start, excl_stop in indices_to_exclude[::-1]:
excl_slice = np.s_[
excl_start - start_idx:excl_stop - start_idx + 1]
values = np.delete(values, excl_slice)
where = values != 0
out[i, where] = values[where]
if field != 'volume':
out *= self._ohlc_inverse
results.append(out)
return results