mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-27 17:00:15 +08:00
WIP: Five Minute bars and FiveMinuteSimulationClock
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
Dear Zipline Maintainers,
|
||||
Dear Catalyst Maintainers,
|
||||
|
||||
Before I tell you about my issue, let me describe my environment:
|
||||
|
||||
@@ -7,7 +7,7 @@ Before I tell you about my issue, let me describe my environment:
|
||||
* Operating System: (Windows Version or `$ uname --all`)
|
||||
* Python Version: `$ python --version`
|
||||
* Python Bitness: `$ python -c 'import math, sys;print(int(math.log(sys.maxsize + 1, 2) + 1))'`
|
||||
* How did you install Zipline: (`pip`, `conda`, or `other (please explain)`)
|
||||
* How did you install Catalyst: (`pip`, `conda`, or `other (please explain)`)
|
||||
* Python packages: `$ pip freeze` or `$ conda list`
|
||||
|
||||
Now that you know a little about me, let me tell you about the issue I am
|
||||
|
||||
@@ -123,7 +123,7 @@ def ipython_only(option):
|
||||
)
|
||||
@click.option(
|
||||
'--data-frequency',
|
||||
type=click.Choice({'daily', 'minute'}),
|
||||
type=click.Choice({'daily', '5-minute', 'minute'}),
|
||||
default='daily',
|
||||
show_default=True,
|
||||
help='The data frequency of the simulation.',
|
||||
|
||||
+40
-15
@@ -133,7 +133,10 @@ from catalyst.utils.security_list import SecurityList
|
||||
import catalyst.protocol
|
||||
from catalyst.sources.requests_csv import PandasRequestsCSV
|
||||
|
||||
from catalyst.gens.sim_engine import MinuteSimulationClock
|
||||
from catalyst.gens.sim_engine import (
|
||||
MinuteSimulationClock,
|
||||
FiveMinuteSimulationClock,
|
||||
)
|
||||
from catalyst.sources.benchmark_source import BenchmarkSource
|
||||
from catalyst.catalyst_warnings import ZiplineDeprecationWarning
|
||||
|
||||
@@ -170,7 +173,7 @@ class TradingAlgorithm(object):
|
||||
algo_filename : str, optional
|
||||
The filename for the algoscript. This will be used in exception
|
||||
tracebacks. default: '<string>'.
|
||||
data_frequency : {'daily', 'minute'}, optional
|
||||
data_frequency : {'daily', '5-minute', 'minute'}, optional
|
||||
The duration of the bars.
|
||||
instant_fill : bool, optional
|
||||
Whether to fill orders immediately or on next bar. default: False
|
||||
@@ -223,7 +226,7 @@ class TradingAlgorithm(object):
|
||||
script : str
|
||||
Algoscript that contains initialize and
|
||||
handle_data function definition.
|
||||
data_frequency : {'daily', 'minute'}
|
||||
data_frequency : {'daily', '5-minute', 'minute'}
|
||||
The duration of the bars.
|
||||
capital_base : float <default: 1.0e5>
|
||||
How much capital to start with.
|
||||
@@ -449,7 +452,7 @@ class TradingAlgorithm(object):
|
||||
self._in_before_trading_start = True
|
||||
|
||||
with handle_non_market_minutes(data) if \
|
||||
self.data_frequency == "minute" else ExitStack():
|
||||
self.data_frequency in ('minute', '5-minute') else ExitStack():
|
||||
self._before_trading_start(self, data)
|
||||
|
||||
self._in_before_trading_start = False
|
||||
@@ -505,10 +508,11 @@ class TradingAlgorithm(object):
|
||||
market_closes = trading_o_and_c['market_close']
|
||||
minutely_emission = False
|
||||
|
||||
if self.sim_params.data_frequency == 'minute':
|
||||
if self.sim_params.data_frequency in set(('minute', '5-minute')):
|
||||
market_opens = trading_o_and_c['market_open']
|
||||
|
||||
minutely_emission = self.sim_params.emission_rate == "minute"
|
||||
minutely_emission = self.sim_params.emission_rate in \
|
||||
set(('minute', '5-minute'))
|
||||
else:
|
||||
# in daily mode, we want to have one bar per session, timestamped
|
||||
# as the last minute of the session.
|
||||
@@ -528,8 +532,17 @@ class TradingAlgorithm(object):
|
||||
# FIXME generalize these values
|
||||
before_trading_start_minutes = days_at_time(
|
||||
self.sim_params.sessions,
|
||||
time(8, 45),
|
||||
"US/Eastern"
|
||||
time(0, 0),
|
||||
'UTC',
|
||||
)
|
||||
|
||||
if self.sim_params.data_frequency == '5-minute':
|
||||
return FiveMinuteSimulationClock(
|
||||
self.sim_params.sessions,
|
||||
execution_opens,
|
||||
execution_closes,
|
||||
before_trading_start_minutes,
|
||||
minute_emission=minutely_emission,
|
||||
)
|
||||
|
||||
return MinuteSimulationClock(
|
||||
@@ -660,8 +673,11 @@ class TradingAlgorithm(object):
|
||||
# Assume data is daily if timestamp times are
|
||||
# standardized, otherwise assume minute bars.
|
||||
times = data.major_axis.time
|
||||
if np.all(times == times[0]):
|
||||
time_count = times.nunique()
|
||||
if time_count == 1:
|
||||
self.sim_params.data_frequency = 'daily'
|
||||
elif time_count == 288:
|
||||
self.sim_params.data_frequency = '5-minute'
|
||||
else:
|
||||
self.sim_params.data_frequency = 'minute'
|
||||
|
||||
@@ -683,6 +699,8 @@ class TradingAlgorithm(object):
|
||||
|
||||
if self.sim_params.data_frequency == 'daily':
|
||||
equity_reader_arg = 'equity_daily_reader'
|
||||
elif self.sim_params.data_frequency == '5-minute':
|
||||
equity_daily_reader = 'equity_5_minute_reader'
|
||||
elif self.sim_params.data_frequency == 'minute':
|
||||
equity_reader_arg = 'equity_minute_reader'
|
||||
equity_reader = PanelBarReader(
|
||||
@@ -926,9 +944,9 @@ class TradingAlgorithm(object):
|
||||
The arena from the simulation parameters. This will normally
|
||||
be ``'backtest'`` but some systems may use this distinguish
|
||||
live trading from backtesting.
|
||||
data_frequency : {'daily', 'minute'}
|
||||
data_frequency : {'daily', '5-minute', 'minute'}
|
||||
data_frequency tells the algorithm if it is running with
|
||||
daily data or minute data.
|
||||
daily, minute, or five-minute mode.
|
||||
start : datetime
|
||||
The start date for the simulation.
|
||||
end : datetime
|
||||
@@ -1103,10 +1121,17 @@ class TradingAlgorithm(object):
|
||||
'time_rule= when calling schedule_function without '
|
||||
'specifying a date_rule', stacklevel=3)
|
||||
|
||||
freq = self.sim_params.data_frequency
|
||||
|
||||
date_rule = date_rule or date_rules.every_day()
|
||||
time_rule = ((time_rule or time_rules.every_minute())
|
||||
if self.sim_params.data_frequency == 'minute' else
|
||||
# If we are in daily mode the time_rule is ignored.
|
||||
if freq is 'daily':
|
||||
# ignore time rule in daily mode
|
||||
time_rule = time_rules.every_minute()
|
||||
else:
|
||||
# use provided time rule or default to every minute or 5 minutes
|
||||
# based on desired data frequency.
|
||||
time_rule = time_rule or (time_rules.every_5_minutes()
|
||||
if freq is '5-minute' else
|
||||
time_rules.every_minute())
|
||||
|
||||
# Check the type of the algorithm's schedule before pulling calendar
|
||||
@@ -1782,7 +1807,7 @@ class TradingAlgorithm(object):
|
||||
|
||||
@data_frequency.setter
|
||||
def data_frequency(self, value):
|
||||
assert value in ('daily', 'minute')
|
||||
assert value in ('daily', '5-minute', 'minute')
|
||||
self.sim_params.data_frequency = value
|
||||
|
||||
@api_method
|
||||
|
||||
@@ -117,6 +117,7 @@ class AbstractBundle(object):
|
||||
|
||||
symbol_map = raw_metadata.symbol
|
||||
|
||||
if 'daily' in self.frequencies:
|
||||
daily_bar_writer.write(
|
||||
self._fetch_symbol_iter(
|
||||
api_key,
|
||||
@@ -132,9 +133,11 @@ class AbstractBundle(object):
|
||||
show_progress=show_progress,
|
||||
)
|
||||
|
||||
"""
|
||||
for data_frequency in self.frequencies:
|
||||
self._write_symbol_for_freq(
|
||||
metadata = self._post_process_metadata(raw_metadata, cache)
|
||||
asset_db_writer.write(metadata)
|
||||
|
||||
if '5-minute' in self.frequencies:
|
||||
minute_bar_writer.write(
|
||||
self._fetch_symbol_iter(
|
||||
api_key,
|
||||
cache,
|
||||
@@ -142,21 +145,11 @@ class AbstractBundle(object):
|
||||
calendar,
|
||||
start_session,
|
||||
end_session,
|
||||
data_frequency,
|
||||
'5-minute',
|
||||
retries,
|
||||
),
|
||||
data_frequency,
|
||||
daily_bar_writer,
|
||||
minute_bar_writer,
|
||||
assets=raw_metadata.index,
|
||||
show_progress=show_progress,
|
||||
)
|
||||
"""
|
||||
|
||||
|
||||
|
||||
metadata = self._post_process_metadata(raw_metadata, cache)
|
||||
asset_db_writer.write(metadata)
|
||||
|
||||
adjustment_writer.write()
|
||||
else:
|
||||
@@ -188,7 +181,7 @@ class AbstractBundle(object):
|
||||
raw_iter = self._fetch_metadata_iter(api_key, cache, retries, environ)
|
||||
|
||||
def item_show_func(_, _it=iter(count())):
|
||||
return 'Downloading metadata page: {0}'.format(next(_it))
|
||||
return 'Downloading metadata: {0}'.format('.' * next(_it))
|
||||
|
||||
with maybe_show_progress(
|
||||
raw_iter,
|
||||
@@ -263,64 +256,107 @@ class AbstractBundle(object):
|
||||
calendar,
|
||||
start_session,
|
||||
end_session,
|
||||
frequency,
|
||||
data_frequency,
|
||||
retries):
|
||||
|
||||
for asset_id, symbol in symbol_map.iteritems():
|
||||
# Record start time of iteration, compare at end of iteration to
|
||||
# adhere to the datas source's rate limit policy.
|
||||
start_time = pd.Timestamp.utcnow()
|
||||
|
||||
# Fetch new data if cached data is absent or stale, otherwise
|
||||
# returns the cached data unaltered. The `should_sleep` flag
|
||||
# indicates that an API call was attempted, and that we should be
|
||||
# ensure aren't exceeding our rate limit before proceeding to the
|
||||
# next symbol. If the raw_data is updated, it is cached before being
|
||||
# returned.
|
||||
raw_data, should_sleep = self._maybe_update_symbol_frame(
|
||||
start_time,
|
||||
api_key,
|
||||
cache,
|
||||
symbol,
|
||||
start_session,
|
||||
end_session,
|
||||
data_frequency,
|
||||
)
|
||||
|
||||
# TODO(cfromknecht) further data validation?
|
||||
|
||||
# Pass asset_id and symbol data to writer.
|
||||
yield asset_id, raw_data
|
||||
|
||||
# If an API call was made during this iteration and the time to
|
||||
# reach this point was less than the inter-request `wait_time`,
|
||||
# sleep until after enough time has elapsed to prevent getting rate
|
||||
# limited.
|
||||
if should_sleep:
|
||||
remaining = pd.Timestamp.utcnow() - start_time + self.wait_time
|
||||
if remaining.value > 0:
|
||||
sleep(remaining.value / 10**9)
|
||||
|
||||
def _maybe_update_symbol_frame(self,
|
||||
start_time,
|
||||
api_key,
|
||||
cache,
|
||||
symbol,
|
||||
start_session,
|
||||
end_session,
|
||||
data_frequency):
|
||||
try:
|
||||
raw_data = cache[symbol]
|
||||
except KeyError:
|
||||
raw_data = None
|
||||
|
||||
if raw_data is not None and not raw_data.empty:
|
||||
last = raw_data.index[-1].tz_localize('UTC')
|
||||
else:
|
||||
last = start_session
|
||||
# Select the most recent date in cached dataset if it exists,
|
||||
# otherwise use the provided `start_session`.
|
||||
last = (
|
||||
raw_data.index[-1].tz_localize('UTC')
|
||||
if raw_data is not None and not raw_data.empty else
|
||||
start_session
|
||||
)
|
||||
|
||||
next_start_time = last + pd.Timedelta(minutes=5)
|
||||
if start_time > next_start_time:
|
||||
# Determine time at which cached data will be considered stale.
|
||||
cache_expiration = last + pd.Timedelta(minutes=5)
|
||||
if start_time <= cache_expiration:
|
||||
# Data is fresh enough to reuse, no need to update. Iterator can
|
||||
# proceed to next symbol directly since no API call was required.
|
||||
should_sleep = False
|
||||
else:
|
||||
# Data for symbol is old enough to attempt an update or is not
|
||||
# present in the cache. Fetch raw data for a single symbol
|
||||
# with requested intervals and frequency.
|
||||
raw_diff = self.fetch_raw_symbol_frame(
|
||||
api_key,
|
||||
symbol,
|
||||
last,
|
||||
end_session,
|
||||
frequency,
|
||||
data_frequency,
|
||||
)
|
||||
|
||||
# Filter incoming data to minimize overlap.
|
||||
raw_diff = raw_diff[
|
||||
(raw_diff.index >= last) &
|
||||
(raw_diff.index <= end_session)
|
||||
]
|
||||
|
||||
# Append incoming data to cached data if it exists,
|
||||
# otherwise treat incoming data as the entire raw dataset.
|
||||
raw_data = cache[symbol] = (
|
||||
raw_data.append(raw_diff)
|
||||
if raw_data is not None else
|
||||
raw_diff
|
||||
)
|
||||
|
||||
# Filter out any duplicates entries, keep last one as previous
|
||||
# one was probably an incomplete frame.
|
||||
raw_data = raw_data[~raw_data.index.duplicated(keep='last')]
|
||||
|
||||
# If we arrive here, we must have attempted an API call.
|
||||
# This flag tells the iterator to pause before starting the next
|
||||
# asset, that we don't exceed the data source's rate limit.
|
||||
should_sleep = True
|
||||
else:
|
||||
should_sleep = False
|
||||
|
||||
"""
|
||||
sessions = calendar.sessions_in_range(start_session, end_session)
|
||||
|
||||
print 'raw_data before:\n', raw_data.head()
|
||||
raw_data = raw_data.reindex(
|
||||
sessions,
|
||||
copy=False,
|
||||
).fillna(0.0)
|
||||
print 'raw_data after:\n', raw_data.head()
|
||||
"""
|
||||
|
||||
yield asset_id, raw_data
|
||||
|
||||
if should_sleep:
|
||||
remaining = pd.Timestamp.utcnow() - start_time + self.wait_time
|
||||
if remaining.value > 0:
|
||||
sleep(remaining.value / 10**9)
|
||||
return raw_data, should_sleep
|
||||
|
||||
def _write_symbol_for_freq(self,
|
||||
pricing_iter,
|
||||
@@ -338,13 +374,11 @@ class AbstractBundle(object):
|
||||
elif data_frequency == '5-minute':
|
||||
minute_bar_writer.write(
|
||||
pricing_iter,
|
||||
assets=assets,
|
||||
show_progress=show_progress,
|
||||
)
|
||||
elif data_frequency == 'minute':
|
||||
minute_bar_writer.write(
|
||||
pricing_iter,
|
||||
assets=assets,
|
||||
show_progress=show_progress,
|
||||
)
|
||||
else:
|
||||
|
||||
@@ -24,6 +24,10 @@ from bcolz import ctable
|
||||
from intervaltree import IntervalTree
|
||||
import logbook
|
||||
import numpy as np
|
||||
from numpy import (
|
||||
iinfo,
|
||||
uint64,
|
||||
)
|
||||
import pandas as pd
|
||||
from pandas import HDFStore
|
||||
import tables
|
||||
@@ -39,23 +43,28 @@ from catalyst.data._minute_bar_internal import (
|
||||
from catalyst.gens.sim_engine import NANOS_IN_MINUTE
|
||||
|
||||
from catalyst.data.bar_reader import BarReader, NoDataOnDate
|
||||
from catalyst.data.us_equity_pricing import check_uint32_safe
|
||||
from catalyst.data.us_equity_pricing import (
|
||||
winsorise_uint64,
|
||||
check_uint64_safe,
|
||||
)
|
||||
from catalyst.utils.calendars import get_calendar
|
||||
from catalyst.utils.cli import maybe_show_progress
|
||||
from catalyst.utils.memoize import lazyval
|
||||
|
||||
|
||||
logger = logbook.Logger('FiveMinuteBars')
|
||||
|
||||
CRYPTO_ASSETS_FIVE_MINUTES_PER_DAY = 288
|
||||
OPEN_FIVE_MINUTES_PER_DAY = 288
|
||||
US_EQUITIES_MINUTES_PER_DAY = 390
|
||||
FUTURES_MINUTES_PER_DAY = 1440
|
||||
|
||||
DEFAULT_EXPECTEDLEN = US_EQUITIES_MINUTES_PER_DAY * 252 * 15
|
||||
DEFAULT_EXPECTED_CRYPTO_LEN = CRYPTO_ASSETS_FIVE_MINUTES_PER_DAY * 366 * 15
|
||||
DEFAULT_EXPECTEDLEN_CRYPTO = OPEN_FIVE_MINUTES_PER_DAY * 366 * 15
|
||||
|
||||
OHLC_RATIO = 1000
|
||||
OHLC_RATIO = 1000000
|
||||
|
||||
OHLC = frozenset(['open', 'high', 'low', 'close'])
|
||||
OHLCV = frozenset(['open', 'high', 'low', 'close', 'volume'])
|
||||
|
||||
UINT64_MAX = iinfo(uint64).max
|
||||
|
||||
class BcolzFiveMinuteOverlappingData(Exception):
|
||||
pass
|
||||
@@ -68,7 +77,7 @@ class BcolzFiveMinuteWriterColumnMismatch(Exception):
|
||||
class FiveMinuteBarReader(BarReader):
|
||||
@property
|
||||
def data_frequency(self):
|
||||
return "five-minute"
|
||||
return "5-minute"
|
||||
|
||||
|
||||
def _calc_five_minute_index(market_opens, five_minutes_per_day):
|
||||
@@ -116,19 +125,19 @@ def _sid_subdir_path(sid):
|
||||
|
||||
|
||||
def convert_cols(cols, scale_factor, sid, invalid_data_behavior):
|
||||
"""Adapt OHLCV columns into uint32 columns.
|
||||
"""Adapt OHLCV columns into uint64 columns.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cols : dict
|
||||
A dict mapping each column name (open, high, low, close, volume)
|
||||
to a float column to convert to uint32.
|
||||
to a float column to convert to uint64.
|
||||
scale_factor : int
|
||||
Factor to use to scale float values before converting to uint32.
|
||||
Factor to use to scale float values before converting to uint64.
|
||||
sid : int
|
||||
Sid of the relevant asset, for logging.
|
||||
invalid_data_behavior : str
|
||||
Specifies behavior when data cannot be converted to uint32.
|
||||
Specifies behavior when data cannot be converted to uint64.
|
||||
If 'raise', raises an exception.
|
||||
If 'warn', logs a warning and filters out incompatible values.
|
||||
If 'ignore', silently filters out incompatible values.
|
||||
@@ -137,6 +146,7 @@ def convert_cols(cols, scale_factor, sid, invalid_data_behavior):
|
||||
scaled_highs = np.nan_to_num(cols['high']) * scale_factor
|
||||
scaled_lows = np.nan_to_num(cols['low']) * scale_factor
|
||||
scaled_closes = np.nan_to_num(cols['close']) * scale_factor
|
||||
volumes = np.nan_to_num(cols['volume'])
|
||||
|
||||
exclude_mask = np.zeros_like(scaled_opens, dtype=bool)
|
||||
|
||||
@@ -145,11 +155,12 @@ def convert_cols(cols, scale_factor, sid, invalid_data_behavior):
|
||||
('high', scaled_highs),
|
||||
('low', scaled_lows),
|
||||
('close', scaled_closes),
|
||||
('volume', volumes),
|
||||
]:
|
||||
max_val = scaled_col.max()
|
||||
|
||||
try:
|
||||
check_uint32_safe(max_val, col_name)
|
||||
check_uint64_safe(max_val, col_name)
|
||||
except ValueError:
|
||||
if invalid_data_behavior == 'raise':
|
||||
raise
|
||||
@@ -157,20 +168,20 @@ def convert_cols(cols, scale_factor, sid, invalid_data_behavior):
|
||||
if invalid_data_behavior == 'warn':
|
||||
logger.warn(
|
||||
'Values for sid={}, col={} contain some too large for '
|
||||
'uint32 (max={}), filtering them out',
|
||||
'uint64 (max={}), filtering them out',
|
||||
sid, col_name, max_val,
|
||||
)
|
||||
|
||||
# We want to exclude all rows that have an unsafe value in
|
||||
# this column.
|
||||
exclude_mask &= (scaled_col >= np.iinfo(np.uint32).max)
|
||||
exclude_mask &= (scaled_col >= iinfo(uint64).max)
|
||||
|
||||
# Convert all cols to uint32.
|
||||
opens = scaled_opens.astype(np.uint32)
|
||||
highs = scaled_highs.astype(np.uint32)
|
||||
lows = scaled_lows.astype(np.uint32)
|
||||
closes = scaled_closes.astype(np.uint32)
|
||||
volumes = cols['volume'].astype(np.uint32)
|
||||
# Convert all cols to uint64.
|
||||
opens = scaled_opens.astype(uint64)
|
||||
highs = scaled_highs.astype(uint64)
|
||||
lows = scaled_lows.astype(uint64)
|
||||
closes = scaled_closes.astype(uint64)
|
||||
volumes = volumes.astype(uint64)
|
||||
|
||||
# Exclude rows with unsafe values by setting to zero.
|
||||
opens[exclude_mask] = 0
|
||||
@@ -290,7 +301,7 @@ class BcolzFiveMinuteBarMetadata(object):
|
||||
ohlc_ratio : int
|
||||
The default ratio by which to multiply the pricing data to
|
||||
convert the floats from floats to an integer to fit within
|
||||
the np.uint32. If ohlc_ratios_per_sid is None or does not
|
||||
the np.uint64. If ohlc_ratios_per_sid is None or does not
|
||||
contain a mapping for a given sid, this ratio is used.
|
||||
ohlc_ratios_per_sid : dict
|
||||
A dict mapping each sid in the output to the factor by
|
||||
@@ -374,13 +385,13 @@ class BcolzFiveMinuteBarWriter(object):
|
||||
The last trading session in the data set.
|
||||
default_ohlc_ratio : int, optional
|
||||
The default ratio by which to multiply the pricing data to
|
||||
convert from floats to integers that fit within np.uint32. If
|
||||
convert from floats to integers that fit within np.uint64. If
|
||||
ohlc_ratios_per_sid is None or does not contain a mapping for a
|
||||
given sid, this ratio is used. Default is OHLC_RATIO (1000).
|
||||
ohlc_ratios_per_sid : dict, optional
|
||||
A dict mapping each sid in the output to the ratio by which to
|
||||
multiply the pricing data to convert the floats from floats to
|
||||
an integer to fit within the np.uint32.
|
||||
an integer to fit within the np.uint64.
|
||||
expectedlen : int, optional
|
||||
The expected length of the dataset, used when creating the initial
|
||||
bcolz ctable.
|
||||
@@ -405,9 +416,9 @@ class BcolzFiveMinuteBarWriter(object):
|
||||
|
||||
The open, high, low, and close columns are integers which are 1000 times
|
||||
the quoted price, so that the data can represented and stored as an
|
||||
np.uint32, supporting market prices quoted up to the thousands place.
|
||||
np.uint64, supporting market prices quoted up to the thousands place.
|
||||
|
||||
volume is a np.uint32 with no mutation of the tens place.
|
||||
volume is a np.uint64 with no mutation of the tens place.
|
||||
|
||||
The 'index' for each individual asset are a repeating period of minutes of
|
||||
length `minutes_per_day` starting from each market open.
|
||||
@@ -575,7 +586,7 @@ class BcolzFiveMinuteBarWriter(object):
|
||||
if not os.path.exists(sid_containing_dirname):
|
||||
# Other sids may have already created the containing directory.
|
||||
os.makedirs(sid_containing_dirname)
|
||||
initial_array = np.empty(0, np.uint32)
|
||||
initial_array = np.empty(0, np.uint64)
|
||||
table = ctable(
|
||||
rootdir=path,
|
||||
columns=[
|
||||
@@ -612,7 +623,7 @@ class BcolzFiveMinuteBarWriter(object):
|
||||
five_minute_offset = len(table) % self._five_minutes_per_day
|
||||
num_to_prepend = numdays * self._five_minutes_per_day - five_minute_offset
|
||||
|
||||
prepend_array = np.zeros(num_to_prepend, np.uint32)
|
||||
prepend_array = np.zeros(num_to_prepend, np.uint64)
|
||||
# Fill all OHLCV with zeros.
|
||||
table.append([prepend_array] * 5)
|
||||
table.flush()
|
||||
@@ -817,11 +828,11 @@ class BcolzFiveMinuteBarWriter(object):
|
||||
|
||||
minutes_count = all_minutes_in_window.size
|
||||
|
||||
open_col = np.zeros(minutes_count, dtype=np.uint32)
|
||||
high_col = np.zeros(minutes_count, dtype=np.uint32)
|
||||
low_col = np.zeros(minutes_count, dtype=np.uint32)
|
||||
close_col = np.zeros(minutes_count, dtype=np.uint32)
|
||||
vol_col = np.zeros(minutes_count, dtype=np.uint32)
|
||||
open_col = np.zeros(minutes_count, dtype=uint64)
|
||||
high_col = np.zeros(minutes_count, dtype=uint64)
|
||||
low_col = np.zeros(minutes_count, dtype=uint64)
|
||||
close_col = np.zeros(minutes_count, dtype=uint64)
|
||||
vol_col = np.zeros(minutes_count, dtype=uint64)
|
||||
|
||||
dt_ixs = np.searchsorted(all_minutes_in_window.values,
|
||||
dts.astype('datetime64[ns]'))
|
||||
@@ -1250,7 +1261,7 @@ class BcolzFiveMinuteBarReader(FiveMinuteBarReader):
|
||||
if field != 'volume':
|
||||
out = np.full(shape, np.nan)
|
||||
else:
|
||||
out = np.zeros(shape, dtype=np.uint32)
|
||||
out = np.zeros(shape, dtype=int64)
|
||||
|
||||
for i, sid in enumerate(sids):
|
||||
carray = self._open_minute_file(field, sid)
|
||||
|
||||
@@ -117,7 +117,15 @@ UINT64_MAX = iinfo(uint64).max
|
||||
def check_uint32_safe(value, colname):
|
||||
if value >= UINT32_MAX:
|
||||
raise ValueError(
|
||||
"Value %s from column '%s' is too large" % (value, colname)
|
||||
"Value %s from column '%s' is too large "
|
||||
"for uint32" % (value, colname)
|
||||
)
|
||||
|
||||
def check_uint64_safe(value, colname):
|
||||
if value >= UINT64_MAX:
|
||||
raise ValueError(
|
||||
"Value %s from column '%s' is too large "
|
||||
"for uint64" % (value, colname)
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -20,7 +20,9 @@ cimport cython
|
||||
from cpython cimport bool
|
||||
|
||||
cdef np.int64_t _nanos_in_minute = 60000000000
|
||||
cdef np.int64_t _nanos_in_five_minutes = 5 * _nanos_in_minute
|
||||
NANOS_IN_MINUTE = _nanos_in_minute
|
||||
NANOS_IN_FIVE_MINUTES = _nanos_in_five_minutes
|
||||
|
||||
cpdef enum:
|
||||
BAR = 0
|
||||
@@ -115,3 +117,24 @@ cdef class MinuteSimulationClock:
|
||||
yield minute, BAR
|
||||
if minute_emission:
|
||||
yield minute, MINUTE_END
|
||||
|
||||
cdef class FiveMinuteSimulationClock(MinuteSimulationClock):
|
||||
@cython.boundscheck(False)
|
||||
@cython.wraparound(False)
|
||||
cdef dict calc_minutes_by_session(self):
|
||||
cdef dict five_minutes_by_session
|
||||
cdef int session_idx
|
||||
cdef np.int64_t session_nano
|
||||
cdef np.ndarray[np.int64_t, ndim=1] five_minutes_nanos
|
||||
|
||||
five_minutes_by_session = {}
|
||||
for session_idx, session_nano in enumerate(self.sessions_nanos):
|
||||
five_minutes_nanos = np.arange(
|
||||
self.market_opens_nanos[session_idx],
|
||||
self.market_closes_nanos[session_idx] + _nanos_in_five_minutes,
|
||||
_nanos_in_five_minutes
|
||||
)
|
||||
five_minutes_by_session[session_nano] = pd.to_datetime(
|
||||
five_minutes_nanos, utc=True, box=True
|
||||
)
|
||||
return five_minutes_by_session
|
||||
|
||||
@@ -34,6 +34,7 @@ class AlgorithmSimulator(object):
|
||||
|
||||
EMISSION_TO_PERF_KEY_MAP = {
|
||||
'minute': 'minute_perf',
|
||||
'5-minute': '5_minute_perf',
|
||||
'daily': 'daily_perf'
|
||||
}
|
||||
|
||||
@@ -201,7 +202,7 @@ class AlgorithmSimulator(object):
|
||||
stack.enter_context(self.processor)
|
||||
stack.enter_context(ZiplineAPI(self.algo))
|
||||
|
||||
if algo.data_frequency == 'minute':
|
||||
if algo.data_frequency in set(('minute', '5-minute')):
|
||||
def execute_order_cancellation_policy():
|
||||
algo.blotter.execute_cancel_policy(SESSION_END)
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ _default_calendar_factories = {
|
||||
'us_futures': QuantopianUSFuturesCalendar,
|
||||
}
|
||||
_default_calendar_aliases = {
|
||||
'CATX': 'OPEN',
|
||||
'POLO': 'OPEN',
|
||||
'NASDAQ': 'NYSE',
|
||||
'BATS': 'NYSE',
|
||||
'CBOT': 'CME',
|
||||
|
||||
@@ -602,6 +602,7 @@ class date_rules(object):
|
||||
class time_rules(object):
|
||||
market_open = AfterOpen
|
||||
market_close = BeforeClose
|
||||
every_5_minutes = Always
|
||||
every_minute = Always
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user