mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-27 21:06:32 +08:00
Unit tested a daily reader/writer based on the minute bundle
This commit is contained in:
@@ -1,11 +1,17 @@
|
||||
from logging import Logger
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from catalyst.data.minute_bars import BcolzMinuteBarReader
|
||||
from catalyst.exchange.bundle_utils import get_bcolz_chunk
|
||||
from catalyst.exchange.exchange_bundle import ExchangeBundle
|
||||
from catalyst import get_calendar
|
||||
from catalyst.data.minute_bars import BcolzMinuteBarReader, \
|
||||
BcolzMinuteBarWriter
|
||||
from catalyst.exchange.bundle_utils import get_bcolz_chunk, get_periods_range
|
||||
from catalyst.exchange.exchange_bundle import ExchangeBundle, \
|
||||
BUNDLE_NAME_TEMPLATE
|
||||
from catalyst.exchange.exchange_utils import get_exchange_folder
|
||||
from catalyst.exchange.init_utils import get_exchange
|
||||
from catalyst.utils.paths import ensure_directory
|
||||
|
||||
log = Logger('test_exchange_bundle')
|
||||
|
||||
@@ -144,6 +150,110 @@ class ExchangeBundleTestCase:
|
||||
)
|
||||
pass
|
||||
|
||||
def test_daily_data_to_minute_table(self):
|
||||
exchange_name = 'poloniex'
|
||||
|
||||
# Switch between daily and minute for testing
|
||||
data_frequency = 'daily'
|
||||
# data_frequency = 'minute'
|
||||
|
||||
exchange = get_exchange(exchange_name)
|
||||
assets = [
|
||||
exchange.get_asset('eth_btc'),
|
||||
exchange.get_asset('etc_btc'),
|
||||
]
|
||||
|
||||
start = pd.to_datetime('2017-9-1', utc=True)
|
||||
end = pd.to_datetime('2017-9-30', utc=True)
|
||||
|
||||
# Preparing the bundle folder
|
||||
root = get_exchange_folder(exchange.name)
|
||||
path = BUNDLE_NAME_TEMPLATE.format(
|
||||
root=root,
|
||||
frequency=data_frequency
|
||||
)
|
||||
ensure_directory(path)
|
||||
|
||||
exchange_bundle = ExchangeBundle(exchange)
|
||||
calendar = get_calendar('OPEN')
|
||||
|
||||
# We are using a BcolzMinuteBarWriter even though the data is daily
|
||||
# Each day has a maximum of one bar
|
||||
|
||||
# I tried setting the minutes_per_day to 1 will not create
|
||||
# unnecessary bars
|
||||
writer = BcolzMinuteBarWriter(
|
||||
rootdir=path,
|
||||
calendar=calendar,
|
||||
minutes_per_day=1,
|
||||
start_session=start,
|
||||
end_session=end,
|
||||
write_metadata=True,
|
||||
default_ohlc_ratio=exchange_bundle.default_ohlc_ratio
|
||||
)
|
||||
|
||||
# This will read the daily data in a bundle created by
|
||||
# the daily writer. It will write to the minute writer which
|
||||
# we are passing.
|
||||
|
||||
# Ingesting a second asset to ensure that multiple chunks
|
||||
# don't override each other
|
||||
for asset in assets:
|
||||
exchange_bundle.ingest_ctable(
|
||||
asset=asset,
|
||||
data_frequency=data_frequency,
|
||||
period='2017',
|
||||
start_dt=start,
|
||||
end_dt=end,
|
||||
writer=writer,
|
||||
empty_rows_behavior='strip'
|
||||
)
|
||||
|
||||
# Simplifying the data reader to play nice with 1 minute per day
|
||||
class BcolzDayBarReader(BcolzMinuteBarReader):
|
||||
def load_raw_arrays(self, fields, start_dt, end_dt, sids):
|
||||
start_idx = self._find_position_of_minute(start_dt)
|
||||
end_idx = self._find_position_of_minute(end_dt)
|
||||
|
||||
num_days = (end_idx - start_idx + 1)
|
||||
shape = num_days, len(sids)
|
||||
|
||||
data = []
|
||||
for field in fields:
|
||||
out = np.full(shape, np.nan)
|
||||
|
||||
for i, sid in enumerate(sids):
|
||||
carray = reader._open_minute_file(field, sid)
|
||||
a = carray[start_idx:end_idx + 1]
|
||||
|
||||
where = a != 0
|
||||
|
||||
out[:len(where), i][where] = (
|
||||
a[where] * self._ohlc_ratio_inverse_for_sid(sid)
|
||||
)
|
||||
|
||||
data.append(out)
|
||||
|
||||
return data
|
||||
|
||||
reader = BcolzDayBarReader(path)
|
||||
|
||||
# Reading the two assets to ensure that no data was lost
|
||||
for asset in assets:
|
||||
sid = asset.sid
|
||||
|
||||
daily_values = reader.load_raw_arrays(
|
||||
fields=['open', 'high', 'low', 'close', 'volume'],
|
||||
start_dt=start,
|
||||
end_dt=end,
|
||||
sids=[sid],
|
||||
)
|
||||
|
||||
print('found {} rows for last ingestion'.format(
|
||||
len(daily_values[0]))
|
||||
)
|
||||
pass
|
||||
|
||||
def test_minute_bundle(self):
|
||||
exchange_name = 'poloniex'
|
||||
data_frequency = 'minute'
|
||||
|
||||
Reference in New Issue
Block a user