From 86f892eade1dcae9eb363256cd9f3842734c2603 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 18 Oct 2017 04:29:22 -0400 Subject: [PATCH] Unit tested a daily reader/writer based on the minute bundle --- tests/exchange/test_bundle.py | 116 +++++++++++++++++++++++++++++++++- 1 file changed, 113 insertions(+), 3 deletions(-) diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index e8af2148..99f10c24 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -1,11 +1,17 @@ from logging import Logger +import numpy as np import pandas as pd -from catalyst.data.minute_bars import BcolzMinuteBarReader -from catalyst.exchange.bundle_utils import get_bcolz_chunk -from catalyst.exchange.exchange_bundle import ExchangeBundle +from catalyst import get_calendar +from catalyst.data.minute_bars import BcolzMinuteBarReader, \ + BcolzMinuteBarWriter +from catalyst.exchange.bundle_utils import get_bcolz_chunk, get_periods_range +from catalyst.exchange.exchange_bundle import ExchangeBundle, \ + BUNDLE_NAME_TEMPLATE +from catalyst.exchange.exchange_utils import get_exchange_folder from catalyst.exchange.init_utils import get_exchange +from catalyst.utils.paths import ensure_directory log = Logger('test_exchange_bundle') @@ -144,6 +150,110 @@ class ExchangeBundleTestCase: ) pass + def test_daily_data_to_minute_table(self): + exchange_name = 'poloniex' + + # Switch between daily and minute for testing + data_frequency = 'daily' + # data_frequency = 'minute' + + exchange = get_exchange(exchange_name) + assets = [ + exchange.get_asset('eth_btc'), + exchange.get_asset('etc_btc'), + ] + + start = pd.to_datetime('2017-9-1', utc=True) + end = pd.to_datetime('2017-9-30', utc=True) + + # Preparing the bundle folder + root = get_exchange_folder(exchange.name) + path = BUNDLE_NAME_TEMPLATE.format( + root=root, + frequency=data_frequency + ) + ensure_directory(path) + + exchange_bundle = ExchangeBundle(exchange) + calendar = get_calendar('OPEN') + + # We are using a BcolzMinuteBarWriter even though the data is daily + # Each day has a maximum of one bar + + # I tried setting the minutes_per_day to 1 will not create + # unnecessary bars + writer = BcolzMinuteBarWriter( + rootdir=path, + calendar=calendar, + minutes_per_day=1, + start_session=start, + end_session=end, + write_metadata=True, + default_ohlc_ratio=exchange_bundle.default_ohlc_ratio + ) + + # This will read the daily data in a bundle created by + # the daily writer. It will write to the minute writer which + # we are passing. + + # Ingesting a second asset to ensure that multiple chunks + # don't override each other + for asset in assets: + exchange_bundle.ingest_ctable( + asset=asset, + data_frequency=data_frequency, + period='2017', + start_dt=start, + end_dt=end, + writer=writer, + empty_rows_behavior='strip' + ) + + # Simplifying the data reader to play nice with 1 minute per day + class BcolzDayBarReader(BcolzMinuteBarReader): + def load_raw_arrays(self, fields, start_dt, end_dt, sids): + start_idx = self._find_position_of_minute(start_dt) + end_idx = self._find_position_of_minute(end_dt) + + num_days = (end_idx - start_idx + 1) + shape = num_days, len(sids) + + data = [] + for field in fields: + out = np.full(shape, np.nan) + + for i, sid in enumerate(sids): + carray = reader._open_minute_file(field, sid) + a = carray[start_idx:end_idx + 1] + + where = a != 0 + + out[:len(where), i][where] = ( + a[where] * self._ohlc_ratio_inverse_for_sid(sid) + ) + + data.append(out) + + return data + + reader = BcolzDayBarReader(path) + + # Reading the two assets to ensure that no data was lost + for asset in assets: + sid = asset.sid + + daily_values = reader.load_raw_arrays( + fields=['open', 'high', 'low', 'close', 'volume'], + start_dt=start, + end_dt=end, + sids=[sid], + ) + + print('found {} rows for last ingestion'.format( + len(daily_values[0])) + ) + pass + def test_minute_bundle(self): exchange_name = 'poloniex' data_frequency = 'minute'