From 51126fd7ae26cdce4cec5ee7d935162eb29ec95a Mon Sep 17 00:00:00 2001 From: Frederic Fortier Date: Thu, 18 Jan 2018 17:09:37 -0500 Subject: [PATCH] BLD: improved the bundle test suite and related adjustments --- catalyst/exchange/ccxt/ccxt_exchange.py | 24 +- catalyst/exchange/exchange.py | 11 +- catalyst/exchange/exchange_bundle.py | 6 +- catalyst/exchange/exchange_data_portal.py | 4 +- catalyst/exchange/utils/bundle_utils.py | 225 +----------- catalyst/exchange/utils/datetime_utils.py | 327 ++++++++++++++++++ catalyst/exchange/utils/exchange_utils.py | 83 +---- catalyst/exchange/utils/test_utils.py | 12 +- tests/exchange/test_bundle.py | 3 +- tests/exchange/test_ccxt.py | 4 +- .../exchange/test_suites/test_suite_bundle.py | 36 +- 11 files changed, 409 insertions(+), 326 deletions(-) create mode 100644 catalyst/exchange/utils/datetime_utils.py diff --git a/catalyst/exchange/ccxt/ccxt_exchange.py b/catalyst/exchange/ccxt/ccxt_exchange.py index edc497c6..01770413 100644 --- a/catalyst/exchange/ccxt/ccxt_exchange.py +++ b/catalyst/exchange/ccxt/ccxt_exchange.py @@ -22,8 +22,10 @@ from catalyst.exchange.exchange_errors import InvalidHistoryFrequencyError, \ UnsupportedHistoryFrequencyError from catalyst.exchange.exchange_execution import ExchangeLimitOrder from catalyst.exchange.utils.exchange_utils import mixin_market_params, \ - from_ms_timestamp, get_epoch, get_exchange_folder, get_catalyst_symbol, \ + get_exchange_folder, get_catalyst_symbol, \ get_exchange_auth +from exchange.utils.datetime_utils import from_ms_timestamp, get_epoch, \ + get_periods_range from catalyst.finance.order import Order, ORDER_STATUS from catalyst.finance.transaction import Transaction @@ -399,7 +401,7 @@ class CCXT(Exchange): timeframe, source='ccxt', raise_error=raise_error ) - def get_candles(self, freq, assets, bar_count=None, start_dt=None, + def get_candles(self, freq, assets, bar_count=1, start_dt=None, end_dt=None): is_single = (isinstance(assets, TradingPair)) if is_single: @@ -416,9 +418,25 @@ class CCXT(Exchange): freqs=freqs, ) + if start_dt is not None and end_dt is not None: + raise ValueError( + 'Please provide either start_dt or end_dt, not both.' + ) + + elif end_dt is not None: + dt_range = get_periods_range( + end_dt=end_dt, + periods=bar_count, + freq=freq, + ) + # skip the left bound of the range since the open range is + # on the right bound + start_dt = dt_range[1] + ms = None if start_dt is not None: - delta = start_dt - get_epoch() + if end_dt is not None: + delta = start_dt - get_epoch() ms = int(delta.total_seconds()) * 1000 candles = dict() diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index a0f3a4bf..c873962e 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -13,10 +13,10 @@ from catalyst.exchange.exchange_errors import MismatchingBaseCurrencies, \ PricingDataNotLoadedError, \ NoDataAvailableOnExchange, NoValueForField, LastCandleTooEarlyError, \ TickerNotFoundError, NotEnoughCashError -from catalyst.exchange.utils.bundle_utils import get_start_dt, \ - get_delta, get_periods, get_periods_range +from exchange.utils.datetime_utils import get_delta, get_periods_range, \ + get_periods, get_start_dt, get_frequency from catalyst.exchange.utils.exchange_utils import get_exchange_symbols, \ - get_frequency, resample_history_df, has_bundle + resample_history_df, has_bundle from logbook import Logger log = Logger('Exchange', level=LOG_LEVEL) @@ -433,7 +433,7 @@ class Exchange: series = pd.Series(values, index=dates) periods = get_periods_range( - start_dt, end_dt, data_frequency + start_dt=start_dt, end_dt=end_dt, freq=data_frequency ) # TODO: ensure that this working as expected, if not use fillna series = series.reindex( @@ -929,8 +929,7 @@ class Exchange: pass @abstractmethod - def get_candles(self, freq, assets, bar_count=None, - start_dt=None, end_dt=None): + def get_candles(self, freq, assets, bar_count, start_dt=None, end_dt=None): """ Retrieve OHLCV candles for the given assets diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 9844f751..9df3f42e 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -21,9 +21,9 @@ from catalyst.exchange.exchange_errors import EmptyValuesInBundleError, \ NoDataAvailableOnExchange, \ PricingDataNotLoadedError, DataCorruptionError, PricingDataValueError from catalyst.exchange.utils.bundle_utils import range_in_bundle, \ - get_bcolz_chunk, get_month_start_end, \ - get_year_start_end, get_df_from_arrays, get_start_dt, get_period_label, \ - get_delta, get_assets + get_bcolz_chunk, get_df_from_arrays, get_assets +from exchange.utils.datetime_utils import get_delta, get_start_dt, \ + get_period_label, get_month_start_end, get_year_start_end from catalyst.exchange.utils.exchange_utils import get_exchange_folder, \ save_exchange_symbols, mixin_market_params, get_catalyst_symbol from catalyst.utils.cli import maybe_show_progress diff --git a/catalyst/exchange/exchange_data_portal.py b/catalyst/exchange/exchange_data_portal.py index 4f73079e..dd4507f6 100644 --- a/catalyst/exchange/exchange_data_portal.py +++ b/catalyst/exchange/exchange_data_portal.py @@ -9,8 +9,8 @@ from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import ( ExchangeRequestError, PricingDataNotLoadedError) -from catalyst.exchange.utils.exchange_utils import get_frequency, \ - resample_history_df, group_assets_by_exchange +from catalyst.exchange.utils.exchange_utils import resample_history_df, group_assets_by_exchange +from exchange.utils.datetime_utils import get_frequency from logbook import Logger from redo import retry diff --git a/catalyst/exchange/utils/bundle_utils.py b/catalyst/exchange/utils/bundle_utils.py index e9207511..b65f1771 100644 --- a/catalyst/exchange/utils/bundle_utils.py +++ b/catalyst/exchange/utils/bundle_utils.py @@ -1,11 +1,19 @@ -import calendar import os import tarfile -from datetime import timedelta, datetime, date +from datetime import datetime import numpy as np import pandas as pd -import pytz + +from catalyst.data.bundles.core import download_without_progress +from catalyst.exchange.utils.exchange_utils import get_exchange_bundles_folder +import os +import tarfile +from datetime import datetime + +import numpy as np +import pandas as pd + from catalyst.data.bundles.core import download_without_progress from catalyst.exchange.utils.exchange_utils import get_exchange_bundles_folder @@ -13,41 +21,6 @@ EXCHANGE_NAMES = ['bitfinex', 'bittrex', 'poloniex'] API_URL = 'http://data.enigma.co/api/v1' -def get_date_from_ms(ms): - """ - The date from the number of miliseconds from the epoch. - - Parameters - ---------- - ms: int - - Returns - ------- - datetime - - """ - return datetime.fromtimestamp(ms / 1000.0) - - -def get_seconds_from_date(date): - """ - The number of seconds from the epoch. - - Parameters - ---------- - date: datetime - - Returns - ------- - int - - """ - epoch = datetime.utcfromtimestamp(0) - epoch = epoch.replace(tzinfo=pytz.UTC) - - return int((date - epoch).total_seconds()) - - def get_bcolz_chunk(exchange_name, symbol, data_frequency, period): """ Download and extract a bcolz bundle. @@ -77,8 +50,8 @@ def get_bcolz_chunk(exchange_name, symbol, data_frequency, period): if not os.path.isdir(path): url = 'https://s3.amazonaws.com/enigmaco/catalyst-bundles/' \ 'exchange-{exchange}/{name}.tar.gz'.format( - exchange=exchange_name, - name=name) + exchange=exchange_name, + name=name) bytes = download_without_progress(url) with tarfile.open('r', fileobj=bytes) as tar: @@ -87,178 +60,6 @@ def get_bcolz_chunk(exchange_name, symbol, data_frequency, period): return path -def get_delta(periods, data_frequency): - """ - Get a time delta based on the specified data frequency. - - Parameters - ---------- - periods: int - data_frequency: str - - Returns - ------- - timedelta - - """ - return timedelta(minutes=periods) \ - if data_frequency == 'minute' else timedelta(days=periods) - - -def get_periods_range(start_dt, end_dt, freq): - """ - Get a date range for the specified parameters. - - Parameters - ---------- - start_dt: datetime - end_dt: datetime - freq: str - - Returns - ------- - DateTimeIndex - - """ - if freq == 'minute': - freq = 'T' - - elif freq == 'daily': - freq = 'D' - - return pd.date_range(start_dt, end_dt, freq=freq) - - -def get_periods(start_dt, end_dt, freq): - """ - The number of periods in the specified range. - - Parameters - ---------- - start_dt: datetime - end_dt: datetime - freq: str - - Returns - ------- - int - - """ - return len(get_periods_range(start_dt, end_dt, freq)) - - -def get_start_dt(end_dt, bar_count, data_frequency, include_first=True): - """ - The start date based on specified end date and data frequency. - - Parameters - ---------- - end_dt: datetime - bar_count: int - data_frequency: str - - Returns - ------- - datetime - - """ - periods = bar_count - if periods > 1: - delta = get_delta(periods, data_frequency) - start_dt = end_dt - delta - - if not include_first: - start_dt += get_delta(1, data_frequency) - else: - start_dt = end_dt - - return start_dt - - -def get_period_label(dt, data_frequency): - """ - The period label for the specified date and frequency. - - Parameters - ---------- - dt: datetime - data_frequency: str - - Returns - ------- - str - - """ - if data_frequency == 'minute': - return '{}-{:02d}'.format(dt.year, dt.month) - else: - return '{}'.format(dt.year) - - -def get_month_start_end(dt, first_day=None, last_day=None): - """ - The first and last day of the month for the specified date. - - Parameters - ---------- - dt: datetime - first_day: datetime - last_day: datetime - - Returns - ------- - datetime, datetime - - """ - month_range = calendar.monthrange(dt.year, dt.month) - - if first_day: - month_start = first_day - else: - month_start = pd.to_datetime(datetime( - dt.year, dt.month, 1, 0, 0, 0, 0 - ), utc=True) - - if last_day: - month_end = last_day - else: - month_end = pd.to_datetime(datetime( - dt.year, dt.month, month_range[1], 23, 59, 0, 0 - ), utc=True) - - if month_end > pd.Timestamp.utcnow(): - month_end = pd.Timestamp.utcnow().floor('1D') - - return month_start, month_end - - -def get_year_start_end(dt, first_day=None, last_day=None): - """ - The first and last day of the year for the specified date. - - Parameters - ---------- - - dt: datetime - first_day: datetime - last_day: datetime - - Returns - ------- - datetime, datetime - - """ - year_start = first_day if first_day \ - else pd.to_datetime(date(dt.year, 1, 1), utc=True) - year_end = last_day if last_day \ - else pd.to_datetime(date(dt.year, 12, 31), utc=True) - - if year_end > pd.Timestamp.utcnow(): - year_end = pd.Timestamp.utcnow().floor('1D') - - return year_start, year_end - - def get_df_from_arrays(arrays, periods): """ A DataFrame from the specified OHCLV arrays. diff --git a/catalyst/exchange/utils/datetime_utils.py b/catalyst/exchange/utils/datetime_utils.py new file mode 100644 index 00000000..6e7b9d60 --- /dev/null +++ b/catalyst/exchange/utils/datetime_utils.py @@ -0,0 +1,327 @@ +import calendar +import re +from datetime import datetime, timedelta, date + +import pandas as pd +import pytz + +from exchange.exchange_errors import InvalidHistoryFrequencyError, \ + InvalidHistoryFrequencyAlias + + +def get_date_from_ms(ms): + """ + The date from the number of miliseconds from the epoch. + + Parameters + ---------- + ms: int + + Returns + ------- + datetime + + """ + return datetime.fromtimestamp(ms / 1000.0) + + +def get_seconds_from_date(date): + """ + The number of seconds from the epoch. + + Parameters + ---------- + date: datetime + + Returns + ------- + int + + """ + epoch = datetime.utcfromtimestamp(0) + epoch = epoch.replace(tzinfo=pytz.UTC) + + return int((date - epoch).total_seconds()) + + +def get_delta(periods, data_frequency): + """ + Get a time delta based on the specified data frequency. + + Parameters + ---------- + periods: int + data_frequency: str + + Returns + ------- + timedelta + + """ + return timedelta(minutes=periods) \ + if data_frequency == 'minute' else timedelta(days=periods) + + +def get_periods_range(freq, start_dt=None, end_dt=None, periods=None): + """ + Get a date range for the specified parameters. + + Parameters + ---------- + start_dt: datetime + end_dt: datetime + freq: str + + Returns + ------- + DateTimeIndex + + """ + if freq == 'minute': + freq = 'T' + + elif freq == 'daily': + freq = 'D' + + if start_dt is not None and end_dt is not None and periods is None: + + return pd.date_range(start_dt, end_dt, freq=freq) + + elif periods is not None and (start_dt is not None or end_dt is not None): + _, unit_periods, unit, _ = get_frequency(freq) + adj_periods = periods * unit_periods + + # TODO: standardize time aliases to avoid any mapping + unit = 'd' if unit == 'D' else 'm' + delta = pd.Timedelta(adj_periods, unit) + + if start_dt is not None: + return pd.date_range( + start=start_dt, + end=start_dt + delta, + freq=freq, + closed='left', + ) + + else: + return pd.date_range( + start=end_dt - delta, + end=end_dt, + freq=freq, + ) + + else: + raise ValueError( + 'Choose only two parameters between start_dt, end_dt ' + 'and periods.' + ) + + +def get_periods(start_dt, end_dt, freq): + """ + The number of periods in the specified range. + + Parameters + ---------- + start_dt: datetime + end_dt: datetime + freq: str + + Returns + ------- + int + + """ + return len(get_periods_range(start_dt=start_dt, end_dt=end_dt, freq=freq)) + + +def get_start_dt(end_dt, bar_count, data_frequency, include_first=True): + """ + The start date based on specified end date and data frequency. + + Parameters + ---------- + end_dt: datetime + bar_count: int + data_frequency: str + include_first + + Returns + ------- + datetime + + """ + periods = bar_count + if periods > 1: + delta = get_delta(periods, data_frequency) + start_dt = end_dt - delta + + if not include_first: + start_dt += get_delta(1, data_frequency) + else: + start_dt = end_dt + + return start_dt + + +def get_period_label(dt, data_frequency): + """ + The period label for the specified date and frequency. + + Parameters + ---------- + dt: datetime + data_frequency: str + + Returns + ------- + str + + """ + if data_frequency == 'minute': + return '{}-{:02d}'.format(dt.year, dt.month) + else: + return '{}'.format(dt.year) + + +def get_month_start_end(dt, first_day=None, last_day=None): + """ + The first and last day of the month for the specified date. + + Parameters + ---------- + dt: datetime + first_day: datetime + last_day: datetime + + Returns + ------- + datetime, datetime + + """ + month_range = calendar.monthrange(dt.year, dt.month) + + if first_day: + month_start = first_day + else: + month_start = pd.to_datetime(datetime( + dt.year, dt.month, 1, 0, 0, 0, 0 + ), utc=True) + + if last_day: + month_end = last_day + else: + month_end = pd.to_datetime(datetime( + dt.year, dt.month, month_range[1], 23, 59, 0, 0 + ), utc=True) + + if month_end > pd.Timestamp.utcnow(): + month_end = pd.Timestamp.utcnow().floor('1D') + + return month_start, month_end + + +def get_year_start_end(dt, first_day=None, last_day=None): + """ + The first and last day of the year for the specified date. + + Parameters + ---------- + + dt: datetime + first_day: datetime + last_day: datetime + + Returns + ------- + datetime, datetime + + """ + year_start = first_day if first_day \ + else pd.to_datetime(date(dt.year, 1, 1), utc=True) + year_end = last_day if last_day \ + else pd.to_datetime(date(dt.year, 12, 31), utc=True) + + if year_end > pd.Timestamp.utcnow(): + year_end = pd.Timestamp.utcnow().floor('1D') + + return year_start, year_end + + +def get_frequency(freq, data_frequency=None): + """ + Get the frequency parameters. + + Notes + ----- + We're trying to use Pandas convention for frequency aliases. + + Parameters + ---------- + freq: str + data_frequency: str + + Returns + ------- + str, int, str, str + + """ + if data_frequency is None: + data_frequency = 'daily' if freq.upper().endswith('D') else 'minute' + + if freq == 'minute': + unit = 'T' + candle_size = 1 + + elif freq == 'daily': + unit = 'D' + candle_size = 1 + + else: + freq_match = re.match(r'([0-9].*)?(m|M|d|D|h|H|T)', freq, re.M | re.I) + if freq_match: + candle_size = int(freq_match.group(1)) if freq_match.group(1) \ + else 1 + unit = freq_match.group(2) + + else: + raise InvalidHistoryFrequencyError(frequency=freq) + + # TODO: some exchanges support H and W frequencies but not bundles + # Find a way to pass-through these parameters to exchanges + # but resample from minute or daily in backtest mode + # see catalyst/exchange/ccxt/ccxt_exchange.py:242 for mapping between + # Pandas offet aliases (used by Catalyst) and the CCXT timeframes + if unit.lower() == 'd': + unit = 'D' + alias = '{}D'.format(candle_size) + + if data_frequency == 'minute': + data_frequency = 'daily' + + elif unit.lower() == 'm' or unit == 'T': + unit = 'T' + alias = '{}T'.format(candle_size) + + if data_frequency == 'daily': + data_frequency = 'minute' + + # elif unit.lower() == 'h': + # candle_size = candle_size * 60 + # + # alias = '{}T'.format(candle_size) + # if data_frequency == 'daily': + # data_frequency = 'minute' + + else: + raise InvalidHistoryFrequencyAlias(freq=freq) + + return alias, candle_size, unit, data_frequency + + +def from_ms_timestamp(ms): + return pd.to_datetime(ms, unit='ms', utc=True) + + +def get_epoch(): + return pd.to_datetime('1970-1-1', utc=True) diff --git a/catalyst/exchange/utils/exchange_utils.py b/catalyst/exchange/utils/exchange_utils.py index baf4c629..3c87b510 100644 --- a/catalyst/exchange/utils/exchange_utils.py +++ b/catalyst/exchange/utils/exchange_utils.py @@ -2,21 +2,20 @@ import hashlib import json import os import pickle -import re import shutil from datetime import date, datetime import pandas as pd from catalyst.assets._assets import TradingPair +from six import string_types +from six.moves.urllib import request + from catalyst.constants import DATE_FORMAT, SYMBOLS_URL -from catalyst.exchange.exchange_errors import ExchangeSymbolsNotFound, \ - InvalidHistoryFrequencyError, InvalidHistoryFrequencyAlias +from catalyst.exchange.exchange_errors import ExchangeSymbolsNotFound from catalyst.exchange.utils.serialization_utils import ExchangeJSONEncoder, \ ExchangeJSONDecoder from catalyst.utils.paths import data_root, ensure_directory, \ last_modified_time -from six import string_types -from six.moves.urllib import request def get_sid(symbol): @@ -513,72 +512,6 @@ def get_common_assets(exchanges): return assets -def get_frequency(freq, data_frequency): - """ - Get the frequency parameters. - - Notes - ----- - We're trying to use Pandas convention for frequency aliases. - - Parameters - ---------- - freq: str - data_frequency: str - - Returns - ------- - str, int, str, str - - """ - if freq == 'minute': - unit = 'T' - candle_size = 1 - - elif freq == 'daily': - unit = 'D' - candle_size = 1 - - else: - freq_match = re.match(r'([0-9].*)?(m|M|d|D|h|H|T)', freq, re.M | re.I) - if freq_match: - candle_size = int(freq_match.group(1)) if freq_match.group(1) \ - else 1 - unit = freq_match.group(2) - - else: - raise InvalidHistoryFrequencyError(frequency=freq) - - # TODO: some exchanges support H and W frequencies but not bundles - # Find a way to pass-through these parameters to exchanges - # but resample from minute or daily in backtest mode - # see catalyst/exchange/ccxt/ccxt_exchange.py:242 for mapping between - # Pandas offet aliases (used by Catalyst) and the CCXT timeframes - if unit.lower() == 'd': - alias = '{}D'.format(candle_size) - - if data_frequency == 'minute': - data_frequency = 'daily' - - elif unit.lower() == 'm' or unit == 'T': - alias = '{}T'.format(candle_size) - - if data_frequency == 'daily': - data_frequency = 'minute' - - # elif unit.lower() == 'h': - # candle_size = candle_size * 60 - # - # alias = '{}T'.format(candle_size) - # if data_frequency == 'daily': - # data_frequency = 'minute' - - else: - raise InvalidHistoryFrequencyAlias(freq=freq) - - return alias, candle_size, unit, data_frequency - - def resample_history_df(df, freq, field): """ Resample the OHCLV DataFrame using the specified frequency. @@ -652,14 +585,6 @@ def mixin_market_params(exchange_name, params, market): params['lot'] = params['min_trade_size'] -def from_ms_timestamp(ms): - return pd.to_datetime(ms, unit='ms', utc=True) - - -def get_epoch(): - return pd.to_datetime('1970-1-1', utc=True) - - def group_assets_by_exchange(assets): exchange_assets = dict() for asset in assets: diff --git a/catalyst/exchange/utils/test_utils.py b/catalyst/exchange/utils/test_utils.py index ac5021be..9f7f1a15 100644 --- a/catalyst/exchange/utils/test_utils.py +++ b/catalyst/exchange/utils/test_utils.py @@ -62,14 +62,14 @@ def output_df(df, assets, name=None): """ if isinstance(assets, TradingPair): - exchange_folder = assets.exchange - asset_folder = assets.symbol + asset_folder = '{}_{}'.format(assets.exchange, assets.symbol) else: - exchange_folder = ','.join([asset.exchange for asset in assets]) - asset_folder = ','.join([asset.symbol for asset in assets]) + asset_folder = ','.join( + ['{}_{}'.format(a.exchange, a.symbol) for a in assets] + ) folder = os.path.join( - tempfile.gettempdir(), 'catalyst', exchange_folder, asset_folder + tempfile.gettempdir(), 'catalyst', asset_folder ) ensure_directory(folder) @@ -79,4 +79,4 @@ def output_df(df, assets, name=None): path = os.path.join(folder, '{}.csv'.format(name)) df.to_csv(path) - return path + return path, folder diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 3864d531..c66fcfb4 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -10,7 +10,8 @@ from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ from catalyst.exchange.exchange_bundle import ExchangeBundle, \ BUNDLE_NAME_TEMPLATE from catalyst.exchange.utils.bundle_utils import get_bcolz_chunk, \ - get_start_dt, get_df_from_arrays + get_df_from_arrays +from exchange.utils.datetime_utils import get_start_dt from catalyst.exchange.utils.exchange_utils import get_exchange_folder from catalyst.exchange.utils.factory import get_exchange from catalyst.exchange.utils.stats_utils import df_to_string diff --git a/tests/exchange/test_ccxt.py b/tests/exchange/test_ccxt.py index 04112675..67ba61d2 100644 --- a/tests/exchange/test_ccxt.py +++ b/tests/exchange/test_ccxt.py @@ -1,6 +1,8 @@ import pandas as pd from logbook import Logger +from catalyst.testing import ZiplineTestCase +from catalyst.testing.fixtures import WithLogger from .base import BaseExchangeTestCase from catalyst.exchange.ccxt.ccxt_exchange import CCXT from catalyst.exchange.exchange_execution import ExchangeLimitOrder @@ -59,7 +61,7 @@ class TestCCXT(BaseExchangeTestCase): freq='5T', assets=[self.exchange.get_asset('eth_btc')], bar_count=200, - start_dt=pd.to_datetime('2017-01-01', utc=True) + start_dt=pd.to_datetime('2017-09-01', utc=True) ) for asset in candles: diff --git a/tests/exchange/test_suites/test_suite_bundle.py b/tests/exchange/test_suites/test_suite_bundle.py index 1bc8efec..21ef60d5 100644 --- a/tests/exchange/test_suites/test_suite_bundle.py +++ b/tests/exchange/test_suites/test_suite_bundle.py @@ -1,5 +1,6 @@ import random +import os import pandas as pd from logbook import TestHandler from pandas.util.testing import assert_frame_equal @@ -11,7 +12,6 @@ from catalyst.exchange.utils.exchange_utils import get_candles_df from catalyst.exchange.utils.factory import get_exchange from catalyst.exchange.utils.test_utils import output_df, \ select_random_assets -from catalyst.testing.fixtures import WithLogger, ZiplineTestCase pd.set_option('display.expand_frame_repr', False) pd.set_option('precision', 8) @@ -19,7 +19,7 @@ pd.set_option('display.width', 1000) pd.set_option('display.max_colwidth', 1000) -class TestSuiteBundle(WithLogger, ZiplineTestCase): +class TestSuiteBundle: @staticmethod def get_data_portal(exchanges): open_calendar = get_calendar('OPEN') @@ -46,7 +46,9 @@ class TestSuiteBundle(WithLogger, ZiplineTestCase): assets end_dt bar_count - sample_minutes + freq + data_frequency + data_portal Returns ------- @@ -64,10 +66,6 @@ class TestSuiteBundle(WithLogger, ZiplineTestCase): field='close', data_frequency=data_frequency, ) - print('bundle data:\n{}'.format( - data['bundle'].tail(10)) - ) - candles = exchange.get_candles( end_dt=end_dt, freq=freq, @@ -81,19 +79,31 @@ class TestSuiteBundle(WithLogger, ZiplineTestCase): bar_count=bar_count, end_dt=end_dt, ) - print('exchange data:\n{}'.format( - data['exchange'].tail(10)) - ) for source in data: df = data[source] - path = output_df(df, assets, '{}_{}'.format(freq, source)) - print('saved {}:\n{}'.format(source, path)) + path, folder = output_df( + df, assets, '{}_{}'.format(freq, source) + ) assert_frame_equal( right=data['bundle'], left=data['exchange'], - check_less_precise=True, + check_less_precise=1, ) + try: + assert_frame_equal( + right=data['bundle'], + left=data['exchange'], + check_less_precise=min([a.decimals for a in assets]), + ) + except Exception as e: + print('Some differences were found within a 1 decimal point ' + 'interval of confidence: {}'.format(e)) + with open(os.path.join(folder, 'compare.txt'), 'w+') as handle: + handle.write(e.args[0]) + + print('saved test results: {}'.format(folder)) + pass def test_validate_bundles(self): # exchange_population = 3