From 36881b03e281024eb4f1c452c20467fb12b2782c Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 7 Sep 2017 23:54:11 -0400 Subject: [PATCH 001/110] Working on multi-exchange implementation (not fully tested) --- catalyst/exchange/algorithm_exchange.py | 134 ++++++++++++++++----- catalyst/exchange/asset_finder_exchange.py | 17 +-- catalyst/exchange/data_portal_exchange.py | 78 ++++++++++-- catalyst/exchange/exchange_errors.py | 7 ++ catalyst/utils/run_algo.py | 77 +++++++----- 5 files changed, 226 insertions(+), 87 deletions(-) diff --git a/catalyst/exchange/algorithm_exchange.py b/catalyst/exchange/algorithm_exchange.py index e7bced7e..5321f971 100644 --- a/catalyst/exchange/algorithm_exchange.py +++ b/catalyst/exchange/algorithm_exchange.py @@ -23,6 +23,7 @@ import numpy as np import logbook import pandas as pd +from catalyst.utils.preprocess import preprocess import catalyst.protocol as zp from catalyst.algorithm import TradingAlgorithm @@ -45,7 +46,7 @@ from catalyst.gens.tradesimulation import AlgorithmSimulator from catalyst.utils.api_support import ( api_method, disallowed_in_before_trading_start) -from catalyst.utils.input_validation import error_keywords +from catalyst.utils.input_validation import error_keywords, ensure_upper_case log = logbook.Logger("ExchangeTradingAlgorithm") @@ -57,7 +58,7 @@ class ExchangeAlgorithmExecutor(AlgorithmSimulator): class ExchangeTradingAlgorithm(TradingAlgorithm): def __init__(self, *args, **kwargs): - self.exchange = kwargs.pop('exchange', None) + self.exchanges = kwargs.pop('exchanges', None) self.algo_namespace = kwargs.pop('algo_namespace', None) self.live_graph = kwargs.pop('live_graph', None) @@ -83,6 +84,7 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): self.stats_minutes = 5 super(self.__class__, self).__init__(*args, **kwargs) + # TODO: fix precision before re-enabling # self._create_minute_writer() signal.signal(signal.SIGINT, self.signal_handler) @@ -97,6 +99,7 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): writer = BcolzMinuteBarWriter.open( root, self.sim_params.end_session) else: + # TODO: need to be able to write more precise numbers writer = BcolzMinuteBarWriter( rootdir=root, calendar=self.trading_calendar, @@ -163,13 +166,11 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): if self.live_graph: self._clock = LiveGraphClock( self.sim_params.sessions, - time_skew=self.exchange.time_skew, context=self ) else: self._clock = SimpleClock( self.sim_params.sessions, - time_skew=self.exchange.time_skew ) return self._clock @@ -202,27 +203,31 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): portfolio directly. :return: """ - return self.exchange.portfolio + # TODO: build cumulative portfolio + return self.perf_tracker.get_portfolio(False) def updated_account(self): - return self.exchange.account + return self.perf_tracker.get_account(False) def _synchronize_portfolio(self, attempt_index=0): try: - self.exchange.synchronize_portfolio() + for exchange_name in self.exchanges: + exchange = self.exchanges[exchange_name] - # Applying the updated last_sales_price to the positions - # in the performance tracker. This seems a bit redundant - # but it will make sense when we have multiple exchange portfolios - # feeding into the same performance tracker. - tracker = self.perf_tracker.todays_performance.position_tracker - for asset in self.exchange.portfolio.positions: - position = self.exchange.portfolio.positions[asset] - tracker.update_position( - asset=asset, - last_sale_date=position.last_sale_date, - last_sale_price=position.last_sale_price - ) + exchange.synchronize_portfolio() + + # Applying the updated last_sales_price to the positions + # in the performance tracker. This seems a bit redundant + # but it will make sense when we have multiple exchange portfolios + # feeding into the same performance tracker. + tracker = self.perf_tracker.todays_performance.position_tracker + for asset in exchange.portfolio.positions: + position = exchange.portfolio.positions[asset] + tracker.update_position( + asset=asset, + last_sale_date=position.last_sale_date, + last_sale_price=position.last_sale_price + ) except ExchangeRequestError as e: log.warn( 'update portfolio attempt {}: {}'.format(attempt_index, e) @@ -239,7 +244,14 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): def _check_open_orders(self, attempt_index=0): try: - return self.exchange.check_open_orders() + orders = list() + for exchange_name in self.exchanges: + exchange = self.exchanges[exchange_name] + exchange_orders = exchange.check_open_orders() + + orders += exchange_orders + + return orders except ExchangeRequestError as e: log.warn( 'check open orders attempt {}: {}'.format(attempt_index, e) @@ -429,11 +441,13 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): log.warn('unable to save minute perfs to disk: {}'.format(e)) try: - save_algo_object( - algo_name=self.algo_namespace, - key='portfolio_{}'.format(self.exchange.name), - obj=self.exchange.portfolio - ) + for exchange_name in self.exchanges: + exchange = self.exchanges[exchange_name] + save_algo_object( + algo_name=self.algo_namespace, + key='portfolio_{}'.format(exchange_name), + obj=exchange.portfolio + ) except Exception as e: log.warn('unable to save portfolio to disk: {}'.format(e)) @@ -445,9 +459,10 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): style=None, attempt_index=0): try: - return self.exchange.order(asset, amount, limit_price, - stop_price, - style) + exchange = self.exchanges[asset.exchange] + return exchange.order(asset, amount, limit_price, + stop_price, + style) except ExchangeRequestError as e: log.warn( 'order attempt {}: {}'.format(attempt_index, e) @@ -500,7 +515,18 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): def _get_open_orders(self, asset=None, attempt_index=0): try: - return self.exchange.get_open_orders(asset) + if asset: + exchange = self.exchanges[asset.exchange] + return exchange.get_open_orders(asset) + + else: + open_orders = [] + for exchange_name in self.exchanges: + exchange = self.exchanges[exchange_name] + exchange_orders = exchange.get_open_orders() + open_orders.append(exchange_orders) + + return open_orders except ExchangeRequestError as e: log.warn( 'open orders attempt {}: {}'.format(attempt_index, e) @@ -522,12 +548,54 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): return self._get_open_orders(asset) @api_method - def get_order(self, order_id): - return self.exchange.get_order(order_id) + def get_order(self, order_id, exchange_name): + exchange = self.exchanges[exchange_name] + return exchange.get_order(order_id) @api_method - def cancel_order(self, order_param): + def cancel_order(self, order_param, exchange_name): + exchange = self.exchanges[exchange_name] + order_id = order_param if isinstance(order_param, zp.Order): order_id = order_param.id - self.exchange.cancel_order(order_id) + + exchange.cancel_order(order_id) + + @api_method + @preprocess(symbol_str=ensure_upper_case) + def symbol(self, symbol_str, exchange_name=None): + """Lookup an Equity by its ticker symbol. + + Parameters + ---------- + symbol_str : str + The ticker symbol for the equity to lookup. + + Returns + ------- + equity : Equity + The equity that held the ticker symbol on the current + symbol lookup date. + + Raises + ------ + SymbolNotFound + Raised when the symbols was not held on the current lookup date. + + See Also + -------- + :func:`catalyst.api.set_symbol_lookup_date` + """ + # If the user has not set the symbol lookup date, + # use the end_session as the date for sybmol->sid resolution. + _lookup_date = self._symbol_lookup_date \ + if self._symbol_lookup_date is not None \ + else self.sim_params.end_session + + exchange = self.exchanges[exchange_name] + return self.asset_finder.lookup_symbol( + symbol_str, + as_of_date=_lookup_date, + exchange=exchange + ) diff --git a/catalyst/exchange/asset_finder_exchange.py b/catalyst/exchange/asset_finder_exchange.py index 2239f8a8..b95d1101 100644 --- a/catalyst/exchange/asset_finder_exchange.py +++ b/catalyst/exchange/asset_finder_exchange.py @@ -4,8 +4,7 @@ log = Logger('AssetFinderExchange') class AssetFinderExchange(object): - def __init__(self, exchange): - self.exchange = exchange + def __init__(self): self._asset_cache = {} @property @@ -47,7 +46,8 @@ class AssetFinderExchange(object): log.info('fetching asset: {}'.format(sid)) return list() - def lookup_symbol(self, symbol, as_of_date, fuzzy=False): + def lookup_symbol(self, symbol, as_of_date, exchange, + fuzzy=False): """Lookup an asset by symbol. Parameters @@ -81,11 +81,12 @@ class AssetFinderExchange(object): there are multiple candidates for the given ``symbol`` on the ``as_of_date``. """ - log.debug('looking up symbol: {}'.format(symbol)) + log.debug('looking up symbol: {} {}'.format(symbol, exchange.name)) - if symbol in self._asset_cache: - return self._asset_cache[symbol] + key = ','.join([exchange.name, symbol]) + if key in self._asset_cache: + return self._asset_cache[key] else: - asset = self.exchange.get_asset(symbol) - self._asset_cache[symbol] = asset + asset = exchange.get_asset(symbol) + self._asset_cache[key] = asset return asset diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index 77a7cb76..b1d91f56 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -12,7 +12,8 @@ # limitations under the License. from time import sleep - +import pandas as pd +from catalyst.assets._assets import TradingPair from logbook import Logger from catalyst.data.data_portal import DataPortal @@ -25,8 +26,8 @@ log = Logger('DataPortalExchange') class DataPortalExchange(DataPortal): - def __init__(self, exchange, *args, **kwargs): - self.exchange = exchange + def __init__(self, exchanges, *args, **kwargs): + self.exchanges = exchanges # TODO: put somewhere accessible by each algo self.retry_get_history_window = 5 @@ -45,14 +46,43 @@ class DataPortalExchange(DataPortal): ffill=True, attempt_index=0): try: - return self.exchange.get_history_window( - assets, - end_dt, - bar_count, - frequency, - field, - data_frequency, - ffill) + exchange_assets = dict() + for asset in assets: + if asset.exchange not in exchange_assets: + exchange_assets[asset.exchange] = list() + + exchange_assets[asset.exchange].append(asset) + + if len(exchange_assets) > 1: + df_list = [] + for exchange_name in exchange_assets: + exchange = self.exchanges[exchange_name] + assets = exchange_assets[exchange_name] + + df = exchange.get_history_window( + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency, + ffill) + + df_list.append(df) + + return pd.concat(df_list) + + else: + exchange = self.exchanges[exchange_assets.keys()[0]] + return exchange.get_history_window( + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency, + ffill) + except ExchangeRequestError as e: log.warn( 'get history attempt {}: {}'.format(attempt_index, e) @@ -93,8 +123,30 @@ class DataPortalExchange(DataPortal): def _get_spot_value(self, assets, field, dt, data_frequency, attempt_index=0): try: - return self.exchange.get_spot_value(assets, field, dt, - data_frequency) + if isinstance(assets, TradingPair): + exchange = self.exchanges[assets.exchange] + return exchange.get_spot_value( + assets, field, dt, data_frequency) + + else: + exchange_assets = dict() + for asset in assets: + if asset.exchange not in exchange_assets: + exchange_assets[asset.exchange] = list() + + exchange_assets[asset.exchange].append(asset) + + spot_values = [] + for exchange_name in exchange_assets: + exchange = self.exchanges[exchange_name] + assets = exchange_assets[exchange_name] + exchange_spot_values = exchange.get_spot_value( + assets, field, dt, data_frequency) + + spot_values += exchange_spot_values + + return spot_values + except ExchangeRequestError as e: log.warn( 'get spot value attempt {}: {}'.format(attempt_index, e) diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index 0cfa35b3..276be89c 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -34,6 +34,13 @@ class ExchangeTransactionError(ZiplineError): ).strip() +class ExchangeNotFoundError(ZiplineError): + msg = ( + 'Exchange {exchange_name} not found. Please specify exchanges ' + 'supported by Catalyst and verify spelling for accuracy.' + ).strip() + + class ExchangeAuthNotFound(ZiplineError): msg = ( 'Please create an auth.json file containing the api token and key for ' diff --git a/catalyst/utils/run_algo.py b/catalyst/utils/run_algo.py index fc9f6354..793c8919 100644 --- a/catalyst/utils/run_algo.py +++ b/catalyst/utils/run_algo.py @@ -45,7 +45,7 @@ from catalyst.exchange.exchange_portfolio import ExchangePortfolio from catalyst.exchange.exchange_errors import ( ExchangeRequestError, ExchangeRequestErrorTooManyAttempts, - BaseCurrencyNotFoundError) + BaseCurrencyNotFoundError, ExchangeNotFoundError) from catalyst.exchange.exchange_utils import get_exchange_auth, \ get_algo_object from logbook import Logger @@ -150,37 +150,42 @@ def _run(handle_data, if live and exchange is not None: exchange_name = exchange + start = pd.Timestamp.utcnow() end = start + timedelta(minutes=1439) + exchange_list = [x.strip().lower() for x in exchange.split(',')] - portfolio = get_algo_object( - algo_name=algo_namespace, - key='portfolio_{}'.format(exchange_name), - environ=environ - ) - if portfolio is None: - portfolio = ExchangePortfolio( - start_date=pd.Timestamp.utcnow() + exchanges = dict() + for exchange_name in exchange_list: + + portfolio = get_algo_object( + algo_name=algo_namespace, + key='portfolio_{}'.format(exchange_name), + environ=environ ) - exchange_auth = get_exchange_auth(exchange_name) - if exchange_name == 'bitfinex': - exchange = Bitfinex( - key=exchange_auth['key'], - secret=exchange_auth['secret'], - base_currency=base_currency, - portfolio=portfolio - ) - elif exchange_name == 'bittrex': - exchange = Bittrex( - key=exchange_auth['key'], - secret=exchange_auth['secret'], - base_currency=base_currency, - portfolio=portfolio - ) - else: - raise NotImplementedError( - 'exchange not supported: %s' % exchange_name) + if portfolio is None: + portfolio = ExchangePortfolio( + start_date=pd.Timestamp.utcnow() + ) + + exchange_auth = get_exchange_auth(exchange_name) + if exchange_name == 'bitfinex': + exchanges[exchange_name] = Bitfinex( + key=exchange_auth['key'], + secret=exchange_auth['secret'], + base_currency=base_currency, + portfolio=portfolio + ) + elif exchange_name == 'bittrex': + exchanges[exchange_name] = Bittrex( + key=exchange_auth['key'], + secret=exchange_auth['secret'], + base_currency=base_currency, + portfolio=portfolio + ) + else: + raise ExchangeNotFoundError(exchange_name=exchange_name) open_calendar = get_calendar('OPEN') sim_params = create_simulation_parameters( @@ -197,23 +202,24 @@ def _run(handle_data, exchange_tz='UTC', asset_db_path=None ) - env.asset_finder = AssetFinderExchange(exchange) + env.asset_finder = AssetFinderExchange() data = DataPortalExchange( - exchange=exchange, + exchanges=exchanges, asset_finder=env.asset_finder, trading_calendar=open_calendar, first_trading_day=pd.to_datetime('today', utc=True) ) choose_loader = None - def fetch_capital_base(attempt_index=0): + def fetch_capital_base(exchange, attempt_index=0): """ Fetch the base currency amount required to bootstrap the algorithm against the exchange. The algorithm cannot continue without this value. + :param exchange: the targeted exchange :param attempt_index: :return capital_base: the amount of base currency available for trading @@ -240,10 +246,15 @@ def _run(handle_data, exchange=exchange_name ) + capital_base = 0 + for exchange_name in exchanges: + exchange = exchanges[exchange_name] + capital_base += fetch_capital_base(exchange) + sim_params = create_simulation_parameters( start=start, end=end, - capital_base=fetch_capital_base(), + capital_base=capital_base, emission_rate='minute', data_frequency='minute' ) @@ -339,9 +350,9 @@ def _run(handle_data, choose_loader = None TradingAlgorithmClass = ( - partial(ExchangeTradingAlgorithm, exchange=exchange, + partial(ExchangeTradingAlgorithm, exchanges=exchanges, algo_namespace=algo_namespace, live_graph=live_graph) - if live and exchange else TradingAlgorithm) + if live and exchanges else TradingAlgorithm) perf = TradingAlgorithmClass( namespace=namespace, From 7e280aeb5cd4aacb8aee9edd5d428a792ad4237e Mon Sep 17 00:00:00 2001 From: fredfortier Date: Sun, 10 Sep 2017 20:20:34 -0400 Subject: [PATCH 002/110] Working on multiple exchanges and a sample algo for arbitrage --- catalyst/examples/arbitrage_with_interface.py | 190 ++++++++++++++++++ catalyst/exchange/algorithm_exchange.py | 58 ++++-- catalyst/exchange/bitfinex/bitfinex.py | 1 + catalyst/exchange/bitfinex/symbols.json | 12 ++ catalyst/exchange/bittrex/bittrex.py | 1 + catalyst/exchange/exchange.py | 10 +- catalyst/exchange/exchange_errors.py | 22 ++ catalyst/exchange/live_graph_clock.py | 30 ++- 8 files changed, 299 insertions(+), 25 deletions(-) create mode 100644 catalyst/examples/arbitrage_with_interface.py diff --git a/catalyst/examples/arbitrage_with_interface.py b/catalyst/examples/arbitrage_with_interface.py new file mode 100644 index 00000000..66afd8ce --- /dev/null +++ b/catalyst/examples/arbitrage_with_interface.py @@ -0,0 +1,190 @@ +import talib +from logbook import Logger + +from catalyst.api import ( + order, + order_target_percent, + symbol, + record, + get_open_orders, +) +from catalyst.exchange.stats_utils import get_pretty_stats +from catalyst.utils.run_algo import run_algorithm + +algo_namespace = 'arbitrage_neo_eth' +log = Logger(algo_namespace) + + +def initialize(context): + log.info('initializing arbitrage algorithm') + + context.buying_exchange = 'bittrex' + context.selling_exchange = 'bitfinex' + + context.trading_pair_symbol = 'neo_eth' + context.trading_pairs = dict() + context.trading_pairs[context.buying_exchange] = \ + symbol(context.trading_pair_symbol, context.buying_exchange) + context.trading_pairs[context.selling_exchange] = \ + symbol(context.trading_pair_symbol, context.selling_exchange) + + context.entry_points = [ + dict(gap=0.001, amount=0.05), + dict(gap=0.002, amount=0.1), + ] + context.exit_points = [ + dict(gap=0, amount=0.05), + dict(gap=-0.001, amount=0.01), + ] + + context.MAX_POSITIONS = 50 + context.SLIPPAGE_ALLOWED = 0.02 + + pass + + +def place_order(context, amount, buying_price, selling_price, + action): + if action == 'enter': + buying_exchange = context.exchanges[context.buying_exchange] + buy_price = buying_price + + selling_exchange = context.exchanges[context.selling_exchange] + sell_price = selling_price + + elif action == 'exit': + buying_exchange = context.exchanges[context.selling_exchange] + buy_price = selling_price + + selling_exchange = context.exchanges[context.buying_exchange] + sell_price = buying_price + + else: + raise ValueError('invalid order action') + + base_currency = buying_exchange.base_currency + base_currency_amount = buying_exchange.portfolio.cash + + sell_balances = selling_exchange.get_balances() + sell_currency = context.trading_pairs[ + context.selling_exchange].market_currency + + if sell_currency in sell_balances: + market_currency_amount = sell_balances[sell_currency] + else: + log.warn('the selling exchange {} does not hold currency {}'.format( + selling_exchange.name, sell_currency + )) + return + + if base_currency_amount < amount: + log.warn('not enough {} ({}) to buy {}, adjusting the amount'.format( + base_currency, base_currency_amount, amount)) + amount = base_currency_amount + elif market_currency_amount < amount: + log.warn('not enough {} ({}) to sell {}, aborting'.format( + sell_currency, market_currency_amount, amount)) + return + + adj_buy_price = buy_price * (1 + context.SLIPPAGE_ALLOWED) + log.info('buying {} limit at {}{} on {}'.format( + amount, buying_price, context.trading_pair_symbol, + buying_exchange.name)) + order( + asset=context.trading_pairs[buying_exchange], + amount=amount, + limit_price=adj_buy_price + ) + + adj_sell_price = sell_price * (1 - context.SLIPPAGE_ALLOWED) + log.info('selling {} limit at {}{} on {}'.format( + amount, adj_sell_price, context.trading_pair_symbol, + selling_exchange.name)) + order( + asset=context.trading_pairs[selling_exchange], + amount=amount, + limit_price=adj_sell_price + ) + pass + + +def handle_data(context, data): + log.info('handling bar {}'.format(data.current_dt)) + + buying_price = data.current( + context.trading_pairs[context.buying_exchange], 'price') + log.info('price on buying exchange {exchange}: {price}'.format( + exchange=context.buying_exchange.upper(), + price=buying_price, + )) + + selling_price = data.current( + context.trading_pairs[context.selling_exchange], 'price') + + log.info('price on selling exchange {exchange}: {price}'.format( + exchange=context.selling_exchange.upper(), + price=selling_price, + )) + + # If for example, + # selling price = 50 + # buying price = 25 + # expected gap = 1 + + # If follows that, + # selling price - buying price / buying price + # 50 - 25 / 25 = 1 + gap = (selling_price - buying_price) / buying_price + log.info('the price gap: {} ({}%)'.format(gap, gap * 100)) + + # Consider the least ambitious entry point first + # Override of wider gap is found + entry_points = sorted( + context.entry_points, + key=lambda point: point['gap'], + ) + + buy_amount = None + for entry_point in entry_points: + if gap > entry_point['gap']: + buy_amount = entry_point['amount'] + + if buy_amount: + log.info('found buy trigger for amount: {}'.format(buy_amount)) + place_order(context, buy_amount, buying_price, selling_price, 'enter') + + else: + # Consider the narrowest exit gap first + # Override of wider gap is found + exit_points = sorted( + context.exit_points, + key=lambda point: point['gap'], + reverse=True + ) + + sell_amount = None + for exit_point in exit_points: + if gap < exit_point['gap']: + sell_amount = exit_point['amount'] + + if sell_amount: + log.info('found sell trigger for amount: {}'.format(sell_amount)) + place_order(context, sell_amount, buying_price, selling_price, + 'exit') + + +def analyze(context, stats): + log.info('the daily stats:\n{}'.format(get_pretty_stats(stats))) + pass + + +run_algorithm( + initialize=initialize, + handle_data=handle_data, + analyze=analyze, + exchange_name='bittrex,bitfinex', + live=True, + algo_namespace=algo_namespace, + base_currency='eth', + live_graph=False +) diff --git a/catalyst/exchange/algorithm_exchange.py b/catalyst/exchange/algorithm_exchange.py index 5321f971..86b836f2 100644 --- a/catalyst/exchange/algorithm_exchange.py +++ b/catalyst/exchange/algorithm_exchange.py @@ -11,42 +11,43 @@ # See the License for the specific language governing permissions and # limitations under the License. import os +import pickle import signal import sys -import pickle +from collections import deque from datetime import timedelta -from time import sleep from os import listdir from os.path import isfile, join -from collections import deque -import numpy as np +from time import sleep import logbook import pandas as pd -from catalyst.utils.preprocess import preprocess +from catalyst.assets._assets import TradingPair import catalyst.protocol as zp from catalyst.algorithm import TradingAlgorithm from catalyst.data.minute_bars import BcolzMinuteBarWriter, \ BcolzMinuteBarReader from catalyst.errors import OrderInBeforeTradingStart -from catalyst.exchange.simple_clock import SimpleClock -from catalyst.exchange.live_graph_clock import LiveGraphClock from catalyst.exchange.exchange_errors import ( ExchangeRequestError, ExchangePortfolioDataError, - ExchangeTransactionError -) + ExchangeTransactionError, + OrphanOrderError) from catalyst.exchange.exchange_utils import get_exchange_minute_writer_root, \ save_algo_object, get_algo_object, get_algo_folder, get_algo_df, \ save_algo_df +from catalyst.exchange.live_graph_clock import LiveGraphClock +from catalyst.exchange.simple_clock import SimpleClock from catalyst.exchange.stats_utils import get_pretty_stats from catalyst.finance.performance.period import calc_period_stats from catalyst.gens.tradesimulation import AlgorithmSimulator from catalyst.utils.api_support import ( api_method, disallowed_in_before_trading_start) -from catalyst.utils.input_validation import error_keywords, ensure_upper_case +from catalyst.utils.input_validation import error_keywords, ensure_upper_case, \ + expect_types +from catalyst.utils.preprocess import preprocess log = logbook.Logger("ExchangeTradingAlgorithm") @@ -406,7 +407,9 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): self.minute_stats.append(minute_stats) self.add_pnl_stats(minute_stats) - self.add_custom_signals_stats(minute_stats) + if self.recorded_vars: + self.add_custom_signals_stats(minute_stats) + self.add_exposure_stats(minute_stats) print_df = pd.DataFrame(list(self.minute_stats)) @@ -481,23 +484,50 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): @api_method @disallowed_in_before_trading_start(OrderInBeforeTradingStart()) + @expect_types(asset=TradingPair) def order(self, asset, amount, limit_price=None, stop_price=None, style=None): + """ + We use the exchange specific portfolio to place orders. + The cumulative portfolio does not contain open orders but exchange + portfolios do. + + :param asset: TradingPair + :param amount: float + :param limit_price: float + :param stop_price: float + :param style: Style + :return order: Order + The catalyst order object or None + """ + amount, style = self._calculate_order(asset, amount, limit_price, stop_price, style) order_id = self._order(asset, amount, limit_price, stop_price, style) + exchange = self.exchanges[asset.exchange] + exchange_portfolio = exchange.portfolio if order_id is not None: - order = self.portfolio.open_orders[order_id] - self.perf_tracker.process_order(order) - return order + + if order_id in exchange_portfolio.open_orders: + order = exchange_portfolio.open_orders[order_id] + self.perf_tracker.process_order(order) + return order + + else: + raise OrphanOrderError( + order_id=order_id, + exchange=exchange.name + ) else: + log.warn('unable to order {} {} on exchange {}'.format( + amount, asset.symbol, asset.exchange)) return None def round_order(self, amount): diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index fe32991a..6d0fb09f 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -40,6 +40,7 @@ class Bitfinex(Exchange): self.key = key self.secret = secret.encode('UTF-8') self.name = 'bitfinex' + self.color = 'green' self.assets = {} self.load_assets() self.base_currency = base_currency diff --git a/catalyst/exchange/bitfinex/symbols.json b/catalyst/exchange/bitfinex/symbols.json index 8ab44191..2134bd9f 100644 --- a/catalyst/exchange/bitfinex/symbols.json +++ b/catalyst/exchange/bitfinex/symbols.json @@ -1,4 +1,16 @@ { + "neobtc": { + "symbol": "neo_btc", + "start_date": "2017-09-07" + }, + "neousd": { + "symbol": "neo_usd", + "start_date": "2017-09-07" + }, + "neoeth": { + "symbol": "neo_eth", + "start_date": "2017-09-07" + }, "btcusd": { "symbol": "btc_usd", "start_date": "2010-01-01" diff --git a/catalyst/exchange/bittrex/bittrex.py b/catalyst/exchange/bittrex/bittrex.py index f56073c5..87b1c437 100644 --- a/catalyst/exchange/bittrex/bittrex.py +++ b/catalyst/exchange/bittrex/bittrex.py @@ -22,6 +22,7 @@ class Bittrex(Exchange): def __init__(self, key, secret, base_currency, portfolio=None): self.api = Bittrex_api(key=key, secret=secret.encode('UTF-8')) self.name = 'bittrex' + self.color = 'blue' self.base_currency = base_currency self._portfolio = portfolio diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index b0d0f5e2..7aaff3fc 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -14,7 +14,7 @@ from catalyst.errors import ( SymbolNotFound, ) from catalyst.exchange.exchange_errors import MismatchingBaseCurrencies, \ - InvalidOrderStyle, BaseCurrencyNotFoundError + InvalidOrderStyle, BaseCurrencyNotFoundError, SymbolNotFoundOnExchange from catalyst.exchange.exchange_execution import ExchangeStopLimitOrder, \ ExchangeLimitOrder, ExchangeStopOrder from catalyst.exchange.exchange_portfolio import ExchangePortfolio @@ -30,7 +30,6 @@ class Exchange: def __init__(self): self.name = None - self.trading_pairs = None self.assets = {} self._portfolio = None self.minute_writer = None @@ -110,7 +109,12 @@ class Exchange: asset = self.assets[key] if not asset: - raise SymbolNotFound(symbol=symbol) + supported_symbols = [pair.symbol for pair in self.assets.values()] + raise SymbolNotFoundOnExchange( + symbol=symbol, + exchange=self.name, + supported_symbols=supported_symbols + ) return asset diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index 276be89c..a0a137f9 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -94,6 +94,13 @@ class OrderNotFound(ZiplineError): ).strip() +class OrphanOrderError(ZiplineError): + msg = ( + 'Order {order_id} found in exchange {exchange} but not tracked by ' + 'the algorithm.' + ).strip() + + class OrderCancelError(ZiplineError): msg = ( 'Unable to cancel order {order_id} on exchange {exchange} {error}.' @@ -118,3 +125,18 @@ class MismatchingBaseCurrencies(ZiplineError): 'Unable to trade with base currency {base_currency} when the ' 'algorithm uses {algo_currency}.' ).strip() + + +class MismatchingBaseCurrenciesExchanges(ZiplineError): + msg = ( + 'Unable to trade with base currency {base_currency} when the ' + 'exchange {exchange_name} users {exchange_currency}.' + ).strip() + + +class SymbolNotFoundOnExchange(ZiplineError): + """ + Raised when a symbol() call contains a non-existant symbol. + """ + msg = ('Symbol {symbol} not found on exchange {exchange}. ' + 'Choose from: {supported_symbols}').strip() diff --git a/catalyst/exchange/live_graph_clock.py b/catalyst/exchange/live_graph_clock.py index 877f8115..be3b80a8 100644 --- a/catalyst/exchange/live_graph_clock.py +++ b/catalyst/exchange/live_graph_clock.py @@ -22,6 +22,9 @@ from logbook import Logger from matplotlib import pyplot as plt from matplotlib import style +from catalyst.exchange.exchange_errors import \ + MismatchingBaseCurrenciesExchanges + log = Logger('LiveGraphClock') style.use('dark_background') @@ -154,17 +157,31 @@ class LiveGraphClock(object): context = self.context df = context.exposure_stats + # TODO: list exchanges in graph + base_currency = None + positions = [] + for exchange_name in context.exchanges: + exchange = context.exchanges[exchange_name] + + if not base_currency: + base_currency = exchange.base_currency + elif base_currency != exchange.base_currency: + raise MismatchingBaseCurrenciesExchanges( + base_currency=base_currency, + exchange_name=exchange.name, + exchange_currency=exchange.base_currency + ) + + positions += exchange.portfolio.positions + ax.clear() ax.set_title('Exposure') ax.plot(df.index, df['base_currency'], '-', color='green', linewidth=1.0, - label='Base Currency: {}'.format( - context.exchange.base_currency.upper() - ) + label='Base Currency: {}'.format(base_currency.upper()) ) - positions = context.exchange.portfolio.positions symbols = [] for position in positions: symbols.append(position.symbol) @@ -172,10 +189,7 @@ class LiveGraphClock(object): ax.plot(df.index, df['long_exposure'], '-', color='blue', linewidth=1.0, - label='Long Exposure: {}'.format( - ', '.join(symbols).upper() - ) - ) + label='Long Exposure: {}'.format(', '.join(symbols).upper())) self.set_legend(ax) self.format_ax(ax) From 3e2a8dd78be8ccadf2bc92efc8b8491863852a60 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 11 Sep 2017 18:03:58 -0400 Subject: [PATCH 003/110] Adjustments to the sample arbitrage algo --- catalyst/examples/arbitrage_with_interface.py | 165 ++++++++++++------ 1 file changed, 111 insertions(+), 54 deletions(-) diff --git a/catalyst/examples/arbitrage_with_interface.py b/catalyst/examples/arbitrage_with_interface.py index 66afd8ce..425f995b 100644 --- a/catalyst/examples/arbitrage_with_interface.py +++ b/catalyst/examples/arbitrage_with_interface.py @@ -1,12 +1,9 @@ -import talib from logbook import Logger from catalyst.api import ( order, - order_target_percent, symbol, - record, - get_open_orders, + get_open_orders ) from catalyst.exchange.stats_utils import get_pretty_stats from catalyst.utils.run_algo import run_algorithm @@ -18,23 +15,23 @@ log = Logger(algo_namespace) def initialize(context): log.info('initializing arbitrage algorithm') - context.buying_exchange = 'bittrex' - context.selling_exchange = 'bitfinex' + context.buying_exchange = context.exchanges['bittrex'] + context.selling_exchange = context.exchanges['bitfinex'] context.trading_pair_symbol = 'neo_eth' context.trading_pairs = dict() context.trading_pairs[context.buying_exchange] = \ - symbol(context.trading_pair_symbol, context.buying_exchange) + symbol(context.trading_pair_symbol, context.buying_exchange.name) context.trading_pairs[context.selling_exchange] = \ - symbol(context.trading_pair_symbol, context.selling_exchange) + symbol(context.trading_pair_symbol, context.selling_exchange.name) context.entry_points = [ - dict(gap=0.001, amount=0.05), - dict(gap=0.002, amount=0.1), + dict(gap=0.01, amount=0.05), + dict(gap=0.02, amount=0.1), ] context.exit_points = [ - dict(gap=0, amount=0.05), - dict(gap=-0.001, amount=0.01), + dict(gap=0.01, amount=0.05), + dict(gap=-0.02, amount=0.01), ] context.MAX_POSITIONS = 50 @@ -43,66 +40,95 @@ def initialize(context): pass -def place_order(context, amount, buying_price, selling_price, - action): +def place_order(context, amount, buying_price, selling_price, action): if action == 'enter': - buying_exchange = context.exchanges[context.buying_exchange] - buy_price = buying_price + enter_exchange = context.buying_exchange + entry_price = buying_price - selling_exchange = context.exchanges[context.selling_exchange] - sell_price = selling_price + exit_exchange = context.selling_exchange + exit_price = selling_price elif action == 'exit': - buying_exchange = context.exchanges[context.selling_exchange] - buy_price = selling_price + enter_exchange = context.selling_exchange + entry_price = selling_price - selling_exchange = context.exchanges[context.buying_exchange] - sell_price = buying_price + exit_exchange = context.buying_exchange + exit_price = buying_price else: raise ValueError('invalid order action') - base_currency = buying_exchange.base_currency - base_currency_amount = buying_exchange.portfolio.cash + base_currency = enter_exchange.base_currency + base_currency_amount = enter_exchange.portfolio.cash - sell_balances = selling_exchange.get_balances() - sell_currency = context.trading_pairs[ + exit_balances = exit_exchange.get_balances() + exit_currency = context.trading_pairs[ context.selling_exchange].market_currency - if sell_currency in sell_balances: - market_currency_amount = sell_balances[sell_currency] + if exit_currency in exit_balances: + market_currency_amount = exit_balances[exit_currency] else: - log.warn('the selling exchange {} does not hold currency {}'.format( - selling_exchange.name, sell_currency - )) + log.warn( + 'the selling exchange {exchange_name} does not hold ' + 'currency {currency}'.format( + exchange_name=exit_exchange.name, + currency=exit_currency + ) + ) return - if base_currency_amount < amount: - log.warn('not enough {} ({}) to buy {}, adjusting the amount'.format( - base_currency, base_currency_amount, amount)) - amount = base_currency_amount + if base_currency_amount < (amount * entry_price): + adj_amount = base_currency_amount / entry_price + log.warn( + 'not enough {base_currency} ({base_currency_amount}) to buy ' + '{amount}, adjusting the amount to {adj_amount}'.format( + base_currency=base_currency, + base_currency_amount=base_currency_amount, + amount=amount, + adj_amount=adj_amount + ) + ) + amount = adj_amount elif market_currency_amount < amount: - log.warn('not enough {} ({}) to sell {}, aborting'.format( - sell_currency, market_currency_amount, amount)) + log.warn( + 'not enough {currency} ({currency_amount}) to sell ' + '{amount}, aborting'.format( + currency=exit_currency, + currency_amount=market_currency_amount, + amount=amount + ) + ) return - adj_buy_price = buy_price * (1 + context.SLIPPAGE_ALLOWED) - log.info('buying {} limit at {}{} on {}'.format( - amount, buying_price, context.trading_pair_symbol, - buying_exchange.name)) + adj_buy_price = entry_price * (1 + context.SLIPPAGE_ALLOWED) + log.info( + 'buying {amount} {trading_pair} on {exchange_name} with price ' + 'limit {limit_price}'.format( + amount=amount, + trading_pair=context.trading_pair_symbol, + exchange_name=enter_exchange.name, + limit_price=adj_buy_price + ) + ) order( - asset=context.trading_pairs[buying_exchange], + asset=context.trading_pairs[enter_exchange], amount=amount, limit_price=adj_buy_price ) - adj_sell_price = sell_price * (1 - context.SLIPPAGE_ALLOWED) - log.info('selling {} limit at {}{} on {}'.format( - amount, adj_sell_price, context.trading_pair_symbol, - selling_exchange.name)) + adj_sell_price = exit_price * (1 - context.SLIPPAGE_ALLOWED) + log.info( + 'selling {amount} {trading_pair} on {exchange_name} with price ' + 'limit {limit_price}'.format( + amount=-amount, + trading_pair=context.trading_pair_symbol, + exchange_name=exit_exchange.name, + limit_price=adj_sell_price + ) + ) order( - asset=context.trading_pairs[selling_exchange], - amount=amount, + asset=context.trading_pairs[exit_exchange], + amount=-amount, limit_price=adj_sell_price ) pass @@ -113,8 +139,9 @@ def handle_data(context, data): buying_price = data.current( context.trading_pairs[context.buying_exchange], 'price') + log.info('price on buying exchange {exchange}: {price}'.format( - exchange=context.buying_exchange.upper(), + exchange=context.buying_exchange.name.upper(), price=buying_price, )) @@ -122,7 +149,7 @@ def handle_data(context, data): context.trading_pairs[context.selling_exchange], 'price') log.info('price on selling exchange {exchange}: {price}'.format( - exchange=context.selling_exchange.upper(), + exchange=context.selling_exchange.name.upper(), price=selling_price, )) @@ -135,7 +162,26 @@ def handle_data(context, data): # selling price - buying price / buying price # 50 - 25 / 25 = 1 gap = (selling_price - buying_price) / buying_price - log.info('the price gap: {} ({}%)'.format(gap, gap * 100)) + log.info( + 'the price gap: {gap} ({gap_percent}%)'.format( + gap=gap, + gap_percent=gap * 100 + ) + ) + + for exchange in context.trading_pairs: + asset = context.trading_pairs[exchange] + + orders = get_open_orders(asset) + if orders: + log.info( + 'found {order_count} open orders on {exchange_name} ' + 'skipping bar until all open orders execute'.format( + order_count=len(orders), + exchange_name=exchange.name + ) + ) + return # Consider the least ambitious entry point first # Override of wider gap is found @@ -151,7 +197,13 @@ def handle_data(context, data): if buy_amount: log.info('found buy trigger for amount: {}'.format(buy_amount)) - place_order(context, buy_amount, buying_price, selling_price, 'enter') + place_order( + context=context, + amount=buy_amount, + buying_price=buying_price, + selling_price=selling_price, + action='enter' + ) else: # Consider the narrowest exit gap first @@ -169,8 +221,13 @@ def handle_data(context, data): if sell_amount: log.info('found sell trigger for amount: {}'.format(sell_amount)) - place_order(context, sell_amount, buying_price, selling_price, - 'exit') + place_order( + context=context, + amount=sell_amount, + buying_price=buying_price, + selling_price=selling_price, + action='exit' + ) def analyze(context, stats): From 41d9bbca1bdefbd8ea4f9b958bb874a738811489 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 11 Sep 2017 18:20:28 -0400 Subject: [PATCH 004/110] Adjustments to the sample arbitrage algo --- catalyst/examples/arbitrage_with_interface.py | 10 ++++++---- catalyst/exchange/algorithm_exchange.py | 9 ++++++++- catalyst/exchange/stats_utils.py | 6 +++++- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/catalyst/examples/arbitrage_with_interface.py b/catalyst/examples/arbitrage_with_interface.py index 425f995b..56a1e889 100644 --- a/catalyst/examples/arbitrage_with_interface.py +++ b/catalyst/examples/arbitrage_with_interface.py @@ -1,6 +1,7 @@ from logbook import Logger from catalyst.api import ( + record, order, symbol, get_open_orders @@ -26,12 +27,12 @@ def initialize(context): symbol(context.trading_pair_symbol, context.selling_exchange.name) context.entry_points = [ - dict(gap=0.01, amount=0.05), - dict(gap=0.02, amount=0.1), + dict(gap=0.03, amount=0.05), + dict(gap=0.04, amount=0.1), + dict(gap=0.05, amount=0.5), ] context.exit_points = [ - dict(gap=0.01, amount=0.05), - dict(gap=-0.02, amount=0.01), + dict(gap=-0.02, amount=0.5), ] context.MAX_POSITIONS = 50 @@ -168,6 +169,7 @@ def handle_data(context, data): gap_percent=gap * 100 ) ) + record(buying_price=buying_price, selling_price=selling_price, gap=gap) for exchange in context.trading_pairs: asset = context.trading_pairs[exchange] diff --git a/catalyst/exchange/algorithm_exchange.py b/catalyst/exchange/algorithm_exchange.py index 86b836f2..88128091 100644 --- a/catalyst/exchange/algorithm_exchange.py +++ b/catalyst/exchange/algorithm_exchange.py @@ -409,6 +409,9 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): self.add_pnl_stats(minute_stats) if self.recorded_vars: self.add_custom_signals_stats(minute_stats) + recorded_cols = self.recorded_vars.keys() + else: + recorded_cols = None self.add_exposure_stats(minute_stats) @@ -416,7 +419,11 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): log.debug( 'statistics for the last {stats_minutes} minutes:\n{stats}'.format( stats_minutes=self.stats_minutes, - stats=get_pretty_stats(print_df, self.stats_minutes) + stats=get_pretty_stats( + stats_df=print_df, + recorded_cols=recorded_cols, + num_rows=self.stats_minutes + ) )) today = pd.to_datetime('today', utc=True) diff --git a/catalyst/exchange/stats_utils.py b/catalyst/exchange/stats_utils.py index eda1b2fd..2cf65b43 100644 --- a/catalyst/exchange/stats_utils.py +++ b/catalyst/exchange/stats_utils.py @@ -1,7 +1,7 @@ import pandas as pd -def get_pretty_stats(stats_df, num_rows=10): +def get_pretty_stats(stats_df, recorded_cols=None, num_rows=10): """ Format and print the last few rows of a statistics DataFrame. See the pyfolio project for the data structure. @@ -22,6 +22,10 @@ def get_pretty_stats(stats_df, num_rows=10): 'pnl', 'long_exposure', 'short_exposure', 'orders', 'transactions', 'positions'] + if recorded_cols is not None: + for column in recorded_cols: + columns.append(column) + def format_positions(positions): parts = [] for position in positions: From ff0dc5cff98dfcfb260531e03efc369882b45ad1 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Tue, 12 Sep 2017 14:25:04 -0400 Subject: [PATCH 005/110] Polishing the sample arbitrage algo --- catalyst/examples/arbitrage_with_interface.py | 36 ++++++++++++++++--- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/catalyst/examples/arbitrage_with_interface.py b/catalyst/examples/arbitrage_with_interface.py index 56a1e889..7ee9bf86 100644 --- a/catalyst/examples/arbitrage_with_interface.py +++ b/catalyst/examples/arbitrage_with_interface.py @@ -16,13 +16,22 @@ log = Logger(algo_namespace) def initialize(context): log.info('initializing arbitrage algorithm') + # The context contains a new "exchanges" attribute which is a dictionary + # of exchange objects by exchange name. This allow easy access to the + # exchanges. context.buying_exchange = context.exchanges['bittrex'] context.selling_exchange = context.exchanges['bitfinex'] context.trading_pair_symbol = 'neo_eth' context.trading_pairs = dict() + + # Note the second parameter of the symbol() method + # Passing the exchange name here returns a TradingPair object including + # the exchange information. This allow all other operations using + # the TradingPair to target the correct exchange. context.trading_pairs[context.buying_exchange] = \ symbol(context.trading_pair_symbol, context.buying_exchange.name) + context.trading_pairs[context.selling_exchange] = \ symbol(context.trading_pair_symbol, context.selling_exchange.name) @@ -35,13 +44,28 @@ def initialize(context): dict(gap=-0.02, amount=0.5), ] - context.MAX_POSITIONS = 50 context.SLIPPAGE_ALLOWED = 0.02 - pass -def place_order(context, amount, buying_price, selling_price, action): +def place_orders(context, amount, buying_price, selling_price, action): + """ + This method will always place two orders of the same amount to keep + the currency position the same as it moves between the two exchanges. + + :param context: TradingAlgorithm + :param amount: float + The trading pair amount to trade on both exchanges. + :param buying_price: float + The current trading pair price on the buying exchange. + :param selling_price: float + The current trading pair price on the selling exchange. + :param action: string + "enter": buys on the buying exchange and sells on the selling exchange + "exit": buys on the selling exchange and sells on the buying exchange + + :return: + """ if action == 'enter': enter_exchange = context.buying_exchange entry_price = buying_price @@ -90,6 +114,7 @@ def place_order(context, amount, buying_price, selling_price, action): ) ) amount = adj_amount + elif market_currency_amount < amount: log.warn( 'not enough {currency} ({currency_amount}) to sell ' @@ -171,6 +196,7 @@ def handle_data(context, data): ) record(buying_price=buying_price, selling_price=selling_price, gap=gap) + # Waiting for orders to close before initiating new ones for exchange in context.trading_pairs: asset = context.trading_pairs[exchange] @@ -199,7 +225,7 @@ def handle_data(context, data): if buy_amount: log.info('found buy trigger for amount: {}'.format(buy_amount)) - place_order( + place_orders( context=context, amount=buy_amount, buying_price=buying_price, @@ -223,7 +249,7 @@ def handle_data(context, data): if sell_amount: log.info('found sell trigger for amount: {}'.format(sell_amount)) - place_order( + place_orders( context=context, amount=sell_amount, buying_price=buying_price, From 5a345a3abb3c53bcfa39c8a0e4b4959c0bcd57cc Mon Sep 17 00:00:00 2001 From: fredfortier Date: Fri, 15 Sep 2017 18:00:15 -0400 Subject: [PATCH 006/110] Documentation and cleanup from meeting with Victor --- catalyst/data/bundles/poloniex.py | 3 +++ catalyst/examples/arbitrage_with_interface.py | 2 +- catalyst/examples/buy_low_sell_high.py | 13 ++++++++----- catalyst/exchange/algorithm_exchange.py | 1 + catalyst/exchange/bitfinex/bitfinex.py | 4 ++-- catalyst/exchange/bitfinex/symbols.json | 3 ++- catalyst/exchange/bittrex/bittrex.py | 2 +- catalyst/exchange/data_portal_exchange.py | 1 + catalyst/exchange/exchange.py | 5 ++++- catalyst/exchange/live_graph_clock.py | 4 ++-- catalyst/utils/run_algo.py | 9 +++++++-- 11 files changed, 32 insertions(+), 15 deletions(-) diff --git a/catalyst/data/bundles/poloniex.py b/catalyst/data/bundles/poloniex.py index 4dcdf7bc..ad224492 100644 --- a/catalyst/data/bundles/poloniex.py +++ b/catalyst/data/bundles/poloniex.py @@ -90,6 +90,8 @@ class PoloniexBundle(BaseCryptoPricingBundle): start_date, end_date, frequency): + # TODO: replace this with direct exchange call + # The end date and frequency should be used to calculate the number of bars raw = pd.read_json( self._format_data_url( api_key, @@ -153,6 +155,7 @@ class PoloniexBundle(BaseCryptoPricingBundle): return self._format_polo_query(query_params) def _format_polo_query(self, query_params): + # TODO: got against the exchange object return 'https://poloniex.com/public?{query}'.format( query=urlencode(query_params), ) diff --git a/catalyst/examples/arbitrage_with_interface.py b/catalyst/examples/arbitrage_with_interface.py index 7ee9bf86..01baefa9 100644 --- a/catalyst/examples/arbitrage_with_interface.py +++ b/catalyst/examples/arbitrage_with_interface.py @@ -30,7 +30,7 @@ def initialize(context): # the exchange information. This allow all other operations using # the TradingPair to target the correct exchange. context.trading_pairs[context.buying_exchange] = \ - symbol(context.trading_pair_symbol, context.buying_exchange.name) + symbol('neo_eth', context.buying_exchange.name) context.trading_pairs[context.selling_exchange] = \ symbol(context.trading_pair_symbol, context.selling_exchange.name) diff --git a/catalyst/examples/buy_low_sell_high.py b/catalyst/examples/buy_low_sell_high.py index e1459ee6..feadf49b 100644 --- a/catalyst/examples/buy_low_sell_high.py +++ b/catalyst/examples/buy_low_sell_high.py @@ -38,6 +38,8 @@ def initialize(context): context.retry_update_portfolio = 10 context.retry_order = 5 + context.swallow_errors = True + context.errors = [] pass @@ -49,6 +51,7 @@ def _handle_data(context, data): bar_count=20, frequency='15m' ) + rsi = talib.RSI(prices.values, timeperiod=14)[-1] log.info('got rsi: {}'.format(rsi)) @@ -135,11 +138,11 @@ def _handle_data(context, data): def handle_data(context, data): log.info('handling bar {}'.format(data.current_dt)) - # try: - _handle_data(context, data) - # except Exception as e: - # log.warn('aborting the bar on error {}'.format(e)) - # context.errors.append(e) + try: + _handle_data(context, data) + except Exception as e: + log.warn('aborting the bar on error {}'.format(e)) + context.errors.append(e) log.info('completed bar {}, total execution errors {}'.format( data.current_dt, diff --git a/catalyst/exchange/algorithm_exchange.py b/catalyst/exchange/algorithm_exchange.py index 88128091..f0193b12 100644 --- a/catalyst/exchange/algorithm_exchange.py +++ b/catalyst/exchange/algorithm_exchange.py @@ -544,6 +544,7 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): :param amount: :return: """ + # TODO: is this good enough? Victor has a better solution. return amount @api_method diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index 6d0fb09f..a72b6167 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -224,7 +224,8 @@ class Bitfinex(Exchange): # TODO: fetch account data and keep in cache return None - def get_candles(self, data_frequency, assets, bar_count=None): + def get_candles(self, data_frequency, assets, bar_count=None, + start_date=None): """ Retrieve OHLVC candles from Bitfinex @@ -239,7 +240,6 @@ class Bitfinex(Exchange): '1M' """ - # TODO: use BcolzMinuteBarReader to read from cache freq_match = re.match(r'([0-9].*)(m|h|d)', data_frequency, re.M | re.I) if freq_match: number = int(freq_match.group(1)) diff --git a/catalyst/exchange/bitfinex/symbols.json b/catalyst/exchange/bitfinex/symbols.json index 2134bd9f..bd6951d1 100644 --- a/catalyst/exchange/bitfinex/symbols.json +++ b/catalyst/exchange/bitfinex/symbols.json @@ -1,7 +1,8 @@ { "neobtc": { "symbol": "neo_btc", - "start_date": "2017-09-07" + "start_date": "2017-09-07", + "precision":5 }, "neousd": { "symbol": "neo_usd", diff --git a/catalyst/exchange/bittrex/bittrex.py b/catalyst/exchange/bittrex/bittrex.py index 87b1c437..3593e89c 100644 --- a/catalyst/exchange/bittrex/bittrex.py +++ b/catalyst/exchange/bittrex/bittrex.py @@ -208,7 +208,7 @@ class Bittrex(Exchange): error=status['message'] ) - def get_candles(self, data_frequency, assets, bar_count=None): + def get_candles(self, data_frequency, assets, bar_count=None, start_date=None): """ Supported Intervals ------------------- diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index b1d91f56..b2214962 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -70,6 +70,7 @@ class DataPortalExchange(DataPortal): df_list.append(df) + # Merging the values values of each exchange return pd.concat(df_list) else: diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 7aaff3fc..56bf786f 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -323,6 +323,7 @@ class Exchange: ) try: + #TODO: use victor's modified branch using int64 self.minute_writer.write_sid( sid=asset.sid, df=df @@ -592,7 +593,8 @@ class Exchange: pass @abstractmethod - def get_candles(self, data_frequency, assets, bar_count=None): + def get_candles(self, data_frequency, assets, bar_count=None, + start_date=None): """ Retrieve OHLCV candles for the given assets @@ -601,6 +603,7 @@ class Exchange: :param end_dt: :param bar_count: :param limit: + :param start_date: :return: """ pass diff --git a/catalyst/exchange/live_graph_clock.py b/catalyst/exchange/live_graph_clock.py index be3b80a8..ce3aa7df 100644 --- a/catalyst/exchange/live_graph_clock.py +++ b/catalyst/exchange/live_graph_clock.py @@ -27,8 +27,6 @@ from catalyst.exchange.exchange_errors import \ log = Logger('LiveGraphClock') -style.use('dark_background') - fmt = mdates.DateFormatter('%Y-%m-%d %H:%M') @@ -60,6 +58,8 @@ class LiveGraphClock(object): def __init__(self, sessions, context, time_skew=pd.Timedelta('0s')): + style.use('dark_background') + self.sessions = sessions self.time_skew = time_skew self._last_emit = None diff --git a/catalyst/utils/run_algo.py b/catalyst/utils/run_algo.py index 793c8919..46b87845 100644 --- a/catalyst/utils/run_algo.py +++ b/catalyst/utils/run_algo.py @@ -152,12 +152,16 @@ def _run(handle_data, exchange_name = exchange start = pd.Timestamp.utcnow() - end = start + timedelta(minutes=1439) + + # TODO: fix the end data. + end = start + timedelta(hours=8760) + exchange_list = [x.strip().lower() for x in exchange.split(',')] exchanges = dict() for exchange_name in exchange_list: + # Looking for the portfolio from the cache first portfolio = get_algo_object( algo_name=algo_namespace, key='portfolio_{}'.format(exchange_name), @@ -169,6 +173,7 @@ def _run(handle_data, start_date=pd.Timestamp.utcnow() ) + # This corresponds to the json file containing api token info exchange_auth = get_exchange_auth(exchange_name) if exchange_name == 'bitfinex': exchanges[exchange_name] = Bitfinex( @@ -352,7 +357,7 @@ def _run(handle_data, TradingAlgorithmClass = ( partial(ExchangeTradingAlgorithm, exchanges=exchanges, algo_namespace=algo_namespace, live_graph=live_graph) - if live and exchanges else TradingAlgorithm) + if live and exchanges else TradingAlgorithm) # TODO: backtest trading algo class perf = TradingAlgorithmClass( namespace=namespace, From 394777217d4529206142cc776262578a23b4b3a6 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 18 Sep 2017 13:56:30 -0400 Subject: [PATCH 007/110] Splitting the exchange_algorithm class to allow access to the symbol() method in backtesting mode --- ...ithm_exchange.py => exchange_algorithm.py} | 87 ++++++++++--------- 1 file changed, 48 insertions(+), 39 deletions(-) rename catalyst/exchange/{algorithm_exchange.py => exchange_algorithm.py} (98%) diff --git a/catalyst/exchange/algorithm_exchange.py b/catalyst/exchange/exchange_algorithm.py similarity index 98% rename from catalyst/exchange/algorithm_exchange.py rename to catalyst/exchange/exchange_algorithm.py index f0193b12..0f9e4e47 100644 --- a/catalyst/exchange/algorithm_exchange.py +++ b/catalyst/exchange/exchange_algorithm.py @@ -57,7 +57,54 @@ class ExchangeAlgorithmExecutor(AlgorithmSimulator): super(self.__class__, self).__init__(*args, **kwargs) -class ExchangeTradingAlgorithm(TradingAlgorithm): +class ExchangeTradingAlgorithmBase(TradingAlgorithm): + def __init__(self, *args, **kwargs): + self.exchanges = kwargs.pop('exchanges', None) + super(self.__class__, self).__init__(*args, **kwargs) + + @api_method + @preprocess(symbol_str=ensure_upper_case) + def symbol(self, symbol_str, exchange_name=None): + """Lookup an Equity by its ticker symbol. + + Parameters + ---------- + symbol_str : str + The ticker symbol for the equity to lookup. + exchange_name: str + The name of the exchange containing the symbol + + Returns + ------- + equity : Equity + The equity that held the ticker symbol on the current + symbol lookup date. + + Raises + ------ + SymbolNotFound + Raised when the symbols was not held on the current lookup date. + + See Also + -------- + :func:`catalyst.api.set_symbol_lookup_date` + """ + # If the user has not set the symbol lookup date, + # use the end_session as the date for sybmol->sid resolution. + + _lookup_date = self._symbol_lookup_date \ + if self._symbol_lookup_date is not None \ + else self.sim_params.end_session + + exchange = self.exchanges[exchange_name] + return self.asset_finder.lookup_symbol( + symbol_str, + as_of_date=_lookup_date, + exchange=exchange + ) + + +class ExchangeTradingAlgorithm(ExchangeTradingAlgorithmBase): def __init__(self, *args, **kwargs): self.exchanges = kwargs.pop('exchanges', None) self.algo_namespace = kwargs.pop('algo_namespace', None) @@ -599,41 +646,3 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): order_id = order_param.id exchange.cancel_order(order_id) - - @api_method - @preprocess(symbol_str=ensure_upper_case) - def symbol(self, symbol_str, exchange_name=None): - """Lookup an Equity by its ticker symbol. - - Parameters - ---------- - symbol_str : str - The ticker symbol for the equity to lookup. - - Returns - ------- - equity : Equity - The equity that held the ticker symbol on the current - symbol lookup date. - - Raises - ------ - SymbolNotFound - Raised when the symbols was not held on the current lookup date. - - See Also - -------- - :func:`catalyst.api.set_symbol_lookup_date` - """ - # If the user has not set the symbol lookup date, - # use the end_session as the date for sybmol->sid resolution. - _lookup_date = self._symbol_lookup_date \ - if self._symbol_lookup_date is not None \ - else self.sim_params.end_session - - exchange = self.exchanges[exchange_name] - return self.asset_finder.lookup_symbol( - symbol_str, - as_of_date=_lookup_date, - exchange=exchange - ) From 1d6336afda599667603dc0d02782c946628dbd6d Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 18 Sep 2017 14:48:24 -0400 Subject: [PATCH 008/110] Splitting the exchange_algorithm class to allow access to the symbol() method in backtesting mode --- catalyst/exchange/asset_finder_exchange.py | 3 +- catalyst/exchange/data_portal_exchange.py | 100 +++++++++++++++++++-- 2 files changed, 94 insertions(+), 9 deletions(-) diff --git a/catalyst/exchange/asset_finder_exchange.py b/catalyst/exchange/asset_finder_exchange.py index b95d1101..be8c4a6e 100644 --- a/catalyst/exchange/asset_finder_exchange.py +++ b/catalyst/exchange/asset_finder_exchange.py @@ -46,8 +46,7 @@ class AssetFinderExchange(object): log.info('fetching asset: {}'.format(sid)) return list() - def lookup_symbol(self, symbol, as_of_date, exchange, - fuzzy=False): + def lookup_symbol(self, symbol, exchange, as_of_date=None, fuzzy=False): """Lookup an asset by symbol. Parameters diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index b2214962..10df5389 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -11,6 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import abc from time import sleep import pandas as pd from catalyst.assets._assets import TradingPair @@ -25,7 +26,7 @@ from catalyst.exchange.exchange_errors import ( log = Logger('DataPortalExchange') -class DataPortalExchange(DataPortal): +class DataPortalExchangeBase(DataPortal): def __init__(self, exchanges, *args, **kwargs): self.exchanges = exchanges @@ -34,7 +35,7 @@ class DataPortalExchange(DataPortal): self.retry_get_spot_value = 5 self.retry_delay = 5 - super(DataPortalExchange, self).__init__(*args, **kwargs) + super(self.__class__, self).__init__(*args, **kwargs) def _get_history_window(self, assets, @@ -59,7 +60,8 @@ class DataPortalExchange(DataPortal): exchange = self.exchanges[exchange_name] assets = exchange_assets[exchange_name] - df = exchange.get_history_window( + df = self.get_exchange_spot_value_history_window( + exchange, assets, end_dt, bar_count, @@ -121,6 +123,18 @@ class DataPortalExchange(DataPortal): data_frequency, ffill) + @abc.abstractmethod + def get_exchange_spot_value_history_window(self, + exchange, + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency, + ffill=True): + pass + def _get_spot_value(self, assets, field, dt, data_frequency, attempt_index=0): try: @@ -141,9 +155,13 @@ class DataPortalExchange(DataPortal): for exchange_name in exchange_assets: exchange = self.exchanges[exchange_name] assets = exchange_assets[exchange_name] - exchange_spot_values = exchange.get_spot_value( - assets, field, dt, data_frequency) - + exchange_spot_values = self.get_exchange_spot_value( + exchange, + assets, + field, + dt, + data_frequency + ) spot_values += exchange_spot_values return spot_values @@ -166,9 +184,77 @@ class DataPortalExchange(DataPortal): def get_spot_value(self, assets, field, dt, data_frequency): return self._get_spot_value(assets, field, dt, data_frequency) + @abc.abstractmethod + def get_exchange_spot_value(self, exchange, assets, field, dt, + data_frequency): + return + def get_adjusted_value(self, asset, field, dt, perspective_dt, data_frequency, spot_value=None): # TODO: does this pertain to cryptocurrencies? - raise NotImplementedError("get_adjusted_value is not implemented yet!") + log.warn('get_adjusted_value is not implemented yet!') + return spot_value + + +class DataPortalExchangeLive(DataPortalExchangeBase): + def __init__(self, exchanges, *args, **kwargs): + super(self.__class__, self).__init__(exchanges, *args, **kwargs) + + def get_exchange_spot_value_history_window(self, + exchange, + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency, + ffill=True): + df = exchange.get_history_window( + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency, + ffill) + return df + + def get_exchange_spot_value(self, exchange, assets, field, dt, + data_frequency): + exchange_spot_values = exchange.get_spot_value( + assets, field, dt, data_frequency) + + return exchange_spot_values + + +class DataPortalExchangeBacktest(DataPortalExchangeBase): + def __init__(self, exchanges, *args, **kwargs): + super(self.__class__, self).__init__(exchanges, *args, **kwargs) + + def get_exchange_spot_value_history_window(self, + exchange, + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency, + ffill=True): + df = exchange.get_history_window( + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency, + ffill) + return df + + def get_exchange_spot_value(self, exchange, assets, field, dt, + data_frequency): + exchange_spot_values = exchange.get_spot_value( + assets, field, dt, data_frequency) + + return exchange_spot_values From 555b7e95b55b0c8e2e2dbf5f4c56cede0f701d4a Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 18 Sep 2017 14:51:01 -0400 Subject: [PATCH 009/110] Working on adjusted the DataPortal class (unstable) --- catalyst/exchange/exchange_algorithm.py | 12 ++++-- catalyst/utils/run_algo.py | 4 +- tests/exchange/test_bitfinex.py | 2 +- tests/exchange/test_data_portal.py | 55 +++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 7 deletions(-) create mode 100644 tests/exchange/test_data_portal.py diff --git a/catalyst/exchange/exchange_algorithm.py b/catalyst/exchange/exchange_algorithm.py index 0f9e4e47..c09c17f2 100644 --- a/catalyst/exchange/exchange_algorithm.py +++ b/catalyst/exchange/exchange_algorithm.py @@ -96,11 +96,15 @@ class ExchangeTradingAlgorithmBase(TradingAlgorithm): if self._symbol_lookup_date is not None \ else self.sim_params.end_session - exchange = self.exchanges[exchange_name] + if exchange_name is None: + exchange = self.exchanges.values()[0] + else: + exchange = self.exchanges[exchange_name] + return self.asset_finder.lookup_symbol( - symbol_str, - as_of_date=_lookup_date, - exchange=exchange + symbol=symbol_str, + exchange=exchange, + as_of_date=_lookup_date ) diff --git a/catalyst/utils/run_algo.py b/catalyst/utils/run_algo.py index 46b87845..89e3e9e4 100644 --- a/catalyst/utils/run_algo.py +++ b/catalyst/utils/run_algo.py @@ -38,7 +38,7 @@ from catalyst.utils.factory import create_simulation_parameters import catalyst.utils.paths as pth from catalyst.exchange.algorithm_exchange import ExchangeTradingAlgorithm -from catalyst.exchange.data_portal_exchange import DataPortalExchange +from catalyst.exchange.data_portal_exchange import DataPortalExchangeLive from catalyst.exchange.bitfinex.bitfinex import Bitfinex from catalyst.exchange.asset_finder_exchange import AssetFinderExchange from catalyst.exchange.exchange_portfolio import ExchangePortfolio @@ -209,7 +209,7 @@ def _run(handle_data, ) env.asset_finder = AssetFinderExchange() - data = DataPortalExchange( + data = DataPortalExchangeLive( exchanges=exchanges, asset_finder=env.asset_finder, trading_calendar=open_calendar, diff --git a/tests/exchange/test_bitfinex.py b/tests/exchange/test_bitfinex.py index 6b9990c3..10b21bc7 100644 --- a/tests/exchange/test_bitfinex.py +++ b/tests/exchange/test_bitfinex.py @@ -14,7 +14,7 @@ log = Logger('test_bitfinex') class BitfinexTestCase(BaseExchangeTestCase): @classmethod def setup(self): - print ('creating bitfinex object') + log.info('creating bitfinex object') auth = get_exchange_auth('bitfinex') self.exchange = Bitfinex( key=auth['key'], diff --git a/tests/exchange/test_data_portal.py b/tests/exchange/test_data_portal.py new file mode 100644 index 00000000..21890a44 --- /dev/null +++ b/tests/exchange/test_data_portal.py @@ -0,0 +1,55 @@ +import pandas as pd +from catalyst import get_calendar +from logbook import Logger + +from catalyst.exchange.asset_finder_exchange import AssetFinderExchange +from catalyst.exchange.bitfinex.bitfinex import Bitfinex +from catalyst.exchange.bittrex.bittrex import Bittrex +from catalyst.exchange.data_portal_exchange import DataPortalExchangeBacktest, \ + DataPortalExchangeLive +from catalyst.exchange.exchange_utils import get_exchange_auth + +log = Logger('test_bitfinex') + + +class ExchangeDataPortalTestCase: + @classmethod + def setup(self): + log.info('creating bitfinex exchange') + auth_bitfinex = get_exchange_auth('bitfinex') + bitfinex = Bitfinex( + key=auth_bitfinex['key'], + secret=auth_bitfinex['secret'], + base_currency='usd' + ) + + log.info('creating bittrex exchange') + auth_bitfinex = get_exchange_auth('bittrex') + bittrex = Bittrex( + key=auth_bitfinex['key'], + secret=auth_bitfinex['secret'], + base_currency='usd' + ) + + open_calendar = get_calendar('OPEN') + asset_finder = AssetFinderExchange() + self.data_portal_live = DataPortalExchangeLive( + exchanges=dict(bitfinex=bitfinex, bittrex=bittrex), + asset_finder=asset_finder, + trading_calendar=open_calendar, + first_trading_day=pd.to_datetime('today', utc=True) + ) + + def test_history_window_live(self): + pass + + def test_spot_value_live(self): + asset_finder = self.data_portal_live.asset_finder + + now = pd.Timestamp.utcnow() + assets = [ + asset_finder.lookup_symbol('eth_usd',now,) + ] + value = self.data_portal_live.get_spot_value( + assets, field, dt, data_frequency) + pass From 18bfaff7c9a1106c7f5c63d04613a997e7723299 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 18 Sep 2017 15:37:10 -0400 Subject: [PATCH 010/110] Trying to stabilize refactoring an last few commits (still unstable) --- catalyst/exchange/data_portal_exchange.py | 10 +++++----- catalyst/exchange/exchange_algorithm.py | 5 ++--- catalyst/utils/run_algo.py | 2 +- tests/exchange/__init__.py | 0 tests/exchange/test_data_portal.py | 22 ++++++++++++---------- 5 files changed, 20 insertions(+), 19 deletions(-) delete mode 100644 tests/exchange/__init__.py diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index 10df5389..3df15e98 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -27,15 +27,15 @@ log = Logger('DataPortalExchange') class DataPortalExchangeBase(DataPortal): - def __init__(self, exchanges, *args, **kwargs): - self.exchanges = exchanges + def __init__(self, *args, **kwargs): + self.exchanges = kwargs.pop('exchanges', None) # TODO: put somewhere accessible by each algo self.retry_get_history_window = 5 self.retry_get_spot_value = 5 self.retry_delay = 5 - super(self.__class__, self).__init__(*args, **kwargs) + super(DataPortalExchangeBase, self).__init__(*args, **kwargs) def _get_history_window(self, assets, @@ -199,8 +199,8 @@ class DataPortalExchangeBase(DataPortal): class DataPortalExchangeLive(DataPortalExchangeBase): - def __init__(self, exchanges, *args, **kwargs): - super(self.__class__, self).__init__(exchanges, *args, **kwargs) + def __init__(self, *args, **kwargs): + super(DataPortalExchangeLive, self).__init__(*args, **kwargs) def get_exchange_spot_value_history_window(self, exchange, diff --git a/catalyst/exchange/exchange_algorithm.py b/catalyst/exchange/exchange_algorithm.py index c09c17f2..030a3341 100644 --- a/catalyst/exchange/exchange_algorithm.py +++ b/catalyst/exchange/exchange_algorithm.py @@ -59,8 +59,7 @@ class ExchangeAlgorithmExecutor(AlgorithmSimulator): class ExchangeTradingAlgorithmBase(TradingAlgorithm): def __init__(self, *args, **kwargs): - self.exchanges = kwargs.pop('exchanges', None) - super(self.__class__, self).__init__(*args, **kwargs) + super(ExchangeTradingAlgorithmBase, self).__init__(*args, **kwargs) @api_method @preprocess(symbol_str=ensure_upper_case) @@ -135,7 +134,7 @@ class ExchangeTradingAlgorithm(ExchangeTradingAlgorithmBase): self.stats_minutes = 5 - super(self.__class__, self).__init__(*args, **kwargs) + super(ExchangeTradingAlgorithm, self).__init__(*args, **kwargs) # TODO: fix precision before re-enabling # self._create_minute_writer() diff --git a/catalyst/utils/run_algo.py b/catalyst/utils/run_algo.py index 89e3e9e4..6877bc54 100644 --- a/catalyst/utils/run_algo.py +++ b/catalyst/utils/run_algo.py @@ -37,7 +37,7 @@ from catalyst.utils.calendars import get_calendar from catalyst.utils.factory import create_simulation_parameters import catalyst.utils.paths as pth -from catalyst.exchange.algorithm_exchange import ExchangeTradingAlgorithm +from catalyst.exchange.exchange_algorithm import ExchangeTradingAlgorithm from catalyst.exchange.data_portal_exchange import DataPortalExchangeLive from catalyst.exchange.bitfinex.bitfinex import Bitfinex from catalyst.exchange.asset_finder_exchange import AssetFinderExchange diff --git a/tests/exchange/__init__.py b/tests/exchange/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/exchange/test_data_portal.py b/tests/exchange/test_data_portal.py index 21890a44..d98b2ac7 100644 --- a/tests/exchange/test_data_portal.py +++ b/tests/exchange/test_data_portal.py @@ -1,5 +1,5 @@ import pandas as pd -from catalyst import get_calendar +# from catalyst import get_calendar from logbook import Logger from catalyst.exchange.asset_finder_exchange import AssetFinderExchange @@ -17,7 +17,7 @@ class ExchangeDataPortalTestCase: def setup(self): log.info('creating bitfinex exchange') auth_bitfinex = get_exchange_auth('bitfinex') - bitfinex = Bitfinex( + self.bitfinex = Bitfinex( key=auth_bitfinex['key'], secret=auth_bitfinex['secret'], base_currency='usd' @@ -25,31 +25,33 @@ class ExchangeDataPortalTestCase: log.info('creating bittrex exchange') auth_bitfinex = get_exchange_auth('bittrex') - bittrex = Bittrex( + self.bittrex = Bittrex( key=auth_bitfinex['key'], secret=auth_bitfinex['secret'], base_currency='usd' ) - open_calendar = get_calendar('OPEN') + # open_calendar = get_calendar('OPEN') + open_calendar = None asset_finder = AssetFinderExchange() self.data_portal_live = DataPortalExchangeLive( - exchanges=dict(bitfinex=bitfinex, bittrex=bittrex), + exchanges=dict(bitfinex=self.bitfinex, bittrex=self.bittrex), asset_finder=asset_finder, trading_calendar=open_calendar, first_trading_day=pd.to_datetime('today', utc=True) ) - def test_history_window_live(self): + def test_get_history_window_live(self): pass - def test_spot_value_live(self): + def test_get_spot_value_live(self): asset_finder = self.data_portal_live.asset_finder - now = pd.Timestamp.utcnow() assets = [ - asset_finder.lookup_symbol('eth_usd',now,) + asset_finder.lookup_symbol('eth_usd', self.bitfinex), + asset_finder.lookup_symbol('eth_usd', self.bittrex) ] + now = pd.Timestamp.utcnow() value = self.data_portal_live.get_spot_value( - assets, field, dt, data_frequency) + assets, 'price', now, '1m') pass From b70ff3a74058fa6a724571a728966a1b75146200 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 18 Sep 2017 22:19:27 -0400 Subject: [PATCH 011/110] Bug fixes and working on unit tests for the data portal --- catalyst/exchange/data_portal_exchange.py | 60 +++++++++++------------ catalyst/exchange/exchange.py | 4 +- tests/exchange/base.py | 2 +- tests/exchange/test_bitfinex.py | 3 +- tests/exchange/test_bittrex.py | 2 +- tests/exchange/test_data_portal.py | 22 +++++++-- 6 files changed, 52 insertions(+), 41 deletions(-) diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index 3df15e98..10a85ba8 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -60,7 +60,7 @@ class DataPortalExchangeBase(DataPortal): exchange = self.exchanges[exchange_name] assets = exchange_assets[exchange_name] - df = self.get_exchange_spot_value_history_window( + df_exchange = self.get_exchange_history_window( exchange, assets, end_dt, @@ -70,7 +70,7 @@ class DataPortalExchangeBase(DataPortal): data_frequency, ffill) - df_list.append(df) + df_list.append(df_exchange) # Merging the values values of each exchange return pd.concat(df_list) @@ -113,7 +113,7 @@ class DataPortalExchangeBase(DataPortal): bar_count, frequency, field, - data_frequency, + data_frequency=None, ffill=True): return self._get_history_window(assets, end_dt, @@ -124,15 +124,15 @@ class DataPortalExchangeBase(DataPortal): ffill) @abc.abstractmethod - def get_exchange_spot_value_history_window(self, - exchange, - assets, - end_dt, - bar_count, - frequency, - field, - data_frequency, - ffill=True): + def get_exchange_history_window(self, + exchange, + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency, + ffill=True): pass def _get_spot_value(self, assets, field, dt, data_frequency, @@ -202,15 +202,15 @@ class DataPortalExchangeLive(DataPortalExchangeBase): def __init__(self, *args, **kwargs): super(DataPortalExchangeLive, self).__init__(*args, **kwargs) - def get_exchange_spot_value_history_window(self, - exchange, - assets, - end_dt, - bar_count, - frequency, - field, - data_frequency, - ffill=True): + def get_exchange_history_window(self, + exchange, + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency, + ffill=True): df = exchange.get_history_window( assets, end_dt, @@ -233,15 +233,15 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): def __init__(self, exchanges, *args, **kwargs): super(self.__class__, self).__init__(exchanges, *args, **kwargs) - def get_exchange_spot_value_history_window(self, - exchange, - assets, - end_dt, - bar_count, - frequency, - field, - data_frequency, - ffill=True): + def get_exchange_history_window(self, + exchange, + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency, + ffill=True): df = exchange.get_history_window( assets, end_dt, diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 56bf786f..cf22b80a 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -346,7 +346,7 @@ class Exchange: bar_count, frequency, field, - data_frequency, + data_frequency=None, ffill=True): """ @@ -399,7 +399,7 @@ class Exchange: value_series = pd.Series(values, index=dates) series[asset] = value_series - df = pd.concat(series) + df = pd.DataFrame(series) return df def synchronize_portfolio(self): diff --git a/tests/exchange/base.py b/tests/exchange/base.py index 73c43017..b98c3ec1 100644 --- a/tests/exchange/base.py +++ b/tests/exchange/base.py @@ -2,7 +2,7 @@ import unittest from abc import ABCMeta, abstractmethod -class BaseExchangeTestCase(): +class BaseExchangeTestCase: __metaclass__ = ABCMeta @abstractmethod diff --git a/tests/exchange/test_bitfinex.py b/tests/exchange/test_bitfinex.py index 10b21bc7..7aaddc14 100644 --- a/tests/exchange/test_bitfinex.py +++ b/tests/exchange/test_bitfinex.py @@ -1,7 +1,6 @@ from catalyst.exchange.bitfinex.bitfinex import Bitfinex -from .base import BaseExchangeTestCase +from base import BaseExchangeTestCase from logbook import Logger -import pandas as pd from catalyst.finance.execution import (MarketOrder, LimitOrder, StopOrder, diff --git a/tests/exchange/test_bittrex.py b/tests/exchange/test_bittrex.py index 825af970..5d90d660 100644 --- a/tests/exchange/test_bittrex.py +++ b/tests/exchange/test_bittrex.py @@ -1,6 +1,6 @@ from catalyst.exchange.bittrex.bittrex import Bittrex from catalyst.finance.order import Order -from .base import BaseExchangeTestCase +from base import BaseExchangeTestCase from logbook import Logger from catalyst.exchange.exchange_utils import get_exchange_auth diff --git a/tests/exchange/test_data_portal.py b/tests/exchange/test_data_portal.py index d98b2ac7..b569799d 100644 --- a/tests/exchange/test_data_portal.py +++ b/tests/exchange/test_data_portal.py @@ -1,5 +1,5 @@ import pandas as pd -# from catalyst import get_calendar +from catalyst import get_calendar from logbook import Logger from catalyst.exchange.asset_finder_exchange import AssetFinderExchange @@ -31,8 +31,7 @@ class ExchangeDataPortalTestCase: base_currency='usd' ) - # open_calendar = get_calendar('OPEN') - open_calendar = None + open_calendar = get_calendar('OPEN') asset_finder = AssetFinderExchange() self.data_portal_live = DataPortalExchangeLive( exchanges=dict(bitfinex=self.bitfinex, bittrex=self.bittrex), @@ -42,14 +41,27 @@ class ExchangeDataPortalTestCase: ) def test_get_history_window_live(self): + asset_finder = self.data_portal_live.asset_finder + + assets = [ + asset_finder.lookup_symbol('eth_btc', self.bitfinex), + asset_finder.lookup_symbol('eth_btc', self.bittrex) + ] + now = pd.Timestamp.utcnow() + data = self.data_portal_live.get_history_window( + assets, + now, + 10, + '1m', + 'price') pass def test_get_spot_value_live(self): asset_finder = self.data_portal_live.asset_finder assets = [ - asset_finder.lookup_symbol('eth_usd', self.bitfinex), - asset_finder.lookup_symbol('eth_usd', self.bittrex) + asset_finder.lookup_symbol('eth_btc', self.bitfinex), + asset_finder.lookup_symbol('eth_btc', self.bittrex) ] now = pd.Timestamp.utcnow() value = self.data_portal_live.get_spot_value( From 68546a0d8d605d35fb11f9a21c6377ea611e65cc Mon Sep 17 00:00:00 2001 From: fredfortier Date: Tue, 19 Sep 2017 03:49:34 -0400 Subject: [PATCH 012/110] Experimenting with simpler bundle and data portal approach (works in unit testing) --- catalyst/exchange/bitfinex/bitfinex.py | 13 +- catalyst/exchange/data_portal_exchange.py | 37 ++++- catalyst/exchange/exchange.py | 12 +- catalyst/exchange/exchange_bundle.py | 184 ++++++++++++++++++++++ tests/exchange/test_bundle.py | 60 +++++++ tests/exchange/test_data_portal.py | 35 +++- 6 files changed, 328 insertions(+), 13 deletions(-) create mode 100644 catalyst/exchange/exchange_bundle.py create mode 100644 tests/exchange/test_bundle.py diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index a72b6167..0397c2d8 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -4,6 +4,7 @@ import hmac import json import re import time +import datetime import numpy as np import pandas as pd @@ -47,6 +48,7 @@ class Bitfinex(Exchange): self._portfolio = portfolio self.minute_writer = None self.minute_reader = None + self.num_candles_limit = 100 def _request(self, operation, data, version='v1'): payload_object = { @@ -224,8 +226,7 @@ class Bitfinex(Exchange): # TODO: fetch account data and keep in cache return None - def get_candles(self, data_frequency, assets, bar_count=None, - start_date=None): + def get_candles(self, data_frequency, assets, bar_count=None, end_dt=None): """ Retrieve OHLVC candles from Bitfinex @@ -281,6 +282,14 @@ class Bitfinex(Exchange): if bar_count: is_list = True url += '/hist?limit={}'.format(int(bar_count)) + + if end_dt is not None: + epoch = datetime.datetime.utcfromtimestamp(0) + epoch = epoch.replace(tzinfo=pytz.UTC) + + end_ms = (end_dt - epoch).total_seconds() * 1000.0 + url += '&end={0:f}'.format(end_ms) + else: is_list = False url += '/last' diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index 10a85ba8..3c1dd01d 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -13,15 +13,20 @@ import abc from time import sleep + +import os import pandas as pd from catalyst.assets._assets import TradingPair from logbook import Logger from catalyst.data.data_portal import DataPortal +from catalyst.data.minute_bars import BcolzMinuteBarReader from catalyst.exchange.exchange_errors import ( ExchangeRequestError, ExchangeBarDataError ) +from catalyst.data.bundles.core import load +from catalyst.exchange.exchange_utils import get_exchange_minute_writer_root log = Logger('DataPortalExchange') @@ -230,8 +235,14 @@ class DataPortalExchangeLive(DataPortalExchangeBase): class DataPortalExchangeBacktest(DataPortalExchangeBase): - def __init__(self, exchanges, *args, **kwargs): - super(self.__class__, self).__init__(exchanges, *args, **kwargs) + def __init__(self, *args, **kwargs): + + super(DataPortalExchangeBacktest, self).__init__(*args, **kwargs) + + self.minute_readers = dict() + for exchange_name in self.exchanges: + root = get_exchange_minute_writer_root(exchange_name) + self.minute_readers[exchange_name] = BcolzMinuteBarReader(root) def get_exchange_history_window(self, exchange, @@ -254,7 +265,23 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): def get_exchange_spot_value(self, exchange, assets, field, dt, data_frequency): - exchange_spot_values = exchange.get_spot_value( - assets, field, dt, data_frequency) - return exchange_spot_values + if data_frequency == 'minute': + reader = self.minute_readers[exchange.name] + else: + raise ValueError('Unsupported frequency') + + values = [] + for asset in assets: + try: + value = reader.get_value( + sid=asset.sid, + dt=dt, + field=field + ) + values.append(value) + except Exception as e: + log.warn('minute data not found: {}'.format(e)) + values.append(None) + + return values diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index cf22b80a..cf26c9ff 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -35,6 +35,7 @@ class Exchange: self.minute_writer = None self.minute_reader = None self.base_currency = None + self.num_candles_limit = 100 @property def positions(self): @@ -96,6 +97,15 @@ class Exchange: return symbols + def get_assets(self, symbols): + assets = [] + + for symbol in symbols: + asset = self.get_asset(symbol) + assets.append(asset) + + return assets + def get_asset(self, symbol): """ Find an Asset on the current exchange based on its Catalyst symbol @@ -323,7 +333,7 @@ class Exchange: ) try: - #TODO: use victor's modified branch using int64 + # TODO: use victor's modified branch using int64 self.minute_writer.write_sid( sid=asset.sid, df=df diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py new file mode 100644 index 00000000..1605f279 --- /dev/null +++ b/catalyst/exchange/exchange_bundle.py @@ -0,0 +1,184 @@ +from datetime import timedelta + +import pandas as pd +from logbook import Logger + +from catalyst.data.minute_bars import BcolzMinuteOverlappingData +from catalyst.exchange.bitfinex.bitfinex import Bitfinex +from catalyst.exchange.bittrex.bittrex import Bittrex +from catalyst.exchange.exchange_errors import ExchangeNotFoundError +from catalyst.exchange.exchange_utils import get_exchange_auth +from catalyst.utils.cli import maybe_show_progress + + +def _cachpath(symbol, type_): + return '-'.join([symbol, type_]) + + +log = Logger('exchange_bundle') + + +def fetch_candles_chunk(exchange, assets, data_frequency, end_dt, bar_count): + candles = exchange.get_candles( + data_frequency=data_frequency, + assets=assets, + bar_count=bar_count, + end_dt=end_dt + ) + + series = dict() + + for asset in assets: + asset_candles = candles[asset] + + asset_df = pd.DataFrame(asset_candles) + asset_df.set_index('last_traded', inplace=True, drop=True) + asset_df.sort_index(inplace=True) + + series[asset] = asset_df + + return series + + +def exchange_bundle(exchange_name, symbols, start=None, end=None): + """Create a data bundle ingest function for the specified exchange. + + Parameters + ---------- + exchange_name: str + The name of the exchange + symbols : iterable[str] + The ticker symbols to load data for. + start : datetime, optional + The start date to query for. By default this pulls the full history + for the calendar. + end : datetime, optional + The end date to query for. By default this pulls the full history + for the calendar. + + Returns + ------- + ingest : callable + The bundle ingest function for the given set of symbols. + + Examples + -------- + This code should be added to ~/.catalyst/extension.py + + .. code-block:: python + + from catalyst.data.bundles import register + + symbols = ( + 'eth_btc', + 'etc_btc', + 'neo_btc', + ) + register('bitfinex_bundle', exchange_bundle('bitfinex', symbols)) + + Notes + ----- + The sids for each symbol will be the index into the symbols sequence. + """ + # strict this in memory so that we can reiterate over it + symbols = tuple(symbols) + + def ingest(environ, + asset_db_writer, + minute_bar_writer, # unused + daily_bar_writer, + adjustment_writer, + calendar, + start_session, + end_session, + cache, + show_progress, + output_dir, + # pass these as defaults to make them 'nonlocal' in py2 + start=start, + end=end): + + # TODO: I don't understand this session vs dates idea + if start is None: + start = start_session + if end is None: + end = None + + log.info('ingesting data from {} to {}'.format(start, end)) + + exchange_auth = get_exchange_auth(exchange_name) + if exchange_name == 'bitfinex': + exchange = Bitfinex( + key=exchange_auth['key'], + secret=exchange_auth['secret'], + base_currency=None, # TODO: make optional at the exchange + portfolio=None + ) + elif exchange_name == 'bittrex': + exchange = Bittrex( + key=exchange_auth['key'], + secret=exchange_auth['secret'], + base_currency=None, + portfolio=None + ) + else: + raise ExchangeNotFoundError(exchange_name=exchange_name) + + assets = exchange.get_assets(symbols) + + delta = end - start + delta_minutes = delta.total_seconds() / 60 + if delta_minutes > exchange.num_candles_limit: + bar_count = exchange.num_candles_limit + + chunks = [] + last_chunk_date = end + while last_chunk_date > start + timedelta(minutes=bar_count): + # TODO: account for the partial last bar + chunk = dict(end=last_chunk_date, bar_count=bar_count) + chunks.append(chunk) + + last_chunk_date = \ + last_chunk_date - timedelta(minutes=(bar_count + 1)) + + chunks.reverse() + + else: + chunks = [dict(end=end, bar_count=delta_minutes)] + + with maybe_show_progress( + chunks, + show_progress, + label='Fetching {} candles: '.format(exchange_name)) as it: + + for chunk in it: + asset_df = fetch_candles_chunk( + exchange=exchange, + assets=assets, + data_frequency='1m', + end_dt=chunk['end'], + bar_count=chunk['bar_count'] + ) + + data = [] + for asset in asset_df: + df = asset_df[asset] + sid = asset.sid + data.append((sid, df)) + + try: + log.debug( + 'writing chunk: {sid} start: {start} end: {end}'.format( + sid=sid, + start=chunk['end'] - timedelta( + minutes=chunk['bar_count']), + end=chunk['end'] + ) + ) + minute_bar_writer.write(data, show_progress=show_progress) + except KeyError: + minute_bar_writer.write(data, show_progress=show_progress) + except BcolzMinuteOverlappingData as e: + log.warn('Unable to write chunk {}: {}'.format(chunk, e)) + + return ingest diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py new file mode 100644 index 00000000..784aecea --- /dev/null +++ b/tests/exchange/test_bundle.py @@ -0,0 +1,60 @@ +from datetime import timedelta + +import os +import pandas as pd +from logging import Logger + +from catalyst import get_calendar + +from catalyst.data.minute_bars import BcolzMinuteBarWriter +from catalyst.exchange.exchange_bundle import exchange_bundle +from catalyst.exchange.exchange_utils import get_exchange_minute_writer_root + +log = Logger('test_exchange_bundle') + + +class ExchangeBundleTestCase: + def test_ingest(self): + exchange_name = 'bitfinex' + + start = pd.Timestamp.utcnow() - timedelta(days=2) + end = pd.Timestamp.utcnow() + open_calendar = get_calendar('OPEN') + + root = get_exchange_minute_writer_root(exchange_name) + filename = os.path.join(root, 'metadata.json') + + if os.path.isfile(filename): + minute_bar_writer = BcolzMinuteBarWriter.open(root, end) + else: + # TODO: need to be able to write more precise numbers + minute_bar_writer = BcolzMinuteBarWriter( + rootdir=root, + calendar=open_calendar, + minutes_per_day=1440, + start_session=start.floor('1d'), + end_session=end, + write_metadata=True + ) + + ingest = exchange_bundle( + exchange_name=exchange_name, + symbols=['btc_usd'] + ) + + ingest( + environ=os.environ, + asset_db_writer=None, # TODO: nice to have + minute_bar_writer=minute_bar_writer, + daily_bar_writer=None, # TODO: add later + adjustment_writer=None, # Not applicable to crypto + calendar=open_calendar, + start_session=start, + end_session=end, + cache=dict(), + show_progress=True, + output_dir=exchange_name, # TODO: not sure + start=start, + end=end + ) + pass diff --git a/tests/exchange/test_data_portal.py b/tests/exchange/test_data_portal.py index b569799d..08980162 100644 --- a/tests/exchange/test_data_portal.py +++ b/tests/exchange/test_data_portal.py @@ -1,7 +1,10 @@ +from datetime import timedelta + import pandas as pd from catalyst import get_calendar from logbook import Logger +from catalyst.data.minute_bars import BcolzMinuteBarReader from catalyst.exchange.asset_finder_exchange import AssetFinderExchange from catalyst.exchange.bitfinex.bitfinex import Bitfinex from catalyst.exchange.bittrex.bittrex import Bittrex @@ -33,12 +36,19 @@ class ExchangeDataPortalTestCase: open_calendar = get_calendar('OPEN') asset_finder = AssetFinderExchange() + self.data_portal_live = DataPortalExchangeLive( exchanges=dict(bitfinex=self.bitfinex, bittrex=self.bittrex), asset_finder=asset_finder, trading_calendar=open_calendar, first_trading_day=pd.to_datetime('today', utc=True) ) + self.data_portal_backtest = DataPortalExchangeBacktest( + exchanges=dict(bitfinex=self.bitfinex, bittrex=self.bittrex), + asset_finder=asset_finder, + trading_calendar=open_calendar, + first_trading_day=pd.to_datetime('today', utc=True) + ) def test_get_history_window_live(self): asset_finder = self.data_portal_live.asset_finder @@ -49,11 +59,11 @@ class ExchangeDataPortalTestCase: ] now = pd.Timestamp.utcnow() data = self.data_portal_live.get_history_window( - assets, - now, - 10, - '1m', - 'price') + assets, + now, + 10, + '1m', + 'price') pass def test_get_spot_value_live(self): @@ -67,3 +77,18 @@ class ExchangeDataPortalTestCase: value = self.data_portal_live.get_spot_value( assets, 'price', now, '1m') pass + + def test_get_spot_value_backtest(self): + asset_finder = self.data_portal_backtest.asset_finder + + assets = [ + asset_finder.lookup_symbol('btc_usd', self.bitfinex), + ] + + date = pd.Timestamp.utcnow() - timedelta(hours=2) + value = self.data_portal_backtest.get_spot_value( + assets, 'close', date, 'minute') + pass + + def test_get_history_window_backtest(self): + pass From 3b655d466ec20e4eff94ba6b489bf620a72631ac Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 20 Sep 2017 05:11:54 -0400 Subject: [PATCH 013/110] Unit tested exchange loader extension and backtest data portal refactoring --- catalyst/exchange/bitfinex/bitfinex.py | 14 +- catalyst/exchange/bitfinex/symbols.json | 10 +- catalyst/exchange/bittrex/bittrex.py | 15 +- catalyst/exchange/data_portal_exchange.py | 113 +++++++--- catalyst/exchange/exchange.py | 53 ++++- catalyst/exchange/exchange_bundle.py | 238 ++++++++++++++++------ catalyst/exchange/exchange_errors.py | 7 + tests/exchange/test_bundle.py | 61 +++--- tests/exchange/test_data_portal.py | 11 +- 9 files changed, 393 insertions(+), 129 deletions(-) diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index 0397c2d8..cc34d436 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -48,7 +48,12 @@ class Bitfinex(Exchange): self._portfolio = portfolio self.minute_writer = None self.minute_reader = None - self.num_candles_limit = 100 + self.num_candles_limit = 1000 + + # Max is 90 but playing it safe + # https://www.bitfinex.com/posts/188 + self.max_requests_per_minute = 20 + self.request_cpt = dict() def _request(self, operation, data, version='v1'): payload_object = { @@ -176,6 +181,7 @@ class Bitfinex(Exchange): def get_balances(self): log.debug('retrieving wallets balances') try: + self.ask_request() response = self._request('balances', None) balances = response.json() except Exception as e: @@ -295,6 +301,7 @@ class Bitfinex(Exchange): url += '/last' try: + self.ask_request() response = requests.get(url) except Exception as e: raise ExchangeRequestError(error=e) @@ -377,6 +384,7 @@ class Bitfinex(Exchange): date = pd.Timestamp.utcnow() try: + self.ask_request() response = self._request('order/new', req) order_status = response.json() except Exception as e: @@ -418,6 +426,7 @@ class Bitfinex(Exchange): orders for this asset. """ try: + self.ask_request() response = self._request('orders', None) order_statuses = response.json() except Exception as e: @@ -452,6 +461,7 @@ class Bitfinex(Exchange): The order object. """ try: + self.ask_request() response = self._request( 'order/status', {'order_id': int(order_id)}) order_status = response.json() @@ -477,6 +487,7 @@ class Bitfinex(Exchange): if isinstance(order_param, Order) else order_param try: + self.ask_request() response = self._request('order/cancel', {'order_id': order_id}) status = response.json() except Exception as e: @@ -501,6 +512,7 @@ class Bitfinex(Exchange): log.debug('fetching tickers {}'.format(symbols)) try: + self.ask_request() response = requests.get( '{url}/v2/tickers?symbols={symbols}'.format( url=self.url, diff --git a/catalyst/exchange/bitfinex/symbols.json b/catalyst/exchange/bitfinex/symbols.json index bd6951d1..ab0f38f9 100644 --- a/catalyst/exchange/bitfinex/symbols.json +++ b/catalyst/exchange/bitfinex/symbols.json @@ -2,7 +2,7 @@ "neobtc": { "symbol": "neo_btc", "start_date": "2017-09-07", - "precision":5 + "precision": 5 }, "neousd": { "symbol": "neo_usd", @@ -30,19 +30,19 @@ }, "ethusd": { "symbol": "eth_usd", - "start_date": "2010-01-01" + "start_date": "2017-01-01" }, "ethbtc": { "symbol": "eth_btc", - "start_date": "2010-01-01" + "start_date": "2017-01-01" }, "etcbtc": { "symbol": "etc_btc", - "start_date": "2010-01-01" + "start_date": "2017-01-01" }, "etcusd": { "symbol": "etc_usd", - "start_date": "2010-01-01" + "start_date": "2017-01-01" }, "rrtusd": { "symbol": "rrt_usd", diff --git a/catalyst/exchange/bittrex/bittrex.py b/catalyst/exchange/bittrex/bittrex.py index 3593e89c..86f0db88 100644 --- a/catalyst/exchange/bittrex/bittrex.py +++ b/catalyst/exchange/bittrex/bittrex.py @@ -26,6 +26,11 @@ class Bittrex(Exchange): self.base_currency = base_currency self._portfolio = portfolio + # Not sure what the rate limit is but trying to play it safe + # https://bitcoin.stackexchange.com/questions/53778/bittrex-api-rate-limit + self.max_requests_per_minute = 60 + self.request_cpt = dict() + self.minute_writer = None self.minute_reader = None @@ -62,6 +67,7 @@ class Bittrex(Exchange): """ symbol_map = dict() + self.ask_request() markets = self.api.getmarkets() for market in markets: exchange_symbol = market['MarketName'] @@ -79,6 +85,7 @@ class Bittrex(Exchange): def get_balances(self): try: log.debug('retrieving wallet balances') + self.ask_request() balances = self.api.getbalances() except Exception as e: raise ExchangeRequestError(error=e) @@ -99,6 +106,7 @@ class Bittrex(Exchange): price = style.get_limit_price(is_buy) try: + self.ask_request() if is_buy: order_status = self.api.buylimit(exchange_symbol, amount, price) @@ -139,6 +147,7 @@ class Bittrex(Exchange): def get_open_orders(self, asset): symbol = self.get_symbol(asset) try: + self.ask_request() open_orders = self.api.getopenorders(symbol) except Exception as e: raise ExchangeRequestError(error=e) @@ -182,6 +191,7 @@ class Bittrex(Exchange): def get_order(self, order_id): log.info('retrieving order {}'.format(order_id)) try: + self.ask_request() order_status = self.api.getorder(order_id) except Exception as e: raise ExchangeRequestError(error=e) @@ -197,6 +207,7 @@ class Bittrex(Exchange): log.info('cancelling order {}'.format(order_id)) try: + self.ask_request() status = self.api.cancel(order_id) except Exception as e: raise ExchangeRequestError(error=e) @@ -208,7 +219,8 @@ class Bittrex(Exchange): error=status['message'] ) - def get_candles(self, data_frequency, assets, bar_count=None, start_date=None): + def get_candles(self, data_frequency, assets, bar_count=None, + start_date=None): """ Supported Intervals ------------------- @@ -299,6 +311,7 @@ class Bittrex(Exchange): for asset in assets: symbol = self.get_symbol(asset) try: + self.ask_request() ticker = self.api.getticker(symbol) except Exception as e: raise ExchangeRequestError(error=e) diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index 3c1dd01d..c9dc5bd0 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -12,21 +12,25 @@ # limitations under the License. import abc +import os from time import sleep -import os +import collections import pandas as pd from catalyst.assets._assets import TradingPair from logbook import Logger +from catalyst.data.bundles.core import load, from_bundle_ingest_dirname, \ + BundleData, minute_path, five_minute_path, daily_path from catalyst.data.data_portal import DataPortal +from catalyst.data.five_minute_bars import BcolzFiveMinuteBarReader from catalyst.data.minute_bars import BcolzMinuteBarReader +from catalyst.data.us_equity_pricing import BcolzDailyBarReader from catalyst.exchange.exchange_errors import ( ExchangeRequestError, - ExchangeBarDataError -) -from catalyst.data.bundles.core import load -from catalyst.exchange.exchange_utils import get_exchange_minute_writer_root + ExchangeBarDataError, + BundleNotFoundError) +from catalyst.utils.paths import data_path log = Logger('DataPortalExchange') @@ -145,8 +149,8 @@ class DataPortalExchangeBase(DataPortal): try: if isinstance(assets, TradingPair): exchange = self.exchanges[assets.exchange] - return exchange.get_spot_value( - assets, field, dt, data_frequency) + return self.get_exchange_spot_value( + exchange, assets, field, dt, data_frequency) else: exchange_assets = dict() @@ -156,20 +160,29 @@ class DataPortalExchangeBase(DataPortal): exchange_assets[asset.exchange].append(asset) - spot_values = [] - for exchange_name in exchange_assets: - exchange = self.exchanges[exchange_name] - assets = exchange_assets[exchange_name] - exchange_spot_values = self.get_exchange_spot_value( - exchange, - assets, - field, - dt, - data_frequency - ) - spot_values += exchange_spot_values + if len(exchange_assets.keys()) == 1: + exchange = self.exchanges[exchange_assets.keys()[0]] + return self.get_exchange_spot_value( + exchange, assets, field, dt, data_frequency) - return spot_values + else: + spot_values = [] + for exchange_name in exchange_assets: + exchange = self.exchanges[exchange_name] + assets = exchange_assets[exchange_name] + exchange_spot_values = self.get_exchange_spot_value( + exchange, + assets, + field, + dt, + data_frequency + ) + if len(assets) == 1: + spot_values.append(exchange_spot_values) + else: + spot_values += exchange_spot_values + + return spot_values except ExchangeRequestError as e: log.warn( @@ -239,10 +252,53 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): super(DataPortalExchangeBacktest, self).__init__(*args, **kwargs) - self.minute_readers = dict() + self.daily_bar_readers = dict() + self.minute_bar_readers = dict() + self.five_minute_bar_readers = dict() for exchange_name in self.exchanges: - root = get_exchange_minute_writer_root(exchange_name) - self.minute_readers[exchange_name] = BcolzMinuteBarReader(root) + name = 'exchange_{}'.format(exchange_name) + time_folder = \ + DataPortalExchangeBacktest.find_most_recent_time(name) + + if time_folder is None: + raise BundleNotFoundError(exchange=exchange_name) + + self.daily_bar_readers[exchange_name] = \ + BcolzDailyBarReader( + daily_path(name, time_folder), + ) + + self.five_minute_bar_readers[exchange_name] = \ + BcolzFiveMinuteBarReader( + five_minute_path(name, time_folder), + ) + + self.minute_bar_readers[exchange_name] = \ + BcolzMinuteBarReader( + minute_path(name, time_folder), + ) + + @staticmethod + def find_most_recent_time(bundle_name): + try: + bundle_folders = os.listdir( + data_path([bundle_name]), + ) + except OSError: + return None + + most_recent_bundle = dict() + for folder in bundle_folders: + date = from_bundle_ingest_dirname(folder) + if not most_recent_bundle or date > \ + most_recent_bundle[most_recent_bundle.keys()[0]]: + most_recent_bundle = dict() + most_recent_bundle[folder] = date + + if most_recent_bundle: + return most_recent_bundle.keys()[0] + else: + return None def get_exchange_history_window(self, exchange, @@ -267,7 +323,11 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): data_frequency): if data_frequency == 'minute': - reader = self.minute_readers[exchange.name] + reader = self.minute_bar_readers[exchange.name] + elif data_frequency == '5-minute': + reader = self.five_minute_bar_readers[exchange.name] + elif data_frequency == 'daily': + reader = self.daily_bar_readers[exchange.name] else: raise ValueError('Unsupported frequency') @@ -284,4 +344,7 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): log.warn('minute data not found: {}'.format(e)) values.append(None) - return values + if len(assets) == 1: + return values[0] + else: + return values diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index cf26c9ff..361413a5 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -2,6 +2,7 @@ import abc import collections import random from abc import ABCMeta, abstractmethod, abstractproperty +from datetime import timedelta from time import sleep import numpy as np @@ -10,9 +11,6 @@ from catalyst.assets._assets import TradingPair from logbook import Logger from catalyst.data.data_portal import BASE_FIELDS -from catalyst.errors import ( - SymbolNotFound, -) from catalyst.exchange.exchange_errors import MismatchingBaseCurrencies, \ InvalidOrderStyle, BaseCurrencyNotFoundError, SymbolNotFoundOnExchange from catalyst.exchange.exchange_execution import ExchangeStopLimitOrder, \ @@ -35,7 +33,10 @@ class Exchange: self.minute_writer = None self.minute_reader = None self.base_currency = None - self.num_candles_limit = 100 + + self.num_candles_limit = None + self.max_requests_per_minute = None + self.request_cpt = None @property def positions(self): @@ -64,6 +65,50 @@ class Exchange: def time_skew(self): pass + def ask_request(self): + """ + Asks permission to issue a request to the exchange. + The primary purpose is to avoid hitting rate limits. + + The application will pause if the maximum requests per minute + permitted by the exchange is exceeded. + + :return boolean: + + """ + now = pd.Timestamp.utcnow() + if not self.request_cpt: + self.request_cpt = dict() + self.request_cpt[now] = 0 + return True + + cpt_date = self.request_cpt.keys()[0] + cpt = self.request_cpt[cpt_date] + + if now > cpt_date + timedelta(minutes=1): + self.request_cpt = dict() + self.request_cpt[now] = 0 + return True + + if cpt >= self.max_requests_per_minute: + delta = now - cpt_date + + sleep_period = 60 - delta.total_seconds() + + # log.debug( + # 'max requests {} reached, sleeping for {} seconds'.format( + # self.max_requests_per_minute, + # sleep_period + # )) + sleep(sleep_period) + + now = pd.Timestamp.utcnow() + self.request_cpt = dict() + self.request_cpt[now] = 0 + return True + else: + self.request_cpt[cpt_date] += 1 + def get_symbol(self, asset): """ Get the exchange specific symbol of the given asset. diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 1605f279..2b4c6435 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -1,9 +1,17 @@ from datetime import timedelta -import pandas as pd -from logbook import Logger +from time import sleep -from catalyst.data.minute_bars import BcolzMinuteOverlappingData +import os +import pandas as pd +from catalyst.data.bundles.base_pricing import BaseCryptoPricingBundle + +from catalyst import get_calendar +from logbook import Logger, INFO + +from catalyst.data.five_minute_bars import BcolzFiveMinuteOverlappingData +from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ + BcolzMinuteBarReader from catalyst.exchange.bitfinex.bitfinex import Bitfinex from catalyst.exchange.bittrex.bittrex import Bittrex from catalyst.exchange.exchange_errors import ExchangeNotFoundError @@ -32,15 +40,111 @@ def fetch_candles_chunk(exchange, assets, data_frequency, end_dt, bar_count): asset_candles = candles[asset] asset_df = pd.DataFrame(asset_candles) - asset_df.set_index('last_traded', inplace=True, drop=True) - asset_df.sort_index(inplace=True) + if not asset_df.empty: + asset_df.set_index('last_traded', inplace=True, drop=True) + asset_df.sort_index(inplace=True) - series[asset] = asset_df + series[asset] = asset_df return series -def exchange_bundle(exchange_name, symbols, start=None, end=None): +def process_bar_data(exchange, assets, writer, data_frequency, + show_progress, start, end): + open_calendar = get_calendar('OPEN') + + writer.calendar = open_calendar + writer.minutes_per_day = 1440 + writer.write_metadata = True + + delta = end - start + if data_frequency == 'minute': + delta_periods = delta.total_seconds() / 60 + frequency = '1m' + + elif data_frequency == '5-minute': + delta_periods = delta.total_seconds() / 60 / 5 + frequency = '5m' + + elif data_frequency == 'daily': + delta_periods = delta.total_seconds() / 60 / 60 / 24 + frequency = '1d' + + else: + raise ValueError('frequency not supported') + + if delta_periods > exchange.num_candles_limit: + bar_count = exchange.num_candles_limit + + chunks = [] + last_chunk_date = end + while last_chunk_date > start + timedelta(minutes=bar_count): + # TODO: account for the partial last bar + chunk = dict(end=last_chunk_date, bar_count=bar_count) + chunks.append(chunk) + + last_chunk_date = \ + last_chunk_date - timedelta(minutes=(bar_count + 1)) + + chunks.reverse() + + else: + chunks = [dict(end=end, bar_count=delta_periods)] + + with maybe_show_progress( + chunks, + show_progress, + label='Fetching {exchange} {frequency} candles: '.format( + exchange=exchange.name, + frequency=data_frequency + )) as it: + + for chunk in it: + assets_candles_dict = fetch_candles_chunk( + exchange=exchange, + assets=assets, + data_frequency=frequency, + end_dt=chunk['end'], + bar_count=chunk['bar_count'] + ) + log.debug('requests counter {}'.format(exchange.request_cpt)) + + if not assets_candles_dict.keys(): + log.debug( + 'no data: {symbols} on {exchange}, date {end}'.format( + symbols=assets, + exchange=exchange.name, + end=chunk['end'] + ) + ) + continue + + data = [] + for asset in assets_candles_dict: + df = assets_candles_dict[asset] + sid = asset.sid + data.append((sid, df)) + + try: + log.debug( + 'writing chunk {start} to {end}'.format( + start=chunk['end'] - timedelta( + minutes=chunk['bar_count']), + end=chunk['end'] + ) + ) + writer.write( + data=data, + show_progress=False, + invalid_data_behavior='raise' + ) + except (BcolzMinuteOverlappingData, + BcolzFiveMinuteOverlappingData) as e: + log.warn('chunk already exists {}: {}'.format(chunk, e)) + + +def exchange_bundle(exchange_name, symbols=None, start=None, end=None, + log_level=INFO): """Create a data bundle ingest function for the specified exchange. Parameters @@ -68,24 +172,27 @@ def exchange_bundle(exchange_name, symbols, start=None, end=None): .. code-block:: python from catalyst.data.bundles import register + from catalyst.exchange.exchange_bundle import exchange_bundle symbols = ( + 'btc_usd', 'eth_btc', 'etc_btc', 'neo_btc', ) - register('bitfinex_bundle', exchange_bundle('bitfinex', symbols)) + register('exchange_bitfinex', exchange_bundle('bitfinex', symbols)) Notes ----- The sids for each symbol will be the index into the symbols sequence. """ # strict this in memory so that we can reiterate over it - symbols = tuple(symbols) + log.level = log_level def ingest(environ, asset_db_writer, - minute_bar_writer, # unused + minute_bar_writer, + five_minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, @@ -93,16 +200,23 @@ def exchange_bundle(exchange_name, symbols, start=None, end=None): end_session, cache, show_progress, + is_compile, output_dir, - # pass these as defaults to make them 'nonlocal' in py2 start=start, end=end): + log.info('ingesting bundle {}'.format(output_dir)) + # TODO: I don't understand this session vs dates idea if start is None: start = start_session if end is None: - end = None + end = end_session + + now = pd.Timestamp.utcnow() + if end > now: + log.info('adjusting the end date to now {}'.format(now)) + end = now log.info('ingesting data from {} to {}'.format(start, end)) @@ -124,61 +238,59 @@ def exchange_bundle(exchange_name, symbols, start=None, end=None): else: raise ExchangeNotFoundError(exchange_name=exchange_name) - assets = exchange.get_assets(symbols) - - delta = end - start - delta_minutes = delta.total_seconds() / 60 - if delta_minutes > exchange.num_candles_limit: - bar_count = exchange.num_candles_limit - - chunks = [] - last_chunk_date = end - while last_chunk_date > start + timedelta(minutes=bar_count): - # TODO: account for the partial last bar - chunk = dict(end=last_chunk_date, bar_count=bar_count) - chunks.append(chunk) - - last_chunk_date = \ - last_chunk_date - timedelta(minutes=(bar_count + 1)) - - chunks.reverse() - + if symbols is not None: + assets = exchange.get_assets(symbols) else: - chunks = [dict(end=end, bar_count=delta_minutes)] + assets = exchange.assets - with maybe_show_progress( - chunks, - show_progress, - label='Fetching {} candles: '.format(exchange_name)) as it: + earliest_trade = None + for asset in assets: + if earliest_trade is None or earliest_trade > asset.start_date: + earliest_trade = asset.start_date - for chunk in it: - asset_df = fetch_candles_chunk( - exchange=exchange, - assets=assets, - data_frequency='1m', - end_dt=chunk['end'], - bar_count=chunk['bar_count'] - ) + if earliest_trade > start: + log.info( + 'adjusting start date to earliest trade date found {}'.format( + earliest_trade + )) + start = earliest_trade - data = [] - for asset in asset_df: - df = asset_df[asset] - sid = asset.sid - data.append((sid, df)) + if start >= end: + raise ValueError('start date cannot be after end date') - try: - log.debug( - 'writing chunk: {sid} start: {start} end: {end}'.format( - sid=sid, - start=chunk['end'] - timedelta( - minutes=chunk['bar_count']), - end=chunk['end'] - ) - ) - minute_bar_writer.write(data, show_progress=show_progress) - except KeyError: - minute_bar_writer.write(data, show_progress=show_progress) - except BcolzMinuteOverlappingData as e: - log.warn('Unable to write chunk {}: {}'.format(chunk, e)) + if daily_bar_writer is not None: + process_bar_data( + exchange=exchange, + assets=assets, + writer=daily_bar_writer, + data_frequency='daily', + show_progress=show_progress, + start=start, + end=end + ) + + if five_minute_bar_writer is not None: + process_bar_data( + exchange=exchange, + assets=assets, + writer=five_minute_bar_writer, + data_frequency='5-minute', + show_progress=show_progress, + start=start, + end=end + ) + + if minute_bar_writer is not None: + process_bar_data( + exchange=exchange, + assets=assets, + writer=minute_bar_writer, + data_frequency='minute', + show_progress=show_progress, + start=start, + end=end + ) return ingest + + diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index a0a137f9..d823cd87 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -140,3 +140,10 @@ class SymbolNotFoundOnExchange(ZiplineError): """ msg = ('Symbol {symbol} not found on exchange {exchange}. ' 'Choose from: {supported_symbols}').strip() + + +class BundleNotFoundError(ZiplineError): + msg = ('Unable to find bundle data for exchange {exchange}. ' + 'Please ingest data using the command ' + '`catalyst ingest -b exchange_{exchange}`. ' + 'See catalyst documentation for details.').strip() diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 784aecea..7cf236a2 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -1,14 +1,13 @@ -from datetime import timedelta - import os +from datetime import timedelta +from logging import Logger, DEBUG + import pandas as pd -from logging import Logger from catalyst import get_calendar - from catalyst.data.minute_bars import BcolzMinuteBarWriter from catalyst.exchange.exchange_bundle import exchange_bundle -from catalyst.exchange.exchange_utils import get_exchange_minute_writer_root +from catalyst.utils.paths import ensure_directory, data_root log = Logger('test_exchange_bundle') @@ -17,44 +16,52 @@ class ExchangeBundleTestCase: def test_ingest(self): exchange_name = 'bitfinex' - start = pd.Timestamp.utcnow() - timedelta(days=2) + start = pd.Timestamp.utcnow() - timedelta(days=365) end = pd.Timestamp.utcnow() open_calendar = get_calendar('OPEN') - root = get_exchange_minute_writer_root(exchange_name) - filename = os.path.join(root, 'metadata.json') + root = data_root(os.environ) + output_dir = '{root}/exchange_{exchange}/test'.format( + root=root, + exchange=exchange_name + ) + ensure_directory(output_dir) + filename = os.path.join(output_dir, 'metadata.json') + + start_session = start.floor('1d') if os.path.isfile(filename): - minute_bar_writer = BcolzMinuteBarWriter.open(root, end) + minute_bar_writer = BcolzMinuteBarWriter.open(output_dir, end) else: # TODO: need to be able to write more precise numbers minute_bar_writer = BcolzMinuteBarWriter( - rootdir=root, + rootdir=output_dir, calendar=open_calendar, minutes_per_day=1440, - start_session=start.floor('1d'), + start_session=start_session, end_session=end, write_metadata=True ) ingest = exchange_bundle( exchange_name=exchange_name, - symbols=['btc_usd'] + symbols=['eth_btc'], + log_level=DEBUG ) - ingest( - environ=os.environ, - asset_db_writer=None, # TODO: nice to have - minute_bar_writer=minute_bar_writer, - daily_bar_writer=None, # TODO: add later - adjustment_writer=None, # Not applicable to crypto - calendar=open_calendar, - start_session=start, - end_session=end, - cache=dict(), - show_progress=True, - output_dir=exchange_name, # TODO: not sure - start=start, - end=end - ) + ingest(environ=os.environ, + asset_db_writer=None, + minute_bar_writer=minute_bar_writer, + five_minute_bar_writer=None, + daily_bar_writer=None, + adjustment_writer=None, + calendar=open_calendar, + start_session=start_session, + end_session=end, + cache=dict(), + show_progress=True, + is_compile=False, + output_dir=output_dir, + start=start, + end=end) pass diff --git a/tests/exchange/test_data_portal.py b/tests/exchange/test_data_portal.py index 08980162..0ea2b8e9 100644 --- a/tests/exchange/test_data_portal.py +++ b/tests/exchange/test_data_portal.py @@ -1,5 +1,6 @@ from datetime import timedelta +import os import pandas as pd from catalyst import get_calendar from logbook import Logger @@ -10,7 +11,9 @@ from catalyst.exchange.bitfinex.bitfinex import Bitfinex from catalyst.exchange.bittrex.bittrex import Bittrex from catalyst.exchange.data_portal_exchange import DataPortalExchangeBacktest, \ DataPortalExchangeLive +from catalyst.exchange.exchange_bundle import exchange_bundle from catalyst.exchange.exchange_utils import get_exchange_auth +from catalyst.utils.run_algo import load_extensions log = Logger('test_bitfinex') @@ -44,13 +47,14 @@ class ExchangeDataPortalTestCase: first_trading_day=pd.to_datetime('today', utc=True) ) self.data_portal_backtest = DataPortalExchangeBacktest( - exchanges=dict(bitfinex=self.bitfinex, bittrex=self.bittrex), + exchanges=dict(bitfinex=self.bitfinex), asset_finder=asset_finder, trading_calendar=open_calendar, first_trading_day=pd.to_datetime('today', utc=True) ) def test_get_history_window_live(self): + asset_finder = self.data_portal_live.asset_finder assets = [ @@ -79,13 +83,14 @@ class ExchangeDataPortalTestCase: pass def test_get_spot_value_backtest(self): + asset_finder = self.data_portal_backtest.asset_finder assets = [ - asset_finder.lookup_symbol('btc_usd', self.bitfinex), + asset_finder.lookup_symbol('neo_btc', self.bitfinex), ] - date = pd.Timestamp.utcnow() - timedelta(hours=2) + date = pd.Timestamp.utcnow() - timedelta(hours=8) value = self.data_portal_backtest.get_spot_value( assets, 'close', date, 'minute') pass From 4e2d092123cf983e1a83b324196744b0f38ef184 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 20 Sep 2017 18:00:08 -0400 Subject: [PATCH 014/110] Trying to fix an issue with periodical bars --- catalyst/exchange/bitfinex/bitfinex.py | 20 +- catalyst/exchange/data_portal_exchange.py | 27 ++- catalyst/exchange/exchange_algorithm.py | 152 +++++++------- catalyst/exchange/exchange_bundle.py | 101 +++++---- catalyst/utils/run_algo.py | 236 ++++++++-------------- tests/exchange/test_data_portal.py | 4 +- 6 files changed, 263 insertions(+), 277 deletions(-) diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index cc34d436..20c3f312 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -232,7 +232,8 @@ class Bitfinex(Exchange): # TODO: fetch account data and keep in cache return None - def get_candles(self, data_frequency, assets, bar_count=None, end_dt=None): + def get_candles(self, data_frequency, assets, bar_count=None, + start_dt=None, end_dt=None): """ Retrieve OHLVC candles from Bitfinex @@ -289,11 +290,18 @@ class Bitfinex(Exchange): is_list = True url += '/hist?limit={}'.format(int(bar_count)) - if end_dt is not None: + def get_ms(date): epoch = datetime.datetime.utcfromtimestamp(0) epoch = epoch.replace(tzinfo=pytz.UTC) - end_ms = (end_dt - epoch).total_seconds() * 1000.0 + return (date - epoch).total_seconds() * 1000.0 + + if start_dt is not None: + start_ms = get_ms(start_dt) + url += '&start={0:f}'.format(start_ms) + + if end_dt is not None: + end_ms = get_ms(end_dt) url += '&end={0:f}'.format(end_ms) else: @@ -315,6 +323,9 @@ class Bitfinex(Exchange): candles = response.json() def ohlc_from_candle(candle): + last_traded = pd.Timestamp.utcfromtimestamp( + candle[0] / 1000.0) + last_traded = last_traded.replace(tzinfo=pytz.UTC) ohlc = dict( open=np.float64(candle[1]), high=np.float64(candle[3]), @@ -322,8 +333,7 @@ class Bitfinex(Exchange): close=np.float64(candle[2]), volume=np.float64(candle[5]), price=np.float64(candle[2]), - last_traded=pd.Timestamp.utcfromtimestamp( - candle[0] / 1000.0) + last_traded=last_traded ) return ohlc diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index c9dc5bd0..3968370d 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -331,20 +331,29 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): else: raise ValueError('Unsupported frequency') - values = [] - for asset in assets: + if isinstance(assets, TradingPair): try: value = reader.get_value( - sid=asset.sid, + sid=assets.sid, dt=dt, field=field ) - values.append(value) + return value except Exception as e: log.warn('minute data not found: {}'.format(e)) - values.append(None) - - if len(assets) == 1: - return values[0] + return None else: - return values + values = [] + for asset in assets: + try: + value = reader.get_value( + sid=asset.sid, + dt=dt, + field=field + ) + values.append(value) + except Exception as e: + log.warn('minute data not found: {}'.format(e)) + values.append(None) + + return values diff --git a/catalyst/exchange/exchange_algorithm.py b/catalyst/exchange/exchange_algorithm.py index 030a3341..d20bed49 100644 --- a/catalyst/exchange/exchange_algorithm.py +++ b/catalyst/exchange/exchange_algorithm.py @@ -49,7 +49,7 @@ from catalyst.utils.input_validation import error_keywords, ensure_upper_case, \ expect_types from catalyst.utils.preprocess import preprocess -log = logbook.Logger("ExchangeTradingAlgorithm") +log = logbook.Logger('exchange_algorithm') class ExchangeAlgorithmExecutor(AlgorithmSimulator): @@ -59,6 +59,8 @@ class ExchangeAlgorithmExecutor(AlgorithmSimulator): class ExchangeTradingAlgorithmBase(TradingAlgorithm): def __init__(self, *args, **kwargs): + self.exchanges = kwargs.pop('exchanges', None) + super(ExchangeTradingAlgorithmBase, self).__init__(*args, **kwargs) @api_method @@ -106,10 +108,83 @@ class ExchangeTradingAlgorithmBase(TradingAlgorithm): as_of_date=_lookup_date ) + def prepare_period_stats(self, start_dt, end_dt): + """ + Creates a dictionary representing the state of the tracker. -class ExchangeTradingAlgorithm(ExchangeTradingAlgorithmBase): + + I rewrote this in an attempt to better control the stats. + I don't want things to happen magically through complex logic + pertaining to backtesting. + + """ + tracker = self.perf_tracker + period = tracker.todays_performance + + pos_stats = period.position_tracker.stats() + period_stats = calc_period_stats(pos_stats, period.ending_cash) + + stats = dict( + period_start=tracker.period_start, + period_end=tracker.period_end, + capital_base=tracker.capital_base, + progress=tracker.progress, + ending_value=period.ending_value, + ending_exposure=period.ending_exposure, + capital_used=period.cash_flow, + starting_value=period.starting_value, + starting_exposure=period.starting_exposure, + starting_cash=period.starting_cash, + ending_cash=period.ending_cash, + portfolio_value=period.ending_cash + period.ending_value, + pnl=period.pnl, + returns=period.returns, + period_open=period.period_open, + period_close=period.period_close, + gross_leverage=period_stats.gross_leverage, + net_leverage=period_stats.net_leverage, + short_exposure=pos_stats.short_exposure, + long_exposure=pos_stats.long_exposure, + short_value=pos_stats.short_value, + long_value=pos_stats.long_value, + longs_count=pos_stats.longs_count, + shorts_count=pos_stats.shorts_count, + ) + + # Merging cumulative risk + stats.update(tracker.cumulative_risk_metrics.to_dict()) + + # Merging latest recorded variables + stats.update(self.recorded_vars) + + stats['positions'] = period.position_tracker.get_positions_list() + + # we want the key to be absent, not just empty + # Only include transactions for given dt + stats['transactions'] = dict() + for date in period.processed_transactions: + if start_dt <= date < end_dt: + stats['transactions'][date] = \ + period.processed_transactions[date] + + stats['orders'] = dict() + for date in period.orders_by_modified: + if start_dt <= date < end_dt: + stats['orders'][date] = \ + period.orders_by_modified[date] + + return stats + + +class ExchangeTradingAlgorithmBacktest(ExchangeTradingAlgorithmBase): + def __init__(self, *args, **kwargs): + super(ExchangeTradingAlgorithmBacktest, self).__init__(*args, **kwargs) + + log.info('initialized trading algorithm in backtest mode') + + +class ExchangeTradingAlgorithmLive(ExchangeTradingAlgorithmBase): def __init__(self, *args, **kwargs): - self.exchanges = kwargs.pop('exchanges', None) self.algo_namespace = kwargs.pop('algo_namespace', None) self.live_graph = kwargs.pop('live_graph', None) @@ -134,13 +209,13 @@ class ExchangeTradingAlgorithm(ExchangeTradingAlgorithmBase): self.stats_minutes = 5 - super(ExchangeTradingAlgorithm, self).__init__(*args, **kwargs) + super(ExchangeTradingAlgorithmLive, self).__init__(*args, **kwargs) # TODO: fix precision before re-enabling # self._create_minute_writer() signal.signal(signal.SIGINT, self.signal_handler) - log.info('exchange trading algorithm successfully initialized') + log.info('initialized trading algorithm in live mode') def _create_minute_writer(self): root = get_exchange_minute_writer_root(self.exchange.name) @@ -360,73 +435,6 @@ class ExchangeTradingAlgorithm(ExchangeTradingAlgorithmBase): save_algo_df(self.algo_namespace, 'exposure_stats', self.exposure_stats) - def prepare_period_stats(self, start_dt, end_dt): - """ - Creates a dictionary representing the state of the tracker. - - - I rewrote this in an attempt to better control the stats. - I don't want things to happen magically through complex logic - pertaining to backtesting. - - """ - tracker = self.perf_tracker - period = tracker.todays_performance - - pos_stats = period.position_tracker.stats() - period_stats = calc_period_stats(pos_stats, period.ending_cash) - - stats = dict( - period_start=tracker.period_start, - period_end=tracker.period_end, - capital_base=tracker.capital_base, - progress=tracker.progress, - ending_value=period.ending_value, - ending_exposure=period.ending_exposure, - capital_used=period.cash_flow, - starting_value=period.starting_value, - starting_exposure=period.starting_exposure, - starting_cash=period.starting_cash, - ending_cash=period.ending_cash, - portfolio_value=period.ending_cash + period.ending_value, - pnl=period.pnl, - returns=period.returns, - period_open=period.period_open, - period_close=period.period_close, - gross_leverage=period_stats.gross_leverage, - net_leverage=period_stats.net_leverage, - short_exposure=pos_stats.short_exposure, - long_exposure=pos_stats.long_exposure, - short_value=pos_stats.short_value, - long_value=pos_stats.long_value, - longs_count=pos_stats.longs_count, - shorts_count=pos_stats.shorts_count, - ) - - # Merging cumulative risk - stats.update(tracker.cumulative_risk_metrics.to_dict()) - - # Merging latest recorded variables - stats.update(self.recorded_vars) - - stats['positions'] = period.position_tracker.get_positions_list() - - # we want the key to be absent, not just empty - # Only include transactions for given dt - stats['transactions'] = dict() - for date in period.processed_transactions: - if start_dt <= date < end_dt: - stats['transactions'][date] = \ - period.processed_transactions[date] - - stats['orders'] = dict() - for date in period.orders_by_modified: - if start_dt <= date < end_dt: - stats['orders'][date] = \ - period.orders_by_modified[date] - - return stats - def handle_data(self, data): if not self.is_running: return diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 2b4c6435..147c4d9d 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -1,17 +1,12 @@ from datetime import timedelta -from time import sleep - -import os import pandas as pd -from catalyst.data.bundles.base_pricing import BaseCryptoPricingBundle - -from catalyst import get_calendar +import numpy as np from logbook import Logger, INFO +from catalyst import get_calendar from catalyst.data.five_minute_bars import BcolzFiveMinuteOverlappingData -from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ - BcolzMinuteBarReader +from catalyst.data.minute_bars import BcolzMinuteOverlappingData from catalyst.exchange.bitfinex.bitfinex import Bitfinex from catalyst.exchange.bittrex.bittrex import Bittrex from catalyst.exchange.exchange_errors import ExchangeNotFoundError @@ -27,10 +22,12 @@ log = Logger('exchange_bundle') def fetch_candles_chunk(exchange, assets, data_frequency, end_dt, bar_count): + calc_start_dt = end_dt - timedelta(minutes=bar_count) candles = exchange.get_candles( data_frequency=data_frequency, assets=assets, bar_count=bar_count, + start_dt=calc_start_dt, end_dt=end_dt ) @@ -39,10 +36,34 @@ def fetch_candles_chunk(exchange, assets, data_frequency, end_dt, bar_count): for asset in assets: asset_candles = candles[asset] + candle_start_dt = None + candle_end_dt = None + for candle in asset_candles: + last_traded = candle['last_traded'] + + if candle_start_dt is None or candle_start_dt > last_traded: + candle_start_dt = last_traded + + if candle_end_dt is None or candle_end_dt < last_traded: + candle_end_dt = last_traded + + if candle_end_dt < end_dt: + asset_candles.append( + dict( + open=None, + high=None, + close=None, + low=None, + volume=None, + last_traded=end_dt + ) + ) + asset_df = pd.DataFrame(asset_candles) if not asset_df.empty: asset_df.set_index('last_traded', inplace=True, drop=True) asset_df.sort_index(inplace=True) + asset_df = asset_df.resample('1T').ffill() series[asset] = asset_df @@ -77,12 +98,13 @@ def process_bar_data(exchange, assets, writer, data_frequency, bar_count = exchange.num_candles_limit chunks = [] - last_chunk_date = end + last_chunk_date = end.floor('1 min') while last_chunk_date > start + timedelta(minutes=bar_count): # TODO: account for the partial last bar chunk = dict(end=last_chunk_date, bar_count=bar_count) chunks.append(chunk) + # TODO: base on frequency last_chunk_date = \ last_chunk_date - timedelta(minutes=(bar_count + 1)) @@ -119,20 +141,29 @@ def process_bar_data(exchange, assets, writer, data_frequency, ) continue + num_candles = 0 data = [] for asset in assets_candles_dict: df = assets_candles_dict[asset] sid = asset.sid + + num_candles += len(df.values) data.append((sid, df)) try: - log.debug( - 'writing chunk {start} to {end}'.format( - start=chunk['end'] - timedelta( - minutes=chunk['bar_count']), + log.info( + 'writing {num_candles} candles from {start} to {end}'.format( + num_candles=num_candles, + start=chunk['end'] - \ + timedelta(minutes=chunk['bar_count']), end=chunk['end'] ) ) + + for pair in data: + log.info('data for sid {}\n{}\n{}'.format( + pair[0], pair[1].head(2), pair[1].tail(2))) + writer.write( data=data, show_progress=False, @@ -258,27 +289,27 @@ def exchange_bundle(exchange_name, symbols=None, start=None, end=None, if start >= end: raise ValueError('start date cannot be after end date') - if daily_bar_writer is not None: - process_bar_data( - exchange=exchange, - assets=assets, - writer=daily_bar_writer, - data_frequency='daily', - show_progress=show_progress, - start=start, - end=end - ) - - if five_minute_bar_writer is not None: - process_bar_data( - exchange=exchange, - assets=assets, - writer=five_minute_bar_writer, - data_frequency='5-minute', - show_progress=show_progress, - start=start, - end=end - ) + # if daily_bar_writer is not None: + # process_bar_data( + # exchange=exchange, + # assets=assets, + # writer=daily_bar_writer, + # data_frequency='daily', + # show_progress=show_progress, + # start=start, + # end=end + # ) + # + # if five_minute_bar_writer is not None: + # process_bar_data( + # exchange=exchange, + # assets=assets, + # writer=five_minute_bar_writer, + # data_frequency='5-minute', + # show_progress=show_progress, + # start=start, + # end=end + # ) if minute_bar_writer is not None: process_bar_data( @@ -292,5 +323,3 @@ def exchange_bundle(exchange_name, symbols=None, start=None, end=None, ) return ingest - - diff --git a/catalyst/utils/run_algo.py b/catalyst/utils/run_algo.py index 6877bc54..401bd2e2 100644 --- a/catalyst/utils/run_algo.py +++ b/catalyst/utils/run_algo.py @@ -1,14 +1,12 @@ import os -import re -from runpy import run_path import sys import warnings -from time import sleep from datetime import timedelta - -import pandas as pd +from runpy import run_path +from time import sleep import click +import pandas as pd from catalyst.exchange.bittrex.bittrex import Bittrex @@ -23,22 +21,15 @@ except: from toolz import valfilter, concatv from functools import partial -from catalyst.algorithm import TradingAlgorithm -from catalyst.data.bundles.core import load -from catalyst.data.data_portal import DataPortal -from catalyst.data.loader import load_crypto_market_data from catalyst.finance.trading import TradingEnvironment -from catalyst.pipeline.data import USEquityPricing, CryptoPricing -from catalyst.pipeline.loaders import ( - USEquityPricingLoader, - CryptoPricingLoader, -) from catalyst.utils.calendars import get_calendar from catalyst.utils.factory import create_simulation_parameters import catalyst.utils.paths as pth -from catalyst.exchange.exchange_algorithm import ExchangeTradingAlgorithm -from catalyst.exchange.data_portal_exchange import DataPortalExchangeLive +from catalyst.exchange.exchange_algorithm import ExchangeTradingAlgorithmLive, \ + ExchangeTradingAlgorithmBacktest +from catalyst.exchange.data_portal_exchange import DataPortalExchangeLive, \ + DataPortalExchangeBacktest from catalyst.exchange.bitfinex.bitfinex import Bitfinex from catalyst.exchange.asset_finder_exchange import AssetFinderExchange from catalyst.exchange.exchange_portfolio import ExchangePortfolio @@ -148,49 +139,46 @@ def _run(handle_data, mode = 'live' if live else 'backtest' log.info('running algo in {mode} mode'.format(mode=mode)) - if live and exchange is not None: - exchange_name = exchange + exchange_name = exchange + if exchange_name is None: + raise ValueError('Please specify at least one exchange.') - start = pd.Timestamp.utcnow() - # TODO: fix the end data. - end = start + timedelta(hours=8760) + exchange_list = [x.strip().lower() for x in exchange.split(',')] - exchange_list = [x.strip().lower() for x in exchange.split(',')] + exchanges = dict() + for exchange_name in exchange_list: - exchanges = dict() - for exchange_name in exchange_list: + # Looking for the portfolio from the cache first + portfolio = get_algo_object( + algo_name=algo_namespace, + key='portfolio_{}'.format(exchange_name), + environ=environ + ) - # Looking for the portfolio from the cache first - portfolio = get_algo_object( - algo_name=algo_namespace, - key='portfolio_{}'.format(exchange_name), - environ=environ + if portfolio is None: + portfolio = ExchangePortfolio( + start_date=pd.Timestamp.utcnow() ) - if portfolio is None: - portfolio = ExchangePortfolio( - start_date=pd.Timestamp.utcnow() - ) - - # This corresponds to the json file containing api token info - exchange_auth = get_exchange_auth(exchange_name) - if exchange_name == 'bitfinex': - exchanges[exchange_name] = Bitfinex( - key=exchange_auth['key'], - secret=exchange_auth['secret'], - base_currency=base_currency, - portfolio=portfolio - ) - elif exchange_name == 'bittrex': - exchanges[exchange_name] = Bittrex( - key=exchange_auth['key'], - secret=exchange_auth['secret'], - base_currency=base_currency, - portfolio=portfolio - ) - else: - raise ExchangeNotFoundError(exchange_name=exchange_name) + # This corresponds to the json file containing api token info + exchange_auth = get_exchange_auth(exchange_name) + if exchange_name == 'bitfinex': + exchanges[exchange_name] = Bitfinex( + key=exchange_auth['key'], + secret=exchange_auth['secret'], + base_currency=base_currency, + portfolio=portfolio + ) + elif exchange_name == 'bittrex': + exchanges[exchange_name] = Bittrex( + key=exchange_auth['key'], + secret=exchange_auth['secret'], + base_currency=base_currency, + portfolio=portfolio + ) + else: + raise ExchangeNotFoundError(exchange_name=exchange_name) open_calendar = get_calendar('OPEN') sim_params = create_simulation_parameters( @@ -201,13 +189,19 @@ def _run(handle_data, emission_rate=data_frequency, ) - if live and exchange is not None: - env = TradingEnvironment( - environ=environ, - exchange_tz='UTC', - asset_db_path=None - ) - env.asset_finder = AssetFinderExchange() + env = TradingEnvironment( + environ=environ, + exchange_tz='UTC', + asset_db_path=None # We don't need an asset db, we have exchanges + ) + env.asset_finder = AssetFinderExchange() + choose_loader = None # TODO: use the DataPortal for in the algorithm class for this + + if live: + start = pd.Timestamp.utcnow() + + # TODO: fix the end data. + end = start + timedelta(hours=8760) data = DataPortalExchangeLive( exchanges=exchanges, @@ -215,7 +209,6 @@ def _run(handle_data, trading_calendar=open_calendar, first_trading_day=pd.to_datetime('today', utc=True) ) - choose_loader = None def fetch_capital_base(exchange, attempt_index=0): """ @@ -264,102 +257,34 @@ def _run(handle_data, data_frequency='minute' ) - elif bundle is not None: - bundles = bundle.split(',') - - def get_trading_env_and_data(bundles): - env = data = None - - b = 'poloniex' - if len(bundles) == 0: - return env, data - elif len(bundles) == 1: - b = bundles[0] - - bundle_data = load( - b, - environ, - bundle_timestamp, - ) - - prefix, connstr = re.split( - r'sqlite:///', - str(bundle_data.asset_finder.engine.url), - maxsplit=1, - ) - if prefix: - raise ValueError( - "invalid url %r, must begin with 'sqlite:///'" % - str(bundle_data.asset_finder.engine.url), - ) - - env = TradingEnvironment( - load=partial(load_crypto_market_data, bundle=b, - bundle_data=bundle_data, environ=environ), - bm_symbol='USDT_BTC', - trading_calendar=open_calendar, - asset_db_path=connstr, - environ=environ, - ) - - first_trading_day = bundle_data.minute_bar_reader.first_trading_day - - data = DataPortal( - env.asset_finder, - open_calendar, - first_trading_day=first_trading_day, - minute_reader=bundle_data.minute_bar_reader, - five_minute_reader=bundle_data.five_minute_bar_reader, - daily_reader=bundle_data.daily_bar_reader, - adjustment_reader=bundle_data.adjustment_reader, - ) - - return env, data - - def get_loader_for_bundle(b): - bundle_data = load( - b, - environ, - bundle_timestamp, - ) - - if b == 'poloniex': - return CryptoPricingLoader( - bundle_data, - data_frequency, - CryptoPricing, - ) - elif b == 'quandl': - return USEquityPricingLoader( - bundle_data, - data_frequency, - USEquityPricing, - ) - raise ValueError( - "No PipelineLoader registered for bundle %s." % b - ) - - loaders = [get_loader_for_bundle(b) for b in bundles] - env, data = get_trading_env_and_data(bundles) - - def choose_loader(column): - for loader in loaders: - if column in loader.columns: - return loader - raise ValueError( - "No PipelineLoader registered for column %s." % column - ) - + algorithm_class = partial( + ExchangeTradingAlgorithmLive, + exchanges=exchanges, + algo_namespace=algo_namespace, + live_graph=live_graph + ) else: - env = TradingEnvironment(environ=environ) - choose_loader = None + # Removed the existing Poloniex fork to keep things simple + # We can add back the complexity if required. - TradingAlgorithmClass = ( - partial(ExchangeTradingAlgorithm, exchanges=exchanges, - algo_namespace=algo_namespace, live_graph=live_graph) - if live and exchanges else TradingAlgorithm) # TODO: backtest trading algo class + # I don't think that we should have arbitrary price data bundles + # Instead, we should center this data around exchanges. + # We still need to support bundles for other misc data, but we + # can handle this later. - perf = TradingAlgorithmClass( + data = DataPortalExchangeBacktest( + exchanges=exchanges, + asset_finder=env.asset_finder, + trading_calendar=open_calendar, + first_trading_day=start, + ) + + algorithm_class = partial( + ExchangeTradingAlgorithmBacktest, + exchanges=exchanges + ) + + perf = algorithm_class( namespace=namespace, env=env, get_pipeline_loader=choose_loader, @@ -530,6 +455,11 @@ def run_algorithm(initialize, """ load_extensions(default_extension, extensions, strict_extensions, environ) + # I'm not sure that we need this since the modified DataPortal + # does not require extensions to be explicitly loaded. + + # This will be useful for arbitrary non-pricing bundles but we may + # need to modify the logic. if not live: non_none_data = valfilter(bool, { 'data': data is not None, diff --git a/tests/exchange/test_data_portal.py b/tests/exchange/test_data_portal.py index 0ea2b8e9..87357c2b 100644 --- a/tests/exchange/test_data_portal.py +++ b/tests/exchange/test_data_portal.py @@ -50,7 +50,7 @@ class ExchangeDataPortalTestCase: exchanges=dict(bitfinex=self.bitfinex), asset_finder=asset_finder, trading_calendar=open_calendar, - first_trading_day=pd.to_datetime('today', utc=True) + first_trading_day=pd.to_datetime('2017-09-10', utc=True) ) def test_get_history_window_live(self): @@ -90,7 +90,7 @@ class ExchangeDataPortalTestCase: asset_finder.lookup_symbol('neo_btc', self.bitfinex), ] - date = pd.Timestamp.utcnow() - timedelta(hours=8) + date = pd.to_datetime('2017-09-10 9:00', utc=True) value = self.data_portal_backtest.get_spot_value( assets, 'close', date, 'minute') pass From 10a5b5412e22c3476c65cb0382aea16df0c042d3 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 20 Sep 2017 23:48:57 -0400 Subject: [PATCH 015/110] Testing the same algo in live and backtest mode. Most of it works well. We need a commission model for the TradingPair currency type. --- .../buy_low_sell_high_neo_with_interface.py | 170 ++++++++++++++++++ catalyst/exchange/exchange_algorithm.py | 20 +-- catalyst/exchange/exchange_bundle.py | 134 ++++++++------ tests/exchange/test_data_portal.py | 2 +- 4 files changed, 256 insertions(+), 70 deletions(-) create mode 100644 catalyst/examples/buy_low_sell_high_neo_with_interface.py diff --git a/catalyst/examples/buy_low_sell_high_neo_with_interface.py b/catalyst/examples/buy_low_sell_high_neo_with_interface.py new file mode 100644 index 00000000..c453786b --- /dev/null +++ b/catalyst/examples/buy_low_sell_high_neo_with_interface.py @@ -0,0 +1,170 @@ +import talib +from logbook import Logger +import pandas as pd + +from catalyst.api import ( + order, + order_target_percent, + symbol, + record, + get_open_orders, +) +from catalyst.exchange.stats_utils import get_pretty_stats +from catalyst.utils.run_algo import run_algorithm + +algo_namespace = 'buy_low_sell_high_neo' +log = Logger(algo_namespace) + + +def initialize(context): + log.info('initializing algo') + context.asset = symbol('neo_btc', 'bitfinex') + + context.TARGET_POSITIONS = 50 + context.PROFIT_TARGET = 0.1 + context.SLIPPAGE_ALLOWED = 0.02 + + context.retry_check_open_orders = 10 + context.retry_update_portfolio = 10 + context.retry_order = 5 + + context.errors = [] + pass + + +def _handle_data(context, data): + prices = data.history( + context.asset, + fields='price', + bar_count=20, + frequency='30m' + ) + rsi = talib.RSI(prices.values, timeperiod=14)[-1] + log.info('got rsi: {}'.format(rsi)) + + # Buying more when RSI is low, this should lower our cost basis + if rsi <= 30: + buy_increment = 1 + elif rsi <= 40: + buy_increment = 0.5 + elif rsi <= 70: + buy_increment = 0.1 + else: + buy_increment = None + + cash = context.portfolio.cash + log.info('base currency available: {cash}'.format(cash=cash)) + + price = data.current(context.asset, 'close') + log.info('got price {price}'.format(price=price)) + + if price is None: + log.warn('no pricing data') + return + + record(price=price, rsi=rsi) + + orders = get_open_orders(context.asset) + if orders: + log.info('skipping bar until all open orders execute') + return + + is_buy = False + cost_basis = None + if context.asset in context.portfolio.positions: + position = context.portfolio.positions[context.asset] + + cost_basis = position.cost_basis + log.info( + 'found {amount} positions with cost basis {cost_basis}'.format( + amount=position.amount, + cost_basis=cost_basis + ) + ) + + if position.amount >= context.TARGET_POSITIONS: + log.info('reached positions target: {}'.format(position.amount)) + return + + if price < cost_basis: + is_buy = True + elif position.amount > 0 and \ + price > cost_basis * (1 + context.PROFIT_TARGET): + profit = (price * position.amount) - (cost_basis * position.amount) + log.info('closing position, taking profit: {}'.format(profit)) + order_target_percent( + asset=context.asset, + target=0, + limit_price=price * (1 - context.SLIPPAGE_ALLOWED), + ) + else: + log.info('no buy or sell opportunity found') + else: + is_buy = True + + if is_buy: + if buy_increment is None: + log.info('the rsi is too high to consider buying {}'.format(rsi)) + return + + if price * buy_increment > cash: + log.info('not enough base currency to consider buying') + return + + log.info( + 'buying position cheaper than cost basis {} < {}'.format( + price, + cost_basis + ) + ) + order( + asset=context.asset, + amount=buy_increment, + limit_price=price * (1 + context.SLIPPAGE_ALLOWED) + ) + + +def handle_data(context, data): + log.info('handling bar {}'.format(data.current_dt)) + # try: + _handle_data(context, data) + # except Exception as e: + # log.warn('aborting the bar on error {}'.format(e)) + # context.errors.append(e) + + log.info('completed bar {}, total execution errors {}'.format( + data.current_dt, + len(context.errors) + )) + + if len(context.errors) > 0: + log.info('the errors:\n{}'.format(context.errors)) + + +def analyze(context, stats): + log.info('the daily stats:\n{}'.format(get_pretty_stats(stats))) + pass + + +# run_algorithm( +# initialize=initialize, +# handle_data=handle_data, +# analyze=analyze, +# exchange_name='bittrex,bitfinex', +# live=True, +# algo_namespace=algo_namespace, +# base_currency='eth', +# live_graph=True +# ) +run_algorithm( + capital_base=10000, + start=pd.to_datetime('2017-09-10', utc=True), + end=pd.to_datetime('2017-09-15', utc=True), + data_frequency='minute', + initialize=initialize, + handle_data=handle_data, + analyze=analyze, + exchange_name='bitfinex', + algo_namespace=algo_namespace, + base_currency='btc' +) diff --git a/catalyst/exchange/exchange_algorithm.py b/catalyst/exchange/exchange_algorithm.py index d20bed49..2d902572 100644 --- a/catalyst/exchange/exchange_algorithm.py +++ b/catalyst/exchange/exchange_algorithm.py @@ -63,6 +63,16 @@ class ExchangeTradingAlgorithmBase(TradingAlgorithm): super(ExchangeTradingAlgorithmBase, self).__init__(*args, **kwargs) + def round_order(self, amount): + """ + We need fractions with cryptocurrencies + + :param amount: + :return: + """ + # TODO: is this good enough? Victor has a better solution. + return amount + @api_method @preprocess(symbol_str=ensure_upper_case) def symbol(self, symbol_str, exchange_name=None): @@ -595,16 +605,6 @@ class ExchangeTradingAlgorithmLive(ExchangeTradingAlgorithmBase): amount, asset.symbol, asset.exchange)) return None - def round_order(self, amount): - """ - We need fractions with cryptocurrencies - - :param amount: - :return: - """ - # TODO: is this good enough? Victor has a better solution. - return amount - @api_method def batch_market_order(self, share_counts): raise NotImplementedError() diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 147c4d9d..056a6421 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -30,44 +30,34 @@ def fetch_candles_chunk(exchange, assets, data_frequency, end_dt, bar_count): start_dt=calc_start_dt, end_dt=end_dt ) + return candles - series = dict() - - for asset in assets: - asset_candles = candles[asset] - - candle_start_dt = None - candle_end_dt = None - for candle in asset_candles: - last_traded = candle['last_traded'] - - if candle_start_dt is None or candle_start_dt > last_traded: - candle_start_dt = last_traded - - if candle_end_dt is None or candle_end_dt < last_traded: - candle_end_dt = last_traded - - if candle_end_dt < end_dt: - asset_candles.append( - dict( - open=None, - high=None, - close=None, - low=None, - volume=None, - last_traded=end_dt - ) - ) - - asset_df = pd.DataFrame(asset_candles) - if not asset_df.empty: - asset_df.set_index('last_traded', inplace=True, drop=True) - asset_df.sort_index(inplace=True) - asset_df = asset_df.resample('1T').ffill() - - series[asset] = asset_df - - return series + # series = dict() + # + # for asset in assets: + # asset_candles = candles[asset] + # + # candle_start_dt = None + # candle_end_dt = None + # for candle in asset_candles: + # last_traded = candle['last_traded'] + # + # if candle_start_dt is None or candle_start_dt > last_traded: + # candle_start_dt = last_traded + # + # if candle_end_dt is None or candle_end_dt < last_traded: + # candle_end_dt = last_traded + # + # + # asset_df = pd.DataFrame(asset_candles) + # if not asset_df.empty: + # asset_df.set_index('last_traded', inplace=True, drop=True) + # asset_df.sort_index(inplace=True) + # asset_df = asset_df.resample('1T').ffill() + # + # series[asset] = asset_df + # + # return series def process_bar_data(exchange, assets, writer, data_frequency, @@ -121,47 +111,73 @@ def process_bar_data(exchange, assets, writer, data_frequency, frequency=data_frequency )) as it: + previous_candle = dict() for chunk in it: - assets_candles_dict = fetch_candles_chunk( + chunk_end = chunk['end'] + chunk_start = chunk_end - timedelta(minutes=chunk['bar_count']) + + candles = fetch_candles_chunk( exchange=exchange, assets=assets, data_frequency=frequency, - end_dt=chunk['end'], + end_dt=chunk_end, bar_count=chunk['bar_count'] ) log.debug('requests counter {}'.format(exchange.request_cpt)) - if not assets_candles_dict.keys(): - log.debug( - 'no data: {symbols} on {exchange}, date {end}'.format( - symbols=assets, - exchange=exchange.name, - end=chunk['end'] - ) - ) - continue - num_candles = 0 data = [] - for asset in assets_candles_dict: - df = assets_candles_dict[asset] - sid = asset.sid + for asset in candles: + asset_candles = candles[asset] + if not asset_candles: + log.debug( + 'no data: {symbols} on {exchange}, date {end}'.format( + symbols=assets, + exchange=exchange.name, + end=chunk_end + ) + ) + continue - num_candles += len(df.values) - data.append((sid, df)) + all_dates = [] + all_candles = [] + date = chunk_start + while date <= chunk_end: + + previous = previous_candle[asset] \ + if asset in previous_candle else None + + candle = next((candle for candle in asset_candles \ + if candle['last_traded'] == date), previous) + + if candle is not None: + all_dates.append(date) + all_candles.append(candle) + + previous_candle[asset] = candle + + date += timedelta(minutes=1) + + df = pd.DataFrame(all_candles, index=all_dates) + if not df.empty: + df.sort_index(inplace=True) + + sid = asset.sid + num_candles += len(df.values) + + data.append((sid, df)) try: - log.info( + log.debug( 'writing {num_candles} candles from {start} to {end}'.format( num_candles=num_candles, - start=chunk['end'] - \ - timedelta(minutes=chunk['bar_count']), - end=chunk['end'] + start=chunk_start, + end=chunk_end ) ) for pair in data: - log.info('data for sid {}\n{}\n{}'.format( + log.debug('data for sid {}\n{}\n{}'.format( pair[0], pair[1].head(2), pair[1].tail(2))) writer.write( diff --git a/tests/exchange/test_data_portal.py b/tests/exchange/test_data_portal.py index 87357c2b..25d123cc 100644 --- a/tests/exchange/test_data_portal.py +++ b/tests/exchange/test_data_portal.py @@ -90,7 +90,7 @@ class ExchangeDataPortalTestCase: asset_finder.lookup_symbol('neo_btc', self.bitfinex), ] - date = pd.to_datetime('2017-09-10 9:00', utc=True) + date = pd.to_datetime('2017-09-10', utc=True) value = self.data_portal_backtest.get_spot_value( assets, 'close', date, 'minute') pass From 7335810cc246f1ce34423062a3b7adad61a39533 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 21 Sep 2017 01:17:10 -0400 Subject: [PATCH 016/110] Defined the same commission model as with equities for now. We need to fix the data precision in the bundles. --- catalyst/algorithm.py | 2 +- catalyst/exchange/exchange_algorithm.py | 6 ++++++ catalyst/exchange/exchange_blotter.py | 19 +++++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 catalyst/exchange/exchange_blotter.py diff --git a/catalyst/algorithm.py b/catalyst/algorithm.py index ed10fedd..93ea60bb 100644 --- a/catalyst/algorithm.py +++ b/catalyst/algorithm.py @@ -1135,7 +1135,7 @@ class TradingAlgorithm(object): 'date_rule. You should use keyword argument ' 'time_rule= when calling schedule_function without ' 'specifying a date_rule', stacklevel=3) - + freq = self.sim_params.data_frequency date_rule = date_rule or date_rules.every_day() diff --git a/catalyst/exchange/exchange_algorithm.py b/catalyst/exchange/exchange_algorithm.py index 2d902572..bddbf2f0 100644 --- a/catalyst/exchange/exchange_algorithm.py +++ b/catalyst/exchange/exchange_algorithm.py @@ -29,6 +29,7 @@ from catalyst.algorithm import TradingAlgorithm from catalyst.data.minute_bars import BcolzMinuteBarWriter, \ BcolzMinuteBarReader from catalyst.errors import OrderInBeforeTradingStart +from catalyst.exchange.exchange_blotter import ExchangeBlotter from catalyst.exchange.exchange_errors import ( ExchangeRequestError, ExchangePortfolioDataError, @@ -190,6 +191,11 @@ class ExchangeTradingAlgorithmBacktest(ExchangeTradingAlgorithmBase): def __init__(self, *args, **kwargs): super(ExchangeTradingAlgorithmBacktest, self).__init__(*args, **kwargs) + self.blotter = ExchangeBlotter( + data_frequency=self.data_frequency, + # Default to NeverCancel in catalyst + cancel_policy=self.cancel_policy, + ) log.info('initialized trading algorithm in backtest mode') diff --git a/catalyst/exchange/exchange_blotter.py b/catalyst/exchange/exchange_blotter.py new file mode 100644 index 00000000..dd771cca --- /dev/null +++ b/catalyst/exchange/exchange_blotter.py @@ -0,0 +1,19 @@ +from catalyst.finance.blotter import Blotter +from catalyst.finance.commission import PerShare +from catalyst.finance.slippage import VolumeShareSlippage +from catalyst.assets._assets import TradingPair + + +class ExchangeBlotter(Blotter): + def __init__(self, *args, **kwargs): + super(ExchangeBlotter, self).__init__(*args, **kwargs) + + # Using the equity models for now + # We may be able to define more sophisticated models based on the fee + # structure of each exchange. + self.slippage_models = { + TradingPair: VolumeShareSlippage() + } + self.commission_models = { + TradingPair: PerShare() + } From 2f8768bb066a912ad052e552ee67151291cbd25c Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 21 Sep 2017 16:36:10 -0400 Subject: [PATCH 017/110] Merged Victor's hack for the minute writer precision --- catalyst/data/minute_bars.py | 106 +++++++++++++++++------------------ 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/catalyst/data/minute_bars.py b/catalyst/data/minute_bars.py index d2707122..9cae04af 100644 --- a/catalyst/data/minute_bars.py +++ b/catalyst/data/minute_bars.py @@ -39,7 +39,7 @@ from catalyst.data._minute_bar_internal import ( from catalyst.gens.sim_engine import NANOS_IN_MINUTE from catalyst.data.bar_reader import BarReader, NoDataOnDate -from catalyst.data.us_equity_pricing import check_uint32_safe +from catalyst.data.us_equity_pricing import check_uint64_safe from catalyst.utils.calendars import get_calendar from catalyst.utils.cli import maybe_show_progress from catalyst.utils.memoize import lazyval @@ -52,7 +52,7 @@ FUTURES_MINUTES_PER_DAY = 1440 DEFAULT_EXPECTEDLEN = US_EQUITIES_MINUTES_PER_DAY * 252 * 15 -OHLC_RATIO = 1000 +OHLC_RATIO = 100000000 class BcolzMinuteOverlappingData(Exception): @@ -114,15 +114,15 @@ def _sid_subdir_path(sid): def convert_cols(cols, scale_factor, sid, invalid_data_behavior): - """Adapt OHLCV columns into uint32 columns. + """Adapt OHLCV columns into uint64 columns. Parameters ---------- cols : dict A dict mapping each column name (open, high, low, close, volume) - to a float column to convert to uint32. + to a float column to convert to uint64. scale_factor : int - Factor to use to scale float values before converting to uint32. + Factor to use to scale float values before converting to uint64. sid : int Sid of the relevant asset, for logging. invalid_data_behavior : str @@ -135,6 +135,7 @@ def convert_cols(cols, scale_factor, sid, invalid_data_behavior): scaled_highs = np.nan_to_num(cols['high']) * scale_factor scaled_lows = np.nan_to_num(cols['low']) * scale_factor scaled_closes = np.nan_to_num(cols['close']) * scale_factor + scaled_volumes = np.nan_to_num(cols['volume']) * scale_factor exclude_mask = np.zeros_like(scaled_opens, dtype=bool) @@ -143,11 +144,12 @@ def convert_cols(cols, scale_factor, sid, invalid_data_behavior): ('high', scaled_highs), ('low', scaled_lows), ('close', scaled_closes), + ('volume', scaled_volumes), ]: max_val = scaled_col.max() try: - check_uint32_safe(max_val, col_name) + check_uint64_safe(max_val, col_name) except ValueError: if invalid_data_behavior == 'raise': raise @@ -155,20 +157,20 @@ def convert_cols(cols, scale_factor, sid, invalid_data_behavior): if invalid_data_behavior == 'warn': logger.warn( 'Values for sid={}, col={} contain some too large for ' - 'uint32 (max={}), filtering them out', + 'uint64 (max={}), filtering them out', sid, col_name, max_val, ) # We want to exclude all rows that have an unsafe value in # this column. - exclude_mask &= (scaled_col >= np.iinfo(np.uint32).max) + exclude_mask &= (scaled_col >= np.iinfo(np.uint64).max) # Convert all cols to uint32. - opens = scaled_opens.astype(np.uint32) - highs = scaled_highs.astype(np.uint32) - lows = scaled_lows.astype(np.uint32) - closes = scaled_closes.astype(np.uint32) - volumes = cols['volume'].astype(np.uint32) + opens = scaled_opens.astype(np.uint64) + highs = scaled_highs.astype(np.uint64) + lows = scaled_lows.astype(np.uint64) + closes = scaled_closes.astype(np.uint64) + volumes = scaled_volumes.astype(np.uint64) # Exclude rows with unsafe values by setting to zero. opens[exclude_mask] = 0 @@ -260,14 +262,14 @@ class BcolzMinuteBarMetadata(object): ) def __init__( - self, - default_ohlc_ratio, - ohlc_ratios_per_sid, - calendar, - start_session, - end_session, - minutes_per_day, - version=FORMAT_VERSION, + self, + default_ohlc_ratio, + ohlc_ratios_per_sid, + calendar, + start_session, + end_session, + minutes_per_day, + version=FORMAT_VERSION, ): self.calendar = calendar self.start_session = start_session @@ -288,7 +290,7 @@ class BcolzMinuteBarMetadata(object): ohlc_ratio : int The default ratio by which to multiply the pricing data to convert the floats from floats to an integer to fit within - the np.uint32. If ohlc_ratios_per_sid is None or does not + the np.uint64. If ohlc_ratios_per_sid is None or does not contain a mapping for a given sid, this ratio is used. ohlc_ratios_per_sid : dict A dict mapping each sid in the output to the factor by @@ -340,10 +342,10 @@ class BcolzMinuteBarMetadata(object): 'first_trading_day': str(self.start_session.date()), 'market_opens': ( market_opens.values.astype('datetime64[m]'). - astype(np.int64).tolist()), + astype(np.int64).tolist()), 'market_closes': ( market_closes.values.astype('datetime64[m]'). - astype(np.int64).tolist()), + astype(np.int64).tolist()), } with open(self.metadata_path(rootdir), 'w+') as fp: json.dump(metadata, fp) @@ -372,13 +374,13 @@ class BcolzMinuteBarWriter(object): The last trading session in the data set. default_ohlc_ratio : int, optional The default ratio by which to multiply the pricing data to - convert from floats to integers that fit within np.uint32. If + convert from floats to integers that fit within np.uint64. If ohlc_ratios_per_sid is None or does not contain a mapping for a - given sid, this ratio is used. Default is OHLC_RATIO (1000). + given sid, this ratio is used. Default is OHLC_RATIO (10^8). ohlc_ratios_per_sid : dict, optional A dict mapping each sid in the output to the ratio by which to multiply the pricing data to convert the floats from floats to - an integer to fit within the np.uint32. + an integer to fit within the np.uint64. expectedlen : int, optional The expected length of the dataset, used when creating the initial bcolz ctable. @@ -401,11 +403,9 @@ class BcolzMinuteBarWriter(object): Each individual asset's data is stored as a bcolz table with a column for each pricing field: (open, high, low, close, volume) - The open, high, low, and close columns are integers which are 1000 times + The open, high, low, close and volume columns are integers which are 10^8 times the quoted price, so that the data can represented and stored as an - np.uint32, supporting market prices quoted up to the thousands place. - - volume is a np.uint32 with no mutation of the tens place. + np.uint64, supporting market prices quoted up to the 1/10^8-th place. The 'index' for each individual asset are a repeating period of minutes of length `minutes_per_day` starting from each market open. @@ -573,7 +573,7 @@ class BcolzMinuteBarWriter(object): if not os.path.exists(sid_containing_dirname): # Other sids may have already created the containing directory. os.makedirs(sid_containing_dirname) - initial_array = np.empty(0, np.uint32) + initial_array = np.empty(0, np.uint64) table = ctable( rootdir=path, columns=[ @@ -610,7 +610,7 @@ class BcolzMinuteBarWriter(object): minute_offset = len(table) % self._minutes_per_day num_to_prepend = numdays * self._minutes_per_day - minute_offset - prepend_array = np.zeros(num_to_prepend, np.uint32) + prepend_array = np.zeros(num_to_prepend, np.uint64) # Fill all OHLCV with zeros. table.append([prepend_array] * 5) table.flush() @@ -815,11 +815,11 @@ class BcolzMinuteBarWriter(object): minutes_count = all_minutes_in_window.size - open_col = np.zeros(minutes_count, dtype=np.uint32) - high_col = np.zeros(minutes_count, dtype=np.uint32) - low_col = np.zeros(minutes_count, dtype=np.uint32) - close_col = np.zeros(minutes_count, dtype=np.uint32) - vol_col = np.zeros(minutes_count, dtype=np.uint32) + open_col = np.zeros(minutes_count, dtype=np.uint64) + high_col = np.zeros(minutes_count, dtype=np.uint64) + low_col = np.zeros(minutes_count, dtype=np.uint64) + close_col = np.zeros(minutes_count, dtype=np.uint64) + vol_col = np.zeros(minutes_count, dtype=np.uint64) dt_ixs = np.searchsorted(all_minutes_in_window.values, dts.astype('datetime64[ns]')) @@ -914,10 +914,10 @@ class BcolzMinuteBarReader(MinuteBarReader): ) self._schedule = self.calendar.schedule[slicer] self._market_opens = self._schedule.market_open - self._market_open_values = self._market_opens.values.\ + self._market_open_values = self._market_opens.values. \ astype('datetime64[m]').astype(np.int64) self._market_closes = self._schedule.market_close - self._market_close_values = self._market_closes.values.\ + self._market_close_values = self._market_closes.values. \ astype('datetime64[m]').astype(np.int64) self._default_ohlc_inverse = 1.0 / metadata.default_ohlc_ratio @@ -1125,8 +1125,8 @@ class BcolzMinuteBarReader(MinuteBarReader): else: return np.nan - if field != 'volume': - value *= self._ohlc_ratio_inverse_for_sid(sid) + #if field != 'volume': + value *= self._ohlc_ratio_inverse_for_sid(sid) return value def get_last_traded_dt(self, asset, dt): @@ -1206,7 +1206,7 @@ class BcolzMinuteBarReader(MinuteBarReader): minute_dt.value / NANOS_IN_MINUTE, self._minutes_per_day, False, - ) + ) def load_raw_arrays(self, fields, start_dt, end_dt, sids): """ @@ -1248,7 +1248,7 @@ class BcolzMinuteBarReader(MinuteBarReader): if field != 'volume': out = np.full(shape, np.nan) else: - out = np.zeros(shape, dtype=np.uint32) + out = np.zeros(shape, dtype=np.uint64) for i, sid in enumerate(sids): carray = self._open_minute_file(field, sid) @@ -1256,17 +1256,17 @@ class BcolzMinuteBarReader(MinuteBarReader): if indices_to_exclude is not None: for excl_start, excl_stop in indices_to_exclude[::-1]: excl_slice = np.s_[ - excl_start - start_idx:excl_stop - start_idx + 1] + excl_start - start_idx:excl_stop - start_idx + 1] values = np.delete(values, excl_slice) where = values != 0 # first slice down to len(where) because we might not have # written data for all the minutes requested - if field != 'volume': - out[:len(where), i][where] = ( - values[where] * self._ohlc_ratio_inverse_for_sid(sid)) - else: - out[:len(where), i][where] = values[where] + #if field != 'volume': + out[:len(where), i][where] = ( + values[where] * self._ohlc_ratio_inverse_for_sid(sid)) + #else: + # out[:len(where), i][where] = values[where] results.append(out) return results @@ -1319,9 +1319,9 @@ class H5MinuteBarUpdateWriter(object): def __init__(self, path, complevel=None, complib=None): self._complevel = complevel if complevel \ - is not None else self._COMPLEVEL + is not None else self._COMPLEVEL self._complib = complib if complib \ - is not None else self._COMPLIB + is not None else self._COMPLIB self._path = path def write(self, frames): @@ -1358,4 +1358,4 @@ class H5MinuteBarUpdateReader(MinuteBarUpdateReader): def read(self, dts, sids): panel = self._panel[sids, dts, :] - return panel.iteritems() + return panel.iteritems() \ No newline at end of file From ddecd6bb48cc114adc8a38ff98aa0a6cd6730257 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 21 Sep 2017 19:05:16 -0400 Subject: [PATCH 018/110] First working version with the backtest and live modes executing the same algorithm. --- .../buy_low_sell_high_neo_with_interface.py | 42 +++--- catalyst/exchange/bitfinex/bitfinex.py | 2 +- catalyst/exchange/data_portal_exchange.py | 42 ++++-- catalyst/exchange/exchange_algorithm.py | 40 +++++ catalyst/exchange/exchange_blotter.py | 141 +++++++++++++++++- catalyst/exchange/exchange_bundle.py | 1 + tests/exchange/test_bundle.py | 7 +- 7 files changed, 232 insertions(+), 43 deletions(-) diff --git a/catalyst/examples/buy_low_sell_high_neo_with_interface.py b/catalyst/examples/buy_low_sell_high_neo_with_interface.py index c453786b..4fc81028 100644 --- a/catalyst/examples/buy_low_sell_high_neo_with_interface.py +++ b/catalyst/examples/buy_low_sell_high_neo_with_interface.py @@ -33,24 +33,25 @@ def initialize(context): def _handle_data(context, data): - prices = data.history( - context.asset, - fields='price', - bar_count=20, - frequency='30m' - ) - rsi = talib.RSI(prices.values, timeperiod=14)[-1] - log.info('got rsi: {}'.format(rsi)) + # prices = data.history( + # context.asset, + # fields='price', + # bar_count=20, + # frequency='30m' + # ) + # rsi = talib.RSI(prices.values, timeperiod=14)[-1] + # log.info('got rsi: {}'.format(rsi)) # Buying more when RSI is low, this should lower our cost basis - if rsi <= 30: - buy_increment = 1 - elif rsi <= 40: - buy_increment = 0.5 - elif rsi <= 70: - buy_increment = 0.1 - else: - buy_increment = None + # if rsi <= 30: + # buy_increment = 1 + # elif rsi <= 40: + # buy_increment = 0.5 + # elif rsi <= 70: + # buy_increment = 0.1 + # else: + # buy_increment = None + buy_increment = 0.1 cash = context.portfolio.cash log.info('base currency available: {cash}'.format(cash=cash)) @@ -62,10 +63,10 @@ def _handle_data(context, data): log.warn('no pricing data') return - record(price=price, rsi=rsi) + record(price=price) orders = get_open_orders(context.asset) - if orders: + if len(orders) > 0: log.info('skipping bar until all open orders execute') return @@ -104,7 +105,6 @@ def _handle_data(context, data): if is_buy: if buy_increment is None: - log.info('the rsi is too high to consider buying {}'.format(rsi)) return if price * buy_increment > cash: @@ -117,11 +117,13 @@ def _handle_data(context, data): cost_basis ) ) + limit_price = price * (1 + context.SLIPPAGE_ALLOWED) order( asset=context.asset, amount=buy_increment, - limit_price=price * (1 + context.SLIPPAGE_ALLOWED) + limit_price=limit_price ) + pass def handle_data(context, data): diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index 20c3f312..466e3e50 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -448,7 +448,7 @@ class Bitfinex(Exchange): order_statuses['message']) ) - orders = list() + orders = [] for order_status in order_statuses: order, executed_price = self._create_order(order_status) if asset is None or asset == order.sid: diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index 3968370d..e7c35a1b 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -15,13 +15,12 @@ import abc import os from time import sleep -import collections import pandas as pd from catalyst.assets._assets import TradingPair from logbook import Logger -from catalyst.data.bundles.core import load, from_bundle_ingest_dirname, \ - BundleData, minute_path, five_minute_path, daily_path +from catalyst.data.bundles.core import from_bundle_ingest_dirname, \ + minute_path, five_minute_path, daily_path from catalyst.data.data_portal import DataPortal from catalyst.data.five_minute_bars import BcolzFiveMinuteBarReader from catalyst.data.minute_bars import BcolzMinuteBarReader @@ -200,6 +199,9 @@ class DataPortalExchangeBase(DataPortal): ) def get_spot_value(self, assets, field, dt, data_frequency): + if field == 'price': + field = 'close' + return self._get_spot_value(assets, field, dt, data_frequency) @abc.abstractmethod @@ -263,20 +265,29 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): if time_folder is None: raise BundleNotFoundError(exchange=exchange_name) - self.daily_bar_readers[exchange_name] = \ - BcolzDailyBarReader( - daily_path(name, time_folder), - ) + try: + self.daily_bar_readers[exchange_name] = \ + BcolzDailyBarReader( + daily_path(name, time_folder), + ) + except IOError: + self.daily_bar_readers[exchange_name] = None - self.five_minute_bar_readers[exchange_name] = \ - BcolzFiveMinuteBarReader( - five_minute_path(name, time_folder), - ) + try: + self.five_minute_bar_readers[exchange_name] = \ + BcolzFiveMinuteBarReader( + five_minute_path(name, time_folder), + ) + except IOError: + self.five_minute_bar_readers[exchange_name] = None - self.minute_bar_readers[exchange_name] = \ - BcolzMinuteBarReader( - minute_path(name, time_folder), - ) + try: + self.minute_bar_readers[exchange_name] = \ + BcolzMinuteBarReader( + minute_path(name, time_folder), + ) + except IOError: + self.minute_bar_readers[exchange_name] = None @staticmethod def find_most_recent_time(bundle_name): @@ -309,6 +320,7 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): field, data_frequency, ffill=True): + # TODO: implement in the bundle df = exchange.get_history_window( assets, end_dt, diff --git a/catalyst/exchange/exchange_algorithm.py b/catalyst/exchange/exchange_algorithm.py index bddbf2f0..c9db3a12 100644 --- a/catalyst/exchange/exchange_algorithm.py +++ b/catalyst/exchange/exchange_algorithm.py @@ -35,12 +35,15 @@ from catalyst.exchange.exchange_errors import ( ExchangePortfolioDataError, ExchangeTransactionError, OrphanOrderError) +from catalyst.exchange.exchange_execution import ExchangeStopLimitOrder, \ + ExchangeLimitOrder, ExchangeStopOrder from catalyst.exchange.exchange_utils import get_exchange_minute_writer_root, \ save_algo_object, get_algo_object, get_algo_folder, get_algo_df, \ save_algo_df from catalyst.exchange.live_graph_clock import LiveGraphClock from catalyst.exchange.simple_clock import SimpleClock from catalyst.exchange.stats_utils import get_pretty_stats +from catalyst.finance.execution import MarketOrder from catalyst.finance.performance.period import calc_period_stats from catalyst.gens.tradesimulation import AlgorithmSimulator from catalyst.utils.api_support import ( @@ -198,6 +201,43 @@ class ExchangeTradingAlgorithmBacktest(ExchangeTradingAlgorithmBase): ) log.info('initialized trading algorithm in backtest mode') + def _calculate_order(self, asset, amount, + limit_price=None, stop_price=None, style=None): + # Raises a ZiplineError if invalid parameters are detected. + self.validate_order_params(asset, + amount, + limit_price, + stop_price, + style) + + # Convert deprecated limit_price and stop_price parameters to use + # ExecutionStyle objects. + style = self.__convert_order_params_for_blotter(limit_price, + stop_price, + style) + return amount, style + + @staticmethod + def __convert_order_params_for_blotter(limit_price, stop_price, style): + """ + Helper method for converting deprecated limit_price and stop_price + arguments into ExecutionStyle instances. + + This function assumes that either style == None or (limit_price, + stop_price) == (None, None). + """ + if style: + assert (limit_price, stop_price) == (None, None) + return style + if limit_price and stop_price: + return ExchangeStopLimitOrder(limit_price, stop_price) + if limit_price: + return ExchangeLimitOrder(limit_price) + if stop_price: + return ExchangeStopOrder(stop_price) + else: + return MarketOrder() + class ExchangeTradingAlgorithmLive(ExchangeTradingAlgorithmBase): def __init__(self, *args, **kwargs): diff --git a/catalyst/exchange/exchange_blotter.py b/catalyst/exchange/exchange_blotter.py index dd771cca..58994030 100644 --- a/catalyst/exchange/exchange_blotter.py +++ b/catalyst/exchange/exchange_blotter.py @@ -1,8 +1,141 @@ +from logbook import Logger + from catalyst.finance.blotter import Blotter -from catalyst.finance.commission import PerShare -from catalyst.finance.slippage import VolumeShareSlippage +from catalyst.finance.commission import PerShare, CommissionModel +from catalyst.finance.slippage import VolumeShareSlippage, SlippageModel, \ + LiquidityExceeded from catalyst.assets._assets import TradingPair +# It seems like we need to accept greate slippage risk in cryptos +# Orders won't often close at Equity levels. +# TODO: consider adjusting dynamically based on trading pair +from catalyst.finance.transaction import Transaction + +log = Logger('exchange_blotter') + +DEFAULT_SLIPPAGE_SPREAD = 0.02 +DEFAULT_MAKER_FEE = 0.001 +DEFAULT_TAKER_FEE = 0.002 + + +class TradingPairFeeSchedule(CommissionModel): + """ + Calculates a commission for a transaction based on a per percentage fee. + + Parameters + ---------- + fee : float, optional + The percentage fee. + """ + + def __init__(self, + maker_fee=DEFAULT_MAKER_FEE, + taker_fee=DEFAULT_TAKER_FEE): + self.maker_fee = maker_fee + self.taker_fee = taker_fee + + def __repr__(self): + return ( + '{class_name}(maker_fee={maker_fee}, ' + 'taker_fee={taker_fee})' + .format( + class_name=self.__class__.__name__, + maker_fee=self.maker_fee, + taker_fee=self.taker_fee, + ) + ) + + def calculate(self, order, transaction): + """ + Calculate the final fee based on the order parameters. + + :param order: + :param transaction: + + :return float: + The total commission. + """ + cost = abs(transaction.amount) * transaction.price + + # Assuming just the taker fee for now + fee = cost * self.taker_fee + return fee + + +class TradingPairFixedSlippage(SlippageModel): + """ + Model slippage as a fixed spread. + + Parameters + ---------- + spread : float, optional + spread / 2 will be added to buys and subtracted from sells. + """ + + def __init__(self, spread=DEFAULT_SLIPPAGE_SPREAD): + super(TradingPairFixedSlippage, self).__init__() + self.spread = spread + + def __repr__(self): + return '{class_name}(spread={spread})'.format( + class_name=self.__class__.__name__, spread=self.spread, + ) + + def simulate(self, data, asset, orders_for_asset): + self._volume_for_bar = 0 + volume = data.current(asset, "volume") + + if volume == 0: + return + + # can use the close price, since we verified there's volume in this + # bar. + price = data.current(asset, "close") + dt = data.current_dt + + for order in orders_for_asset: + if order.open_amount == 0: + continue + + order.check_triggers(price, dt) + if not order.triggered: + continue + + transaction = None + try: + execution_price, execution_volume = \ + self.process_order(data, order) + + if execution_price is not None: + transaction = Transaction( + asset=order.asset, + amount=abs(execution_volume), + dt=data.current_dt, + price=execution_price, + order_id=order.id + ) + + except LiquidityExceeded: + break + + if transaction: + self._volume_for_bar += abs(transaction.amount) + yield order, transaction + + def process_order(self, data, order): + price = data.current(order.asset, 'close') + + if order.amount > 0: + # Buy order + adj_price = price * (1 + self.spread) + else: + # Sell order + adj_price = price & (1 - self.spread) + + log.debug('added slippage to price: {} => {}'.format(price, adj_price)) + + return (adj_price, order.amount) + class ExchangeBlotter(Blotter): def __init__(self, *args, **kwargs): @@ -12,8 +145,8 @@ class ExchangeBlotter(Blotter): # We may be able to define more sophisticated models based on the fee # structure of each exchange. self.slippage_models = { - TradingPair: VolumeShareSlippage() + TradingPair: TradingPairFixedSlippage() } self.commission_models = { - TradingPair: PerShare() + TradingPair: TradingPairFeeSchedule() } diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 056a6421..a02363fd 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -64,6 +64,7 @@ def process_bar_data(exchange, assets, writer, data_frequency, show_progress, start, end): open_calendar = get_calendar('OPEN') + writer.default_ohlc_ratio = 1000000 writer.calendar = open_calendar writer.minutes_per_day = 1440 writer.write_metadata = True diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 7cf236a2..92a1ff30 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -16,12 +16,12 @@ class ExchangeBundleTestCase: def test_ingest(self): exchange_name = 'bitfinex' - start = pd.Timestamp.utcnow() - timedelta(days=365) + start = pd.to_datetime('2017-09-01', utc=True) end = pd.Timestamp.utcnow() open_calendar = get_calendar('OPEN') root = data_root(os.environ) - output_dir = '{root}/exchange_{exchange}/test'.format( + output_dir = '{root}/exchange_{exchange}/2017-09-21T05;34;37.274482'.format( root=root, exchange=exchange_name ) @@ -40,7 +40,8 @@ class ExchangeBundleTestCase: minutes_per_day=1440, start_session=start_session, end_session=end, - write_metadata=True + write_metadata=True, + default_ohlc_ratio=1000000 ) ingest = exchange_bundle( From bc65c10fc6269c218ed11ee28906026204cac195 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Fri, 22 Sep 2017 23:17:38 -0400 Subject: [PATCH 019/110] Implemented and tested the history() method in backtest mode. --- .../buy_low_sell_high_neo_with_interface.py | 39 +++--- catalyst/exchange/data_portal_exchange.py | 115 ++++++++++++++---- catalyst/exchange/exchange_blotter.py | 59 ++++----- catalyst/exchange/exchange_errors.py | 14 +++ tests/exchange/test_data_portal.py | 25 +++- 5 files changed, 167 insertions(+), 85 deletions(-) diff --git a/catalyst/examples/buy_low_sell_high_neo_with_interface.py b/catalyst/examples/buy_low_sell_high_neo_with_interface.py index 4fc81028..0e215aba 100644 --- a/catalyst/examples/buy_low_sell_high_neo_with_interface.py +++ b/catalyst/examples/buy_low_sell_high_neo_with_interface.py @@ -20,7 +20,7 @@ def initialize(context): log.info('initializing algo') context.asset = symbol('neo_btc', 'bitfinex') - context.TARGET_POSITIONS = 50 + context.TARGET_POSITIONS = 50000 context.PROFIT_TARGET = 0.1 context.SLIPPAGE_ALLOWED = 0.02 @@ -33,25 +33,24 @@ def initialize(context): def _handle_data(context, data): - # prices = data.history( - # context.asset, - # fields='price', - # bar_count=20, - # frequency='30m' - # ) - # rsi = talib.RSI(prices.values, timeperiod=14)[-1] - # log.info('got rsi: {}'.format(rsi)) + prices = data.history( + context.asset, + fields='price', + bar_count=30, + frequency='30m' + ) + rsi = talib.RSI(prices.values, timeperiod=14)[-1] + log.info('got rsi: {}'.format(rsi)) # Buying more when RSI is low, this should lower our cost basis - # if rsi <= 30: - # buy_increment = 1 - # elif rsi <= 40: - # buy_increment = 0.5 - # elif rsi <= 70: - # buy_increment = 0.1 - # else: - # buy_increment = None - buy_increment = 0.1 + if rsi <= 30: + buy_increment = 1 + elif rsi <= 40: + buy_increment = 0.5 + elif rsi <= 70: + buy_increment = 0.1 + else: + buy_increment = None cash = context.portfolio.cash log.info('base currency available: {cash}'.format(cash=cash)) @@ -159,8 +158,8 @@ def analyze(context, stats): # live_graph=True # ) run_algorithm( - capital_base=10000, - start=pd.to_datetime('2017-09-10', utc=True), + capital_base=250, + start=pd.to_datetime('2017-09-08', utc=True), end=pd.to_datetime('2017-09-15', utc=True), data_frequency='minute', initialize=initialize, diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index e7c35a1b..3dac7678 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -28,7 +28,8 @@ from catalyst.data.us_equity_pricing import BcolzDailyBarReader from catalyst.exchange.exchange_errors import ( ExchangeRequestError, ExchangeBarDataError, - BundleNotFoundError) + BundleNotFoundError, PricingDataBeforeTradingError, + PricingDataNotLoadedError) from catalyst.utils.paths import data_path log = Logger('DataPortalExchange') @@ -85,7 +86,8 @@ class DataPortalExchangeBase(DataPortal): else: exchange = self.exchanges[exchange_assets.keys()[0]] - return exchange.get_history_window( + return self.get_exchange_history_window( + exchange, assets, end_dt, bar_count, @@ -123,6 +125,10 @@ class DataPortalExchangeBase(DataPortal): field, data_frequency=None, ffill=True): + + if field == 'price': + field = 'close' + return self._get_history_window(assets, end_dt, bar_count, @@ -251,12 +257,14 @@ class DataPortalExchangeLive(DataPortalExchangeBase): class DataPortalExchangeBacktest(DataPortalExchangeBase): def __init__(self, *args, **kwargs): - super(DataPortalExchangeBacktest, self).__init__(*args, **kwargs) self.daily_bar_readers = dict() self.minute_bar_readers = dict() self.five_minute_bar_readers = dict() + + self.history_loaders = dict() + self.minute_history_loaders = dict() for exchange_name in self.exchanges: name = 'exchange_{}'.format(exchange_name) time_folder = \ @@ -289,6 +297,13 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): except IOError: self.minute_bar_readers[exchange_name] = None + def _get_first_trading_day(self, assets): + first_date = None + for asset in assets: + if first_date is None or asset.start_date > first_date: + first_date = asset.start_date + return first_date + @staticmethod def find_most_recent_time(bundle_name): try: @@ -320,30 +335,76 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): field, data_frequency, ffill=True): - # TODO: implement in the bundle - df = exchange.get_history_window( - assets, - end_dt, - bar_count, - frequency, - field, - data_frequency, - ffill) + if data_frequency == 'minute' or data_frequency == '1m': + reader = self.minute_bar_readers[exchange.name] + dts = self.trading_calendar.minutes_window( + end_dt, -bar_count + ) + + self.ensure_after_first_day(dts[0], assets) + + elif data_frequency == '5-minute' or data_frequency == '5m': + reader = self.five_minute_bar_readers[exchange.name] + elif data_frequency == 'daily' or data_frequency == '1d': + reader = self.daily_bar_readers[exchange.name] + + session = self.trading_calendar.minute_to_session_label(end_dt) + dts = self._get_days_for_window(session, bar_count) + + self.ensure_after_first_day(dts[0], assets) + else: + raise ValueError('Unsupported frequency') + + try: + values = reader.load_raw_arrays( + [field], + dts[0], + dts[-1], + assets, + )[0] + except Exception: + raise PricingDataNotLoadedError( + field=field, + first_trading_day=self._get_first_trading_day(assets), + exchange=exchange.name, + symbols=[asset.symbol for asset in assets], + ) + + series = dict() + for index, asset in enumerate(assets): + asset_values = [] + for value in values: + asset_values.append(value[index]) + + value_series = pd.Series(asset_values, index=dts) + series[asset] = value_series + + df = pd.DataFrame(series) return df + def ensure_after_first_day(self, dt, assets): + first_trading_day = self._get_first_trading_day(assets) + if dt < first_trading_day: + raise PricingDataBeforeTradingError( + first_trading_day=first_trading_day, + exchange=assets[0].exchange, + symbols=[asset.symbol for asset in assets], + ) + def get_exchange_spot_value(self, exchange, assets, field, dt, data_frequency): - - if data_frequency == 'minute': + if data_frequency == 'minute' or data_frequency == '1m': reader = self.minute_bar_readers[exchange.name] - elif data_frequency == '5-minute': + elif data_frequency == '5-minute' or data_frequency == '5m': reader = self.five_minute_bar_readers[exchange.name] - elif data_frequency == 'daily': + elif data_frequency == 'daily' or data_frequency == '1d': reader = self.daily_bar_readers[exchange.name] else: raise ValueError('Unsupported frequency') if isinstance(assets, TradingPair): + self.ensure_after_first_day(dt, [assets]) + try: value = reader.get_value( sid=assets.sid, @@ -351,10 +412,16 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): field=field ) return value - except Exception as e: - log.warn('minute data not found: {}'.format(e)) - return None + except Exception: + raise PricingDataNotLoadedError( + field=field, + first_trading_day=self._get_first_trading_day([assets]), + exchange=exchange.name, + symbols=assets.symbol, + ) else: + self.ensure_after_first_day(dt, assets) + values = [] for asset in assets: try: @@ -364,8 +431,12 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): field=field ) values.append(value) - except Exception as e: - log.warn('minute data not found: {}'.format(e)) - values.append(None) + except Exception: + raise PricingDataNotLoadedError( + field=field, + first_trading_day=self._get_first_trading_day(assets), + exchange=exchange.name, + symbols=[asset.symbol for asset in assets], + ) return values diff --git a/catalyst/exchange/exchange_blotter.py b/catalyst/exchange/exchange_blotter.py index 58994030..c4d451a1 100644 --- a/catalyst/exchange/exchange_blotter.py +++ b/catalyst/exchange/exchange_blotter.py @@ -1,18 +1,16 @@ +from catalyst.assets._assets import TradingPair from logbook import Logger from catalyst.finance.blotter import Blotter -from catalyst.finance.commission import PerShare, CommissionModel -from catalyst.finance.slippage import VolumeShareSlippage, SlippageModel, \ - LiquidityExceeded -from catalyst.assets._assets import TradingPair - -# It seems like we need to accept greate slippage risk in cryptos -# Orders won't often close at Equity levels. -# TODO: consider adjusting dynamically based on trading pair +from catalyst.finance.commission import CommissionModel +from catalyst.finance.slippage import SlippageModel from catalyst.finance.transaction import Transaction log = Logger('exchange_blotter') +# It seems like we need to accept greater slippage risk in cryptos +# Orders won't often close at Equity levels. +# TODO: consider adjusting dynamically based on trading pair DEFAULT_SLIPPAGE_SPREAD = 0.02 DEFAULT_MAKER_FEE = 0.001 DEFAULT_TAKER_FEE = 0.002 @@ -37,8 +35,7 @@ class TradingPairFeeSchedule(CommissionModel): def __repr__(self): return ( '{class_name}(maker_fee={maker_fee}, ' - 'taker_fee={taker_fee})' - .format( + 'taker_fee={taker_fee})'.format( class_name=self.__class__.__name__, maker_fee=self.maker_fee, taker_fee=self.taker_fee, @@ -83,44 +80,32 @@ class TradingPairFixedSlippage(SlippageModel): def simulate(self, data, asset, orders_for_asset): self._volume_for_bar = 0 - volume = data.current(asset, "volume") - if volume == 0: - return + price = data.current(asset, 'close') - # can use the close price, since we verified there's volume in this - # bar. - price = data.current(asset, "close") dt = data.current_dt - for order in orders_for_asset: if order.open_amount == 0: continue order.check_triggers(price, dt) if not order.triggered: + log.debug('order has not reached the trigger at current ' + 'price {}'.format(price)) continue - transaction = None - try: - execution_price, execution_volume = \ - self.process_order(data, order) + execution_price, execution_volume = self.process_order(data, order) - if execution_price is not None: - transaction = Transaction( - asset=order.asset, - amount=abs(execution_volume), - dt=data.current_dt, - price=execution_price, - order_id=order.id - ) + transaction = Transaction( + asset=order.asset, + amount=abs(execution_volume), + dt=dt, + price=execution_price, + order_id=order.id + ) - except LiquidityExceeded: - break - - if transaction: - self._volume_for_bar += abs(transaction.amount) - yield order, transaction + self._volume_for_bar += abs(transaction.amount) + yield order, transaction def process_order(self, data, order): price = data.current(order.asset, 'close') @@ -130,11 +115,11 @@ class TradingPairFixedSlippage(SlippageModel): adj_price = price * (1 + self.spread) else: # Sell order - adj_price = price & (1 - self.spread) + adj_price = price * (1 - self.spread) log.debug('added slippage to price: {} => {}'.format(price, adj_price)) - return (adj_price, order.amount) + return adj_price, order.amount class ExchangeBlotter(Blotter): diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index d823cd87..a6bfcbab 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -147,3 +147,17 @@ class BundleNotFoundError(ZiplineError): 'Please ingest data using the command ' '`catalyst ingest -b exchange_{exchange}`. ' 'See catalyst documentation for details.').strip() + + +class PricingDataBeforeTradingError(ZiplineError): + msg = ('Pricing data for trading pairs {symbols} on exchange {exchange} ' + 'starts on {first_trading_day}.').strip() + + +class PricingDataNotLoadedError(ZiplineError): + msg = ('Pricing data {field} for trading pairs {symbols} trading on ' + 'exchange {exchange} since {first_trading_day} is unavailable. ' + 'The bundle data is either out-of-date or has not been loaded yet.' + 'Please ingest data using the command ' + '`catalyst ingest -b exchange_{exchange}`. ' + 'See catalyst documentation for details.').strip() diff --git a/tests/exchange/test_data_portal.py b/tests/exchange/test_data_portal.py index 25d123cc..4e954acf 100644 --- a/tests/exchange/test_data_portal.py +++ b/tests/exchange/test_data_portal.py @@ -50,11 +50,10 @@ class ExchangeDataPortalTestCase: exchanges=dict(bitfinex=self.bitfinex), asset_finder=asset_finder, trading_calendar=open_calendar, - first_trading_day=pd.to_datetime('2017-09-10', utc=True) + first_trading_day=None # will set dynamically based on assets ) def test_get_history_window_live(self): - asset_finder = self.data_portal_live.asset_finder assets = [ @@ -82,8 +81,24 @@ class ExchangeDataPortalTestCase: assets, 'price', now, '1m') pass - def test_get_spot_value_backtest(self): + def test_get_history_window_backtest(self): + asset_finder = self.data_portal_live.asset_finder + assets = [ + asset_finder.lookup_symbol('neo_btc', self.bitfinex), + ] + + date = pd.to_datetime('2017-09-10', utc=True) + data = self.data_portal_backtest.get_history_window( + assets, + date, + 10, + '1m', + 'close', + 'minute') + pass + + def test_get_spot_value_backtest(self): asset_finder = self.data_portal_backtest.asset_finder assets = [ @@ -92,8 +107,6 @@ class ExchangeDataPortalTestCase: date = pd.to_datetime('2017-09-10', utc=True) value = self.data_portal_backtest.get_spot_value( - assets, 'close', date, 'minute') + assets[0], 'close', date, 'minute') pass - def test_get_history_window_backtest(self): - pass From d6996b1e93c69ab3b77f5099257c68c9a376fd58 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Sat, 23 Sep 2017 04:49:13 -0400 Subject: [PATCH 020/110] Refinements and documentation. --- catalyst/exchange/data_portal_exchange.py | 108 ++++++++++++---------- catalyst/exchange/exchange.py | 17 ++-- tests/exchange/test_data_portal.py | 2 +- 3 files changed, 66 insertions(+), 61 deletions(-) diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index 3dac7678..c0af0e32 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -29,7 +29,7 @@ from catalyst.exchange.exchange_errors import ( ExchangeRequestError, ExchangeBarDataError, BundleNotFoundError, PricingDataBeforeTradingError, - PricingDataNotLoadedError) + PricingDataNotLoadedError, InvalidHistoryFrequencyError) from catalyst.utils.paths import data_path log = Logger('DataPortalExchange') @@ -154,8 +154,9 @@ class DataPortalExchangeBase(DataPortal): try: if isinstance(assets, TradingPair): exchange = self.exchanges[assets.exchange] - return self.get_exchange_spot_value( - exchange, assets, field, dt, data_frequency) + spot_values = self.get_exchange_spot_value( + exchange, [assets], field, dt, data_frequency) + return spot_values[0] else: exchange_assets = dict() @@ -306,6 +307,15 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): @staticmethod def find_most_recent_time(bundle_name): + """ + Find most recent "time folder" for a given bundle. + + :param bundle_name: + The name of the targeted bundle. + + :return folder: + The name of the time folder. + """ try: bundle_folders = os.listdir( data_path([bundle_name]), @@ -326,6 +336,33 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): else: return None + def _get_reader(self, data_frequency, exchange_name): + """ + Pick from a collection of readers based of exchange name and frequency. + + :param data_frequency: + The reader frequency: minute, 5-minute, daily. + + :param exchange_name: + The exchange name. + + :return reader: + A reader object. + """ + if data_frequency == 'minute': + reader = self.minute_bar_readers[exchange_name] + elif data_frequency == '5-minute': + reader = self.five_minute_bar_readers[exchange_name] + elif data_frequency == 'daily': + reader = self.daily_bar_readers[exchange_name] + else: + raise InvalidHistoryFrequencyError(frequency=data_frequency) + + if reader is None: + raise ValueError('reader not found') + + return reader + def get_exchange_history_window(self, exchange, assets, @@ -335,33 +372,31 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): field, data_frequency, ffill=True): - if data_frequency == 'minute' or data_frequency == '1m': - reader = self.minute_bar_readers[exchange.name] + reader = self._get_reader(data_frequency, exchange.name) + if data_frequency == 'minute': dts = self.trading_calendar.minutes_window( end_dt, -bar_count ) self.ensure_after_first_day(dts[0], assets) - elif data_frequency == '5-minute' or data_frequency == '5m': - reader = self.five_minute_bar_readers[exchange.name] - elif data_frequency == 'daily' or data_frequency == '1d': - reader = self.daily_bar_readers[exchange.name] - + elif data_frequency == 'daily': session = self.trading_calendar.minute_to_session_label(end_dt) dts = self._get_days_for_window(session, bar_count) self.ensure_after_first_day(dts[0], assets) + else: - raise ValueError('Unsupported frequency') + raise InvalidHistoryFrequencyError(frequency=data_frequency) try: values = reader.load_raw_arrays( - [field], - dts[0], - dts[-1], - assets, + fields=[field], + start_dt=dts[0], + end_dt=dts[-1], + sids=[asset.sid for asset in assets], )[0] + except Exception: raise PricingDataNotLoadedError( field=field, @@ -393,50 +428,25 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): def get_exchange_spot_value(self, exchange, assets, field, dt, data_frequency): - if data_frequency == 'minute' or data_frequency == '1m': - reader = self.minute_bar_readers[exchange.name] - elif data_frequency == '5-minute' or data_frequency == '5m': - reader = self.five_minute_bar_readers[exchange.name] - elif data_frequency == 'daily' or data_frequency == '1d': - reader = self.daily_bar_readers[exchange.name] - else: - raise ValueError('Unsupported frequency') + reader = self._get_reader(data_frequency, exchange.name) - if isinstance(assets, TradingPair): - self.ensure_after_first_day(dt, [assets]) + self.ensure_after_first_day(dt, assets) + values = [] + for asset in assets: try: value = reader.get_value( - sid=assets.sid, + sid=asset.sid, dt=dt, field=field ) - return value + values.append(value) except Exception: raise PricingDataNotLoadedError( field=field, - first_trading_day=self._get_first_trading_day([assets]), + first_trading_day=self._get_first_trading_day(assets), exchange=exchange.name, - symbols=assets.symbol, + symbols=[asset.symbol for asset in assets], ) - else: - self.ensure_after_first_day(dt, assets) - values = [] - for asset in assets: - try: - value = reader.get_value( - sid=asset.sid, - dt=dt, - field=field - ) - values.append(value) - except Exception: - raise PricingDataNotLoadedError( - field=field, - first_trading_day=self._get_first_trading_day(assets), - exchange=exchange.name, - symbols=[asset.symbol for asset in assets], - ) - - return values + return values diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 361413a5..76319b04 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -306,19 +306,14 @@ class Exchange: '1D', '7D', '14D', '1M' """ if field not in BASE_FIELDS: - raise KeyError('Invalid column: ' + str(field)) + raise KeyError('Invalid column: {}'.format(field)) - if isinstance(assets, collections.Iterable): - values = list() - for asset in assets: - value = self.get_single_spot_value( - asset, field, data_frequency) - values.append(value) + values = [] + for asset in assets: + value = self.get_single_spot_value(asset, field, data_frequency) + values.append(value) - return values - else: - return self.get_single_spot_value( - assets, field, data_frequency) + return values def get_single_spot_value(self, asset, field, data_frequency): """ diff --git a/tests/exchange/test_data_portal.py b/tests/exchange/test_data_portal.py index 4e954acf..2b881541 100644 --- a/tests/exchange/test_data_portal.py +++ b/tests/exchange/test_data_portal.py @@ -107,6 +107,6 @@ class ExchangeDataPortalTestCase: date = pd.to_datetime('2017-09-10', utc=True) value = self.data_portal_backtest.get_spot_value( - assets[0], 'close', date, 'minute') + assets, 'close', date, 'minute') pass From 4798fc75fb142edd3ef9065b31a7fcb5bf6de943 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 25 Sep 2017 12:18:22 -0400 Subject: [PATCH 021/110] Housekeeping and documentation --- .../buy_low_sell_high_neo_with_interface.py | 4 +- catalyst/exchange/exchange.py | 67 ++++++++++++++----- catalyst/exchange/exchange_bundle.py | 37 +++------- catalyst/utils/run_algo.py | 22 +++--- 4 files changed, 72 insertions(+), 58 deletions(-) diff --git a/catalyst/examples/buy_low_sell_high_neo_with_interface.py b/catalyst/examples/buy_low_sell_high_neo_with_interface.py index 0e215aba..88f22766 100644 --- a/catalyst/examples/buy_low_sell_high_neo_with_interface.py +++ b/catalyst/examples/buy_low_sell_high_neo_with_interface.py @@ -154,8 +154,8 @@ def analyze(context, stats): # exchange_name='bittrex,bitfinex', # live=True, # algo_namespace=algo_namespace, -# base_currency='eth', -# live_graph=True +# base_currency='btc', +# live_graph=False # ) run_algorithm( capital_base=250, diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 76319b04..ad4e89b9 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -437,6 +437,7 @@ class Exchange: data_frequency=frequency, assets=assets, bar_count=bar_count, + end_dt=end_dt ) series = dict() @@ -495,18 +496,6 @@ class Exchange: portfolio.portfolio_value = \ portfolio.positions_value + portfolio.cash - @abstractmethod - def get_balances(self): - """ - Retrieve wallet balances for the exchange - :return balances: A dict of currency => available balance - """ - pass - - @abstractmethod - def create_order(self, asset, amount, is_buy, style): - pass - def order(self, asset, amount, limit_price=None, stop_price=None, style=None): """Place an order. @@ -592,6 +581,34 @@ class Exchange: else: return None + # The methods below must be implemented for each exchange. + @abstractmethod + def get_balances(self): + """ + Retrieve wallet balances for the exchange + :return balances: A dict of currency => available balance + """ + pass + + @abstractmethod + def create_order(self, asset, amount, is_buy, style): + """ + Place an order on the exchange. + + :param asset : Asset + The asset that this order is for. + :param amount : int + The amount of shares to order. If ``amount`` is positive, this is + the number of shares to buy or cover. If ``amount`` is negative, + this is the number of shares to sell or short. + :param style : ExecutionStyle + The execution style for the order. + :param is_buy: boolean + Is it a buy order? + :return: + """ + pass + @abstractmethod def get_open_orders(self, asset): """Retrieve all of the current open orders. @@ -649,12 +666,28 @@ class Exchange: Retrieve OHLCV candles for the given assets :param data_frequency: - :param assets: - :param end_dt: + The candle frequency: minute, 5-minute or daily + :param assets: list[TradingPair] + The targeted assets. :param bar_count: - :param limit: - :param start_date: - :return: + The number of bar desired. (default 1) + :param end_dt: datetime, optional + The last bar date. + :param start_date: datetime, optional + The first bar date. + + :return dict[TradingPair, dict[str, Object]]: OHLCV data + A dictionary of OHLCV candles. Each TradingPair instance is + mapped to a list of dictionaries with this structure: + open: float + high: float + low: float + close: float + volume: float + last_traded: datetime + + See definition here: + http://www.investopedia.com/terms/o/ohlcchart.asp """ pass diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index a02363fd..9446a88a 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -32,33 +32,6 @@ def fetch_candles_chunk(exchange, assets, data_frequency, end_dt, bar_count): ) return candles - # series = dict() - # - # for asset in assets: - # asset_candles = candles[asset] - # - # candle_start_dt = None - # candle_end_dt = None - # for candle in asset_candles: - # last_traded = candle['last_traded'] - # - # if candle_start_dt is None or candle_start_dt > last_traded: - # candle_start_dt = last_traded - # - # if candle_end_dt is None or candle_end_dt < last_traded: - # candle_end_dt = last_traded - # - # - # asset_df = pd.DataFrame(asset_candles) - # if not asset_df.empty: - # asset_df.set_index('last_traded', inplace=True, drop=True) - # asset_df.sort_index(inplace=True) - # asset_df = asset_df.resample('1T').ffill() - # - # series[asset] = asset_df - # - # return series - def process_bar_data(exchange, assets, writer, data_frequency, show_progress, start, end): @@ -117,9 +90,15 @@ def process_bar_data(exchange, assets, writer, data_frequency, chunk_end = chunk['end'] chunk_start = chunk_end - timedelta(minutes=chunk['bar_count']) + chunk_assets = [] + for asset in assets: + if asset.start_date <= chunk_end: + chunk_assets.append(asset) + + # TODO: ensure correct behavior for assets starting in the chunk candles = fetch_candles_chunk( exchange=exchange, - assets=assets, + assets=chunk_assets, data_frequency=frequency, end_dt=chunk_end, bar_count=chunk['bar_count'] @@ -133,7 +112,7 @@ def process_bar_data(exchange, assets, writer, data_frequency, if not asset_candles: log.debug( 'no data: {symbols} on {exchange}, date {end}'.format( - symbols=assets, + symbols=chunk_assets, exchange=exchange.name, end=chunk_end ) diff --git a/catalyst/utils/run_algo.py b/catalyst/utils/run_algo.py index 401bd2e2..6a4e6f7c 100644 --- a/catalyst/utils/run_algo.py +++ b/catalyst/utils/run_algo.py @@ -143,7 +143,6 @@ def _run(handle_data, if exchange_name is None: raise ValueError('Please specify at least one exchange.') - exchange_list = [x.strip().lower() for x in exchange.split(',')] exchanges = dict() @@ -170,6 +169,7 @@ def _run(handle_data, base_currency=base_currency, portfolio=portfolio ) + elif exchange_name == 'bittrex': exchanges[exchange_name] = Bittrex( key=exchange_auth['key'], @@ -177,17 +177,11 @@ def _run(handle_data, base_currency=base_currency, portfolio=portfolio ) + else: raise ExchangeNotFoundError(exchange_name=exchange_name) open_calendar = get_calendar('OPEN') - sim_params = create_simulation_parameters( - start=start, - end=end, - capital_base=capital_base, - data_frequency=data_frequency, - emission_rate=data_frequency, - ) env = TradingEnvironment( environ=environ, @@ -274,9 +268,17 @@ def _run(handle_data, data = DataPortalExchangeBacktest( exchanges=exchanges, - asset_finder=env.asset_finder, + asset_finder=None, trading_calendar=open_calendar, - first_trading_day=start, + first_trading_day=None, + ) + + sim_params = create_simulation_parameters( + start=start, + end=end, + capital_base=capital_base, + data_frequency=data_frequency, + emission_rate=data_frequency, ) algorithm_class = partial( From f60abcd6363f6425173abb99446b778ce7ab7429 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Mon, 25 Sep 2017 11:28:06 -0600 Subject: [PATCH 022/110] WIP: Poloniex exchange class --- catalyst/exchange/poloniex/__init__.py | 0 catalyst/exchange/poloniex/poloniex.py | 508 +++++++++++++++++++++ catalyst/exchange/poloniex/poloniex_api.py | 126 +++++ catalyst/utils/run_algo.py | 10 +- 4 files changed, 643 insertions(+), 1 deletion(-) create mode 100644 catalyst/exchange/poloniex/__init__.py create mode 100644 catalyst/exchange/poloniex/poloniex.py create mode 100644 catalyst/exchange/poloniex/poloniex_api.py diff --git a/catalyst/exchange/poloniex/__init__.py b/catalyst/exchange/poloniex/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py new file mode 100644 index 00000000..bf8069b8 --- /dev/null +++ b/catalyst/exchange/poloniex/poloniex.py @@ -0,0 +1,508 @@ +import base64 +import hashlib +import hmac +import json +import re +import time + +import numpy as np +import pandas as pd +import pytz +import requests +import six +from catalyst.assets._assets import TradingPair +from logbook import Logger + +from catalyst.exchange.poloniex.poloniex_api import Poloniex_api + + +# from websocket import create_connection +from catalyst.exchange.exchange import Exchange +from catalyst.exchange.exchange_errors import ( + ExchangeRequestError, + InvalidHistoryFrequencyError, + InvalidOrderStyle, OrderCancelError) +from catalyst.exchange.exchange_execution import ExchangeLimitOrder, \ + ExchangeStopLimitOrder, ExchangeStopOrder +from catalyst.finance.order import Order, ORDER_STATUS +from catalyst.protocol import Account +from catalyst.exchange.exchange_utils import get_exchange_symbols_filename + + +log = Logger('Poloniex') + + +class Poloniex(Exchange): + def __init__(self, key, secret, base_currency, portfolio=None): + self.api = Poloniex_api(key=key, secret=secret.encode('UTF-8')) + self.name = 'poloniex' + self.assets = {} + #self.load_assets() + self.base_currency = base_currency + self._portfolio = portfolio + self.minute_writer = None + self.minute_reader = None + + def sanitize_curency_symbol(self, exchange_symbol): + """ + Helper method used to build the universal pair. + Include any symbol mapping here if appropriate. + + :param exchange_symbol: + :return universal_symbol: + """ + return exchange_symbol.lower() + + ''' + def _create_order(self, order_status): + """ + Create a Catalyst order object from a Bitfinex order dictionary + :param order_status: + :return: Order + """ + if order_status['is_cancelled']: + status = ORDER_STATUS.CANCELLED + elif not order_status['is_live']: + log.info('found executed order {}'.format(order_status)) + status = ORDER_STATUS.FILLED + else: + status = ORDER_STATUS.OPEN + + amount = float(order_status['original_amount']) + filled = float(order_status['executed_amount']) + + if order_status['side'] == 'sell': + amount = -amount + filled = -filled + + price = float(order_status['price']) + order_type = order_status['type'] + + stop_price = None + limit_price = None + + # TODO: is this comprehensive enough? + if order_type.endswith('limit'): + limit_price = price + elif order_type.endswith('stop'): + stop_price = price + + executed_price = float(order_status['avg_execution_price']) + + # TODO: bitfinex does not specify comission. I could calculate it but not sure if it's worth it. + commission = None + + date = pd.Timestamp.utcfromtimestamp(float(order_status['timestamp'])) + date = pytz.utc.localize(date) + order = Order( + dt=date, + asset=self.assets[order_status['symbol']], + amount=amount, + stop=stop_price, + limit=limit_price, + filled=filled, + id=str(order_status['id']), + commission=commission + ) + order.status = status + + return order, executed_price + ''' + + def get_balances(self): + pass + ''' + log.debug('retrieving wallets balances') + try: + response = self._request('balances', None) + balances = response.json() + except Exception as e: + raise ExchangeRequestError(error=e) + + if 'message' in balances: + raise ExchangeRequestError( + error='unable to fetch balance {}'.format(balances['message']) + ) + + std_balances = dict() + for balance in balances: + currency = balance['currency'].lower() + std_balances[currency] = float(balance['available']) + + return std_balances + ''' + + @property + def account(self): + account = Account() + + account.settled_cash = None + account.accrued_interest = None + account.buying_power = None + account.equity_with_loan = None + account.total_positions_value = None + account.total_positions_exposure = None + account.regt_equity = None + account.regt_margin = None + account.initial_margin_requirement = None + account.maintenance_margin_requirement = None + account.available_funds = None + account.excess_liquidity = None + account.cushion = None + account.day_trades_remaining = None + account.leverage = None + account.net_leverage = None + account.net_liquidation = None + + return account + + @property + def time_skew(self): + # TODO: research the time skew conditions + return pd.Timedelta('0s') + + def get_account(self): + # TODO: fetch account data and keep in cache + return None + + def get_candles(self, data_frequency, assets, bar_count=None): + pass + ''' + """ + Retrieve OHLVC candles from Bitfinex + + :param data_frequency: + :param assets: + :param bar_count: + :return: + + Available Frequencies + --------------------- + '1m', '5m', '15m', '30m', '1h', '3h', '6h', '12h', '1D', '7D', '14D', + '1M' + """ + + # TODO: use BcolzMinuteBarReader to read from cache + freq_match = re.match(r'([0-9].*)(m|h|d)', data_frequency, re.M | re.I) + if freq_match: + number = int(freq_match.group(1)) + unit = freq_match.group(2) + + if unit == 'd': + converted_unit = 'D' + else: + converted_unit = unit + + frequency = '{}{}'.format(number, converted_unit) + allowed_frequencies = ['1m', '5m', '15m', '30m', '1h', '3h', '6h', + '12h', '1D', '7D', '14D', '1M'] + + if frequency not in allowed_frequencies: + raise InvalidHistoryFrequencyError( + frequency=data_frequency + ) + elif data_frequency == 'minute': + frequency = '1m' + elif data_frequency == 'daily': + frequency = '1D' + else: + raise InvalidHistoryFrequencyError( + frequency=data_frequency + ) + + # Making sure that assets are iterable + asset_list = [assets] if isinstance(assets, TradingPair) else assets + ohlc_map = dict() + for asset in asset_list: + symbol = self._get_v2_symbol(asset) + url = '{url}/v2/candles/trade:{frequency}:{symbol}'.format( + url=self.url, + frequency=frequency, + symbol=symbol + ) + + if bar_count: + is_list = True + url += '/hist?limit={}'.format(int(bar_count)) + else: + is_list = False + url += '/last' + + try: + response = requests.get(url) + except Exception as e: + raise ExchangeRequestError(error=e) + + if 'error' in response.content: + raise ExchangeRequestError( + error='Unable to retrieve candles: {}'.format( + response.content) + ) + + candles = response.json() + + def ohlc_from_candle(candle): + ohlc = dict( + open=np.float64(candle[1]), + high=np.float64(candle[3]), + low=np.float64(candle[4]), + close=np.float64(candle[2]), + volume=np.float64(candle[5]), + price=np.float64(candle[2]), + last_traded=pd.Timestamp.utcfromtimestamp( + candle[0] / 1000.0) + ) + return ohlc + + if is_list: + ohlc_bars = [] + # We can to list candles from old to new + for candle in reversed(candles): + ohlc = ohlc_from_candle(candle) + ohlc_bars.append(ohlc) + + ohlc_map[asset] = ohlc_bars + + else: + ohlc = ohlc_from_candle(candles) + ohlc_map[asset] = ohlc + + return ohlc_map[assets] \ + if isinstance(assets, TradingPair) else ohlc_map + ''' + + def create_order(self, asset, amount, is_buy, style): + pass + ''' + """ + Creating order on the exchange. + + :param asset: + :param amount: + :param is_buy: + :param style: + :return: + """ + exchange_symbol = self.get_symbol(asset) + if isinstance(style, ExchangeLimitOrder) \ + or isinstance(style, ExchangeStopLimitOrder): + price = style.get_limit_price(is_buy) + order_type = 'limit' + + elif isinstance(style, ExchangeStopOrder): + price = style.get_stop_price(is_buy) + order_type = 'stop' + + else: + raise InvalidOrderStyle(exchange=self.name, + style=style.__class__.__name__) + + req = dict( + symbol=exchange_symbol, + amount=str(float(abs(amount))), + price="{:.20f}".format(float(price)), + side='buy' if is_buy else 'sell', + type='exchange ' + order_type, # TODO: support margin trades + exchange=self.name, + is_hidden=False, + is_postonly=False, + use_all_available=0, + ocoorder=False, + buy_price_oco=0, + sell_price_oco=0 + ) + + date = pd.Timestamp.utcnow() + try: + response = self._request('order/new', req) + order_status = response.json() + except Exception as e: + raise ExchangeRequestError(error=e) + + if 'message' in order_status: + raise ExchangeRequestError( + error='unable to create Bitfinex order {}'.format( + order_status['message']) + ) + + order_id = str(order_status['id']) + order = Order( + dt=date, + asset=asset, + amount=amount, + stop=style.get_stop_price(is_buy), + limit=style.get_limit_price(is_buy), + id=order_id + ) + + return order + ''' + + def get_open_orders(self, asset=None): + """Retrieve all of the current open orders. + + Parameters + ---------- + asset : Asset + If passed and not None, return only the open orders for the given + asset instead of all open orders. + + Returns + ------- + open_orders : dict[list[Order]] or list[Order] + If no asset is passed this will return a dict mapping Assets + to a list containing all the open orders for the asset. + If an asset is passed then this will return a list of the open + orders for this asset. + """ + pass + ''' + try: + response = self._request('orders', None) + order_statuses = response.json() + except Exception as e: + raise ExchangeRequestError(error=e) + + if 'message' in order_statuses: + raise ExchangeRequestError( + error='Unable to retrieve open orders: {}'.format( + order_statuses['message']) + ) + + orders = list() + for order_status in order_statuses: + order, executed_price = self._create_order(order_status) + if asset is None or asset == order.sid: + orders.append(order) + + return orders + ''' + + def get_order(self, order_id): + """Lookup an order based on the order id returned from one of the + order functions. + + Parameters + ---------- + order_id : str + The unique identifier for the order. + + Returns + ------- + order : Order + The order object. + """ + pass + ''' + try: + response = self._request( + 'order/status', {'order_id': int(order_id)}) + order_status = response.json() + except Exception as e: + raise ExchangeRequestError(error=e) + + if 'message' in order_status: + raise ExchangeRequestError( + error='Unable to retrieve order status: {}'.format( + order_status['message']) + ) + return self._create_order(order_status) + ''' + + def cancel_order(self, order_param): + """Cancel an open order. + + Parameters + ---------- + order_param : str or Order + The order_id or order object to cancel. + """ + pass + ''' + order_id = order_param.id \ + if isinstance(order_param, Order) else order_param + + try: + response = self._request('order/cancel', {'order_id': order_id}) + status = response.json() + except Exception as e: + raise ExchangeRequestError(error=e) + + if 'message' in status: + raise OrderCancelError( + order_id=order_id, + exchange=self.name, + error=status['message'] + ) + ''' + + def tickers(self, assets): + """ + Fetch ticket data for assets + https://docs.bitfinex.com/v2/reference#rest-public-tickers + + :param assets: + :return: + """ + pass + + ''' + symbols = self._get_v2_symbols(assets) + log.debug('fetching tickers {}'.format(symbols)) + + try: + response = requests.get( + '{url}/v2/tickers?symbols={symbols}'.format( + url=self.url, + symbols=','.join(symbols), + ) + ) + except Exception as e: + raise ExchangeRequestError(error=e) + + if 'error' in response.content: + raise ExchangeRequestError( + error='Unable to retrieve tickers: {}'.format( + response.content) + ) + + tickers = response.json() + + ticks = dict() + for index, ticker in enumerate(tickers): + if not len(ticker) == 11: + raise ExchangeRequestError( + error='Invalid ticker in response: {}'.format(ticker) + ) + + ticks[assets[index]] = dict( + timestamp=pd.Timestamp.utcnow(), + bid=ticker[1], + ask=ticker[3], + last_price=ticker[7], + low=ticker[10], + high=ticker[9], + volume=ticker[8], + ) + + log.debug('got tickers {}'.format(ticks)) + return ticks + ''' + + def generate_symbols_json(self, filename=None): + symbol_map = {} + response = self.api.returnticker() + for exchange_symbol in response: + base, market = self.sanitize_curency_symbol(exchange_symbol).split('_') + symbol = '{market}_{base}'.format( market=market, base=base ) + symbol_map[exchange_symbol] = dict( + symbol = symbol, + start_date = '2010-01-01' + ) + + if(filename is None): + filename = get_exchange_symbols_filename(self.name) + + with open(filename,'w') as f: + json.dump(symbol_map, f, sort_keys=True, indent=2, separators=(',',':')) + diff --git a/catalyst/exchange/poloniex/poloniex_api.py b/catalyst/exchange/poloniex/poloniex_api.py new file mode 100644 index 00000000..5094dd9f --- /dev/null +++ b/catalyst/exchange/poloniex/poloniex_api.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python +import json +import time +import hmac +import hashlib + +from six.moves import urllib + +# Workaround for backwards compatibility +# https://stackoverflow.com/questions/3745771/urllib-request-in-python-2-7 +urlopen = urllib.request.urlopen + + +class Poloniex_api(object): + def __init__(self, key, secret): + self.key = key + self.secret = secret + self.public = ['returnTicker', 'return24Volume', 'returnOrderBook', + 'returnTradeHistory', 'returnChartData', + 'returnCurrencies', 'returnLoanOrders'] + self.trading = ['returnBalances','returnCompleteBalances','returnDepositAddresses', + 'generateNewAddress','returnDepositsWithdrawals','returnOpenOrders', + 'returnTradeHistory','returnOrderTrades', + 'buy', 'sell', 'cancelOrder', 'moveOrder', + 'withdraw', 'returnFeeInfo','returnAvailableAccountBalances', + 'returnTradableBalances', 'transferBalance', + 'returnMarginAccountSummary','marginBuy','marginSell', + 'getMarginPosition', 'closeMarginPosition','createLoanOffer', + 'cancelLoanOffer','returnOpenLoanOffers','returnActiveLoans', + 'returnLendingHistory','toggleAutoRenew'] + + def query(self, method, values={}): + + if method in self.public: + url = 'https://poloniex.com/public?command=' + method + urllib.parse.urlencode(values) + headers = {} + post_data = None + elif method in self.trading: + url = 'https://poloniex.com/tradingApi' + req['command'] = method + req['nonce'] = int(time.time()*1000) + post_data = urllib.urlencode(req) + signature = hmac.new(self.secret, post_data, hashlib.sha512).hexdigest() + headers = { 'Sign': signature, 'Key': self.key} + + req = urllib.request.Request(url, data=post_data, headers=headers) + return json.loads(urlopen(req).read()) + + def returnticker(self): + return self.query('returnTicker') + + def return24volume(self): + return self.query('return24Volume') + + def returnOrderBook(self, market='all'): + return self.query('returnOrderBook', {'currencyPair': market}) + + def returntradehistory(self, market, start=None, end=None): + if(start is not None and end is not None): + return self.query('returntradehistory', + {'currencyPair': market, 'start': start, 'end': end }) + else: + return self.query('returntradehistory', {'currencyPair': market }) + + def returnchartdata(self, market, period, start, end): + return self.query('returnChartData', {'currencyPair': market, 'period': period, + 'start': start, 'end': end}) + + def returncurrencies(self): + return self.query('returnCurrencies') + + def returnloadorders(self, market): + return self.query('returnLoanOrders', {'market': market}) + + ''' + def buylimit(self, market, quantity, rate): + return self.query('buylimit', {'market': market, 'quantity': quantity, + 'rate': rate}) + + def buymarket(self, market, quantity): + return self.query('buymarket', + {'market': market, 'quantity': quantity}) + + def selllimit(self, market, quantity, rate): + return self.query('selllimit', {'market': market, 'quantity': quantity, + 'rate': rate}) + + def sellmarket(self, market, quantity): + return self.query('sellmarket', + {'market': market, 'quantity': quantity}) + + def cancel(self, uuid): + return self.query('cancel', {'uuid': uuid}) + + def getopenorders(self, market): + return self.query('getopenorders', {'market': market}) + + def getbalances(self): + return self.query('getbalances') + + def getbalance(self, currency): + return self.query('getbalance', {'currency': currency}) + + def getdepositaddress(self, currency): + return self.query('getdepositaddress', {'currency': currency}) + + def withdraw(self, currency, quantity, address): + return self.query('withdraw', + {'currency': currency, 'quantity': quantity, + 'address': address}) + + def getorder(self, uuid): + return self.query('getorder', {'uuid': uuid}) + + def getorderhistory(self, market, count): + return self.query('getorderhistory', + {'market': market, 'count': count}) + + def getwithdrawalhistory(self, currency, count): + return self.query('getwithdrawalhistory', + {'currency': currency, 'count': count}) + + def getdeposithistory(self, currency, count): + return self.query('getdeposithistory', + {'currency': currency, 'count': count}) + ''' diff --git a/catalyst/utils/run_algo.py b/catalyst/utils/run_algo.py index fc9f6354..63063c55 100644 --- a/catalyst/utils/run_algo.py +++ b/catalyst/utils/run_algo.py @@ -11,6 +11,8 @@ import pandas as pd import click from catalyst.exchange.bittrex.bittrex import Bittrex +from catalyst.exchange.bitfinex.bitfinex import Bitfinex +from catalyst.exchange.poloniex.poloniex import Poloniex try: from pygments import highlight @@ -39,7 +41,6 @@ import catalyst.utils.paths as pth from catalyst.exchange.algorithm_exchange import ExchangeTradingAlgorithm from catalyst.exchange.data_portal_exchange import DataPortalExchange -from catalyst.exchange.bitfinex.bitfinex import Bitfinex from catalyst.exchange.asset_finder_exchange import AssetFinderExchange from catalyst.exchange.exchange_portfolio import ExchangePortfolio from catalyst.exchange.exchange_errors import ( @@ -178,6 +179,13 @@ def _run(handle_data, base_currency=base_currency, portfolio=portfolio ) + elif exchange_name == 'poloniex': + exchange = Poloniex( + key=exchange_auth['key'], + secret=exchange_auth['secret'], + base_currency=base_currency, + portfolio=portfolio + ) else: raise NotImplementedError( 'exchange not supported: %s' % exchange_name) From 5d1bdee4a62aabfc954c126e3f108cea743d4428 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Mon, 25 Sep 2017 14:35:58 -0600 Subject: [PATCH 023/110] WIP: Poloniex exchange - generating symbols.json --- catalyst/curate/poloniex.py | 39 ++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/catalyst/curate/poloniex.py b/catalyst/curate/poloniex.py index 911ac25f..e2a88476 100644 --- a/catalyst/curate/poloniex.py +++ b/catalyst/curate/poloniex.py @@ -2,10 +2,13 @@ import json, time, csv from datetime import datetime import pandas as pd import os, time, shutil, requests, logbook +from catalyst.exchange.exchange_utils import get_exchange_symbols_filename + DT_START = int(time.mktime(datetime(2010, 1, 1, 0, 0).timetuple())) DT_END = int(time.time()) CSV_OUT_FOLDER = '/var/tmp/catalyst/data/poloniex/' +CSV_OUT_FOLDER = '/Volumes/enigma/data/poloniex/' CONN_RETRIES = 2 logbook.StderrHandler().push_application() @@ -247,11 +250,45 @@ class PoloniexCurator(object): df.set_index('date', inplace=True) return df[start : end] + ''' + Generates a symbols.json file with corresponding start_date for each currencyPair + ''' + def generate_symbols_json(self, filename=None): + symbol_map = {} + + if(filename is None): + filename = get_exchange_symbols_filename('poloniex') + + with open(filename, 'w') as symbols: + for currencyPair in self.currency_pairs: + start = None + csv_fn = CSV_OUT_FOLDER + 'crypto_trades-' + currencyPair + '.csv' + with open(csv_fn, 'r') as f: + f.seek(0, os.SEEK_END) + if(f.tell() > 2): # First check file is not zero size + f.seek(-2, os.SEEK_END) # Jump to the second last byte. + while f.read(1) != b"\n": # Until EOL is found... + f.seek(-2, os.SEEK_CUR) # ...jump back the read byte plus one more. + start = pd.to_datetime( f.readline().split(',')[1], infer_datetime_format=True) + + if(start is None): + start = time.gmtime() + base, market = currencyPair.lower().split('_') + symbol = '{market}_{base}'.format( market=market, base=base ) + symbol_map[currencyPair] = dict( + symbol = symbol, + start_date = start.strftime("%Y-%m-%d") + ) + json.dump(symbol_map, symbols, sort_keys=True, indent=2, separators=(',',':')) + if __name__ == '__main__': pc = PoloniexCurator() pc.get_currency_pairs() - + #pc.generate_symbols_json() + for currencyPair in pc.currency_pairs: pc.retrieve_trade_history(currencyPair) pc.write_ohlcv_file(currencyPair) + + \ No newline at end of file From cf20f78e5532048697df18c5c42d3d54f5564b4e Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Mon, 25 Sep 2017 22:01:04 -0600 Subject: [PATCH 024/110] WIP: Poloniex exchange - balances, candles & cancel --- catalyst/exchange/poloniex/poloniex.py | 197 +++++++++------------ catalyst/exchange/poloniex/poloniex_api.py | 29 +-- 2 files changed, 103 insertions(+), 123 deletions(-) diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py index bf8069b8..2b5b3586 100644 --- a/catalyst/exchange/poloniex/poloniex.py +++ b/catalyst/exchange/poloniex/poloniex.py @@ -9,7 +9,8 @@ import numpy as np import pandas as pd import pytz import requests -import six +#import six +from six import iteritems from catalyst.assets._assets import TradingPair from logbook import Logger @@ -37,12 +38,13 @@ class Poloniex(Exchange): self.api = Poloniex_api(key=key, secret=secret.encode('UTF-8')) self.name = 'poloniex' self.assets = {} - #self.load_assets() + self.load_assets() self.base_currency = base_currency self._portfolio = portfolio self.minute_writer = None self.minute_reader = None + def sanitize_curency_symbol(self, exchange_symbol): """ Helper method used to build the universal pair. @@ -53,47 +55,51 @@ class Poloniex(Exchange): """ return exchange_symbol.lower() - ''' + def _create_order(self, order_status): """ - Create a Catalyst order object from a Bitfinex order dictionary + Create a Catalyst order object from the Exchange order dictionary :param order_status: :return: Order """ - if order_status['is_cancelled']: - status = ORDER_STATUS.CANCELLED - elif not order_status['is_live']: - log.info('found executed order {}'.format(order_status)) - status = ORDER_STATUS.FILLED - else: - status = ORDER_STATUS.OPEN + #if order_status['is_cancelled']: + # status = ORDER_STATUS.CANCELLED + #elif not order_status['is_live']: + # log.info('found executed order {}'.format(order_status)) + # status = ORDER_STATUS.FILLED + #else: + status = ORDER_STATUS.OPEN - amount = float(order_status['original_amount']) - filled = float(order_status['executed_amount']) + amount = float(order_status['amount']) + #filled = float(order_status['executed_amount']) + filled = None - if order_status['side'] == 'sell': + if order_status['type'] == 'sell': amount = -amount - filled = -filled + #filled = -filled - price = float(order_status['price']) + price = float(order_status['rate']) order_type = order_status['type'] stop_price = None limit_price = None # TODO: is this comprehensive enough? - if order_type.endswith('limit'): - limit_price = price - elif order_type.endswith('stop'): - stop_price = price + #if order_type.endswith('limit'): + # limit_price = price + #elif order_type.endswith('stop'): + # stop_price = price - executed_price = float(order_status['avg_execution_price']) + #executed_price = float(order_status['avg_execution_price']) + executed_price = price # TODO: bitfinex does not specify comission. I could calculate it but not sure if it's worth it. commission = None - date = pd.Timestamp.utcfromtimestamp(float(order_status['timestamp'])) - date = pytz.utc.localize(date) + #date = pd.Timestamp.utcfromtimestamp(float(order_status['timestamp'])) + #date = pytz.utc.localize(date) + date = None + order = Order( dt=date, asset=self.assets[order_status['symbol']], @@ -101,36 +107,34 @@ class Poloniex(Exchange): stop=stop_price, limit=limit_price, filled=filled, - id=str(order_status['id']), + id=str(order_status['orderNumber']), commission=commission ) order.status = status return order, executed_price - ''' + def get_balances(self): - pass - ''' log.debug('retrieving wallets balances') try: - response = self._request('balances', None) - balances = response.json() + balances = self.api.returnbalances() except Exception as e: + log.debug(e) raise ExchangeRequestError(error=e) - if 'message' in balances: + if 'error' in balances: raise ExchangeRequestError( - error='unable to fetch balance {}'.format(balances['message']) + error='unable to fetch balance {}'.format(balances['error']) ) std_balances = dict() - for balance in balances: - currency = balance['currency'].lower() - std_balances[currency] = float(balance['available']) + for (key, value) in iteritems(balances): + currency = key.lower() + std_balances[currency] = float(value) return std_balances - ''' + @property def account(self): @@ -166,10 +170,8 @@ class Poloniex(Exchange): return None def get_candles(self, data_frequency, assets, bar_count=None): - pass - ''' """ - Retrieve OHLVC candles from Bitfinex + Retrieve OHLVC candles from Poloniex :param data_frequency: :param assets: @@ -178,33 +180,22 @@ class Poloniex(Exchange): Available Frequencies --------------------- - '1m', '5m', '15m', '30m', '1h', '3h', '6h', '12h', '1D', '7D', '14D', - '1M' + '5m', '15m', '30m', '2h', '4h', '1D' """ # TODO: use BcolzMinuteBarReader to read from cache - freq_match = re.match(r'([0-9].*)(m|h|d)', data_frequency, re.M | re.I) - if freq_match: - number = int(freq_match.group(1)) - unit = freq_match.group(2) - - if unit == 'd': - converted_unit = 'D' - else: - converted_unit = unit - - frequency = '{}{}'.format(number, converted_unit) - allowed_frequencies = ['1m', '5m', '15m', '30m', '1h', '3h', '6h', - '12h', '1D', '7D', '14D', '1M'] - - if frequency not in allowed_frequencies: - raise InvalidHistoryFrequencyError( - frequency=data_frequency - ) - elif data_frequency == 'minute': - frequency = '1m' - elif data_frequency == 'daily': - frequency = '1D' + if(data_frequency == '5m' or data_frequency == 'minute'): #TODO: Polo does not have '1m' + frequency = 300 + elif(data_frequency == '15m'): + frequency = 900 + elif(data_frequency == '30m'): + frequency = 1800 + elif(data_frequency == '2h'): + frequency = 7200 + elif(data_frequency == '4h'): + frequency = 14400 + elif(data_frequency == '1D' or data_frequency == 'daily'): + frequency = 86400 else: raise InvalidHistoryFrequencyError( frequency=data_frequency @@ -213,63 +204,51 @@ class Poloniex(Exchange): # Making sure that assets are iterable asset_list = [assets] if isinstance(assets, TradingPair) else assets ohlc_map = dict() + for asset in asset_list: - symbol = self._get_v2_symbol(asset) - url = '{url}/v2/candles/trade:{frequency}:{symbol}'.format( - url=self.url, - frequency=frequency, - symbol=symbol - ) - if bar_count: - is_list = True - url += '/hist?limit={}'.format(int(bar_count)) + end = int(time.time()) + if(bar_count is None): + start = end - 2 * frequency else: - is_list = False - url += '/last' + start = end - bar_count * frequency - try: - response = requests.get(url) + try: + response = self.api.returnchartdata(self.get_symbol(asset),frequency, start, end) except Exception as e: raise ExchangeRequestError(error=e) - if 'error' in response.content: + if 'error' in response: raise ExchangeRequestError( error='Unable to retrieve candles: {}'.format( response.content) ) - candles = response.json() - def ohlc_from_candle(candle): ohlc = dict( - open=np.float64(candle[1]), - high=np.float64(candle[3]), - low=np.float64(candle[4]), - close=np.float64(candle[2]), - volume=np.float64(candle[5]), - price=np.float64(candle[2]), - last_traded=pd.Timestamp.utcfromtimestamp( - candle[0] / 1000.0) + open=np.float64(candle['open']), + high=np.float64(candle['high']), + low=np.float64(candle['low']), + close=np.float64(candle['close']), + volume=np.float64(candle['volume']), + price=np.float64(candle['close']), + last_traded=pd.Timestamp.utcfromtimestamp( candle['date'] ) ) + return ohlc - if is_list: + if bar_count is None: + ohlc_map[asset] = ohlc_from_candle(response[0]) + else: ohlc_bars = [] - # We can to list candles from old to new - for candle in reversed(candles): + for candle in response: ohlc = ohlc_from_candle(candle) ohlc_bars.append(ohlc) - ohlc_map[asset] = ohlc_bars - else: - ohlc = ohlc_from_candle(candles) - ohlc_map[asset] = ohlc - return ohlc_map[assets] \ if isinstance(assets, TradingPair) else ohlc_map - ''' + def create_order(self, asset, amount, is_buy, style): pass @@ -338,7 +317,7 @@ class Poloniex(Exchange): return order ''' - def get_open_orders(self, asset=None): + def get_open_orders(self, asset='all'): """Retrieve all of the current open orders. Parameters @@ -355,28 +334,29 @@ class Poloniex(Exchange): If an asset is passed then this will return a list of the open orders for this asset. """ - pass - ''' try: - response = self._request('orders', None) - order_statuses = response.json() + if(asset=='all'): + response = self.api.returnopenorders('all') + else: + response = self.api.returnopenorders(self.get_symbol(asset)) except Exception as e: raise ExchangeRequestError(error=e) - if 'message' in order_statuses: + if 'error' in response: raise ExchangeRequestError( error='Unable to retrieve open orders: {}'.format( order_statuses['message']) ) + #TODO: Need to handle openOrders for 'all' orders = list() - for order_status in order_statuses: + for order_status in response: order, executed_price = self._create_order(order_status) if asset is None or asset == order.sid: orders.append(order) return orders - ''' + def get_order(self, order_id): """Lookup an order based on the order id returned from one of the @@ -417,24 +397,21 @@ class Poloniex(Exchange): order_param : str or Order The order_id or order object to cancel. """ - pass - ''' order_id = order_param.id \ if isinstance(order_param, Order) else order_param try: - response = self._request('order/cancel', {'order_id': order_id}) - status = response.json() + response = self.api.cancelorder(order_id) except Exception as e: raise ExchangeRequestError(error=e) - if 'message' in status: + if 'error' in response: raise OrderCancelError( order_id=order_id, exchange=self.name, - error=status['message'] + error=response['error'] ) - ''' + def tickers(self, assets): """ diff --git a/catalyst/exchange/poloniex/poloniex_api.py b/catalyst/exchange/poloniex/poloniex_api.py index 5094dd9f..2efdd858 100644 --- a/catalyst/exchange/poloniex/poloniex_api.py +++ b/catalyst/exchange/poloniex/poloniex_api.py @@ -29,19 +29,22 @@ class Poloniex_api(object): 'cancelLoanOffer','returnOpenLoanOffers','returnActiveLoans', 'returnLendingHistory','toggleAutoRenew'] - def query(self, method, values={}): + def query(self, method, req={}): if method in self.public: - url = 'https://poloniex.com/public?command=' + method + urllib.parse.urlencode(values) + url = 'https://poloniex.com/public?command=' + method + '&' + urllib.parse.urlencode(req) headers = {} post_data = None elif method in self.trading: url = 'https://poloniex.com/tradingApi' req['command'] = method req['nonce'] = int(time.time()*1000) - post_data = urllib.urlencode(req) + post_data = urllib.parse.urlencode(req) + print(post_data) signature = hmac.new(self.secret, post_data, hashlib.sha512).hexdigest() headers = { 'Sign': signature, 'Key': self.key} + else: + raise ValueError('Method "' + method + '" not found in neither the Public API or Trading API endpoints') req = urllib.request.Request(url, data=post_data, headers=headers) return json.loads(urlopen(req).read()) @@ -70,7 +73,16 @@ class Poloniex_api(object): return self.query('returnCurrencies') def returnloadorders(self, market): - return self.query('returnLoanOrders', {'market': market}) + return self.query('returnLoanOrders', {'currency': market}) + + def returnbalances(self): + return self.query('returnBalances') + + def returnopenorders(self, market): + return self.query('returnOpenOrders', {'currencyPair': market}) + + def cancelorder(self, ordernumber): + return self.query('cancelOrder', {'orderNumber': ordernumber}) ''' def buylimit(self, market, quantity, rate): @@ -89,15 +101,6 @@ class Poloniex_api(object): return self.query('sellmarket', {'market': market, 'quantity': quantity}) - def cancel(self, uuid): - return self.query('cancel', {'uuid': uuid}) - - def getopenorders(self, market): - return self.query('getopenorders', {'market': market}) - - def getbalances(self): - return self.query('getbalances') - def getbalance(self, currency): return self.query('getbalance', {'currency': currency}) From 2c2c861a8f3629f994ba0dbbc1c7bb86256d4459 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Tue, 26 Sep 2017 11:40:25 -0600 Subject: [PATCH 025/110] WIP: Poloniex exchange - fix for multiple exchanges --- catalyst/exchange/poloniex/poloniex_api.py | 1 - catalyst/utils/run_algo.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/catalyst/exchange/poloniex/poloniex_api.py b/catalyst/exchange/poloniex/poloniex_api.py index 2efdd858..ee5f5198 100644 --- a/catalyst/exchange/poloniex/poloniex_api.py +++ b/catalyst/exchange/poloniex/poloniex_api.py @@ -40,7 +40,6 @@ class Poloniex_api(object): req['command'] = method req['nonce'] = int(time.time()*1000) post_data = urllib.parse.urlencode(req) - print(post_data) signature = hmac.new(self.secret, post_data, hashlib.sha512).hexdigest() headers = { 'Sign': signature, 'Key': self.key} else: diff --git a/catalyst/utils/run_algo.py b/catalyst/utils/run_algo.py index de8e64a5..9130d2af 100644 --- a/catalyst/utils/run_algo.py +++ b/catalyst/utils/run_algo.py @@ -170,7 +170,6 @@ def _run(handle_data, base_currency=base_currency, portfolio=portfolio ) - elif exchange_name == 'bittrex': exchanges[exchange_name] = Bittrex( key=exchange_auth['key'], @@ -179,7 +178,7 @@ def _run(handle_data, portfolio=portfolio ) elif exchange_name == 'poloniex': - exchange = Poloniex( + exchanges[exchange_name] = Poloniex( key=exchange_auth['key'], secret=exchange_auth['secret'], base_currency=base_currency, From 87ecf6114d237da4ad6ed831707c05d5fdf23948 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Tue, 26 Sep 2017 13:32:23 -0600 Subject: [PATCH 026/110] adding min_trade_size in TradingPair --- catalyst/assets/_assets.pyx | 17 ++++++++++++----- catalyst/exchange/exchange.py | 8 +++++++- catalyst/exchange/exchange_algorithm.py | 6 +++--- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/catalyst/assets/_assets.pyx b/catalyst/assets/_assets.pyx index 05144223..b98127f7 100644 --- a/catalyst/assets/_assets.pyx +++ b/catalyst/assets/_assets.pyx @@ -408,7 +408,8 @@ cdef class TradingPair(Asset): 'exchange_full', 'leverage', 'market_currency', - 'base_currency' + 'base_currency', + 'min_trade_size', }) def __init__(self, object symbol, @@ -420,7 +421,8 @@ cdef class TradingPair(Asset): object end_date=None, object first_traded=None, object auto_close_date=None, - object exchange_full=None): + object exchange_full=None, + object min_trade_size=None): """ Replicates the Asset constructor with some built-in conventions and a new 'leverage' attribute. @@ -476,6 +478,7 @@ cdef class TradingPair(Asset): :param first_traded: :param auto_close_date: :param exchange_full: + :param min_trade_size: """ symbol = symbol.lower() @@ -509,6 +512,7 @@ cdef class TradingPair(Asset): first_traded=first_traded, auto_close_date=auto_close_date, exchange_full=exchange_full, + min_trade_size=min_trade_size ) self.leverage = leverage @@ -518,14 +522,16 @@ cdef class TradingPair(Asset): 'Introduced On: {start_date}, ' \ 'Market Currency: {market_currency}, ' \ 'Base Currency: {base_currency}, ' \ - 'Exchange Leverage: {leverage}'.format( + 'Exchange Leverage: {leverage}, ' \ + 'Minimum Trade Size: {min_trade_size}'.format( symbol=self.symbol, sid=self.sid, exchange=self.exchange, start_date=self.start_date, market_currency=self.market_currency, base_currency=self.base_currency, - leverage=self.leverage + leverage=self.leverage, + min_trade_size=self.min_trade_size ) cpdef __reduce__(self): @@ -544,7 +550,8 @@ cdef class TradingPair(Asset): self.end_date, self.first_traded, self.auto_close_date, - self.exchange_full)) + self.exchange_full, + self.min_trade_size)) def make_asset_array(int size, Asset asset): cdef np.ndarray out = np.empty([size], dtype=object) diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index ad4e89b9..ade14dfe 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -218,13 +218,19 @@ class Exchange: else: asset_name = None + if 'min_trade_size' in asset: + min_trade_size = asset['min_trade_size'] + else: + min_trade_size = 0.0000001 + trading_pair = TradingPair( symbol=asset['symbol'], exchange=self.name, start_date=start_date, end_date=end_date, leverage=leverage, - asset_name=asset_name + asset_name=asset_name, + min_trade_size=min_trade_size ) self.assets[exchange_symbol] = trading_pair diff --git a/catalyst/exchange/exchange_algorithm.py b/catalyst/exchange/exchange_algorithm.py index c9db3a12..3304fcc1 100644 --- a/catalyst/exchange/exchange_algorithm.py +++ b/catalyst/exchange/exchange_algorithm.py @@ -52,6 +52,7 @@ from catalyst.utils.api_support import ( from catalyst.utils.input_validation import error_keywords, ensure_upper_case, \ expect_types from catalyst.utils.preprocess import preprocess +from catalyst.utils.math_utils import round_nearest log = logbook.Logger('exchange_algorithm') @@ -67,15 +68,14 @@ class ExchangeTradingAlgorithmBase(TradingAlgorithm): super(ExchangeTradingAlgorithmBase, self).__init__(*args, **kwargs) - def round_order(self, amount): + def round_order(self, amount, asset): """ We need fractions with cryptocurrencies :param amount: :return: """ - # TODO: is this good enough? Victor has a better solution. - return amount + return round_nearest(amount, asset.min_trade_size) @api_method @preprocess(symbol_str=ensure_upper_case) From 1d0faf693d0f57d6ffe22beb186e956adde97b61 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Tue, 26 Sep 2017 15:36:14 -0600 Subject: [PATCH 027/110] WIP: Poloniex exchange - create order --- catalyst/exchange/poloniex/poloniex.py | 121 ++++++++------------- catalyst/exchange/poloniex/poloniex_api.py | 95 +++++++++------- 2 files changed, 100 insertions(+), 116 deletions(-) diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py index 2b5b3586..92204e27 100644 --- a/catalyst/exchange/poloniex/poloniex.py +++ b/catalyst/exchange/poloniex/poloniex.py @@ -251,8 +251,6 @@ class Poloniex(Exchange): def create_order(self, asset, amount, is_buy, style): - pass - ''' """ Creating order on the exchange. @@ -263,59 +261,43 @@ class Poloniex(Exchange): :return: """ exchange_symbol = self.get_symbol(asset) - if isinstance(style, ExchangeLimitOrder) \ - or isinstance(style, ExchangeStopLimitOrder): + + if isinstance(style, ExchangeLimitOrder) or isinstance(style, ExchangeStopLimitOrder): + if isinstance(style, ExchangeStopLimitOrder): + log.warn('{} will ignore the stop price'.format(self.name)) + price = style.get_limit_price(is_buy) - order_type = 'limit' - elif isinstance(style, ExchangeStopOrder): - price = style.get_stop_price(is_buy) - order_type = 'stop' + try: + if(is_buy): + response = self.api.buy(exchange_symbol, amount, price) + else: + reponse = self.api.sell(exchange_symbol, amount, price) + except Exception as e: + raise ExchangeRequestError(error=e) + date = pd.Timestamp.utcnow() + + print(response) + + if('orderNumber' in response): + order_id = str(response['orderNumber']) + order = Order( + dt=date, + asset=asset, + amount=amount, + stop=style.get_stop_price(is_buy), + limit=style.get_limit_price(is_buy), + id=order_id + ) + return order + else: + log.warn('{} order failed: {}'.format('buy' if is_buy else 'sell', response['error'])) + return None else: raise InvalidOrderStyle(exchange=self.name, style=style.__class__.__name__) - req = dict( - symbol=exchange_symbol, - amount=str(float(abs(amount))), - price="{:.20f}".format(float(price)), - side='buy' if is_buy else 'sell', - type='exchange ' + order_type, # TODO: support margin trades - exchange=self.name, - is_hidden=False, - is_postonly=False, - use_all_available=0, - ocoorder=False, - buy_price_oco=0, - sell_price_oco=0 - ) - - date = pd.Timestamp.utcnow() - try: - response = self._request('order/new', req) - order_status = response.json() - except Exception as e: - raise ExchangeRequestError(error=e) - - if 'message' in order_status: - raise ExchangeRequestError( - error='unable to create Bitfinex order {}'.format( - order_status['message']) - ) - - order_id = str(order_status['id']) - order = Order( - dt=date, - asset=asset, - amount=amount, - stop=style.get_stop_price(is_buy), - limit=style.get_limit_price(is_buy), - id=order_id - ) - - return order - ''' def get_open_orders(self, asset='all'): """Retrieve all of the current open orders. @@ -420,51 +402,40 @@ class Poloniex(Exchange): :param assets: :return: - """ - pass - - ''' - symbols = self._get_v2_symbols(assets) + """ + symbols = [] + for asset in assets: + symbols.append(self.get_symbol(asset)) log.debug('fetching tickers {}'.format(symbols)) try: - response = requests.get( - '{url}/v2/tickers?symbols={symbols}'.format( - url=self.url, - symbols=','.join(symbols), - ) - ) + response = self.api.returnticker() except Exception as e: raise ExchangeRequestError(error=e) - if 'error' in response.content: + if 'error' in response: raise ExchangeRequestError( error='Unable to retrieve tickers: {}'.format( - response.content) + response['error']) ) - tickers = response.json() - ticks = dict() - for index, ticker in enumerate(tickers): - if not len(ticker) == 11: - raise ExchangeRequestError( - error='Invalid ticker in response: {}'.format(ticker) - ) + + for index, symbol in enumerate(symbols): ticks[assets[index]] = dict( timestamp=pd.Timestamp.utcnow(), - bid=ticker[1], - ask=ticker[3], - last_price=ticker[7], - low=ticker[10], - high=ticker[9], - volume=ticker[8], + bid=float(response[symbol]['highestBid']), + ask=float(response[symbol]['lowestAsk']), + last_price=float(response[symbol]['last']), + low=float(response[symbol]['lowestAsk']), #TODO: Polo does not provide low + high=float(response[symbol]['highestBid']), #TODO: Polo does not provide high + volume=float(response[symbol]['baseVolume']), ) log.debug('got tickers {}'.format(ticks)) return ticks - ''' + def generate_symbols_json(self, filename=None): symbol_map = {} diff --git a/catalyst/exchange/poloniex/poloniex_api.py b/catalyst/exchange/poloniex/poloniex_api.py index ee5f5198..3a181c07 100644 --- a/catalyst/exchange/poloniex/poloniex_api.py +++ b/catalyst/exchange/poloniex/poloniex_api.py @@ -49,10 +49,10 @@ class Poloniex_api(object): return json.loads(urlopen(req).read()) def returnticker(self): - return self.query('returnTicker') + return self.query('returnTicker', {}) def return24volume(self): - return self.query('return24Volume') + return self.query('return24Volume', {}) def returnOrderBook(self, market='all'): return self.query('returnOrderBook', {'currencyPair': market}) @@ -69,7 +69,7 @@ class Poloniex_api(object): 'start': start, 'end': end}) def returncurrencies(self): - return self.query('returnCurrencies') + return self.query('returnCurrencies', {}) def returnloadorders(self, market): return self.query('returnLoanOrders', {'currency': market}) @@ -77,52 +77,65 @@ class Poloniex_api(object): def returnbalances(self): return self.query('returnBalances') + def returncompletebalances(self, account): + if(account): + return self.query('returnCompleteBalances', {'account': account}) + else: + return self.query('returnCompleteBalances') + + def returndepositaddresses(self): + return self.query('returnDepositAddresses') + + def generatenewaddress(self, currency): + return self.query('generateNewAddress', {'currency': currency}) + + def returnDepositsWithdrawals(self, start, end): + return self.query('returnDepositsWithdrawals', {'start': start, 'end': end}) + def returnopenorders(self, market): return self.query('returnOpenOrders', {'currencyPair': market}) + def returntradehistory(self, market): + #TODO: optional start and/or end and limit + return self.query('returnTradeHistory', {'currencyPair': market}) + + def returnordertrades(self, ordernumber): + return self.query('returnOrderTrades', {'orderNumber': ordernumber}) + + def buy(self, market, amount, rate, fillorkill=0, immediateorcancel=0, postonly=0): + if(fillorkill): + return self.query('buy', {'currencyPair': market, 'rate':rate, 'amount': amount, + 'fillOrKill': fillorkill, }) + elif(immediateorcancel): + return self.query('buy', {'currencyPair': market, 'rate':rate, 'amount': amount, + 'immediateOrCancel': immediateorcancel, }) + elif(postonly): + return self.query('buy', {'currencyPair': market, 'rate':rate, 'amount': amount, + 'postOnly': postonly, }) + else: + return self.query('buy', {'currencyPair': market, 'rate':rate, 'amount': amount, }) + + def sell(self, market, amount, rate, fillorkill=0, immediateorcancel=0, postonly=0): + if(fillorkill): + return self.query('sell', {'currencyPair': market, 'rate':rate, 'amount': amount, + 'fillOrKill': fillorkill, }) + elif(immediateorcancel): + return self.query('sell', {'currencyPair': market, 'rate':rate, 'amount': amount, + 'immediateOrCancel': immediateorcancel, }) + elif(postonly): + return self.query('sell', {'currencyPair': market, 'rate':rate, 'amount': amount, + 'postOnly': postonly, }) + else: + return self.query('sell', {'currencyPair': market, 'rate':rate, 'amount': amount, }) + def cancelorder(self, ordernumber): return self.query('cancelOrder', {'orderNumber': ordernumber}) - ''' - def buylimit(self, market, quantity, rate): - return self.query('buylimit', {'market': market, 'quantity': quantity, - 'rate': rate}) - - def buymarket(self, market, quantity): - return self.query('buymarket', - {'market': market, 'quantity': quantity}) - - def selllimit(self, market, quantity, rate): - return self.query('selllimit', {'market': market, 'quantity': quantity, - 'rate': rate}) - - def sellmarket(self, market, quantity): - return self.query('sellmarket', - {'market': market, 'quantity': quantity}) - - def getbalance(self, currency): - return self.query('getbalance', {'currency': currency}) - - def getdepositaddress(self, currency): - return self.query('getdepositaddress', {'currency': currency}) - def withdraw(self, currency, quantity, address): return self.query('withdraw', - {'currency': currency, 'quantity': quantity, + {'currency': currency, 'amount': quantity, 'address': address}) - def getorder(self, uuid): - return self.query('getorder', {'uuid': uuid}) + def returnfeeinfo(self): + return self.query('returnFeeInfo') - def getorderhistory(self, market, count): - return self.query('getorderhistory', - {'market': market, 'count': count}) - - def getwithdrawalhistory(self, currency, count): - return self.query('getwithdrawalhistory', - {'currency': currency, 'count': count}) - - def getdeposithistory(self, currency, count): - return self.query('getdeposithistory', - {'currency': currency, 'count': count}) - ''' From 3362dbf95c0c0cfb6492afe5ac040ff1d9757e95 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Wed, 27 Sep 2017 14:31:15 -0600 Subject: [PATCH 028/110] WIP: Poloniex exchange - placing orders, executing transactions --- catalyst/exchange/exchange_errors.py | 6 ++ catalyst/exchange/exchange_portfolio.py | 24 +++++ catalyst/exchange/poloniex/poloniex.py | 138 ++++++++++++++++++++---- 3 files changed, 146 insertions(+), 22 deletions(-) diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index a6bfcbab..7e751981 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -101,6 +101,12 @@ class OrphanOrderError(ZiplineError): ).strip() +class OrphanOrderReverseError(ZiplineError): + msg = ( + 'Order {order_id} tracked by algorithm, but not found in exchange {exchange}.' + ).strip() + + class OrderCancelError(ZiplineError): msg = ( 'Unable to cancel order {order_id} on exchange {exchange} {error}.' diff --git a/catalyst/exchange/exchange_portfolio.py b/catalyst/exchange/exchange_portfolio.py index ded8a2a4..d7105480 100644 --- a/catalyst/exchange/exchange_portfolio.py +++ b/catalyst/exchange/exchange_portfolio.py @@ -70,6 +70,30 @@ class ExchangePortfolio(Portfolio): log.debug('updated portfolio with executed order') + def execute_transaction(self, transaction): + log.debug('executing transaction {}'.format(transaction.order_id)) + + order_position = self.positions[transaction.asset] \ + if transaction.asset in self.positions else None + + if order_position is None: + raise ValueError( + 'Trying to execute transaction for a position not held: %s' % transaction.order_id + ) + + self.capital_used += transaction.amount * transaction.price + + if transaction.amount > 0: + if order_position.cost_basis > 0: + order_position.cost_basis = np.average( + [order_position.cost_basis, transaction.price], + weights=[order_position.amount, transaction.amount] + ) + else: + order_position.cost_basis = transaction.price + + log.debug('updated portfolio with executed order') + def remove_order(self, order): log.info('removing cancelled order {}'.format(order.id)) del self.open_orders[order.id] diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py index 92204e27..2a0b8474 100644 --- a/catalyst/exchange/poloniex/poloniex.py +++ b/catalyst/exchange/poloniex/poloniex.py @@ -4,6 +4,7 @@ import hmac import json import re import time +from collections import defaultdict import numpy as np import pandas as pd @@ -16,13 +17,13 @@ from logbook import Logger from catalyst.exchange.poloniex.poloniex_api import Poloniex_api - # from websocket import create_connection from catalyst.exchange.exchange import Exchange from catalyst.exchange.exchange_errors import ( ExchangeRequestError, InvalidHistoryFrequencyError, - InvalidOrderStyle, OrderCancelError) + InvalidOrderStyle, OrderCancelError, + OrphanOrderReverseError) from catalyst.exchange.exchange_execution import ExchangeLimitOrder, \ ExchangeStopLimitOrder, ExchangeStopOrder from catalyst.finance.order import Order, ORDER_STATUS @@ -43,6 +44,7 @@ class Poloniex(Exchange): self._portfolio = portfolio self.minute_writer = None self.minute_reader = None + self.transactions = defaultdict(list) def sanitize_curency_symbol(self, exchange_symbol): @@ -102,7 +104,7 @@ class Poloniex(Exchange): order = Order( dt=date, - asset=self.assets[order_status['symbol']], + asset=self.assets[order_status['symbol']], # No such field in Poloniex amount=amount, stop=stop_price, limit=limit_price, @@ -305,17 +307,25 @@ class Poloniex(Exchange): Parameters ---------- asset : Asset - If passed and not None, return only the open orders for the given + If passed and not 'all', return only the open orders for the given asset instead of all open orders. Returns ------- open_orders : dict[list[Order]] or list[Order] - If no asset is passed this will return a dict mapping Assets + If 'all' is passed this will return a dict mapping Assets to a list containing all the open orders for the asset. If an asset is passed then this will return a list of the open orders for this asset. """ + + return self.portfolio.open_orders + + """ + TODO: Why going to the exchange if we already have this info locally? + And why creating all these Orders if we later discard them? + """ + try: if(asset=='all'): response = self.api.returnopenorders('all') @@ -330,10 +340,12 @@ class Poloniex(Exchange): order_statuses['message']) ) + print(self.portfolio.open_orders) + #TODO: Need to handle openOrders for 'all' orders = list() for order_status in response: - order, executed_price = self._create_order(order_status) + order, executed_price = self._create_order(order_status) # will Throw error b/c Polo doesn't track order['symbol'] if asset is None or asset == order.sid: orders.append(order) @@ -354,22 +366,23 @@ class Poloniex(Exchange): order : Order The order object. """ - pass - ''' + try: - response = self._request( - 'order/status', {'order_id': int(order_id)}) - order_status = response.json() + order = self._portfolio.open_orders[order_id] + except Exception as e: + raise OrphanOrderError(order_id=order_id, exchange=self.name) + + try: + response = self.api.returnopenorders(self.get_symbol(order.sid)) except Exception as e: raise ExchangeRequestError(error=e) - if 'message' in order_status: - raise ExchangeRequestError( - error='Unable to retrieve order status: {}'.format( - order_status['message']) - ) - return self._create_order(order_status) - ''' + for order in response: + if(int(order['orderNumber'])==int(order_id)): + return True + + return None + def cancel_order(self, order_param): """Cancel an open order. @@ -394,6 +407,9 @@ class Poloniex(Exchange): error=response['error'] ) + self.portfolio.remove_order(order_param) #TODO: Verify this works + + def tickers(self, assets): """ @@ -403,9 +419,8 @@ class Poloniex(Exchange): :param assets: :return: """ - symbols = [] - for asset in assets: - symbols.append(self.get_symbol(asset)) + symbols = self.get_symbols(assets) + log.debug('fetching tickers {}'.format(symbols)) try: @@ -453,4 +468,83 @@ class Poloniex(Exchange): with open(filename,'w') as f: json.dump(symbol_map, f, sort_keys=True, indent=2, separators=(',',':')) - + + + def check_open_orders(self): + """ + Need to override this function for Poloniex: + + Loop through the list of open orders in the Portfolio object. + Check if any transactions have been executed: + If so, create a transaction and apply to the Portfolio. + Check if the order is still open: + If not, remove it from open orders + + :return: + transactions: Transaction[] + """ + transactions = list() + if self.portfolio.open_orders: + for order_id in list(self.portfolio.open_orders): + + order = self._portfolio.open_orders[order_id] + log.debug('found open order: {}'.format(order_id)) + + try: + order_open = self.get_order(order_id) + except Exception as e: + raise ExchangeRequestError(error=e) + + if(order_open): + delta = pd.Timestamp.utcnow() - order.dt + log.info( + 'order {order_id} still open after {delta}'.format( + order_id=order_id, + delta=delta ) + ) + + try: + response = self.api.returnordertrades(order_id) + except Exception as e: + raise ExchangeRequestError(error=e) + + if(response['error']): + if(not order_open): + raise OrphanOrderReverseError(order_id=order_id, exchange=self.name) + else: + for tx in response: + """ + We maintain a list of dictionaries of transactions that correspond to + partially filled orders, indexed by order_id. Every time we query + executed transactions from the exchange, we check if we had that + transaction for that order already. If not, we process it. + + When an order if fully filled, we flush the dict of transactions + associated with that order. + """ + if(not filter(lambda item: item['order_id'] == tx['tradeID'], self.transactions[order_id])): + log.debug('Got new transaction for order {}: amount {}, price {}'.format( + order_id, tx.amount, tx.rate)) + if(tx['type']=='sell'): + tx['amount'] = -tx['amount'] + transaction = Transaction( + asset=order.asset, + amount=tx['amount'], + dt=pd.to_datetime(tx['date'], utc=True), + price=tx['rate'], + order_id=tx['tradeID'], # it's a misnomer, but keeping it for compatibility + commission=tx['fee'] + ) + self.transactions[order_id].append(transaction) + self.portfolio.execute_transaction(transaction) + transactions.append(transaction) + + if(not order_open): + """ + Since transactions have been executed individually + the only thing left to do is remove them from list of open_orders + """ + del self.portfolio.open_orders[order_id] + del self.transactions[order_id] + + return transactions From 6bfe0eecd235ab5a9f424ac3c84f1ab8c2a24b3b Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 27 Sep 2017 17:27:40 -0400 Subject: [PATCH 029/110] Remove some 5-minute data and added example of extension.py. --- .../buy_low_sell_high_neo_with_interface.py | 33 ++++++++++--------- .../exchange/bittrex/extensions-example.py | 7 ++++ catalyst/exchange/data_portal_exchange.py | 10 ------ catalyst/exchange/exchange_bundle.py | 17 ++-------- 4 files changed, 27 insertions(+), 40 deletions(-) create mode 100644 catalyst/exchange/bittrex/extensions-example.py diff --git a/catalyst/examples/buy_low_sell_high_neo_with_interface.py b/catalyst/examples/buy_low_sell_high_neo_with_interface.py index 88f22766..4a9cc4c5 100644 --- a/catalyst/examples/buy_low_sell_high_neo_with_interface.py +++ b/catalyst/examples/buy_low_sell_high_neo_with_interface.py @@ -144,28 +144,31 @@ def handle_data(context, data): def analyze(context, stats): log.info('the daily stats:\n{}'.format(get_pretty_stats(stats))) + pass -# run_algorithm( -# initialize=initialize, -# handle_data=handle_data, -# analyze=analyze, -# exchange_name='bittrex,bitfinex', -# live=True, -# algo_namespace=algo_namespace, -# base_currency='btc', -# live_graph=False -# ) run_algorithm( - capital_base=250, - start=pd.to_datetime('2017-09-08', utc=True), - end=pd.to_datetime('2017-09-15', utc=True), - data_frequency='minute', initialize=initialize, handle_data=handle_data, analyze=analyze, exchange_name='bitfinex', + live=True, algo_namespace=algo_namespace, - base_currency='btc' + base_currency='btc', + live_graph=False ) + +# Backtest +# run_algorithm( +# capital_base=250, +# start=pd.to_datetime('2017-09-08', utc=True), +# end=pd.to_datetime('2017-09-15', utc=True), +# data_frequency='minute', +# initialize=initialize, +# handle_data=handle_data, +# analyze=analyze, +# exchange_name='bitfinex', +# algo_namespace=algo_namespace, +# base_currency='btc' +# ) diff --git a/catalyst/exchange/bittrex/extensions-example.py b/catalyst/exchange/bittrex/extensions-example.py new file mode 100644 index 00000000..33ffb4f5 --- /dev/null +++ b/catalyst/exchange/bittrex/extensions-example.py @@ -0,0 +1,7 @@ +from catalyst.data.bundles import register +from catalyst.exchange.exchange_bundle import exchange_bundle + +symbols = ( + 'neo_btc', +) +register('exchange_bitfinex', exchange_bundle('bitfinex', symbols)) \ No newline at end of file diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index c0af0e32..88808fa6 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -282,14 +282,6 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): except IOError: self.daily_bar_readers[exchange_name] = None - try: - self.five_minute_bar_readers[exchange_name] = \ - BcolzFiveMinuteBarReader( - five_minute_path(name, time_folder), - ) - except IOError: - self.five_minute_bar_readers[exchange_name] = None - try: self.minute_bar_readers[exchange_name] = \ BcolzMinuteBarReader( @@ -351,8 +343,6 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): """ if data_frequency == 'minute': reader = self.minute_bar_readers[exchange_name] - elif data_frequency == '5-minute': - reader = self.five_minute_bar_readers[exchange_name] elif data_frequency == 'daily': reader = self.daily_bar_readers[exchange_name] else: diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 9446a88a..df640311 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -47,10 +47,6 @@ def process_bar_data(exchange, assets, writer, data_frequency, delta_periods = delta.total_seconds() / 60 frequency = '1m' - elif data_frequency == '5-minute': - delta_periods = delta.total_seconds() / 60 / 5 - frequency = '5m' - elif data_frequency == 'daily': delta_periods = delta.total_seconds() / 60 / 60 / 24 frequency = '1d' @@ -295,17 +291,8 @@ def exchange_bundle(exchange_name, symbols=None, start=None, end=None, # start=start, # end=end # ) - # - # if five_minute_bar_writer is not None: - # process_bar_data( - # exchange=exchange, - # assets=assets, - # writer=five_minute_bar_writer, - # data_frequency='5-minute', - # show_progress=show_progress, - # start=start, - # end=end - # ) + + # TODO: delete 5-minute writer everywhere if minute_bar_writer is not None: process_bar_data( From 15fa98420d158e02a9057c082904cc84fdfbb216 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Thu, 28 Sep 2017 09:14:22 -0600 Subject: [PATCH 030/110] Catching bitfinex Error: No JSON object could be decoded --- catalyst/exchange/bitfinex/bitfinex.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index 434fd933..313e575e 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -539,7 +539,10 @@ class Bitfinex(Exchange): response.content) ) - tickers = response.json() + try: + tickers = response.json() + except Exception as e: + raise ExchangeRequestError(error=e) ticks = dict() for index, ticker in enumerate(tickers): From 3b681d197d9a302aae423ecc54568dd59e41822a Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Thu, 28 Sep 2017 11:03:47 -0600 Subject: [PATCH 031/110] Purge 5-min implementation --- catalyst/__main__.py | 6 +- catalyst/algorithm.py | 52 +- catalyst/data/_minute_bar_internal.pyx | 78 - catalyst/data/bundles/base.py | 27 +- catalyst/data/bundles/base_pricing.py | 8 - catalyst/data/bundles/core.py | 32 +- catalyst/data/bundles/poloniex.py | 1 - catalyst/data/data_portal.py | 31 - catalyst/data/dispatch_bar_reader.py | 6 - catalyst/data/five_minute_bars.py | 1385 ----------------- catalyst/exchange/data_portal_exchange.py | 6 +- catalyst/exchange/exchange.py | 2 +- catalyst/exchange/exchange_bundle.py | 7 +- catalyst/exchange/poloniex/poloniex.py | 49 +- catalyst/exchange/poloniex/poloniex_api.py | 42 + catalyst/finance/performance/tracker.py | 16 - catalyst/gens/sim_engine.pyx | 23 - catalyst/gens/tradesimulation.py | 3 +- .../pipeline/loaders/crypto_pricing_loader.py | 4 - .../pipeline/loaders/equity_pricing_loader.py | 5 - catalyst/sources/benchmark_source.py | 28 - catalyst/utils/calendars/trading_calendar.py | 55 - catalyst/utils/events.py | 1 - tests/exchange/test_bundle.py | 1 - tests/exchange/test_clock.py | 8 +- 25 files changed, 100 insertions(+), 1776 deletions(-) delete mode 100644 catalyst/data/five_minute_bars.py diff --git a/catalyst/__main__.py b/catalyst/__main__.py index 7a9e6316..bdd9e537 100644 --- a/catalyst/__main__.py +++ b/catalyst/__main__.py @@ -126,7 +126,7 @@ def ipython_only(option): ) @click.option( '--data-frequency', - type=click.Choice({'daily', '5-minute', 'minute'}), + type=click.Choice({'daily', 'minute'}), default='daily', show_default=True, help='The data frequency of the simulation.', @@ -196,8 +196,8 @@ def ipython_only(option): @click.option( '-x', '--exchange-name', - type=click.Choice({'bitfinex', 'bittrex'}), - help='The name of the targeted exchange (supported: bitfinex, bittrex).', + type=click.Choice({'bitfinex', 'bittrex', 'poloniex'}), + help='The name of the targeted exchange (supported: bitfinex, bittrex, poloniex).', ) @click.option( '-n', diff --git a/catalyst/algorithm.py b/catalyst/algorithm.py index 51fdfdba..65e2fa60 100644 --- a/catalyst/algorithm.py +++ b/catalyst/algorithm.py @@ -134,10 +134,7 @@ from catalyst.utils.security_list import SecurityList import catalyst.protocol from catalyst.sources.requests_csv import PandasRequestsCSV -from catalyst.gens.sim_engine import ( - MinuteSimulationClock, - FiveMinuteSimulationClock, -) +from catalyst.gens.sim_engine import MinuteSimulationClock from catalyst.sources.benchmark_source import BenchmarkSource from catalyst.catalyst_warnings import ZiplineDeprecationWarning @@ -174,7 +171,7 @@ class TradingAlgorithm(object): algo_filename : str, optional The filename for the algoscript. This will be used in exception tracebacks. default: ''. - data_frequency : {'daily', '5-minute', 'minute'}, optional + data_frequency : {'daily', 'minute'}, optional The duration of the bars. instant_fill : bool, optional Whether to fill orders immediately or on next bar. default: False @@ -227,7 +224,7 @@ class TradingAlgorithm(object): script : str Algoscript that contains initialize and handle_data function definition. - data_frequency : {'daily', '5-minute', 'minute'} + data_frequency : {'daily', 'minute'} The duration of the bars. capital_base : float How much capital to start with. @@ -435,8 +432,6 @@ class TradingAlgorithm(object): if get_loader is not None: if data_frequency == 'daily': all_dates = self.trading_calendar.all_sessions - elif data_frequency == '5-minute': - all_dates = self.trading_calendar.all_five_minutes elif data_frequency == 'minute': all_dates = self.trading_calendar.all_minutes else: @@ -468,7 +463,7 @@ class TradingAlgorithm(object): self._in_before_trading_start = True with handle_non_market_minutes(data) if \ - self.data_frequency in ('minute', '5-minute') else ExitStack(): + self.data_frequency == 'minute' else ExitStack(): self._before_trading_start(self, data) self._in_before_trading_start = False @@ -524,11 +519,10 @@ class TradingAlgorithm(object): market_closes = trading_o_and_c['market_close'] minutely_emission = False - if self.sim_params.data_frequency in set(('minute', '5-minute')): + if self.sim_params.data_frequency == 'minute': market_opens = trading_o_and_c['market_open'] - minutely_emission = self.sim_params.emission_rate in \ - set(('minute', '5-minute')) + minutely_emission = self.sim_params.emission_rate == 'minute' else: # in daily mode, we want to have one bar per session, timestamped # as the last minute of the session. @@ -552,15 +546,6 @@ class TradingAlgorithm(object): 'UTC', ) - if self.sim_params.data_frequency == '5-minute': - return FiveMinuteSimulationClock( - self.sim_params.sessions, - execution_opens, - execution_closes, - before_trading_start_minutes, - minute_emission=minutely_emission, - ) - return MinuteSimulationClock( self.sim_params.sessions, execution_opens, @@ -692,8 +677,6 @@ class TradingAlgorithm(object): time_count = times.nunique() if time_count == 1: self.sim_params.data_frequency = 'daily' - elif time_count == 288: - self.sim_params.data_frequency = '5-minute' else: self.sim_params.data_frequency = 'minute' @@ -715,8 +698,6 @@ class TradingAlgorithm(object): if self.sim_params.data_frequency == 'daily': equity_reader_arg = 'equity_daily_reader' - elif self.sim_params.data_frequency == '5-minute': - equity_daily_reader = 'equity_5_minute_reader' elif self.sim_params.data_frequency == 'minute': equity_reader_arg = 'equity_minute_reader' equity_reader = PanelBarReader( @@ -960,9 +941,9 @@ class TradingAlgorithm(object): The arena from the simulation parameters. This will normally be ``'backtest'`` but some systems may use this distinguish live trading from backtesting. - data_frequency : {'daily', '5-minute', 'minute'} + data_frequency : {'daily', 'minute'} data_frequency tells the algorithm if it is running with - daily, minute, or five-minute mode. + daily or minute mode. start : datetime The start date for the simulation. end : datetime @@ -1137,18 +1118,11 @@ class TradingAlgorithm(object): 'time_rule= when calling schedule_function without ' 'specifying a date_rule', stacklevel=3) - freq = self.sim_params.data_frequency - date_rule = date_rule or date_rules.every_day() - if freq is 'daily': - # ignore time rule in daily mode - time_rule = time_rules.every_minute() - else: - # use provided time rule or default to every minute or 5 minutes - # based on desired data frequency. - time_rule = time_rule or (time_rules.every_5_minutes() - if freq is '5-minute' else - time_rules.every_minute()) + time_rule = ((time_rule or time_rules.every_minute()) + if self.sim_params.data_frequency == 'minute' else + # If we are in daily mode the time_rule is ignored. + time_rules.every_minute()) # Check the type of the algorithm's schedule before pulling calendar # Note that the ExchangeTradingSchedule is currently the only @@ -1819,7 +1793,7 @@ class TradingAlgorithm(object): @data_frequency.setter def data_frequency(self, value): - assert value in ('daily', '5-minute', 'minute') + assert value in ('daily', 'minute') self.sim_params.data_frequency = value @api_method diff --git a/catalyst/data/_minute_bar_internal.pyx b/catalyst/data/_minute_bar_internal.pyx index 9ebb0841..bea12bc2 100644 --- a/catalyst/data/_minute_bar_internal.pyx +++ b/catalyst/data/_minute_bar_internal.pyx @@ -35,17 +35,6 @@ def minute_value(ndarray[long_t, ndim=1] market_opens, return market_opens[q] + r -@cython.cdivision(True) -def five_minute_value(ndarray[long_t, ndim=1] market_opens, - Py_ssize_t pos, - short five_minutes_per_day): - - cdef short q, r - q = cython.cdiv(pos, five_minutes_per_day) - r = cython.cmod(pos, five_minutes_per_day) - - return market_opens[q] + r - def find_position_of_minute(ndarray[long_t, ndim=1] market_opens, ndarray[long_t, ndim=1] market_closes, long_t minute_val, @@ -99,26 +88,6 @@ def find_position_of_minute(ndarray[long_t, ndim=1] market_opens, return (market_open_loc * minutes_per_day) + delta -def find_position_of_five_minute(ndarray[long_t, ndim=1] market_opens, - ndarray[long_t, ndim=1] market_closes, - long_t five_minute_val, - short five_minutes_per_day, - bool forward_fill): - - cdef Py_ssize_t market_open_loc, market_open, delta - - market_open_loc = \ - searchsorted(market_opens, five_minute_val, side='right') - 1 - market_open = market_opens[market_open_loc] - market_close = market_closes[market_open_loc] - - if not forward_fill and ((five_minute_val - market_open) >= five_minutes_per_day): - raise ValueError("Given five minutes is not between an open and a close") - - delta = int_min(five_minute_val - market_open, market_close - market_open) - - return (market_open_loc * five_minutes_per_day) + delta - def find_last_traded_position_internal( ndarray[long_t, ndim=1] market_opens, ndarray[long_t, ndim=1] market_closes, @@ -189,50 +158,3 @@ def find_last_traded_position_internal( # found a trade event return -1 -def find_last_traded_five_minute_position_internal( - ndarray[long_t, ndim=1] market_opens, - ndarray[long_t, ndim=1] market_closes, - long_t end_five_minute, - long_t start_five_minute, - volumes, - short five_minutes_per_day): - cdef Py_ssize_t minute_pos, current_minute, q - - five_minute_pos = int_min( - find_position_of_five_minute( - market_opens, - market_closes, - end_five_minute, - five_minutes_per_day, - True, - ), - len(volumes) - 1, - ) - - while five_minute_pos >= 0: - current_five_minute = five_minute_value( - market_opens, five_minute_pos, five_minutes_per_day - ) - - q = cython.cdiv(five_minute_pos, five_minutes_per_day) - if current_five_minute > market_closes[q]: - five_minute_pos = find_position_of_five_minute( - market_opens, - market_closes, - market_closes[q], - five_minutes_per_day, - False, - ) - continue - - if current_five_minute < start_five_minute: - return -1 - - if volumes[five_minute_pos] != 0: - return five_minute_pos - - five_minute_pos -= 1 - - # we've gone to the beginning of this asset's range, and still haven't - # found a trade event - return -1 diff --git a/catalyst/data/bundles/base.py b/catalyst/data/bundles/base.py index 135dd531..6af7a0eb 100644 --- a/catalyst/data/bundles/base.py +++ b/catalyst/data/bundles/base.py @@ -60,10 +60,6 @@ class BaseBundle(object): def minutes_per_day(self): raise NotImplementedError() - @lazyval - def five_minutes_per_day(self): - raise NotImplementedError() - @lazyval def frequencies(self): raise NotImplementedError() @@ -115,7 +111,6 @@ class BaseBundle(object): environ, asset_db_writer, minute_bar_writer, - five_minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, @@ -162,7 +157,7 @@ class BaseBundle(object): # Post-process metadata using cached symbol frames, and write to # disk. This metadata must be written before any attempt to write - # either minute or 5-minute data. + # minute data. metadata = self._post_process_metadata( raw_metadata, cache, @@ -170,26 +165,6 @@ class BaseBundle(object): ) asset_db_writer.write(metadata) - # Compile 5-minute symbol data if bundle supports 5-minute mode and - # persist the dataset to disk. - ''' - if '5-minute' in self.frequencies: - five_minute_bar_writer.write( - self._fetch_symbol_iter( - api_key, - cache, - symbol_map, - calendar, - start_session, - end_session, - '5-minute', - retries, - ), - length=len(symbol_map), - show_progress=show_progress, - ) - ''' - # Compile minute symbol data if bundle supports minute mode and # persist the dataset to disk. if 'minute' in self.frequencies: diff --git a/catalyst/data/bundles/base_pricing.py b/catalyst/data/bundles/base_pricing.py index c5281fdd..7b94e4bc 100644 --- a/catalyst/data/bundles/base_pricing.py +++ b/catalyst/data/bundles/base_pricing.py @@ -47,10 +47,6 @@ class BaseCryptoPricingBundle(BasePricingBundle): def minutes_per_day(self): return 1440 - @lazyval - def five_minutes_per_day(self): - return 288 - @property def splits(self): return [] @@ -68,10 +64,6 @@ class BaseEquityPricingBundle(BasePricingBundle): def minutes_per_day(self): return 390 - @lazyval - def five_minutes_per_day(self): - return 78 - @property def splits(self): return self._splits diff --git a/catalyst/data/bundles/core.py b/catalyst/data/bundles/core.py index 29aceb7a..a25591de 100644 --- a/catalyst/data/bundles/core.py +++ b/catalyst/data/bundles/core.py @@ -17,10 +17,6 @@ from ..us_equity_pricing import ( SQLiteAdjustmentReader, SQLiteAdjustmentWriter, ) -from ..five_minute_bars import ( - BcolzFiveMinuteBarReader, - BcolzFiveMinuteBarWriter, -) from ..minute_bars import ( BcolzMinuteBarReader, BcolzMinuteBarWriter, @@ -54,11 +50,6 @@ def minute_path(bundle_name, timestr, environ=None): environ=environ, ) -def five_minute_path(bundle_name, timestr, environ=None): - return pth.data_path( - five_minute_relative(bundle_name, timestr, environ), - environ=environ, - ) def daily_path(bundle_name, timestr, environ=None): return pth.data_path( @@ -92,8 +83,6 @@ def cache_relative(bundle_name, timestr, environ=None): def daily_relative(bundle_name, timestr, environ=None): return bundle_name, timestr, 'daily_equities.bcolz' -def five_minute_relative(bundle_name, timestr, environ=None): - return bundle_name, timestr, 'five_minute.bcolz' def minute_relative(bundle_name, timestr, environ=None): return bundle_name, timestr, 'minute_equities.bcolz' @@ -206,14 +195,13 @@ RegisteredBundle = namedtuple( 'start_session', 'end_session', 'minutes_per_day', - 'five_minutes_per_day', 'ingest', 'create_writers'] ) BundleData = namedtuple( 'BundleData', - 'asset_finder minute_bar_reader five_minute_bar_reader daily_bar_reader ' + 'asset_finder minute_bar_reader daily_bar_reader ' 'adjustment_reader', ) @@ -303,7 +291,6 @@ def _make_bundle_core(): bundle.ingest, calendar_name=bundle.calendar_name, minutes_per_day=bundle.minutes_per_day, - five_minutes_per_day=bundle.five_minutes_per_day, start_session=start_session, end_session=end_session, create_writers=create_writers, @@ -316,7 +303,6 @@ def _make_bundle_core(): start_session=None, end_session=None, minutes_per_day=1440, - five_minutes_per_day=288, create_writers=True): """Register a data bundle ingest function. @@ -397,7 +383,6 @@ def _make_bundle_core(): start_session=start_session, end_session=end_session, minutes_per_day=minutes_per_day, - five_minutes_per_day=five_minutes_per_day, ingest=f, create_writers=create_writers, ) @@ -496,16 +481,6 @@ def _make_bundle_core(): # that it can compute the adjustment ratios for the dividends. daily_bar_writer.write(()) - five_minute_bar_writer = BcolzFiveMinuteBarWriter( - wd.ensure_dir(*five_minute_relative( - name, timestr, environ=environ) - ), - calendar, - start_session, - end_session, - five_minutes_per_day=bundle.five_minutes_per_day, - ) - minute_bar_writer = BcolzMinuteBarWriter( wd.ensure_dir(*minute_relative( name, timestr, environ=environ) @@ -532,7 +507,6 @@ def _make_bundle_core(): ) else: daily_bar_writer = None - five_minute_bar_writer = None minute_bar_writer = None asset_db_writer = None adjustment_db_writer = None @@ -544,7 +518,6 @@ def _make_bundle_core(): environ, asset_db_writer, minute_bar_writer, - five_minute_bar_writer, daily_bar_writer, adjustment_db_writer, calendar, @@ -631,9 +604,6 @@ def _make_bundle_core(): minute_bar_reader=BcolzMinuteBarReader( minute_path(name, timestr, environ=environ), ), - five_minute_bar_reader=BcolzFiveMinuteBarReader( - five_minute_path(name, timestr, environ=environ), - ), daily_bar_reader=BcolzDailyBarReader( daily_path(name, timestr, environ=environ), ), diff --git a/catalyst/data/bundles/poloniex.py b/catalyst/data/bundles/poloniex.py index e161df95..64cc2d27 100644 --- a/catalyst/data/bundles/poloniex.py +++ b/catalyst/data/bundles/poloniex.py @@ -148,7 +148,6 @@ class PoloniexBundle(BaseCryptoPricingBundle): data_frequency): period_map = { 'daily': 86400, -# '5-minute': 300, } try: diff --git a/catalyst/data/data_portal.py b/catalyst/data/data_portal.py index 69aa166e..6bfe6047 100644 --- a/catalyst/data/data_portal.py +++ b/catalyst/data/data_portal.py @@ -42,7 +42,6 @@ from catalyst.assets.roll_finder import ( ) from catalyst.data.dispatch_bar_reader import ( AssetDispatchMinuteBarReader, - AssetDispatchFiveMinuteBarReader, AssetDispatchSessionBarReader ) from catalyst.data.resample import ( @@ -120,10 +119,6 @@ class DataPortal(object): daily data backtests or daily history calls in a minute backetest. If a daily bar reader is not provided but a minute bar reader is, the minutes will be rolled up to serve the daily requests. - five_minute_reader : BcolzFiveMinuteBarReader, optional - The five minute bar reader for equities. This will be used to service - 5-minute data backtests or five-minute history calls. This can be used - to serve daily calls if no daily bar reader is provided. minute_reader : BcolzMinuteBarReader, optional The minute bar reader for equities. This will be used to service minute data backtests or minute history calls. This can be used @@ -150,7 +145,6 @@ class DataPortal(object): trading_calendar, first_trading_day, daily_reader=None, - five_minute_reader=None, minute_reader=None, future_daily_reader=None, future_minute_reader=None, @@ -202,7 +196,6 @@ class DataPortal(object): reader.last_available_dt for reader in [ minute_reader, - five_minute_reader, future_minute_reader, ] if reader is not None @@ -214,8 +207,6 @@ class DataPortal(object): aligned_minute_reader = self._ensure_reader_aligned( minute_reader) - aligned_five_minute_reader = self._ensure_reader_aligned( - five_minute_reader) aligned_session_reader = self._ensure_reader_aligned( daily_reader) aligned_future_minute_reader = self._ensure_reader_aligned( @@ -229,13 +220,10 @@ class DataPortal(object): } aligned_minute_readers = {} - aligned_five_minute_readers = {} aligned_session_readers = {} if aligned_minute_reader is not None: aligned_minute_readers[Equity] = aligned_minute_reader - if aligned_five_minute_reader is not None: - aligned_five_minute_readers[Equity] = aligned_five_minute_reader if aligned_session_reader is not None: aligned_session_readers[Equity] = aligned_session_reader @@ -267,13 +255,6 @@ class DataPortal(object): self._last_available_minute, ) - _dispatch_five_minute_reader = AssetDispatchFiveMinuteBarReader( - self.trading_calendar, - self.asset_finder, - aligned_five_minute_readers, - self._last_available_minute, - ) - _dispatch_session_reader = AssetDispatchSessionBarReader( self.trading_calendar, self.asset_finder, @@ -283,7 +264,6 @@ class DataPortal(object): self._pricing_readers = { 'minute': _dispatch_minute_reader, - '5-minute': _dispatch_five_minute_reader, 'daily': _dispatch_session_reader, } @@ -719,17 +699,6 @@ class DataPortal(object): spot_value=result ) - - def _get_five_minute_spot_value(self, asset, column, dt, ffill=False): - return self._get_minutely_spot_value( - asset, - column, - dt, - ffill, - '5-minute', - ) - - def _get_minute_spot_value(self, asset, column, dt, ffill=False): return self._get_minutely_spot_value( asset, diff --git a/catalyst/data/dispatch_bar_reader.py b/catalyst/data/dispatch_bar_reader.py index e545eef0..e72dab7b 100644 --- a/catalyst/data/dispatch_bar_reader.py +++ b/catalyst/data/dispatch_bar_reader.py @@ -135,12 +135,6 @@ class AssetDispatchMinuteBarReader(AssetDispatchBarReader): def _dt_window_size(self, start_dt, end_dt): return len(self.trading_calendar.minutes_in_range(start_dt, end_dt)) - -class AssetDispatchFiveMinuteBarReader(AssetDispatchBarReader): - - def _dt_window_size(self, start_dt, end_dt): - return len(self.trading_calendar.five_minutes_in_range(start_dt, end_dt)) - class AssetDispatchSessionBarReader(AssetDispatchBarReader): def _dt_window_size(self, start_dt, end_dt): diff --git a/catalyst/data/five_minute_bars.py b/catalyst/data/five_minute_bars.py deleted file mode 100644 index 9021dc0a..00000000 --- a/catalyst/data/five_minute_bars.py +++ /dev/null @@ -1,1385 +0,0 @@ -# Copyright 2016 Quantopian, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from abc import ABCMeta, abstractmethod -import json -import os -from glob import glob -from os.path import join -from textwrap import dedent - -from lru import LRU -import bcolz -from bcolz import ctable -from intervaltree import IntervalTree -import logbook -import numpy as np -from numpy import ( - iinfo, - uint64, -) -import pandas as pd -from pandas import HDFStore -import tables -from six import with_metaclass -from toolz import keymap, valmap - -from catalyst.data._minute_bar_internal import ( - five_minute_value, - find_position_of_five_minute, - find_last_traded_five_minute_position_internal, -) - -from catalyst.gens.sim_engine import NANOS_IN_MINUTE - -from catalyst.data.bar_reader import BarReader, NoDataOnDate -from catalyst.data.us_equity_pricing import ( - winsorise_uint64, - check_uint64_safe, -) -from catalyst.utils.calendars import get_calendar -from catalyst.utils.cli import ( - item_show_count, - maybe_show_progress, -) -from catalyst.utils.memoize import lazyval - -logger = logbook.Logger('FiveMinuteBars') - -OPEN_FIVE_MINUTES_PER_DAY = 288 - -DEFAULT_EXPECTEDLEN_CRYPTO = OPEN_FIVE_MINUTES_PER_DAY * 366 * 15 - -OHLC_RATIO = 1000000 - -OHLC = frozenset(['open', 'high', 'low', 'close']) -OHLCV = frozenset(['open', 'high', 'low', 'close', 'volume']) - -UINT64_MAX = iinfo(uint64).max - -NANOS_IN_FIVE_MINUTES = 5 * NANOS_IN_MINUTE - -class BcolzFiveMinuteOverlappingData(Exception): - pass - - -class BcolzFiveMinuteWriterColumnMismatch(Exception): - pass - - -class FiveMinuteBarReader(BarReader): - @property - def data_frequency(self): - return "5-minute" - - -def _calc_five_minute_index(market_opens, five_minutes_per_day): - five_minutes = np.zeros(len(market_opens) * five_minutes_per_day, - dtype='datetime64[ns]') - deltas = 5 * np.arange(0, five_minutes_per_day, dtype='timedelta64[m]') - for i, market_open in enumerate(market_opens): - start = market_open.asm8 - five_minute_values = start + deltas - start_ix = five_minutes_per_day * i - end_ix = start_ix + five_minutes_per_day - five_minutes[start_ix:end_ix] = five_minute_values - return pd.to_datetime(five_minutes, utc=True, box=True) - - -def _sid_subdir_path(sid): - """ - Format subdir path to limit the number directories in any given - subdirectory to 100. - - The number in each directory is designed to support at least 100000 - equities. - - Parameters - ---------- - sid : int - Asset identifier. - - Returns - ------- - out : string - A path for the bcolz rootdir, including subdirectory prefixes based on - the padded string representation of the given sid. - - e.g. 1 is formatted as 00/00/000001.bcolz - """ - padded_sid = format(sid, '06') - return os.path.join( - # subdir 1 00/XX - padded_sid[0:2], - # subdir 2 XX/00 - padded_sid[2:4], - "{0}.bcolz".format(str(padded_sid)) - ) - - -def convert_cols(cols, scale_factor, sid, invalid_data_behavior): - """Adapt OHLCV columns into uint64 columns. - - Parameters - ---------- - cols : dict - A dict mapping each column name (open, high, low, close, volume) - to a float column to convert to uint64. - scale_factor : int - Factor to use to scale float values before converting to uint64. - sid : int - Sid of the relevant asset, for logging. - invalid_data_behavior : str - Specifies behavior when data cannot be converted to uint64. - If 'raise', raises an exception. - If 'warn', logs a warning and filters out incompatible values. - If 'ignore', silently filters out incompatible values. - """ - scaled_opens = np.nan_to_num(cols['open']) * scale_factor - scaled_highs = np.nan_to_num(cols['high']) * scale_factor - scaled_lows = np.nan_to_num(cols['low']) * scale_factor - scaled_closes = np.nan_to_num(cols['close']) * scale_factor - volumes = np.nan_to_num(cols['volume']) - - exclude_mask = np.zeros_like(scaled_opens, dtype=bool) - - for col_name, scaled_col in [ - ('open', scaled_opens), - ('high', scaled_highs), - ('low', scaled_lows), - ('close', scaled_closes), - ('volume', volumes), - ]: - max_val = scaled_col.max() - - try: - check_uint64_safe(max_val, col_name) - except ValueError: - if invalid_data_behavior == 'raise': - raise - - if invalid_data_behavior == 'warn': - logger.warn( - 'Values for sid={}, col={} contain some too large for ' - 'uint64 (max={}), filtering them out', - sid, col_name, max_val, - ) - - # We want to exclude all rows that have an unsafe value in - # this column. - exclude_mask &= (scaled_col >= iinfo(uint64).max) - - # Convert all cols to uint64. - opens = scaled_opens.astype(uint64) - highs = scaled_highs.astype(uint64) - lows = scaled_lows.astype(uint64) - closes = scaled_closes.astype(uint64) - volumes = volumes.astype(uint64) - - # Exclude rows with unsafe values by setting to zero. - opens[exclude_mask] = 0 - highs[exclude_mask] = 0 - lows[exclude_mask] = 0 - closes[exclude_mask] = 0 - volumes[exclude_mask] = 0 - - return opens, highs, lows, closes, volumes - - -class BcolzFiveMinuteBarMetadata(object): - """ - Parameters - ---------- - ohlc_ratio : int - The factor by which the pricing data is multiplied so that the - float data can be stored as an integer. - calendar : catalyst.utils.calendars.trading_calendar.TradingCalendar - The TradingCalendar on which the five minute bars are based. - start_session : datetime - The first trading session in the data set. - end_session : datetime - The last trading session in the data set. - five_minutes_per_day : int - The number of minutes per each period. - """ - FORMAT_VERSION = 3 - - METADATA_FILENAME = 'five-minute-metadata.json' - - @classmethod - def metadata_path(cls, rootdir): - return os.path.join(rootdir, cls.METADATA_FILENAME) - - @classmethod - def read(cls, rootdir): - path = cls.metadata_path(rootdir) - with open(path) as fp: - raw_data = json.load(fp) - - try: - version = raw_data['version'] - except KeyError: - # Version was first written with version 1, assume 0, - # if version does not match. - version = 0 - - default_ohlc_ratio = raw_data['ohlc_ratio'] - - if version >= 1: - five_minutes_per_day = raw_data['five_minutes_per_day'] - else: - # version 0 always assumed crypto assets. - five_minutes_per_day = CRYPTO_ASSETS_FIVE_MINUTES_PER_DAY - - if version >= 2: - calendar = get_calendar(raw_data['calendar_name']) - start_session = pd.Timestamp( - raw_data['start_session'], tz='UTC') - end_session = pd.Timestamp(raw_data['end_session'], tz='UTC') - else: - # No calendar info included in older versions, so - # default to OPEN. - calendar = get_calendar('OPEN') - - start_session = pd.Timestamp( - raw_data['first_trading_day'], tz='UTC') - end_session = calendar.minute_to_session_label( - pd.Timestamp( - raw_data['market_closes'][-1], unit='m', tz='UTC') - ) - - if version >= 3: - ohlc_ratios_per_sid = raw_data['ohlc_ratios_per_sid'] - if ohlc_ratios_per_sid is not None: - ohlc_ratios_per_sid = keymap(int, ohlc_ratios_per_sid) - else: - ohlc_ratios_per_sid = None - - return cls( - default_ohlc_ratio, - ohlc_ratios_per_sid, - calendar, - start_session, - end_session, - five_minutes_per_day, - version=version, - ) - - def __init__( - self, - default_ohlc_ratio, - ohlc_ratios_per_sid, - calendar, - start_session, - end_session, - five_minutes_per_day, - version=FORMAT_VERSION, - ): - self.calendar = calendar - self.start_session = start_session - self.end_session = end_session - self.default_ohlc_ratio = default_ohlc_ratio - self.ohlc_ratios_per_sid = ohlc_ratios_per_sid - self.five_minutes_per_day = five_minutes_per_day - self.version = version - - def write(self, rootdir): - """ - Write the metadata to a JSON file in the rootdir. - - Values contained in the metadata are: - - version : int - The value of FORMAT_VERSION of this class. - ohlc_ratio : int - The default ratio by which to multiply the pricing data to - convert the floats from floats to an integer to fit within - the np.uint64. If ohlc_ratios_per_sid is None or does not - contain a mapping for a given sid, this ratio is used. - ohlc_ratios_per_sid : dict - A dict mapping each sid in the output to the factor by - which the pricing data is multiplied so that the float data - can be stored as an integer. - five_minutes_per_day : int - The number of minutes per each period. - calendar_name : str - The name of the TradingCalendar on which the minute bars are - based. - start_session : datetime - 'YYYY-MM-DD' formatted representation of the first trading - session in the data set. - end_session : datetime - 'YYYY-MM-DD' formatted representation of the last trading - session in the data set. - - Deprecated, but included for backwards compatibility: - - first_trading_day : string - 'YYYY-MM-DD' formatted representation of the first trading day - available in the dataset. - market_opens : list - List of int64 values representing UTC market opens as - minutes since epoch. - market_closes : list - List of int64 values representing UTC market closes as - minutes since epoch. - """ - - calendar = self.calendar - slicer = calendar.schedule.index.slice_indexer( - self.start_session, - self.end_session, - ) - schedule = calendar.schedule[slicer] - market_opens = schedule.market_open - market_closes = schedule.market_close - - metadata = { - 'version': self.version, - 'ohlc_ratio': self.default_ohlc_ratio, - 'ohlc_ratios_per_sid': self.ohlc_ratios_per_sid, - 'five_minutes_per_day': self.five_minutes_per_day, - 'calendar_name': self.calendar.name, - 'start_session': str(self.start_session.date()), - 'end_session': str(self.end_session.date()), - # Write these values for backwards compatibility - 'first_trading_day': str(self.start_session.date()), - 'market_opens': ( - market_opens.values.astype('datetime64[m]'). - astype(np.int64).tolist()), - 'market_closes': ( - market_closes.values.astype('datetime64[m]'). - astype(np.int64).tolist()), - } - with open(self.metadata_path(rootdir), 'w+') as fp: - json.dump(metadata, fp) - - -class BcolzFiveMinuteBarWriter(object): - """ - Class capable of writing minute OHLCV data to disk into bcolz format. - - Parameters - ---------- - rootdir : string - Path to the root directory into which to write the metadata and - bcolz subdirectories. - calendar : catalyst.utils.calendars.trading_calendar.TradingCalendar - The trading calendar on which to base the minute bars. Used to - get the market opens used as a starting point for each periodic - span of minutes in the index, and the market closes that - correspond with the market opens. - five_minutes_per_day : int - The number of minutes per each period. Defaults to 390, the mode - of minutes in NYSE trading days. - start_session : datetime - The first trading session in the data set. - end_session : datetime - The last trading session in the data set. - default_ohlc_ratio : int, optional - The default ratio by which to multiply the pricing data to - convert from floats to integers that fit within np.uint64. If - ohlc_ratios_per_sid is None or does not contain a mapping for a - given sid, this ratio is used. Default is OHLC_RATIO (1000). - ohlc_ratios_per_sid : dict, optional - A dict mapping each sid in the output to the ratio by which to - multiply the pricing data to convert the floats from floats to - an integer to fit within the np.uint64. - expectedlen : int, optional - The expected length of the dataset, used when creating the initial - bcolz ctable. - - If the expectedlen is not used, the chunksize and corresponding - compression ratios are not ideal. - - Defaults to supporting 15 years of NYSE equity market data. - see: http://bcolz.blosc.org/opt-tips.html#informing-about-the-length-of-your-carrays # noqa - write_metadata : bool, optional - If True, writes the minute bar metadata (on init of the writer). - If False, no metadata is written (existing metadata is - retained). Default is True. - - Notes - ----- - Writes a bcolz directory for each individual sid, all contained within - a root directory which also contains metadata about the entire dataset. - - Each individual asset's data is stored as a bcolz table with a column for - each pricing field: (open, high, low, close, volume) - - The open, high, low, and close columns are integers which are 1000 times - the quoted price, so that the data can represented and stored as an - np.uint64, supporting market prices quoted up to the thousands place. - - volume is a np.uint64 with no mutation of the tens place. - - The 'index' for each individual asset are a repeating period of minutes of - length `minutes_per_day` starting from each market open. - The file format does not account for half-days. - e.g.: - 2016-01-19 14:31 - 2016-01-19 14:32 - ... - 2016-01-19 20:59 - 2016-01-19 21:00 - 2016-01-20 14:31 - 2016-01-20 14:32 - ... - 2016-01-20 20:59 - 2016-01-20 21:00 - - All assets are written with a common 'index', sharing a common first - trading day. Assets that do not begin trading until after the first trading - day will have zeros for all pricing data up and until data is traded. - - 'index' is in quotations, because bcolz does not provide an index. The - format allows index-like behavior by writing each minute's data into the - corresponding position of the enumeration of the aforementioned datetime - index. - - The datetimes which correspond to each position are written in the metadata - as integer nanoseconds since the epoch into the `minute_index` key. - - See Also - -------- - catalyst.data.minute_bars.BcolzMinuteBarReader - """ - COL_NAMES = ('open', 'high', 'low', 'close', 'volume') - - def __init__(self, - rootdir, - calendar, - start_session, - end_session, - five_minutes_per_day, - default_ohlc_ratio=OHLC_RATIO, - ohlc_ratios_per_sid=None, - expectedlen=DEFAULT_EXPECTEDLEN_CRYPTO, - write_metadata=True): - - self._rootdir = rootdir - self._start_session = start_session - self._end_session = end_session - self._calendar = calendar - slicer = ( - calendar.schedule.index.slice_indexer(start_session, end_session)) - self._schedule = calendar.schedule[slicer] - self._session_labels = self._schedule.index - self._five_minutes_per_day = five_minutes_per_day - self._expectedlen = expectedlen - self._default_ohlc_ratio = default_ohlc_ratio - self._ohlc_ratios_per_sid = ohlc_ratios_per_sid - - self._five_minute_index = _calc_five_minute_index( - self._schedule.market_open, self._five_minutes_per_day) - - if write_metadata: - metadata = BcolzFiveMinuteBarMetadata( - self._default_ohlc_ratio, - self._ohlc_ratios_per_sid, - self._calendar, - self._start_session, - self._end_session, - self._five_minutes_per_day, - ) - metadata.write(self._rootdir) - - @classmethod - def open(cls, rootdir, end_session=None): - """ - Open an existing ``rootdir`` for writing. - - Parameters - ---------- - end_session : Timestamp (optional) - When appending, the intended new ``end_session``. - """ - metadata = BcolzFiveMinuteBarMetadata.read(rootdir) - return BcolzFiveMinuteBarWriter( - rootdir, - metadata.calendar, - metadata.start_session, - end_session if end_session is not None else metadata.end_session, - metadata.five_minutes_per_day, - metadata.default_ohlc_ratio, - metadata.ohlc_ratios_per_sid, - write_metadata=end_session is not None - ) - - @property - def first_trading_day(self): - return self._start_session - - def ohlc_ratio_for_sid(self, sid): - if self._ohlc_ratios_per_sid is not None: - try: - return self._ohlc_ratios_per_sid[sid] - except KeyError: - pass - - # If no ohlc_ratios_per_sid dict is passed, or if the specified - # sid is not in the dict, fallback to the general ohlc_ratio. - return self._default_ohlc_ratio - - def sidpath(self, sid): - """ - Parameters - ---------- - sid : int - Asset identifier. - - Returns - ------- - out : string - Full path to the bcolz rootdir for the given sid. - """ - sid_subdir = _sid_subdir_path(sid) - return join(self._rootdir, sid_subdir) - - def last_date_in_output_for_sid(self, sid): - """ - Parameters - ---------- - sid : int - Asset identifier. - - Returns - ------- - out : pd.Timestamp - The midnight of the last date written in to the output for the - given sid. - """ - sizes_path = "{0}/close/meta/sizes".format(self.sidpath(sid)) - if not os.path.exists(sizes_path): - return pd.NaT - with open(sizes_path, mode='r') as f: - sizes = f.read() - data = json.loads(sizes) - # use integer division so that the result is an int - # for pandas index later https://github.com/pandas-dev/pandas/blob/master/pandas/tseries/base.py#L247 # noqa - num_days = data['shape'][0] // self._five_minutes_per_day - if num_days == 0: - # empty container - return pd.NaT - return self._session_labels[num_days - 1] - - def _init_ctable(self, path): - """ - Create empty ctable for given path. - - Parameters - ---------- - path : string - The path to rootdir of the new ctable. - """ - # Only create the containing subdir on creation. - # This is not to be confused with the `.bcolz` directory, but is the - # directory up one level from the `.bcolz` directories. - sid_containing_dirname = os.path.dirname(path) - if not os.path.exists(sid_containing_dirname): - # Other sids may have already created the containing directory. - os.makedirs(sid_containing_dirname) - initial_array = np.empty(0, np.uint64) - table = ctable( - rootdir=path, - columns=[ - initial_array, - initial_array, - initial_array, - initial_array, - initial_array, - ], - names=[ - 'open', - 'high', - 'low', - 'close', - 'volume' - ], - expectedlen=self._expectedlen, - mode='w', - ) - table.flush() - return table - - def _ensure_ctable(self, sid): - """Ensure that a ctable exists for ``sid``, then return it.""" - sidpath = self.sidpath(sid) - if not os.path.exists(sidpath): - return self._init_ctable(sidpath) - return bcolz.ctable(rootdir=sidpath, mode='a') - - def _zerofill(self, table, numdays): - # Compute the number of minutes to be filled, accounting for the - # possibility of a partial day's worth of minutes existing for - # the previous day. - five_minute_offset = len(table) % self._five_minutes_per_day - num_to_prepend = numdays * self._five_minutes_per_day - five_minute_offset - - prepend_array = np.zeros(num_to_prepend, np.uint64) - # Fill all OHLCV with zeros. - table.append([prepend_array] * 5) - table.flush() - - def pad(self, sid, date): - """ - Fill sid container with empty data through the specified date. - - If the last recorded trade is not at the close, then that day will be - padded with zeros until its close. Any day after that (up to and - including the specified date) will be padded with `minute_per_day` - worth of zeros - - Parameters - ---------- - sid : int - The asset identifier for the data being written. - date : datetime-like - The date used to calculate how many slots to be pad. - The padding is done through the date, i.e. after the padding is - done the `last_date_in_output_for_sid` will be equal to `date` - """ - table = self._ensure_ctable(sid) - - last_date = self.last_date_in_output_for_sid(sid) - - tds = self._session_labels - - if date <= last_date or date < tds[0]: - # No need to pad. - return - - if last_date == pd.NaT: - # If there is no data, determine how many days to add so that - # desired days are written to the correct slots. - days_to_zerofill = tds[tds.slice_indexer(end=date)] - else: - days_to_zerofill = tds[tds.slice_indexer( - start=last_date + tds.freq, - end=date)] - - self._zerofill(table, len(days_to_zerofill)) - - new_last_date = self.last_date_in_output_for_sid(sid) - assert new_last_date == date, "new_last_date={0} != date={1}".format( - new_last_date, date) - - def set_sid_attrs(self, sid, **kwargs): - """Write all the supplied kwargs as attributes of the sid's file. - """ - table = self._ensure_ctable(sid) - for k, v in kwargs.items(): - table.attrs[k] = v - - def write(self, - data, - length=None, - show_progress=False, - invalid_data_behavior='warn'): - """Write a stream of minute data. - - Parameters - ---------- - data : iterable[(int, pd.DataFrame)] - The data to write. Each element should be a tuple of sid, data - where data has the following format: - columns : ('open', 'high', 'low', 'close', 'volume') - open : float64 - high : float64 - low : float64 - close : float64 - volume : float64|int64 - index : DatetimeIndex of market minutes. - A given sid may appear more than once in ``data``; however, - the dates must be strictly increasing. - show_progress : bool, optional - Whether or not to show a progress bar while writing. - """ - with maybe_show_progress( - data, - length=length, - show_percent=False, - show_progress=show_progress, - item_show_func=item_show_count(length), - label='Compiling five-minute data', - ) as it: - write_sid = self.write_sid - for e in it: - write_sid(*e, invalid_data_behavior=invalid_data_behavior) - - def write_sid(self, sid, df, invalid_data_behavior='warn'): - """ - Write the OHLCV data for the given sid. - If there is no bcolz ctable yet created for the sid, create it. - If the length of the bcolz ctable is not exactly to the date before - the first day provided, fill the ctable with 0s up to that date. - - Parameters - ---------- - sid : int - The asset identifer for the data being written. - df : pd.DataFrame - DataFrame of market data with the following characteristics. - columns : ('open', 'high', 'low', 'close', 'volume') - open : float64 - high : float64 - low : float64 - close : float64 - volume : float64|int64 - index : DatetimeIndex of market minutes. - """ - cols = { - 'open': df.open.values, - 'high': df.high.values, - 'low': df.low.values, - 'close': df.close.values, - 'volume': df.volume.values, - } - dts = df.index.values - # Call internal method, since DataFrame has already ensured matching - # index and value lengths. - self._write_cols(sid, dts, cols, invalid_data_behavior) - - def write_cols(self, sid, dts, cols, invalid_data_behavior='warn'): - """ - Write the OHLCV data for the given sid. - If there is no bcolz ctable yet created for the sid, create it. - If the length of the bcolz ctable is not exactly to the date before - the first day provided, fill the ctable with 0s up to that date. - - Parameters - ---------- - sid : int - The asset identifier for the data being written. - dts : datetime64 array - The dts corresponding to values in cols. - cols : dict of str -> np.array - dict of market data with the following characteristics. - keys are ('open', 'high', 'low', 'close', 'volume') - open : float64 - high : float64 - low : float64 - close : float64 - volume : float64|int64 - """ - if not all(len(dts) == len(cols[name]) for name in self.COL_NAMES): - raise BcolzFiveMinuteWriterColumnMismatch( - "Length of dts={0} should match cols: {1}".format( - len(dts), - " ".join("{0}={1}".format(name, len(cols[name])) - for name in self.COL_NAMES))) - self._write_cols(sid, dts, cols, invalid_data_behavior) - - def _write_cols(self, sid, dts, cols, invalid_data_behavior): - """ - Internal method for `write_cols` and `write`. - - Parameters - ---------- - sid : int - The asset identifier for the data being written. - dts : datetime64 array - The dts corresponding to values in cols. - cols : dict of str -> np.array - dict of market data with the following characteristics. - keys are ('open', 'high', 'low', 'close', 'volume') - open : float64 - high : float64 - low : float64 - close : float64 - volume : float64|int64 - """ - table = self._ensure_ctable(sid) - - tds = self._session_labels - input_first_day = self._calendar.minute_to_session_label( - pd.Timestamp(dts[0]), direction='previous') - - last_date = self.last_date_in_output_for_sid(sid) - - day_before_input = input_first_day - tds.freq - - self.pad(sid, day_before_input) - table = self._ensure_ctable(sid) - - # Get the number of minutes already recorded in this sid's ctable - num_rec_mins = table.size - - all_minutes = self._five_minute_index - # Get the latest minute we wish to write to the ctable - last_minute_to_write = pd.Timestamp(dts[-1], tz='UTC') - - #print 'all_minutes[-1]:', all_minutes[num_rec_mins-1] - #print 'last_minute_to_write:', last_minute_to_write - - # In the event that we've already written some minutely data to the - # ctable, guard against overwriting that data. - if num_rec_mins > 0: - last_recorded_minute = all_minutes[num_rec_mins - 1] - if last_minute_to_write <= last_recorded_minute: - raise BcolzMinuteOverlappingData(dedent(""" - Data with last_date={0} already includes input start={1} for - sid={2}""".strip()).format(last_date, input_first_day, sid)) - - latest_min_count = all_minutes.get_loc(last_minute_to_write) - - # Get all the minutes we wish to write (all market minutes after the - # latest currently written, up to and including last_minute_to_write) - all_minutes_in_window = all_minutes[num_rec_mins:latest_min_count + 1] - - minutes_count = all_minutes_in_window.size - - open_col = np.zeros(minutes_count, dtype=uint64) - high_col = np.zeros(minutes_count, dtype=uint64) - low_col = np.zeros(minutes_count, dtype=uint64) - close_col = np.zeros(minutes_count, dtype=uint64) - vol_col = np.zeros(minutes_count, dtype=uint64) - - dt_ixs = np.searchsorted(all_minutes_in_window.values, - dts.astype('datetime64[ns]')) - - ohlc_ratio = self.ohlc_ratio_for_sid(sid) - - ( - open_col[dt_ixs], - high_col[dt_ixs], - low_col[dt_ixs], - close_col[dt_ixs], - vol_col[dt_ixs], - ) = convert_cols(cols, ohlc_ratio, sid, invalid_data_behavior) - - table.append([ - open_col, - high_col, - low_col, - close_col, - vol_col - ]) - table.flush() - - def data_len_for_day(self, day): - """ - Return the number of data points up to and including the - provided day. - """ - day_ix = self._session_labels.get_loc(day) - # Add one to the 0-indexed day_ix to get the number of days. - num_days = day_ix + 1 - return num_days * self._five_minutes_per_day - - def truncate(self, date): - """Truncate data beyond this date in all ctables.""" - truncate_slice_end = self.data_len_for_day(date) - - glob_path = os.path.join(self._rootdir, "*", "*", "*.bcolz") - sid_paths = sorted(glob(glob_path)) - - for sid_path in sid_paths: - file_name = os.path.basename(sid_path) - - try: - table = bcolz.open(rootdir=sid_path) - except IOError: - continue - if table.len <= truncate_slice_end: - logger.info("{0} not past truncate date={1}.", file_name, date) - continue - - logger.info( - "Truncating {0} at end_date={1}", file_name, date.date() - ) - - table.resize(truncate_slice_end) - - # Update end session in metadata. - metadata = BcolzFiveMinuteBarMetadata.read(self._rootdir) - metadata.end_session = date - metadata.write(self._rootdir) - - -class BcolzFiveMinuteBarReader(FiveMinuteBarReader): - """ - Reader for data written by BcolzFiveMinuteBarWriter - - Parameters - ---------- - rootdir : string - The root directory containing the metadata and asset bcolz - directories. - - See Also - -------- - catalyst.data.minute_bars.BcolzFiveMinuteBarWriter - """ - FIELDS = ('open', 'high', 'low', 'close', 'volume') - - def __init__(self, rootdir, sid_cache_size=1000): - self._rootdir = rootdir - - metadata = self._get_metadata() - - self._start_session = metadata.start_session - self._end_session = metadata.end_session - - self.calendar = metadata.calendar - slicer = self.calendar.schedule.index.slice_indexer( - self._start_session, - self._end_session, - ) - self._schedule = self.calendar.schedule[slicer] - self._market_opens = self._schedule.market_open - self._market_open_values = self._market_opens.values.\ - astype('datetime64[m]').astype(np.int64) - self._market_closes = self._schedule.market_close - self._market_close_values = self._market_closes.values.\ - astype('datetime64[m]').astype(np.int64) - - self._default_ohlc_inverse = 1.0 / metadata.default_ohlc_ratio - ohlc_ratios = metadata.ohlc_ratios_per_sid - if ohlc_ratios: - self._ohlc_inverses_per_sid = ( - valmap(lambda x: 1.0 / x, ohlc_ratios)) - else: - self._ohlc_inverses_per_sid = None - - self._five_minutes_per_day = metadata.five_minutes_per_day - - self._carrays = { - field: LRU(sid_cache_size) - for field in self.FIELDS - } - - self._last_get_value_dt_position = None - self._last_get_value_dt_value = None - - # This is to avoid any bad data or other performance-killing situation - # where there a consecutive streak of 0 (no volume) starting at an - # asset's start date. - # if asset 1 started on 2015-01-03 but its first trade is 2015-01-06 - # 10:31 AM US/Eastern, this dict would store {1: 23675971}, - # which is the minute epoch of that date. - self._known_zero_volume_dict = {} - - def _get_metadata(self): - return BcolzFiveMinuteBarMetadata.read(self._rootdir) - - @property - def trading_calendar(self): - return self.calendar - - @lazyval - def last_available_dt(self): - _, close = self.calendar.open_and_close_for_session(self._end_session) - return close - - @property - def first_trading_day(self): - return self._start_session - - def _ohlc_ratio_inverse_for_sid(self, sid): - if self._ohlc_inverses_per_sid is not None: - try: - return self._ohlc_inverses_per_sid[sid] - except KeyError: - pass - - # If we can not get a sid-specific OHLC inverse for this sid, - # fallback to the default. - return self._default_ohlc_inverse - - def _minutes_to_exclude(self): - """ - Calculate the minutes which should be excluded when a window - occurs on days which had an early close, i.e. days where the close - based on the regular period of minutes per day and the market close - do not match. - - Returns - ------- - List of DatetimeIndex representing the minutes to exclude because - of early closes. - """ - market_opens = self._market_opens.values.astype('datetime64[m]') - market_closes = self._market_closes.values.astype('datetime64[m]') - minutes_per_day = (market_closes - market_opens).astype(np.int64) / 5 - early_indices = np.where( - minutes_per_day != self._five_minutes_per_day - 1)[0] - early_opens = self._market_opens[early_indices] - early_closes = self._market_closes[early_indices] - minutes = [(market_open, early_close) - for market_open, early_close - in zip(early_opens, early_closes)] - return minutes - - @lazyval - def _minute_exclusion_tree(self): - """ - Build an interval tree keyed by the start and end of each range - of positions should be dropped from windows. (These are the minutes - between an early close and the minute which would be the close based - on the regular period if there were no early close.) - The value of each node is the same start and end position stored as - a tuple. - - The data is stored as such in support of a fast answer to the question, - does a given start and end position overlap any of the exclusion spans? - - Returns - ------- - IntervalTree containing nodes which represent the minutes to exclude - because of early closes. - """ - itree = IntervalTree() - for market_open, early_close in self._minutes_to_exclude(): - start_pos = self._find_position_of_five_minute(early_close) + 1 - end_pos = ( - self._find_position_of_five_minute(market_open) - + - self._five_minutes_per_day - - - 1 - ) - data = (start_pos, end_pos) - itree[start_pos:end_pos + 1] = data - return itree - - def _exclusion_indices_for_range(self, start_idx, end_idx): - """ - Returns - ------- - List of tuples of (start, stop) which represent the ranges of minutes - which should be excluded when a market minute window is requested. - """ - itree = self._minute_exclusion_tree - if itree.overlaps(start_idx, end_idx): - ranges = [] - intervals = itree[start_idx:end_idx] - for interval in intervals: - ranges.append(interval.data) - return sorted(ranges) - else: - return None - - def _get_carray_path(self, sid, field): - sid_subdir = _sid_subdir_path(sid) - # carrays are subdirectories of the sid's rootdir - return os.path.join(self._rootdir, sid_subdir, field) - - def _open_minute_file(self, field, sid): - sid = int(sid) - - try: - carray = self._carrays[field][sid] - except KeyError: - carray = self._carrays[field][sid] = \ - bcolz.carray(rootdir=self._get_carray_path(sid, field), - mode='r') - - return carray - - def table_len(self, sid): - """Returns the length of the underlying table for this sid.""" - return len(self._open_minute_file('close', sid)) - - def get_sid_attr(self, sid, name): - sid_subdir = _sid_subdir_path(sid) - sid_path = os.path.join(self._rootdir, sid_subdir) - attrs = bcolz.attrs.attrs(sid_path, 'r') - try: - return attrs[name] - except KeyError: - return None - - def get_value(self, sid, dt, field): - """ - Retrieve the pricing info for the given sid, dt, and field. - - Parameters - ---------- - sid : int - Asset identifier. - dt : datetime-like - The datetime at which the trade occurred. - field : string - The type of pricing data to retrieve. - ('open', 'high', 'low', 'close', 'volume') - - Returns - ------- - out : float|int - - The market data for the given sid, dt, and field coordinates. - - For OHLC: - Returns a float if a trade occurred at the given dt. - If no trade occurred, a np.nan is returned. - - For volume: - Returns the integer value of the volume. - (A volume of 0 signifies no trades for the given dt.) - """ - if self._last_get_value_dt_value == dt.value: - minute_pos = self._last_get_value_dt_position - else: - try: - minute_pos = self._find_position_of_five_minute(dt) - except ValueError: - raise NoDataOnDate() - - self._last_get_value_dt_value = dt.value - self._last_get_value_dt_position = minute_pos - - try: - value = self._open_minute_file(field, sid)[minute_pos] - except IndexError: - value = 0 - if value == 0: - if field == 'volume': - return 0 - else: - return np.nan - - if field != 'volume': - value *= self._ohlc_ratio_inverse_for_sid(sid) - return value - - def get_last_traded_dt(self, asset, dt): - minute_pos = self._find_last_traded_five_minute_position(asset, dt) - if minute_pos == -1: - return pd.NaT - return self._pos_to_minute(minute_pos) - - def _find_last_traded_five_minute_position(self, asset, dt): - volumes = self._open_minute_file('volume', asset) - start_date_minute = asset.start_date.value / NANOS_IN_FIVE_MINUTE - dt_minute = dt.value / NANOS_IN_FIVE_MINUTE - - try: - # if we know of a dt before which this asset has no volume, - # don't look before that dt - earliest_dt_to_search = self._known_zero_volume_dict[asset.sid] - except KeyError: - earliest_dt_to_search = start_date_minute - - if dt_minute < earliest_dt_to_search: - return -1 - - pos = find_last_traded_five_minute_position_internal( - self._market_open_values, - self._market_close_values, - dt_minute, - earliest_dt_to_search, - volumes, - self._five_minutes_per_day, - ) - - if pos == -1: - # if we didn't find any volume before this dt, save it to avoid - # work in the future. - try: - self._known_zero_volume_dict[asset.sid] = max( - dt_minute, - self._known_zero_volume_dict[asset.sid] - ) - except KeyError: - self._known_zero_volume_dict[asset.sid] = dt_minute - - return pos - - def _pos_to_minute(self, pos): - minute_epoch = five_minute_value( - self._market_open_values, - pos, - self._five_minutes_per_day - ) - - return pd.Timestamp(minute_epoch, tz='UTC', unit="m") - - def _find_position_of_five_minute(self, minute_dt): - """ - Internal method that returns the position of the given minute in the - list of every trading minute since market open of the first trading - day. Adjusts non market minutes to the last close. - - ex. this method would return 1 for 2002-01-02 9:32 AM Eastern, if - 2002-01-02 is the first trading day of the dataset. - - Parameters - ---------- - minute_dt: pd.Timestamp - The minute whose position should be calculated. - - Returns - ------- - int: The position of the given minute in the list of all trading - minutes since market open on the first trading day. - """ - return find_position_of_five_minute( - self._market_open_values, - self._market_close_values, - minute_dt.value / NANOS_IN_FIVE_MINUTE, - self._five_minutes_per_day, - False, - ) - - def load_raw_arrays(self, fields, start_dt, end_dt, sids): - """ - Parameters - ---------- - fields : list of str - 'open', 'high', 'low', 'close', or 'volume' - start_dt: Timestamp - Beginning of the window range. - end_dt: Timestamp - End of the window range. - sids : list of int - The asset identifiers in the window. - - Returns - ------- - list of np.ndarray - A list with an entry per field of ndarrays with shape - (minutes in range, sids) with a dtype of float64, containing the - values for the respective field over start and end dt range. - """ - start_idx = self._find_position_of_five_minute(start_dt) - end_idx = self._find_position_of_five_minute(end_dt) - - num_minutes = (end_idx - start_idx + 1) - - results = [] - - indices_to_exclude = self._exclusion_indices_for_range( - start_idx, end_idx) - if indices_to_exclude is not None: - for excl_start, excl_stop in indices_to_exclude: - length = excl_stop - excl_start + 1 - num_minutes -= length - - shape = num_minutes, len(sids) - - for field in fields: - if field != 'volume': - out = np.full(shape, np.nan) - else: - out = np.zeros(shape, dtype=uint64) - - for i, sid in enumerate(sids): - carray = self._open_minute_file(field, sid) - values = carray[start_idx:end_idx + 1] - if indices_to_exclude is not None: - for excl_start, excl_stop in indices_to_exclude[::-1]: - excl_slice = np.s_[ - excl_start - start_idx:excl_stop - start_idx + 1] - values = np.delete(values, excl_slice) - - where = values != 0 - # first slice down to len(where) because we might not have - # written data for all the minutes requested - if field != 'volume': - out[:len(where), i][where] = ( - values[where] * self._ohlc_ratio_inverse_for_sid(sid)) - else: - out[:len(where), i][where] = values[where] - - results.append(out) - return results - - -class MinuteBarUpdateReader(with_metaclass(ABCMeta, object)): - """ - Abstract base class for minute update readers. - """ - - @abstractmethod - def read(self, dts, sids): - """ - Read and return pricing update data. - - Parameters - ---------- - dts : DatetimeIndex - The minutes for which to read the pricing updates. - sids : iter[int] - The sids for which to read the pricing updates. - - Returns - ------- - data : iter[(int, DataFrame)] - Returns an iterable of ``sid`` to the corresponding OHLCV data. - """ - raise NotImplementedError() - - -class H5MinuteBarUpdateWriter(object): - """ - Writer for files containing minute bar updates for consumption by a writer - for a ``MinuteBarReader`` format. - - Parameters - ---------- - path : str - The destination path. - complevel : int, optional - The HDF5 complevel, defaults to ``5``. - complib : str, optional - The HDF5 complib, defaults to ``zlib``. - """ - - FORMAT_VERSION = 0 - - _COMPLEVEL = 5 - _COMPLIB = 'zlib' - - def __init__(self, path, complevel=None, complib=None): - self._complevel = complevel if complevel \ - is not None else self._COMPLEVEL - self._complib = complib if complib \ - is not None else self._COMPLIB - self._path = path - - def write(self, frames): - """ - Write the frames to the target HDF5 file, using the format used by - ``pd.Panel.to_hdf`` - - Parameters - ---------- - frames : iter[(int, DataFrame)] or dict[int -> DataFrame] - An iterable or other mapping of sid to the corresponding OHLCV - pricing data. - """ - with HDFStore(self._path, 'w', - complevel=self._complevel, complib=self._complib) \ - as store: - panel = pd.Panel.from_dict(dict(frames)) - panel.to_hdf(store, 'updates') - with tables.open_file(self._path, mode='r+') as h5file: - h5file.set_node_attr('/', 'version', 0) - - -class H5MinuteBarUpdateReader(MinuteBarUpdateReader): - """ - Reader for minute bar updates stored in HDF5 files. - - Parameters - ---------- - path : str - The path of the HDF5 file from which to source data. - """ - def __init__(self, path): - self._panel = pd.read_hdf(path) - - def read(self, dts, sids): - panel = self._panel[sids, dts, :] - return panel.iteritems() diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index 88808fa6..d2191920 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -20,9 +20,8 @@ from catalyst.assets._assets import TradingPair from logbook import Logger from catalyst.data.bundles.core import from_bundle_ingest_dirname, \ - minute_path, five_minute_path, daily_path + minute_path, daily_path from catalyst.data.data_portal import DataPortal -from catalyst.data.five_minute_bars import BcolzFiveMinuteBarReader from catalyst.data.minute_bars import BcolzMinuteBarReader from catalyst.data.us_equity_pricing import BcolzDailyBarReader from catalyst.exchange.exchange_errors import ( @@ -262,7 +261,6 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): self.daily_bar_readers = dict() self.minute_bar_readers = dict() - self.five_minute_bar_readers = dict() self.history_loaders = dict() self.minute_history_loaders = dict() @@ -333,7 +331,7 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): Pick from a collection of readers based of exchange name and frequency. :param data_frequency: - The reader frequency: minute, 5-minute, daily. + The reader frequency: minute, daily. :param exchange_name: The exchange name. diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index ade14dfe..0c4d7163 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -672,7 +672,7 @@ class Exchange: Retrieve OHLCV candles for the given assets :param data_frequency: - The candle frequency: minute, 5-minute or daily + The candle frequency: minute or daily :param assets: list[TradingPair] The targeted assets. :param bar_count: diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index df640311..9c3cccc1 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -5,7 +5,6 @@ import numpy as np from logbook import Logger, INFO from catalyst import get_calendar -from catalyst.data.five_minute_bars import BcolzFiveMinuteOverlappingData from catalyst.data.minute_bars import BcolzMinuteOverlappingData from catalyst.exchange.bitfinex.bitfinex import Bitfinex from catalyst.exchange.bittrex.bittrex import Bittrex @@ -161,8 +160,7 @@ def process_bar_data(exchange, assets, writer, data_frequency, show_progress=False, invalid_data_behavior='raise' ) - except (BcolzMinuteOverlappingData, - BcolzFiveMinuteOverlappingData) as e: + except BcolzMinuteOverlappingData as e: log.warn('chunk already exists {}: {}'.format(chunk, e)) @@ -215,7 +213,6 @@ def exchange_bundle(exchange_name, symbols=None, start=None, end=None, def ingest(environ, asset_db_writer, minute_bar_writer, - five_minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, @@ -292,8 +289,6 @@ def exchange_bundle(exchange_name, symbols=None, start=None, end=None, # end=end # ) - # TODO: delete 5-minute writer everywhere - if minute_bar_writer is not None: process_bar_data( exchange=exchange, diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py index 2a0b8474..4fc541de 100644 --- a/catalyst/exchange/poloniex/poloniex.py +++ b/catalyst/exchange/poloniex/poloniex.py @@ -29,6 +29,7 @@ from catalyst.exchange.exchange_execution import ExchangeLimitOrder, \ from catalyst.finance.order import Order, ORDER_STATUS from catalyst.protocol import Account from catalyst.exchange.exchange_utils import get_exchange_symbols_filename +from catalyst.finance.transaction import Transaction log = Logger('Poloniex') @@ -274,14 +275,12 @@ class Poloniex(Exchange): if(is_buy): response = self.api.buy(exchange_symbol, amount, price) else: - reponse = self.api.sell(exchange_symbol, amount, price) + response = self.api.sell(exchange_symbol, -amount, price) except Exception as e: raise ExchangeRequestError(error=e) date = pd.Timestamp.utcnow() - print(response) - if('orderNumber' in response): order_id = str(response['orderNumber']) order = Order( @@ -372,14 +371,19 @@ class Poloniex(Exchange): except Exception as e: raise OrphanOrderError(order_id=order_id, exchange=self.name) + return order + + # TODO: Need to decide whether we fetch orders locally or from exchnage + # The code below is ignored + try: response = self.api.returnopenorders(self.get_symbol(order.sid)) except Exception as e: raise ExchangeRequestError(error=e) - for order in response: - if(int(order['orderNumber'])==int(order_id)): - return True + for o in response: + if(int(o['orderNumber'])==int(order_id)): + return order return None @@ -392,23 +396,31 @@ class Poloniex(Exchange): order_param : str or Order The order_id or order object to cancel. """ - order_id = order_param.id \ - if isinstance(order_param, Order) else order_param + + if(isinstance(order_param, Order)): + order = order_param + else: + order = self._portfolio.open_orders[order_param] try: - response = self.api.cancelorder(order_id) + response = self.api.cancelorder(order.id) except Exception as e: raise ExchangeRequestError(error=e) if 'error' in response: - raise OrderCancelError( - order_id=order_id, + log.info('Unable to cancel order {order_id} on exchange {exchange} {error}.'.format( + order_id=order.id, exchange=self.name, error=response['error'] - ) - - self.portfolio.remove_order(order_param) #TODO: Verify this works + )) + #raise OrderCancelError( + # order_id=order.id, + # exchange=self.name, + # error=response['error'] + #) + + self.portfolio.remove_order(order) def tickers(self, assets): @@ -508,7 +520,7 @@ class Poloniex(Exchange): except Exception as e: raise ExchangeRequestError(error=e) - if(response['error']): + if('error' in response): if(not order_open): raise OrphanOrderReverseError(order_id=order_id, exchange=self.name) else: @@ -524,16 +536,17 @@ class Poloniex(Exchange): """ if(not filter(lambda item: item['order_id'] == tx['tradeID'], self.transactions[order_id])): log.debug('Got new transaction for order {}: amount {}, price {}'.format( - order_id, tx.amount, tx.rate)) + order_id, tx['amount'], tx['rate'])) + tx['amount']=float(tx['amount']) if(tx['type']=='sell'): tx['amount'] = -tx['amount'] transaction = Transaction( asset=order.asset, amount=tx['amount'], dt=pd.to_datetime(tx['date'], utc=True), - price=tx['rate'], + price=float(tx['rate']), order_id=tx['tradeID'], # it's a misnomer, but keeping it for compatibility - commission=tx['fee'] + commission=float(tx['fee']) ) self.transactions[order_id].append(transaction) self.portfolio.execute_transaction(transaction) diff --git a/catalyst/exchange/poloniex/poloniex_api.py b/catalyst/exchange/poloniex/poloniex_api.py index 3a181c07..4fe04972 100644 --- a/catalyst/exchange/poloniex/poloniex_api.py +++ b/catalyst/exchange/poloniex/poloniex_api.py @@ -15,6 +15,10 @@ class Poloniex_api(object): def __init__(self, key, secret): self.key = key self.secret = secret + + self.max_requests_per_second = 6 + self.request_cpt = dict() + self.public = ['returnTicker', 'return24Volume', 'returnOrderBook', 'returnTradeHistory', 'returnChartData', 'returnCurrencies', 'returnLoanOrders'] @@ -29,6 +33,43 @@ class Poloniex_api(object): 'cancelLoanOffer','returnOpenLoanOffers','returnActiveLoans', 'returnLendingHistory','toggleAutoRenew'] + def ask_request(self): + """ + Asks permission to issue a request to the exchange. + The primary purpose is to avoid hitting rate limits. + + The application will pause if the maximum requests per minute + permitted by the exchange is exceeded. + + :return boolean: + + """ + now = time.time() + if not self.request_cpt: + self.request_cpt = dict() + self.request_cpt[now] = 0 + return True + + cpt_date = self.request_cpt.keys()[0] + cpt = self.request_cpt[cpt_date] + + if now > cpt_date + 1: + self.request_cpt = dict() + self.request_cpt[now] = 0 + return True + + if cpt >= self.max_requests_per_second: + + log.debug('max requests 6 reached, sleeping for 1 seconds') + sleep(1) + + now = time.time() + self.request_cpt = dict() + self.request_cpt[now] = 0 + return True + else: + self.request_cpt[cpt_date] += 1 + def query(self, method, req={}): if method in self.public: @@ -45,6 +86,7 @@ class Poloniex_api(object): else: raise ValueError('Method "' + method + '" not found in neither the Public API or Trading API endpoints') + self.ask_request() req = urllib.request.Request(url, data=post_data, headers=headers) return json.loads(urlopen(req).read()) diff --git a/catalyst/finance/performance/tracker.py b/catalyst/finance/performance/tracker.py index 810f2c32..c6df9292 100644 --- a/catalyst/finance/performance/tracker.py +++ b/catalyst/finance/performance/tracker.py @@ -111,27 +111,11 @@ class PerformanceTracker(object): self.treasury_curves, self.trading_calendar ) - elif self.emission_rate == '5-minute': - self.all_benchmark_returns = pd.Series( - index=pd.date_range( - self.sim_params.first_open, - self.sim_params.last_close, - freq='5min' - ), - ) - self.cumulative_risk_metrics = \ - risk.RiskMetricsCumulative( - self.sim_params, - self.treasury_curves, - self.trading_calendar, - create_first_day_stats=True, - ) elif self.emission_rate == 'minute': self.all_benchmark_returns = pd.Series(index=pd.date_range( self.sim_params.first_open, self.sim_params.last_close, freq='Min') ) - self.cumulative_risk_metrics = \ risk.RiskMetricsCumulative( self.sim_params, diff --git a/catalyst/gens/sim_engine.pyx b/catalyst/gens/sim_engine.pyx index a318f292..aa3a9d51 100644 --- a/catalyst/gens/sim_engine.pyx +++ b/catalyst/gens/sim_engine.pyx @@ -20,9 +20,7 @@ cimport cython from cpython cimport bool cdef np.int64_t _nanos_in_minute = 60000000000 -cdef np.int64_t _nanos_in_five_minutes = 5 * _nanos_in_minute NANOS_IN_MINUTE = _nanos_in_minute -NANOS_IN_FIVE_MINUTES = _nanos_in_five_minutes cpdef enum: BAR = 0 @@ -117,24 +115,3 @@ cdef class MinuteSimulationClock: yield minute, BAR if minute_emission: yield minute, MINUTE_END - -cdef class FiveMinuteSimulationClock(MinuteSimulationClock): - @cython.boundscheck(False) - @cython.wraparound(False) - cdef dict calc_minutes_by_session(self): - cdef dict five_minutes_by_session - cdef int session_idx - cdef np.int64_t session_nano - cdef np.ndarray[np.int64_t, ndim=1] five_minutes_nanos - - five_minutes_by_session = {} - for session_idx, session_nano in enumerate(self.sessions_nanos): - five_minutes_nanos = np.arange( - self.market_opens_nanos[session_idx], - self.market_closes_nanos[session_idx], - _nanos_in_five_minutes - ) - five_minutes_by_session[session_nano] = pd.to_datetime( - five_minutes_nanos, utc=True, box=True - ) - return five_minutes_by_session diff --git a/catalyst/gens/tradesimulation.py b/catalyst/gens/tradesimulation.py index 1ef9dfbf..f7daa233 100644 --- a/catalyst/gens/tradesimulation.py +++ b/catalyst/gens/tradesimulation.py @@ -34,7 +34,6 @@ class AlgorithmSimulator(object): EMISSION_TO_PERF_KEY_MAP = { 'minute': 'minute_perf', - '5-minute': '5_minute_perf', 'daily': 'daily_perf' } @@ -202,7 +201,7 @@ class AlgorithmSimulator(object): stack.enter_context(self.processor) stack.enter_context(ZiplineAPI(self.algo)) - if algo.data_frequency in set(('minute', '5-minute')): + if algo.data_frequency == 'minute': def execute_order_cancellation_policy(): algo.blotter.execute_cancel_policy(SESSION_END) diff --git a/catalyst/pipeline/loaders/crypto_pricing_loader.py b/catalyst/pipeline/loaders/crypto_pricing_loader.py index 22c0ccf2..da98c988 100644 --- a/catalyst/pipeline/loaders/crypto_pricing_loader.py +++ b/catalyst/pipeline/loaders/crypto_pricing_loader.py @@ -41,10 +41,6 @@ class CryptoPricingLoader(PipelineLoader): reader = bundle.daily_bar_reader all_sessions = cal.all_sessions - elif data_frequency == '5-minute': - reader = bundle.five_minute_bar_reader - all_sessions = cal.all_five_minutes - elif data_frequency == 'minute': reader = bundle.minute_bar_reader all_sessions = cal.all_minutes diff --git a/catalyst/pipeline/loaders/equity_pricing_loader.py b/catalyst/pipeline/loaders/equity_pricing_loader.py index 4d9ec055..c7dbfd41 100644 --- a/catalyst/pipeline/loaders/equity_pricing_loader.py +++ b/catalyst/pipeline/loaders/equity_pricing_loader.py @@ -40,8 +40,6 @@ class USEquityPricingLoader(PipelineLoader): if data_frequency == 'daily': reader = bundle.daily_bar_reader - elif data_frequency == '5-minute': - reader = bundle.five_minute_bar_reader elif daily_bar_reader == 'minute': reader = bundle.minute_bar_reader else: @@ -53,9 +51,6 @@ class USEquityPricingLoader(PipelineLoader): if data_frequency == 'daily': all_sessions = cal.all_sessions - elif data_frequency == '5-minute': - reader = bundle.five_minute_bar_reader - all_sessions = cal.all_five_minutes elif daily_bar_reader == 'minute': reader = bundle.minute_bar_reader all_sessions = cal.all_minutes diff --git a/catalyst/sources/benchmark_source.py b/catalyst/sources/benchmark_source.py index 846b7eb5..05d5c601 100644 --- a/catalyst/sources/benchmark_source.py +++ b/catalyst/sources/benchmark_source.py @@ -65,19 +65,6 @@ class BenchmarkSource(object): ) self._precalculated_series = minute_series - elif self.emission_rate == '5-minute': - five_minutes = \ - trading_calendar.five_minutes_for_sessions_in_range( - sessions[0], - sessions[-1], - ) - - five_minute_series = daily_series.reindex( - index=five_minutes, - method='ffill', - ) - - self._precalculated_series = five_minute_series else: self._precalculated_series = daily_series else: @@ -168,21 +155,6 @@ class BenchmarkSource(object): ffill=True )[asset] - return benchmark_series.pct_change()[1:] - elif self.emission_rate == '5-minute': - five_minutes = trading_calendar.five_minutes_for_sessions_in_range( - self.sessions[0], self.sessions[-1] - ) - benchmark_series = data_portal.get_history_window( - [asset], - five_minutes[-1], - bar_count=len(five_minutes) + 1, - frequency='5m', - field='price', - data_frequency=self.emission_rate, - ffill=True, - )[asset] - return benchmark_series.pct_change()[1:] else: start_date = asset.start_date diff --git a/catalyst/utils/calendars/trading_calendar.py b/catalyst/utils/calendars/trading_calendar.py index a1489067..4748b1c2 100644 --- a/catalyst/utils/calendars/trading_calendar.py +++ b/catalyst/utils/calendars/trading_calendar.py @@ -117,9 +117,6 @@ class TradingCalendar(with_metaclass(ABCMeta)): self._trading_minutes_nanos = self.all_minutes.values.\ astype(np.int64) - - self._trading_five_minutes_nanos = self.all_five_minutes.values.\ - astype(np.int64) self.first_trading_session = _all_days[0] self.last_trading_session = _all_days[-1] @@ -182,18 +179,6 @@ class TradingCalendar(with_metaclass(ABCMeta)): """ return int(self._minutes_per_session[start_session:end_session].sum()) - @lazyval - def _five_minutes_per_session(self): - diff = self.schedule.market_close - self.schedule.market_open - diff = diff.astype('timedelta64[m]') - return (diff + 1) // 5 - - def five_minutes_count_for_sessions_in_range(self, - start_session, - end_session): - five_mins = self._five_minutes_per_session[start_session:end_session] - return int(five_mins.sum()) - @property def regular_holidays(self): """ @@ -386,10 +371,6 @@ class TradingCalendar(with_metaclass(ABCMeta)): idx = next_divider_idx(self._trading_minutes_nanos, dt.value) return self.all_minutes[idx] - def next_five_minute(self, dt): - idx = next_divider_idx(self._trading_five_minutes_nanos, dt.values) - return self.all_five_mintutes[idx] - def previous_minute(self, dt): """ Given a dt, return the previous exchange minute. @@ -484,12 +465,6 @@ class TradingCalendar(with_metaclass(ABCMeta)): end_minute=self.schedule.at[session_label, 'market_close'], ) - def five_minutes_for_session(self, session_label): - return self.five_minutes_in_range( - start_five_minute=self.schedule.at[session_label, 'market_open'], - end_five_minute=self.schedule.at[session_label, 'market_close'], - ) - def minutes_window(self, start_dt, count): start_dt_nanos = start_dt.value all_minutes_nanos = self._trading_minutes_nanos @@ -591,20 +566,6 @@ class TradingCalendar(with_metaclass(ABCMeta)): return abs(end_idx - start_idx) - def five_minutes_in_range(self, start_five_minute, end_five_minute): - start_idx = searchsorted(self._trading_five_minutes_nanos, - start_five_minute.value) - - end_idx = searchsorted(self._trading_five_minutes_nanos, - end_five_minute.value) - - if end_five_minute.value == self._trading_five_minutes_nanos[end_idx]: - # if the end minute is a market minute, increase by 1 - end_idx += 1 - - return self.all_five_minutes[start_idx:end_idx] - - def minutes_in_range(self, start_minute, end_minute): """ Given start and end minutes, return all the calendar minutes @@ -662,15 +623,6 @@ class TradingCalendar(with_metaclass(ABCMeta)): return self.minutes_in_range(first_minute, last_minute) - def five_minutes_for_sessions_in_range(self, - start_session_label, - end_session_label): - - first_minute, _ = self.open_and_close_for_session(start_session_label) - _, last_minute = self.open_and_close_for_session(end_session_label) - - return self.five_minutes_in_range(first_minute, last_minute) - def open_and_close_for_session(self, session_label): """ Returns a tuple of timestamps of the open and close of the session @@ -777,13 +729,6 @@ class TradingCalendar(with_metaclass(ABCMeta)): return DatetimeIndex(all_minutes).tz_localize("UTC") - @lazyval - def all_five_minutes(self): - """ - Returns a DatetimeIndex representing all the five minutes in this calendar. - """ - return self._all_minutes_with_interval(5) - @lazyval def all_minutes(self): """ diff --git a/catalyst/utils/events.py b/catalyst/utils/events.py index 3fc83dd4..10bb2f55 100644 --- a/catalyst/utils/events.py +++ b/catalyst/utils/events.py @@ -602,7 +602,6 @@ class date_rules(object): class time_rules(object): market_open = AfterOpen market_close = BeforeClose - every_5_minutes = Always every_minute = Always diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 92a1ff30..6856970c 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -53,7 +53,6 @@ class ExchangeBundleTestCase: ingest(environ=os.environ, asset_db_writer=None, minute_bar_writer=minute_bar_writer, - five_minute_bar_writer=None, daily_bar_writer=None, adjustment_writer=None, calendar=open_calendar, diff --git a/tests/exchange/test_clock.py b/tests/exchange/test_clock.py index 94414d27..ff74986b 100644 --- a/tests/exchange/test_clock.py +++ b/tests/exchange/test_clock.py @@ -1,7 +1,7 @@ from unittest import TestCase from logbook import Logger from mock import patch, sentinel -from catalyst.exchange.exchange_clock import ExchangeClock +from catalyst.exchange.simple_clock import SimpleClock from catalyst.utils.calendars.trading_calendar import days_at_time from datetime import time from collections import defaultdict @@ -35,9 +35,9 @@ class ExchangeClockTestCase(TestCase): return self.internal_clock def test_clock(self): - with patch('catalyst.exchange.exchange_clock.pd.to_datetime') as to_dt, \ - patch('catalyst.exchange.exchange_clock.sleep') as sleep: - clock = ExchangeClock(sessions=self.sessions) + with patch('catalyst.exchange.simple_clock.pd.to_datetime') as to_dt, \ + patch('catalyst.exchange.simple_clock.sleep') as sleep: + clock = SimpleClock(sessions=self.sessions) to_dt.side_effect = self.get_clock sleep.side_effect = self.advance_clock start_time = pd.Timestamp.utcnow() From 6d8b8307a11e8194ebaf3ebd2a71ec6dea743fb5 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Thu, 28 Sep 2017 14:14:34 -0600 Subject: [PATCH 032/110] bitfinex autogeneration of symbols.json with optional sourcing of start_date --- catalyst/exchange/bitfinex/bitfinex.py | 78 +++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 3 deletions(-) diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index 313e575e..0f587bc9 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -24,7 +24,9 @@ from catalyst.exchange.exchange_execution import ExchangeLimitOrder, \ ExchangeStopLimitOrder, ExchangeStopOrder from catalyst.finance.order import Order, ORDER_STATUS from catalyst.protocol import Account -from catalyst.exchange.exchange_utils import get_exchange_symbols_filename +from catalyst.exchange.exchange_utils import get_exchange_symbols_filename, \ + download_exchange_symbols + # Trying to account for REST api instability # https://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request @@ -564,14 +566,84 @@ class Bitfinex(Exchange): log.debug('got tickers {}'.format(ticks)) return ticks - def generate_symbols_json(self, filename=None): + def generate_symbols_json(self, filename=None, source_dates=False): symbol_map = {} response = self._request('symbols', None) + + if not source_dates: + fn, r = download_exchange_symbols(self.name) + with open(fn) as data_file: + cached_symbols = json.load(data_file) + for symbol in response.json(): - symbol_map[symbol]= {"symbol":symbol[:-3]+'_'+symbol[-3:], "start_date": "2010-01-01"} + if(source_dates): + start_date = self.get_symbol_start_date(symbol) + else: + try: + start_date = cached_symbols[symbol]['start_date'] + except KeyError as e: + start_date = time.strftime('%Y-%m-%d') + + symbol_map[symbol]= {"symbol":symbol[:-3]+'_'+symbol[-3:], "start_date": start_date} if(filename is None): filename = get_exchange_symbols_filename(self.name) with open(filename,'w') as f: json.dump(symbol_map, f, sort_keys=True, indent=2, separators=(',',':')) + + def get_symbol_start_date(self, symbol): + + print(symbol) + symbol_v2 = 't' + symbol.upper() + + """ + For each symbol we retrieve candles with Monhtly resolution + We get the first month, and query again with daily resolution + around that date, and we get the first date + """ + url = '{url}/v2/candles/trade:1M:{symbol}/hist'.format( + url=self.url, + symbol=symbol_v2 + ) + + try: + self.ask_request() + response = requests.get(url) + except Exception as e: + raise ExchangeRequestError(error=e) + + """ + If we don't get any data back for our monthly-resolution query + it means that symbol started trading less than a month ago, so + arbitrarily set the ref. date to 15 days ago to be safe with + +/- 31 days + """ + if(len(response.json())): + startmonth = response.json()[-1][0] + else: + startmonth = int((time.time()-15*24*3600)*1000) + + """ + Query again with daily resolution setting the start and end around + the startmonth we got above. Avoid end dates greater than now: time.time() + """ + url = '{url}/v2/candles/trade:1D:{symbol}/hist?start={start}&end={end}'.format( + url=self.url, + symbol=symbol_v2, + start=startmonth - 3600 *24 *31 *1000, + end=min(startmonth + 3600 *24 *31 *1000, int(time.time()*1000)) + ) + + try: + self.ask_request() + response = requests.get(url) + except Exception as e: + raise ExchangeRequestError(error=e) + + return time.strftime('%Y-%m-%d', time.gmtime(int(response.json()[-1][0]/1000))) + + + + + From 336f062794cbce9a1791d4c87f43b8d31139e19c Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Thu, 28 Sep 2017 14:42:47 -0600 Subject: [PATCH 033/110] poloniex autogeneration of symbols.json with cached start_date --- catalyst/exchange/poloniex/poloniex.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py index 4fc541de..d280ef59 100644 --- a/catalyst/exchange/poloniex/poloniex.py +++ b/catalyst/exchange/poloniex/poloniex.py @@ -28,7 +28,8 @@ from catalyst.exchange.exchange_execution import ExchangeLimitOrder, \ ExchangeStopLimitOrder, ExchangeStopOrder from catalyst.finance.order import Order, ORDER_STATUS from catalyst.protocol import Account -from catalyst.exchange.exchange_utils import get_exchange_symbols_filename +from catalyst.exchange.exchange_utils import get_exchange_symbols_filename, \ + download_exchange_symbols from catalyst.finance.transaction import Transaction @@ -466,13 +467,25 @@ class Poloniex(Exchange): def generate_symbols_json(self, filename=None): symbol_map = {} + + fn, r = download_exchange_symbols(self.name) + with open(fn) as data_file: + cached_symbols = json.load(data_file) + response = self.api.returnticker() + for exchange_symbol in response: base, market = self.sanitize_curency_symbol(exchange_symbol).split('_') symbol = '{market}_{base}'.format( market=market, base=base ) + + try: + start_date = cached_symbols[exchange_symbol]['start_date'] + except KeyError as e: + start_date = time.strftime('%Y-%m-%d') + symbol_map[exchange_symbol] = dict( symbol = symbol, - start_date = '2010-01-01' + start_date = start_date ) if(filename is None): From b45339692fa723f4965032fd71bfb468c11bcd87 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Thu, 28 Sep 2017 16:14:27 -0600 Subject: [PATCH 034/110] poloniex autogeneration of symbols.json with optional sourcing of start_date --- catalyst/exchange/bitfinex/bitfinex.py | 8 ++++-- catalyst/exchange/poloniex/poloniex.py | 29 ++++++++++++++++------ catalyst/exchange/poloniex/poloniex_api.py | 2 +- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index 0f587bc9..6c23fc93 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -568,13 +568,14 @@ class Bitfinex(Exchange): def generate_symbols_json(self, filename=None, source_dates=False): symbol_map = {} - response = self._request('symbols', None) if not source_dates: fn, r = download_exchange_symbols(self.name) with open(fn) as data_file: cached_symbols = json.load(data_file) + response = self._request('symbols', None) + for symbol in response.json(): if(source_dates): start_date = self.get_symbol_start_date(symbol) @@ -584,7 +585,10 @@ class Bitfinex(Exchange): except KeyError as e: start_date = time.strftime('%Y-%m-%d') - symbol_map[symbol]= {"symbol":symbol[:-3]+'_'+symbol[-3:], "start_date": start_date} + symbol_map[symbol]= dict( + symbol = symbol[:-3]+'_'+symbol[-3:], + start_date = start_date + ) if(filename is None): filename = get_exchange_symbols_filename(self.name) diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py index d280ef59..7d0f9986 100644 --- a/catalyst/exchange/poloniex/poloniex.py +++ b/catalyst/exchange/poloniex/poloniex.py @@ -465,12 +465,13 @@ class Poloniex(Exchange): return ticks - def generate_symbols_json(self, filename=None): + def generate_symbols_json(self, filename=None, source_dates=False): symbol_map = {} - fn, r = download_exchange_symbols(self.name) - with open(fn) as data_file: - cached_symbols = json.load(data_file) + if not source_dates: + fn, r = download_exchange_symbols(self.name) + with open(fn) as data_file: + cached_symbols = json.load(data_file) response = self.api.returnticker() @@ -478,10 +479,13 @@ class Poloniex(Exchange): base, market = self.sanitize_curency_symbol(exchange_symbol).split('_') symbol = '{market}_{base}'.format( market=market, base=base ) - try: - start_date = cached_symbols[exchange_symbol]['start_date'] - except KeyError as e: - start_date = time.strftime('%Y-%m-%d') + if(source_dates): + start_date = self.get_symbol_start_date(exchange_symbol) + else: + try: + start_date = cached_symbols[exchange_symbol]['start_date'] + except KeyError as e: + start_date = time.strftime('%Y-%m-%d') symbol_map[exchange_symbol] = dict( symbol = symbol, @@ -494,6 +498,15 @@ class Poloniex(Exchange): with open(filename,'w') as f: json.dump(symbol_map, f, sort_keys=True, indent=2, separators=(',',':')) + def get_symbol_start_date(self, symbol): + try: + r = self.api.returnchartdata(symbol,86400,pd.to_datetime('2010-1-1').value // 10 ** 9) + except Exception as e: + raise ExchangeRequestError(error=e) + + return time.strftime('%Y-%m-%d', time.gmtime(int(r[0]['date']))) + + def check_open_orders(self): """ diff --git a/catalyst/exchange/poloniex/poloniex_api.py b/catalyst/exchange/poloniex/poloniex_api.py index 4fe04972..599a0b65 100644 --- a/catalyst/exchange/poloniex/poloniex_api.py +++ b/catalyst/exchange/poloniex/poloniex_api.py @@ -106,7 +106,7 @@ class Poloniex_api(object): else: return self.query('returntradehistory', {'currencyPair': market }) - def returnchartdata(self, market, period, start, end): + def returnchartdata(self, market, period, start, end=9999999999): return self.query('returnChartData', {'currencyPair': market, 'period': period, 'start': start, 'end': end}) From e11ecf9d7840b599514f8ca460bf91f855f50b21 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Fri, 29 Sep 2017 13:38:58 -0600 Subject: [PATCH 035/110] Added 'live' mode to CLI instead of option to 'run' --- catalyst/__main__.py | 190 ++++++++++++++++++++------ catalyst/exchange/exchange_utils.py | 3 + catalyst/exchange/live_graph_clock.py | 1 + 3 files changed, 152 insertions(+), 42 deletions(-) diff --git a/catalyst/__main__.py b/catalyst/__main__.py index bdd9e537..c6322f37 100644 --- a/catalyst/__main__.py +++ b/catalyst/__main__.py @@ -38,6 +38,8 @@ except NameError: default=True, help="Don't load the default catalyst extension.py file in $ZIPLINE_HOME.", ) +@click.version_option() + def main(extension, strict_extensions, default_extension): """Top level catalyst entry point. """ @@ -187,12 +189,6 @@ def ipython_only(option): default=None, help='Should the algorithm methods be resolved in the local namespace.' )) -@click.option( - '--live/--no-live', - is_flag=True, - default=False, - help='Enable live trading.', -) @click.option( '-x', '--exchange-name', @@ -210,12 +206,6 @@ def ipython_only(option): help='The base currency used to calculate statistics ' '(e.g. usd, btc, eth).', ) -@click.option( - '--live-graph/--no-live-graph', - is_flag=True, - default=False, - help='Display live graph.', -) @click.pass_context def run(ctx, algofile, @@ -230,44 +220,34 @@ def run(ctx, output, print_algo, local_namespace, - live, exchange_name, algo_namespace, - base_currency, - live_graph): + base_currency): """Run a backtest for the given algorithm. """ - if live: - if exchange_name is None: - ctx.fail("must specify an exchange name '-x' in live execution " - "mode '--live'") - if algo_namespace is None: - ctx.fail("must specify an algorithm name '-n' in live execution " - "mode '--live'") - if base_currency is None: - ctx.fail("must specify a base currency '-c' in live " - "execution mode '--live'") - else: - # check that the start and end dates are passed correctly - if start is None and end is None: - # check both at the same time to avoid the case where a user - # does not pass either of these and then passes the first only - # to be told they need to pass the second argument also - ctx.fail( - "must specify dates with '-s' / '--start' and '-e' / '--end'", - ) - if start is None: - ctx.fail("must specify a start date with '-s' / '--start'") - if end is None: - ctx.fail("must specify an end date with '-e' / '--end'") - if (algotext is not None) == (algofile is not None): ctx.fail( "must specify exactly one of '-f' / '--algofile' or" " '-t' / '--algotext'", ) + # check that the start and end dates are passed correctly + if start is None and end is None: + # check both at the same time to avoid the case where a user + # does not pass either of these and then passes the first only + # to be told they need to pass the second argument also + ctx.fail( + "must specify dates with '-s' / '--start' and '-e' / '--end'", + ) + if start is None: + ctx.fail("must specify a start date with '-s' / '--start'") + if end is None: + ctx.fail("must specify an end date with '-e' / '--end'") + + if exchange_name is None: + ctx.fail("must specify an exchange name '-x'") + perf = _run( initialize=None, handle_data=None, @@ -287,11 +267,11 @@ def run(ctx, print_algo=print_algo, local_namespace=local_namespace, environ=os.environ, - live=live, + live=False, exchange=exchange_name, algo_namespace=algo_namespace, base_currency=base_currency, - live_graph=live_graph + live_graph=False ) if output == '-': @@ -334,6 +314,133 @@ def catalyst_magic(line, cell=None): if e.code: raise ValueError('main returned non-zero status code: %d' % e.code) +@main.command() +@click.option( + '-f', + '--algofile', + default=None, + type=click.File('r'), + help='The file that contains the algorithm to run.', +) +@click.option( + '-t', + '--algotext', + help='The algorithm script to run.', +) +@click.option( + '-D', + '--define', + multiple=True, + help="Define a name to be bound in the namespace before executing" + " the algotext. For example '-Dname=value'. The value may be any python" + " expression. These are evaluated in order so they may refer to previously" + " defined names.", +) +@click.option( + '-o', + '--output', + default='-', + metavar='FILENAME', + show_default=True, + help="The location to write the perf data. If this is '-' the perf will" + " be written to stdout.", +) +@click.option( + '--print-algo/--no-print-algo', + is_flag=True, + default=False, + help='Print the algorithm to stdout.', +) +@ipython_only(click.option( + '--local-namespace/--no-local-namespace', + is_flag=True, + default=None, + help='Should the algorithm methods be resolved in the local namespace.' +)) +@click.option( + '-x', + '--exchange-name', + type=click.Choice({'bitfinex', 'bittrex', 'poloniex'}), + help='The name of the targeted exchange (supported: bitfinex, bittrex, poloniex).', +) +@click.option( + '-n', + '--algo-namespace', + help='A label assigned to the algorithm for data storage purposes.' +) +@click.option( + '-c', + '--base-currency', + help='The base currency used to calculate statistics ' + '(e.g. usd, btc, eth).', +) +@click.option( + '--live-graph/--no-live-graph', + is_flag=True, + default=False, + help='Display live graph.', +) + +@click.pass_context +def live(ctx, + algofile, + algotext, + define, + output, + print_algo, + local_namespace, + exchange_name, + algo_namespace, + base_currency, + live_graph): + """Trade live with the given algorithm. + """ + if (algotext is not None) == (algofile is not None): + ctx.fail( + "must specify exactly one of '-f' / '--algofile' or" + " '-t' / '--algotext'", + ) + + if exchange_name is None: + ctx.fail("must specify an exchange name '-x'") + if algo_namespace is None: + ctx.fail("must specify an algorithm name '-n' in live execution mode") + if base_currency is None: + ctx.fail("must specify a base currency '-c' in live execution mode") + + perf = _run( + initialize=None, + handle_data=None, + before_trading_start=None, + analyze=None, + algofile=algofile, + algotext=algotext, + defines=define, + data_frequency=None, + capital_base=None, + data=None, + bundle=None, + bundle_timestamp=None, + start=None, + end=None, + output=output, + print_algo=print_algo, + local_namespace=local_namespace, + environ=os.environ, + live=True, + exchange=exchange_name, + algo_namespace=algo_namespace, + base_currency=base_currency, + live_graph=live_graph + ) + + if output == '-': + click.echo(str(perf)) + elif output != os.devnull: # make the catalyst magic not write any data + perf.to_pickle(output) + + return perf + @main.command() @click.option( @@ -416,7 +523,6 @@ def clean(bundle, before, after, keep_last): keep_last, ) - @main.command() def bundles(): """List all of the available data bundles. diff --git a/catalyst/exchange/exchange_utils.py b/catalyst/exchange/exchange_utils.py index ebb33023..2d3729ed 100644 --- a/catalyst/exchange/exchange_utils.py +++ b/catalyst/exchange/exchange_utils.py @@ -80,6 +80,9 @@ def get_algo_folder(algo_name, environ=None): def get_algo_object(algo_name, key, environ=None, rel_path=None): + if algo_name is None: + return None + folder = get_algo_folder(algo_name, environ) if rel_path is not None: diff --git a/catalyst/exchange/live_graph_clock.py b/catalyst/exchange/live_graph_clock.py index eab0d58c..06b1086c 100644 --- a/catalyst/exchange/live_graph_clock.py +++ b/catalyst/exchange/live_graph_clock.py @@ -54,6 +54,7 @@ class LiveGraphClock(object): def __init__(self, sessions, context, time_skew=pd.Timedelta('0s')): + global mdates, plt #TODO: Could be cleaner import matplotlib.dates as mdates from matplotlib import pyplot as plt from matplotlib import style From 751608c8ab196b0dd8240378acb7d57cdb2f96ab Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 4 Oct 2017 22:35:07 -0400 Subject: [PATCH 036/110] Mocking Victor's history API service --- catalyst/exchange/data_api.py | 73 +++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 catalyst/exchange/data_api.py diff --git a/catalyst/exchange/data_api.py b/catalyst/exchange/data_api.py new file mode 100644 index 00000000..717dbe82 --- /dev/null +++ b/catalyst/exchange/data_api.py @@ -0,0 +1,73 @@ +import datetime + + +def get_date_from_ms(ms): + return datetime.datetime.fromtimestamp(ms / 1000.0) + + +def get_history_mock(exchange_name, data_frequency, symbol, start_ms, end_ms, + exchanges): + """ + Mocking the history API written by Victor by proxying the request + to Bitfinex. + + :param exchange_name: string + The name identifier of the exchange (e.g. bitfinex). + Only bitfinex is supported in this mock function. + :param data_frequency: string + The bar frequency (minute or daily) + :param symbol: string + The trading pair symbol. + :param start_ms: float + The start date in milliseconds. + :param end_ms: float + The end date in milliseconds. + :param exchanges: MOCK ONLY + This won't be required in production mode since the exchange object + will be retrieved on the server. + :return ohlcv: list[dict[string, float]] + The open, high, low, volume collection for the resulting bars. + + Notes + ===== + Using milliseconds for the start and end dates for ease of use in the + URL query parameters. + + Sometimes, one minute goes by without completing a trade of the given + trading pair on the given exchange. To minimize the payload size, we + don't return identical sequential bars. Post-processing code will + forward fill missing bars outside of this function. + """ + + if exchange_name != 'bitfinex': + raise ValueError('get_history mock function only works with bitfinex') + + exchange = exchanges[exchange_name] + assets = [exchange.get_asset(symbol=symbol)] + start = get_date_from_ms(start_ms) + end = get_date_from_ms(end_ms) + + delta = end - start + + periods = delta.seconds % 3600 / 60.0 \ + if data_frequency == 'minute' else delta.days + + candles = exchange.get_candles( + data_frequency=data_frequency, + assets=assets, + bar_count=periods, + start_dt=start, + end_dt=end + ) + + ohlcv = [] + for candle in candles: + ohlcv.append(dict( + open=candle['open'], + high=candle['high'], + low=candle['low'], + close=candle['close'], + volume=candle['volume'], + last_traded=candle['last_traded'] + )) + return ohlcv From 874968bbbb28e3d6947f0e9213bf7feba44a53bc Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 5 Oct 2017 18:06:17 -0400 Subject: [PATCH 037/110] Refactoring the exchange bundle for incremental loading --- catalyst/__main__.py | 114 +++- catalyst/data/loader.py | 42 +- .../buy_low_sell_high_neo_with_interface.py | 34 +- .../exchange/{data_api.py => bundle_utils.py} | 26 +- catalyst/exchange/exchange.py | 1 - catalyst/exchange/exchange_bundle.py | 500 +++++++++--------- catalyst/exchange/exchange_utils.py | 1 + catalyst/exchange/init_utils.py | 24 + tests/exchange/test_bitfinex.py | 10 +- tests/exchange/test_bundle.py | 60 +-- tests/exchange/test_data_portal.py | 8 +- 11 files changed, 429 insertions(+), 391 deletions(-) rename catalyst/exchange/{data_api.py => bundle_utils.py} (74%) create mode 100644 catalyst/exchange/init_utils.py diff --git a/catalyst/__main__.py b/catalyst/__main__.py index c6322f37..f57a52b4 100644 --- a/catalyst/__main__.py +++ b/catalyst/__main__.py @@ -8,6 +8,7 @@ import pandas as pd from six import text_type from catalyst.data import bundles as bundles_module +from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.utils.cli import Date, Timestamp from catalyst.utils.run_algo import _run, load_extensions @@ -39,7 +40,6 @@ except NameError: help="Don't load the default catalyst extension.py file in $ZIPLINE_HOME.", ) @click.version_option() - def main(extension, strict_extensions, default_extension): """Top level catalyst entry point. """ @@ -238,7 +238,7 @@ def run(ctx, # does not pass either of these and then passes the first only # to be told they need to pass the second argument also ctx.fail( - "must specify dates with '-s' / '--start' and '-e' / '--end'", + "must specify dates with '-s' / '--start' and '-e' / '--end'", ) if start is None: ctx.fail("must specify a start date with '-s' / '--start'") @@ -246,7 +246,7 @@ def run(ctx, ctx.fail("must specify an end date with '-e' / '--end'") if exchange_name is None: - ctx.fail("must specify an exchange name '-x'") + ctx.fail("must specify an exchange name '-x'") perf = _run( initialize=None, @@ -307,13 +307,14 @@ def catalyst_magic(line, cell=None): '%s%%catalyst' % ((cell or '') and '%'), # don't use system exit and propogate errors to the caller standalone_mode=False, - ) + ) except SystemExit as e: # https://github.com/mitsuhiko/click/pull/533 # even in standalone_mode=False `--help` really wants to kill us ;_; if e.code: raise ValueError('main returned non-zero status code: %d' % e.code) + @main.command() @click.option( '-f', @@ -380,33 +381,32 @@ def catalyst_magic(line, cell=None): default=False, help='Display live graph.', ) - @click.pass_context def live(ctx, - algofile, - algotext, - define, - output, - print_algo, - local_namespace, - exchange_name, - algo_namespace, - base_currency, - live_graph): + algofile, + algotext, + define, + output, + print_algo, + local_namespace, + exchange_name, + algo_namespace, + base_currency, + live_graph): """Trade live with the given algorithm. """ if (algotext is not None) == (algofile is not None): ctx.fail( "must specify exactly one of '-f' / '--algofile' or" " '-t' / '--algotext'", - ) + ) if exchange_name is None: - ctx.fail("must specify an exchange name '-x'") + ctx.fail("must specify an exchange name '-x'") if algo_namespace is None: - ctx.fail("must specify an algorithm name '-n' in live execution mode") + ctx.fail("must specify an algorithm name '-n' in live execution mode") if base_currency is None: - ctx.fail("must specify a base currency '-c' in live execution mode") + ctx.fail("must specify a base currency '-c' in live execution mode") perf = _run( initialize=None, @@ -442,15 +442,74 @@ def live(ctx, return perf +@main.command() +@click.option( + '-x', + '--exchange-name', + type=click.Choice({'bitfinex', 'bittrex', 'poloniex'}), + help='The name of the exchange bundle to ingest (supported: bitfinex,' + ' bittrex, poloniex).', +) +@click.option( + '--data-frequency', + type=click.Choice({'daily', 'minute', 'daily,minute'}), + default='daily', + show_default=True, + help='The data frequency of the desired OHLCV bars.', +) +@click.option( + '-s', + '--start', + default=None, + type=Date(tz='utc', as_timestamp=True), + help='The start date of the data range. (default: one year from end date)', +) +@click.option( + '-e', + '--end', + default=None, + type=Date(tz='utc', as_timestamp=True), + help='The end date of the data range. (default: today)', +) +@click.option( + '--show-progress/--no-show-progress', + default=True, + help='Print progress information to the terminal.' +) +def ingest_exchange(exchange_name, data_frequency, start, end, + show_progress): + """ + Ingest data for the given exchange. + """ + click.echo('ingesting exchange bundle {}'.format(exchange_name)) + exchange_bundle = ExchangeBundle( + exchange_name=exchange_name, + data_frequency=data_frequency, + include_symbols=None, + exclude_symbols=None, + start=start, + end=end, + show_progress=show_progress + ) + exchange_bundle.ingest() + + @main.command() @click.option( '-b', '--bundle', - default='poloniex', metavar='BUNDLE-NAME', - show_default=True, + default=None, + show_default=False, help='The data bundle to ingest.', ) +@click.option( + '-x', + '--exchange-name', + type=click.Choice({'bitfinex', 'bittrex', 'poloniex'}), + help='The name of the exchange bundle to ingest (supported: bitfinex,' + ' bittrex, poloniex).', +) @click.option( '-c', '--compile-locally', @@ -469,9 +528,12 @@ def live(ctx, default=True, help='Print progress information to the terminal.' ) -def ingest(bundle, compile_locally, assets_version, show_progress): +@click.pass_context +def ingest(ctx, bundle, exchange_name, compile_locally, assets_version, + show_progress): """Ingest the data for the given bundle. """ + bundles_module.ingest( bundle, os.environ, @@ -491,6 +553,13 @@ def ingest(bundle, compile_locally, assets_version, show_progress): show_default=True, help='The data bundle to clean.', ) +@click.option( + '-x', + '--exchange_name', + metavar='EXCHANGE-NAME', + show_default=True, + help='The exchange bundle name to clean.', +) @click.option( '-e', '--before', @@ -523,6 +592,7 @@ def clean(bundle, before, after, keep_last): keep_last, ) + @main.command() def bundles(): """List all of the available data bundles. diff --git a/catalyst/data/loader.py b/catalyst/data/loader.py index a9dd8167..48fd2b13 100644 --- a/catalyst/data/loader.py +++ b/catalyst/data/loader.py @@ -12,30 +12,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import datetime import os from collections import OrderedDict import logbook import pandas as pd -import numpy as np -from pandas_datareader.data import DataReader -import datetime -import time import pytz +from pandas_datareader.data import DataReader from six import iteritems from six.moves.urllib_error import HTTPError -from .benchmarks import get_benchmark_returns +from catalyst.utils.calendars import get_calendar from . import treasuries, treasuries_can +from .benchmarks import get_benchmark_returns +from ..utils.deprecate import deprecated from ..utils.paths import ( cache_root, data_root, ) -from ..utils.deprecate import deprecated - -from catalyst.data.bundles.poloniex import PoloniexBundle -from catalyst.utils.calendars import get_calendar - logger = logbook.Logger('Loader') @@ -308,20 +303,21 @@ def ensure_crypto_benchmark_data(symbol, ('Downloading benchmark data for {symbol!r} from {first_date} to {last_date}'), symbol=symbol, first_date=first_date, last_date=last_date) + raise DeprecationWarning('poloniex bundle deprecated') # Load benchmark symbol from Poloniex API - try: - bundle = PoloniexBundle() - bench_raw = bundle._fetch_symbol_frame( - None, - symbol, - get_calendar(bundle.calendar_name), - first_date - trading_day, - last_date, - 'daily', - ) - except (OSError, IOError, HTTPError): - logger.exception('Failed to fetch new crypto benchmark returns') - raise + # try: + # bundle = PoloniexBundle() + # bench_raw = bundle._fetch_symbol_frame( + # None, + # symbol, + # get_calendar(bundle.calendar_name), + # first_date - trading_day, + # last_date, + # 'daily', + # ) + # except (OSError, IOError, HTTPError): + # logger.exception('Failed to fetch new crypto benchmark returns') + # raise # select close column and compute percent change between days daily_close = bench_raw[['close']] diff --git a/catalyst/examples/buy_low_sell_high_neo_with_interface.py b/catalyst/examples/buy_low_sell_high_neo_with_interface.py index 4a9cc4c5..2e216cb5 100644 --- a/catalyst/examples/buy_low_sell_high_neo_with_interface.py +++ b/catalyst/examples/buy_low_sell_high_neo_with_interface.py @@ -148,27 +148,27 @@ def analyze(context, stats): pass -run_algorithm( - initialize=initialize, - handle_data=handle_data, - analyze=analyze, - exchange_name='bitfinex', - live=True, - algo_namespace=algo_namespace, - base_currency='btc', - live_graph=False -) - -# Backtest # run_algorithm( -# capital_base=250, -# start=pd.to_datetime('2017-09-08', utc=True), -# end=pd.to_datetime('2017-09-15', utc=True), -# data_frequency='minute', # initialize=initialize, # handle_data=handle_data, # analyze=analyze, # exchange_name='bitfinex', +# live=True, # algo_namespace=algo_namespace, -# base_currency='btc' +# base_currency='btc', +# live_graph=False # ) + +# Backtest +run_algorithm( + capital_base=250, + start=pd.to_datetime('2017-09-08', utc=True), + end=pd.to_datetime('2017-09-15', utc=True), + data_frequency='minute', + initialize=initialize, + handle_data=handle_data, + analyze=analyze, + exchange_name='bitfinex', + algo_namespace=algo_namespace, + base_currency='btc' +) diff --git a/catalyst/exchange/data_api.py b/catalyst/exchange/bundle_utils.py similarity index 74% rename from catalyst/exchange/data_api.py rename to catalyst/exchange/bundle_utils.py index 717dbe82..eca43e6c 100644 --- a/catalyst/exchange/data_api.py +++ b/catalyst/exchange/bundle_utils.py @@ -1,4 +1,16 @@ import datetime +from logging import Logger, DEBUG +import os +from dateutil.relativedelta import relativedelta +import pandas as pd + +from catalyst import get_calendar +from catalyst.data.minute_bars import BcolzMinuteBarWriter +from catalyst.data.us_equity_pricing import BcolzDailyBarWriter +from catalyst.exchange.exchange_utils import get_exchange_folder +from catalyst.utils.paths import data_root, ensure_directory + +log = Logger('test_exchange_bundle') def get_date_from_ms(ms): @@ -30,7 +42,7 @@ def get_history_mock(exchange_name, data_frequency, symbol, start_ms, end_ms, Notes ===== - Using milliseconds for the start and end dates for ease of use in the + Using milliseconds for the start and end dates for ease of use in URL query parameters. Sometimes, one minute goes by without completing a trade of the given @@ -71,3 +83,15 @@ def get_history_mock(exchange_name, data_frequency, symbol, start_ms, end_ms, last_traded=candle['last_traded'] )) return ohlcv + + +def fetch_candles_chunk(exchange, assets, data_frequency, end_dt, bar_count): + calc_start_dt = end_dt - datetime.timedelta(minutes=bar_count) + candles = exchange.get_candles( + data_frequency=data_frequency, + assets=assets, + bar_count=bar_count, + start_dt=calc_start_dt, + end_dt=end_dt + ) + return candles diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 0c4d7163..9d1afe45 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -1,5 +1,4 @@ import abc -import collections import random from abc import ABCMeta, abstractmethod, abstractproperty from datetime import timedelta diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 9c3cccc1..668b3e57 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -1,16 +1,19 @@ +import os from datetime import timedelta import pandas as pd -import numpy as np -from logbook import Logger, INFO +from logbook import Logger from catalyst import get_calendar -from catalyst.data.minute_bars import BcolzMinuteOverlappingData -from catalyst.exchange.bitfinex.bitfinex import Bitfinex -from catalyst.exchange.bittrex.bittrex import Bittrex -from catalyst.exchange.exchange_errors import ExchangeNotFoundError -from catalyst.exchange.exchange_utils import get_exchange_auth +from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ + BcolzMinuteBarWriter, BcolzMinuteBarReader +from catalyst.data.us_equity_pricing import BcolzDailyBarWriter, \ + BcolzDailyBarReader +from catalyst.exchange.bundle_utils import fetch_candles_chunk +from catalyst.exchange.exchange_utils import get_exchange_folder +from catalyst.exchange.init_utils import get_exchange from catalyst.utils.cli import maybe_show_progress +from catalyst.utils.paths import ensure_directory def _cachpath(symbol, type_): @@ -20,251 +23,33 @@ def _cachpath(symbol, type_): log = Logger('exchange_bundle') -def fetch_candles_chunk(exchange, assets, data_frequency, end_dt, bar_count): - calc_start_dt = end_dt - timedelta(minutes=bar_count) - candles = exchange.get_candles( - data_frequency=data_frequency, - assets=assets, - bar_count=bar_count, - start_dt=calc_start_dt, - end_dt=end_dt - ) - return candles +class ExchangeBundle: + def __init__(self, exchange_name, data_frequency, include_symbols=None, + exclude_symbols=None, start=None, end=None, + show_progress=True, environ=os.environ): + self.exchange = get_exchange(exchange_name) + self.data_frequency = data_frequency + self.assets = self.get_assets(include_symbols, exclude_symbols) + self.start, self.end = self.get_adj_dates(start, end) + self.environ = environ + self.show_progress = show_progress + self.minutes_per_day = 1440 + self.default_ohlc_ratio = 1000000 + self._writer = None + self._reader = None + def get_assets(self, include_symbols, exclude_symbols): + # TODO: filter assets + return self.exchange.assets -def process_bar_data(exchange, assets, writer, data_frequency, - show_progress, start, end): - open_calendar = get_calendar('OPEN') - - writer.default_ohlc_ratio = 1000000 - writer.calendar = open_calendar - writer.minutes_per_day = 1440 - writer.write_metadata = True - - delta = end - start - if data_frequency == 'minute': - delta_periods = delta.total_seconds() / 60 - frequency = '1m' - - elif data_frequency == 'daily': - delta_periods = delta.total_seconds() / 60 / 60 / 24 - frequency = '1d' - - else: - raise ValueError('frequency not supported') - - if delta_periods > exchange.num_candles_limit: - bar_count = exchange.num_candles_limit - - chunks = [] - last_chunk_date = end.floor('1 min') - while last_chunk_date > start + timedelta(minutes=bar_count): - # TODO: account for the partial last bar - chunk = dict(end=last_chunk_date, bar_count=bar_count) - chunks.append(chunk) - - # TODO: base on frequency - last_chunk_date = \ - last_chunk_date - timedelta(minutes=(bar_count + 1)) - - chunks.reverse() - - else: - chunks = [dict(end=end, bar_count=delta_periods)] - - with maybe_show_progress( - chunks, - show_progress, - label='Fetching {exchange} {frequency} candles: '.format( - exchange=exchange.name, - frequency=data_frequency - )) as it: - - previous_candle = dict() - for chunk in it: - chunk_end = chunk['end'] - chunk_start = chunk_end - timedelta(minutes=chunk['bar_count']) - - chunk_assets = [] - for asset in assets: - if asset.start_date <= chunk_end: - chunk_assets.append(asset) - - # TODO: ensure correct behavior for assets starting in the chunk - candles = fetch_candles_chunk( - exchange=exchange, - assets=chunk_assets, - data_frequency=frequency, - end_dt=chunk_end, - bar_count=chunk['bar_count'] - ) - log.debug('requests counter {}'.format(exchange.request_cpt)) - - num_candles = 0 - data = [] - for asset in candles: - asset_candles = candles[asset] - if not asset_candles: - log.debug( - 'no data: {symbols} on {exchange}, date {end}'.format( - symbols=chunk_assets, - exchange=exchange.name, - end=chunk_end - ) - ) - continue - - all_dates = [] - all_candles = [] - date = chunk_start - while date <= chunk_end: - - previous = previous_candle[asset] \ - if asset in previous_candle else None - - candle = next((candle for candle in asset_candles \ - if candle['last_traded'] == date), previous) - - if candle is not None: - all_dates.append(date) - all_candles.append(candle) - - previous_candle[asset] = candle - - date += timedelta(minutes=1) - - df = pd.DataFrame(all_candles, index=all_dates) - if not df.empty: - df.sort_index(inplace=True) - - sid = asset.sid - num_candles += len(df.values) - - data.append((sid, df)) - - try: - log.debug( - 'writing {num_candles} candles from {start} to {end}'.format( - num_candles=num_candles, - start=chunk_start, - end=chunk_end - ) - ) - - for pair in data: - log.debug('data for sid {}\n{}\n{}'.format( - pair[0], pair[1].head(2), pair[1].tail(2))) - - writer.write( - data=data, - show_progress=False, - invalid_data_behavior='raise' - ) - except BcolzMinuteOverlappingData as e: - log.warn('chunk already exists {}: {}'.format(chunk, e)) - - -def exchange_bundle(exchange_name, symbols=None, start=None, end=None, - log_level=INFO): - """Create a data bundle ingest function for the specified exchange. - - Parameters - ---------- - exchange_name: str - The name of the exchange - symbols : iterable[str] - The ticker symbols to load data for. - start : datetime, optional - The start date to query for. By default this pulls the full history - for the calendar. - end : datetime, optional - The end date to query for. By default this pulls the full history - for the calendar. - - Returns - ------- - ingest : callable - The bundle ingest function for the given set of symbols. - - Examples - -------- - This code should be added to ~/.catalyst/extension.py - - .. code-block:: python - - from catalyst.data.bundles import register - from catalyst.exchange.exchange_bundle import exchange_bundle - - symbols = ( - 'btc_usd', - 'eth_btc', - 'etc_btc', - 'neo_btc', - ) - register('exchange_bitfinex', exchange_bundle('bitfinex', symbols)) - - Notes - ----- - The sids for each symbol will be the index into the symbols sequence. - """ - # strict this in memory so that we can reiterate over it - log.level = log_level - - def ingest(environ, - asset_db_writer, - minute_bar_writer, - daily_bar_writer, - adjustment_writer, - calendar, - start_session, - end_session, - cache, - show_progress, - is_compile, - output_dir, - start=start, - end=end): - - log.info('ingesting bundle {}'.format(output_dir)) - - # TODO: I don't understand this session vs dates idea - if start is None: - start = start_session - if end is None: - end = end_session - + def get_adj_dates(self, start, end): now = pd.Timestamp.utcnow() if end > now: log.info('adjusting the end date to now {}'.format(now)) end = now - log.info('ingesting data from {} to {}'.format(start, end)) - - exchange_auth = get_exchange_auth(exchange_name) - if exchange_name == 'bitfinex': - exchange = Bitfinex( - key=exchange_auth['key'], - secret=exchange_auth['secret'], - base_currency=None, # TODO: make optional at the exchange - portfolio=None - ) - elif exchange_name == 'bittrex': - exchange = Bittrex( - key=exchange_auth['key'], - secret=exchange_auth['secret'], - base_currency=None, - portfolio=None - ) - else: - raise ExchangeNotFoundError(exchange_name=exchange_name) - - if symbols is not None: - assets = exchange.get_assets(symbols) - else: - assets = exchange.assets - earliest_trade = None - for asset in assets: + for asset in self.assets: if earliest_trade is None or earliest_trade > asset.start_date: earliest_trade = asset.start_date @@ -278,26 +63,213 @@ def exchange_bundle(exchange_name, symbols=None, start=None, end=None, if start >= end: raise ValueError('start date cannot be after end date') - # if daily_bar_writer is not None: - # process_bar_data( - # exchange=exchange, - # assets=assets, - # writer=daily_bar_writer, - # data_frequency='daily', - # show_progress=show_progress, - # start=start, - # end=end - # ) + return start, end - if minute_bar_writer is not None: - process_bar_data( - exchange=exchange, - assets=assets, - writer=minute_bar_writer, - data_frequency='minute', - show_progress=show_progress, - start=start, - end=end + @property + def reader(self): + if self._reader is not None: + return self._reader + + root = get_exchange_folder(self.exchange.name) + input_dir = '{root}/{frequency}_bundle'.format( + root=root, + frequency=self.data_frequency + ) + + if self.data_frequency == 'minute': + try: + self._reader = BcolzMinuteBarReader(input_dir) + except IOError: + log.debug('no reader data found in {}'.format(input_dir)) + + elif self.data_frequency == 'daily': + try: + self._reader = BcolzDailyBarReader(input_dir) + except IOError: + log.debug('no reader data found in {}'.format(input_dir)) + else: + raise ValueError( + 'invalid frequency {}'.format(self.data_frequency) ) - return ingest + return self._reader + + @property + def writer(self): + if self._writer is not None: + return self._writer + + open_calendar = get_calendar('OPEN') + + root = get_exchange_folder(self.exchange.name) + output_dir = '{root}/{frequency}_bundle'.format( + root=root, + frequency=self.data_frequency + ) + ensure_directory(output_dir) + + if self.data_frequency == 'minute': + if len(os.listdir(output_dir)) > 0: + self._writer = BcolzMinuteBarWriter.open(output_dir, self.end) + else: + self._writer = BcolzMinuteBarWriter( + rootdir=output_dir, + calendar=open_calendar, + minutes_per_day=self.minutes_per_day, + start_session=self.start, + end_session=self.end, + write_metadata=True, + default_ohlc_ratio=self.default_ohlc_ratio + ) + elif self.data_frequency == 'daily': + if len(os.listdir(output_dir)) > 0: + self._writer = BcolzDailyBarWriter.open(output_dir, self.end) + else: + self._writer = BcolzDailyBarWriter( + filename=output_dir, + calendar=open_calendar, + start_session=self.start, + end_session=self.end + ) + else: + raise ValueError( + 'invalid frequency {}'.format(self.data_frequency) + ) + + return self._writer + + def ingest(self): + symbols = [] + log.debug( + 'ingesting trading pairs {symbols} on exchange {exchange} ' + 'from {start} to {end}'.format( + symbols=symbols, + exchange=self.exchange.name, + start=self.start, + end=self.end + ) + ) + + delta = self.end - self.start + if self.data_frequency == 'minute': + delta_periods = delta.total_seconds() / 60 + frequency = '1m' + + elif self.data_frequency == 'daily': + delta_periods = delta.total_seconds() / 60 / 60 / 24 + frequency = '1d' + + else: + raise ValueError('frequency not supported') + + if delta_periods > self.exchange.num_candles_limit: + bar_count = self.exchange.num_candles_limit + + chunks = [] + last_chunk_date = self.end.floor('1 min') + while last_chunk_date > self.start + timedelta(minutes=bar_count): + # TODO: account for the partial last bar + chunk = dict(end=last_chunk_date, bar_count=bar_count) + chunks.append(chunk) + + # TODO: base on frequency + last_chunk_date = \ + last_chunk_date - timedelta(minutes=(bar_count + 1)) + + chunks.reverse() + + else: + chunks = [dict(end=self.end, bar_count=delta_periods)] + + with maybe_show_progress( + chunks, + self.show_progress, + label='Fetching {exchange} {frequency} candles: '.format( + exchange=self.exchange.name, + frequency=self.data_frequency + )) as it: + + previous_candle = dict() + for chunk in it: + chunk_end = chunk['end'] + chunk_start = chunk_end - timedelta(minutes=chunk['bar_count']) + + chunk_assets = [] + for asset in self.assets: + if asset.start_date <= chunk_end: + chunk_assets.append(asset) + + # TODO: ensure correct behavior for assets starting in the chunk + candles = fetch_candles_chunk( + exchange=self.exchange, + assets=chunk_assets, + data_frequency=frequency, + end_dt=chunk_end, + bar_count=chunk['bar_count'] + ) + log.debug( + 'requests counter {}'.format(self.exchange.request_cpt)) + + num_candles = 0 + data = [] + for asset in candles: + asset_candles = candles[asset] + if not asset_candles: + log.debug( + 'no data: {symbols} on {exchange}, date {end}'.format( + symbols=chunk_assets, + exchange=self.exchange.name, + end=chunk_end + ) + ) + continue + + all_dates = [] + all_candles = [] + date = chunk_start + while date <= chunk_end: + + previous = previous_candle[asset] \ + if asset in previous_candle else None + + candle = next((candle for candle in asset_candles \ + if candle['last_traded'] == date), + previous) + + if candle is not None: + all_dates.append(date) + all_candles.append(candle) + + previous_candle[asset] = candle + + date += timedelta(minutes=1) + + df = pd.DataFrame(all_candles, index=all_dates) + if not df.empty: + df.sort_index(inplace=True) + + sid = asset.sid + num_candles += len(df.values) + + data.append((sid, df)) + + try: + log.debug( + 'writing {num_candles} candles from {start} to {end}'.format( + num_candles=num_candles, + start=chunk_start, + end=chunk_end + ) + ) + + for pair in data: + log.debug('data for sid {}\n{}\n{}'.format( + pair[0], pair[1].head(2), pair[1].tail(2))) + + self.writer.write( + data=data, + show_progress=False, + invalid_data_behavior='raise' + ) + except BcolzMinuteOverlappingData as e: + log.warn('chunk already exists {}: {}'.format(chunk, e)) diff --git a/catalyst/exchange/exchange_utils.py b/catalyst/exchange/exchange_utils.py index 2d3729ed..c6884d73 100644 --- a/catalyst/exchange/exchange_utils.py +++ b/catalyst/exchange/exchange_utils.py @@ -3,6 +3,7 @@ import os import pickle import urllib from datetime import date, datetime + import pandas as pd from catalyst.exchange.exchange_errors import ExchangeAuthNotFound, \ diff --git a/catalyst/exchange/init_utils.py b/catalyst/exchange/init_utils.py new file mode 100644 index 00000000..446fe1cb --- /dev/null +++ b/catalyst/exchange/init_utils.py @@ -0,0 +1,24 @@ +from catalyst.exchange.bitfinex.bitfinex import Bitfinex +from catalyst.exchange.bittrex.bittrex import Bittrex +from catalyst.exchange.exchange_errors import ExchangeNotFoundError +from catalyst.exchange.exchange_utils import get_exchange_auth + + +def get_exchange(exchange_name): + exchange_auth = get_exchange_auth(exchange_name) + if exchange_name == 'bitfinex': + return Bitfinex( + key=exchange_auth['key'], + secret=exchange_auth['secret'], + base_currency=None, # TODO: make optional at the exchange + portfolio=None + ) + elif exchange_name == 'bittrex': + return Bittrex( + key=exchange_auth['key'], + secret=exchange_auth['secret'], + base_currency=None, + portfolio=None + ) + else: + raise ExchangeNotFoundError(exchange_name=exchange_name) diff --git a/tests/exchange/test_bitfinex.py b/tests/exchange/test_bitfinex.py index 7aaddc14..7efd4884 100644 --- a/tests/exchange/test_bitfinex.py +++ b/tests/exchange/test_bitfinex.py @@ -1,11 +1,9 @@ -from catalyst.exchange.bitfinex.bitfinex import Bitfinex -from base import BaseExchangeTestCase from logbook import Logger -from catalyst.finance.execution import (MarketOrder, - LimitOrder, - StopOrder, - StopLimitOrder) + +from base import BaseExchangeTestCase +from catalyst.exchange.bitfinex.bitfinex import Bitfinex from catalyst.exchange.exchange_utils import get_exchange_auth +from catalyst.finance.execution import (LimitOrder) log = Logger('test_bitfinex') diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 6856970c..0441f4a6 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -1,13 +1,8 @@ -import os -from datetime import timedelta -from logging import Logger, DEBUG +from logging import Logger import pandas as pd -from catalyst import get_calendar -from catalyst.data.minute_bars import BcolzMinuteBarWriter -from catalyst.exchange.exchange_bundle import exchange_bundle -from catalyst.utils.paths import ensure_directory, data_root +from catalyst.exchange.exchange_bundle import ExchangeBundle log = Logger('test_exchange_bundle') @@ -18,50 +13,15 @@ class ExchangeBundleTestCase: start = pd.to_datetime('2017-09-01', utc=True) end = pd.Timestamp.utcnow() - open_calendar = get_calendar('OPEN') - root = data_root(os.environ) - output_dir = '{root}/exchange_{exchange}/2017-09-21T05;34;37.274482'.format( - root=root, - exchange=exchange_name - ) - ensure_directory(output_dir) - - filename = os.path.join(output_dir, 'metadata.json') - - start_session = start.floor('1d') - if os.path.isfile(filename): - minute_bar_writer = BcolzMinuteBarWriter.open(output_dir, end) - else: - # TODO: need to be able to write more precise numbers - minute_bar_writer = BcolzMinuteBarWriter( - rootdir=output_dir, - calendar=open_calendar, - minutes_per_day=1440, - start_session=start_session, - end_session=end, - write_metadata=True, - default_ohlc_ratio=1000000 - ) - - ingest = exchange_bundle( + log.info('ingesting exchange bundle {}'.format(exchange_name)) + exchange_bundle = ExchangeBundle( exchange_name=exchange_name, - symbols=['eth_btc'], - log_level=DEBUG + data_frequency='minute', + include_symbols=None, + exclude_symbols=None, + start=start, + end=end, + show_progress=True ) - - ingest(environ=os.environ, - asset_db_writer=None, - minute_bar_writer=minute_bar_writer, - daily_bar_writer=None, - adjustment_writer=None, - calendar=open_calendar, - start_session=start_session, - end_session=end, - cache=dict(), - show_progress=True, - is_compile=False, - output_dir=output_dir, - start=start, - end=end) pass diff --git a/tests/exchange/test_data_portal.py b/tests/exchange/test_data_portal.py index 2b881541..8d8f90e6 100644 --- a/tests/exchange/test_data_portal.py +++ b/tests/exchange/test_data_portal.py @@ -1,19 +1,13 @@ -from datetime import timedelta - -import os import pandas as pd -from catalyst import get_calendar from logbook import Logger -from catalyst.data.minute_bars import BcolzMinuteBarReader +from catalyst import get_calendar from catalyst.exchange.asset_finder_exchange import AssetFinderExchange from catalyst.exchange.bitfinex.bitfinex import Bitfinex from catalyst.exchange.bittrex.bittrex import Bittrex from catalyst.exchange.data_portal_exchange import DataPortalExchangeBacktest, \ DataPortalExchangeLive -from catalyst.exchange.exchange_bundle import exchange_bundle from catalyst.exchange.exchange_utils import get_exchange_auth -from catalyst.utils.run_algo import load_extensions log = Logger('test_bitfinex') From 14f8c25c89a73955ef8bf226208af54ee0c3fab2 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Thu, 5 Oct 2017 17:28:19 -0600 Subject: [PATCH 038/110] get_history against AWS API --- catalyst/exchange/data_api.py | 72 ++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/catalyst/exchange/data_api.py b/catalyst/exchange/data_api.py index 717dbe82..d898cc9e 100644 --- a/catalyst/exchange/data_api.py +++ b/catalyst/exchange/data_api.py @@ -1,9 +1,78 @@ -import datetime +import datetime, requests +EXCHANGE_NAMES = ['bitfinex', 'bittrex', 'poloniex'] +API_URL = 'http://data.enigma.co/api/v1' def get_date_from_ms(ms): return datetime.datetime.fromtimestamp(ms / 1000.0) +def get_history(exchange_name, data_frequency, symbol, start_ms = None, end_ms = None): + """ + History API provides OHLCV data for any of the supported exchanges up to yesterday. + + :param exchange_name: string + Required: The name identifier of the exchange (e.g. bitfinex, bittrex, poloniex). + :param data_frequency: string + Required: The bar frequency (minute or daily) + *** currently only 'daily' is supported *** + :param symbol: string + Required: The trading pair symbol. + :param start: float + Optional: The start date in milliseconds. + :param end: float + Optional: The end date in milliseconds. + + :return ohlcv: list[dict[string, float]] + Each row contains the following dictionary for the resulting bars: + 'ts' : int, the timestamp in seconds + 'open' : float + 'high' : float + 'low' : float + 'close' : float + 'volume' : float + + Notes + ===== + Using milliseconds for the start and end dates for ease of use in the + function query parameters. + + Sometimes, one minute goes by without completing a trade of the given + trading pair on the given exchange. To minimize the payload size, we + don't return identical sequential bars. Post-processing code will + forward fill missing bars outside of this function. + """ + + if exchange_name not in EXCHANGE_NAMES: + raise ValueError('get_history function only supports the following exchanges: {}'.format(list(EXCHANGE_NAMES))) + + if data_frequency != 'daily': + raise ValueError('get_history currently only supports daily data.') + + url = '{api_url}/candles?exchange={exchange}&market={symbol}&freq={data_frequency}'.format( + api_url=API_URL, + exchange=exchange_name, + symbol=symbol, + data_frequency=data_frequency, + ) + + if start_ms: + url += '&start={}'.format(int(start_ms/1000)) + + if end_ms: + url += '&end={}'.format(int(end_ms/1000)) + + try: + response = requests.get(url) + except Exception as e: + raise ValueError(e) + + data = response.json() + + if 'error' in response: + raise ValueError(e) + + return data + def get_history_mock(exchange_name, data_frequency, symbol, start_ms, end_ms, exchanges): @@ -71,3 +140,4 @@ def get_history_mock(exchange_name, data_frequency, symbol, start_ms, end_ms, last_traded=candle['last_traded'] )) return ohlcv + From 50f075792c7bfe11520337c5721265038ad154e3 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 5 Oct 2017 21:03:39 -0400 Subject: [PATCH 039/110] Tested ingestion after refactoring --- catalyst/exchange/exchange.py | 12 ++++++++---- catalyst/exchange/exchange_bundle.py | 10 ++++++++-- tests/exchange/test_bundle.py | 3 ++- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 9d1afe45..ef06c438 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -141,12 +141,16 @@ class Exchange: return symbols - def get_assets(self, symbols): + def get_assets(self, symbols=None): assets = [] - for symbol in symbols: - asset = self.get_asset(symbol) - assets.append(asset) + if symbols is not None: + for symbol in symbols: + asset = self.get_asset(symbol) + assets.append(asset) + else: + for key in self.assets: + assets.append(self.assets[key]) return assets diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 668b3e57..d608d202 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -39,8 +39,14 @@ class ExchangeBundle: self._reader = None def get_assets(self, include_symbols, exclude_symbols): - # TODO: filter assets - return self.exchange.assets + # TODO: filter exclude symbols assets + if include_symbols is not None: + include_symbols_list = include_symbols.split(',') + + return self.exchange.get_assets(include_symbols_list) + + else: + return self.exchange.get_assets() def get_adj_dates(self, start, end): now = pd.Timestamp.utcnow() diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 0441f4a6..a7337e94 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -18,10 +18,11 @@ class ExchangeBundleTestCase: exchange_bundle = ExchangeBundle( exchange_name=exchange_name, data_frequency='minute', - include_symbols=None, + include_symbols='neo_btc', exclude_symbols=None, start=start, end=end, show_progress=True ) + exchange_bundle.ingest() pass From 04fc7855d5334f183a5a6f6ae9d1f981faec085e Mon Sep 17 00:00:00 2001 From: fredfortier Date: Fri, 6 Oct 2017 14:29:25 -0400 Subject: [PATCH 040/110] Skipping data chunks if they already exist --- catalyst/exchange/exchange_bundle.py | 37 ++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index d608d202..4f5a5a20 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -1,6 +1,7 @@ import os from datetime import timedelta +import numpy as np import pandas as pd from logbook import Logger @@ -20,6 +21,7 @@ def _cachpath(symbol, type_): return '-'.join([symbol, type_]) +BUNDLE_NAME_TEMPLATE = '{root}/{frequency}_bundle' log = Logger('exchange_bundle') @@ -77,7 +79,7 @@ class ExchangeBundle: return self._reader root = get_exchange_folder(self.exchange.name) - input_dir = '{root}/{frequency}_bundle'.format( + input_dir = BUNDLE_NAME_TEMPLATE.format( root=root, frequency=self.data_frequency ) @@ -108,7 +110,7 @@ class ExchangeBundle: open_calendar = get_calendar('OPEN') root = get_exchange_folder(self.exchange.name) - output_dir = '{root}/{frequency}_bundle'.format( + output_dir = BUNDLE_NAME_TEMPLATE.format( root=root, frequency=self.data_frequency ) @@ -144,6 +146,32 @@ class ExchangeBundle: return self._writer + def check_data_exists(self, assets, start, end): + has_data = True + for asset in assets: + if has_data and self.reader is not None: + try: + start_close = self.reader.get_value( + asset.sid, start, 'close') + + if np.is_nan(start_close): + has_data = False + + else: + end_close = self.reader.get_value( + asset.sid, end, 'close') + + if np.is_nan(end_close): + has_data = False + + except Exception as e: + has_data = False + + else: + has_data = False + + return has_data + def ingest(self): symbols = [] log.debug( @@ -205,6 +233,11 @@ class ExchangeBundle: if asset.start_date <= chunk_end: chunk_assets.append(asset) + if self.check_data_exists( + chunk_assets, chunk_start, chunk_end): + log.debug('the data chunk already exists') + continue + # TODO: ensure correct behavior for assets starting in the chunk candles = fetch_candles_chunk( exchange=self.exchange, From a004a01cdb60fafa54408c25e20c1c7f3e0f9daf Mon Sep 17 00:00:00 2001 From: fredfortier Date: Fri, 6 Oct 2017 14:33:05 -0400 Subject: [PATCH 041/110] Skipping data chunks if they already exist (fix) --- catalyst/exchange/exchange_bundle.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 4f5a5a20..8dad4123 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -154,14 +154,14 @@ class ExchangeBundle: start_close = self.reader.get_value( asset.sid, start, 'close') - if np.is_nan(start_close): + if np.isnan(start_close): has_data = False else: end_close = self.reader.get_value( asset.sid, end, 'close') - if np.is_nan(end_close): + if np.isnan(end_close): has_data = False except Exception as e: From 0cc9d839d08d22a23eeb4ee16998f93e73b04559 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Fri, 6 Oct 2017 15:13:40 -0400 Subject: [PATCH 042/110] Optimize the existing data filter to filter by asset. --- catalyst/exchange/exchange_bundle.py | 36 +++++++++++++++++++++------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 8dad4123..890a7bbd 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -146,9 +146,24 @@ class ExchangeBundle: return self._writer - def check_data_exists(self, assets, start, end): - has_data = True + def filter_existing_assets(self, assets, start, end): + """ + For each asset, get the close on the start and end dates of the chunk. + If the data exists, the chunk ingestion is complete. + If any data is missing we ingest the data. + + :param assets: list[TradingPair] + The assets is scope. + :param start: + The chunk start date. + :param end: + The chunk end date. + :return: list[TradingPair] + The assets missing from the bundle + """ + missing_assets = [] for asset in assets: + has_data = True if has_data and self.reader is not None: try: start_close = self.reader.get_value( @@ -170,7 +185,10 @@ class ExchangeBundle: else: has_data = False - return has_data + if not has_data: + missing_assets.append(asset) + + return missing_assets def ingest(self): symbols = [] @@ -233,21 +251,21 @@ class ExchangeBundle: if asset.start_date <= chunk_end: chunk_assets.append(asset) - if self.check_data_exists( - chunk_assets, chunk_start, chunk_end): + missing_assets = self.filter_existing_assets( + chunk_assets, chunk_start, chunk_end) + + if len(missing_assets) == 0: log.debug('the data chunk already exists') continue # TODO: ensure correct behavior for assets starting in the chunk candles = fetch_candles_chunk( exchange=self.exchange, - assets=chunk_assets, + assets=missing_assets, data_frequency=frequency, end_dt=chunk_end, bar_count=chunk['bar_count'] ) - log.debug( - 'requests counter {}'.format(self.exchange.request_cpt)) num_candles = 0 data = [] @@ -256,7 +274,7 @@ class ExchangeBundle: if not asset_candles: log.debug( 'no data: {symbols} on {exchange}, date {end}'.format( - symbols=chunk_assets, + symbols=missing_assets, exchange=self.exchange.name, end=chunk_end ) From a1cf00e6fec64f38826ddc9cc9c41ea165f1ec27 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Fri, 6 Oct 2017 21:03:31 -0600 Subject: [PATCH 043/110] updated symbols.json for 3 exchanges: end_daily, end_minute --- catalyst/exchange/bitfinex/bitfinex.py | 16 ++++++++++++++-- catalyst/exchange/bittrex/bittrex.py | 24 +++++++++++++++++++++--- catalyst/exchange/poloniex/poloniex.py | 16 ++++++++++++++-- 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index 6c23fc93..d4d368a9 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -585,9 +585,21 @@ class Bitfinex(Exchange): except KeyError as e: start_date = time.strftime('%Y-%m-%d') + try: + end_daily = cached_symbols[symbol]['end_daily'] + except KeyError as e: + end_daily ='N/A' + + try: + end_minute = cached_symbols[symbol]['end_minute'] + except KeyError as e: + end_minute = 'N/A' + symbol_map[symbol]= dict( - symbol = symbol[:-3]+'_'+symbol[-3:], - start_date = start_date + symbol = symbol[:-3]+'_'+symbol[-3:], + start_date = start_date, + end_daily = end_daily, + end_minute = end_minute, ) if(filename is None): diff --git a/catalyst/exchange/bittrex/bittrex.py b/catalyst/exchange/bittrex/bittrex.py index 8ed92d85..e59281fa 100644 --- a/catalyst/exchange/bittrex/bittrex.py +++ b/catalyst/exchange/bittrex/bittrex.py @@ -12,8 +12,8 @@ from catalyst.exchange.exchange_errors import InvalidHistoryFrequencyError, \ CreateOrderError from catalyst.finance.execution import LimitOrder, StopLimitOrder from catalyst.finance.order import Order, ORDER_STATUS -from catalyst.exchange.exchange_utils import get_exchange_symbols_filename - +from catalyst.exchange.exchange_utils import get_exchange_symbols_filename, \ + download_exchange_symbols log = Logger('Bittrex') @@ -309,6 +309,11 @@ class Bittrex(Exchange): def generate_symbols_json(self, filename=None): symbol_map = {} + + fn, r = download_exchange_symbols(self.name) + with open(fn) as data_file: + cached_symbols = json.load(data_file) + markets = self.api.getmarkets() for market in markets: exchange_symbol = market['MarketName'] @@ -316,9 +321,22 @@ class Bittrex(Exchange): market=self.sanitize_curency_symbol(market['MarketCurrency']), base=self.sanitize_curency_symbol(market['BaseCurrency']) ) + + try: + end_daily = cached_symbols[exchange_symbol]['end_daily'] + except KeyError as e: + end_daily ='N/A' + + try: + end_minute = cached_symbols[exchange_symbol]['end_minute'] + except KeyError as e: + end_minute = 'N/A' + symbol_map[exchange_symbol] = dict( symbol=symbol, - start_date=pd.to_datetime(market['Created'], utc=True).strftime("%Y-%m-%d") + start_date=pd.to_datetime(market['Created'], utc=True).strftime("%Y-%m-%d"), + end_daily = end_daily, + end_minute = end_minute, ) if(filename is None): diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py index 7d0f9986..0260b823 100644 --- a/catalyst/exchange/poloniex/poloniex.py +++ b/catalyst/exchange/poloniex/poloniex.py @@ -487,9 +487,21 @@ class Poloniex(Exchange): except KeyError as e: start_date = time.strftime('%Y-%m-%d') + try: + end_daily = cached_symbols[exchange_symbol]['end_daily'] + except KeyError as e: + end_daily ='N/A' + + try: + end_minute = cached_symbols[exchange_symbol]['end_minute'] + except KeyError as e: + end_minute = 'N/A' + symbol_map[exchange_symbol] = dict( - symbol = symbol, - start_date = start_date + symbol = symbol, + start_date = start_date, + end_daily = end_daily, + end_minute = end_minute, ) if(filename is None): From 3335ae0ea94043d2ffb9c982f6e43b7abcd5d70d Mon Sep 17 00:00:00 2001 From: fredfortier Date: Sun, 8 Oct 2017 01:13:47 -0400 Subject: [PATCH 044/110] Refactored the data portal to use the exchange bundles --- catalyst/__main__.py | 25 +- catalyst/exchange/bundle_utils.py | 41 ++- catalyst/exchange/data_portal_exchange.py | 100 +------ catalyst/exchange/exchange_bundle.py | 324 ++++++++++++---------- tests/exchange/test_bundle.py | 6 +- tests/exchange/test_data_portal.py | 4 +- 6 files changed, 247 insertions(+), 253 deletions(-) diff --git a/catalyst/__main__.py b/catalyst/__main__.py index f57a52b4..fce7fa55 100644 --- a/catalyst/__main__.py +++ b/catalyst/__main__.py @@ -307,7 +307,7 @@ def catalyst_magic(line, cell=None): '%s%%catalyst' % ((cell or '') and '%'), # don't use system exit and propogate errors to the caller standalone_mode=False, - ) + ) except SystemExit as e: # https://github.com/mitsuhiko/click/pull/533 # even in standalone_mode=False `--help` really wants to kill us ;_; @@ -471,27 +471,38 @@ def live(ctx, type=Date(tz='utc', as_timestamp=True), help='The end date of the data range. (default: today)', ) +@click.option( + '--include-symbols', + default=None, + help='A list of symbols to ingest (optional comma separated list)', +) +@click.option( + '--exclude-symbols', + default=None, + help='A list of symbols to exclude from the ingestion ' + '(optional comma separated list)', +) @click.option( '--show-progress/--no-show-progress', default=True, help='Print progress information to the terminal.' ) def ingest_exchange(exchange_name, data_frequency, start, end, - show_progress): + include_symbols, exclude_symbols, show_progress): """ Ingest data for the given exchange. """ + exchange_bundle = ExchangeBundle(exchange_name) + click.echo('ingesting exchange bundle {}'.format(exchange_name)) - exchange_bundle = ExchangeBundle( - exchange_name=exchange_name, + exchange_bundle.ingest( data_frequency=data_frequency, - include_symbols=None, - exclude_symbols=None, + include_symbols=include_symbols, + exclude_symbols=exclude_symbols, start=start, end=end, show_progress=show_progress ) - exchange_bundle.ingest() @main.command() diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index eca43e6c..e4d2d12e 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -1,14 +1,9 @@ import datetime -from logging import Logger, DEBUG import os -from dateutil.relativedelta import relativedelta -import pandas as pd +from logging import Logger -from catalyst import get_calendar -from catalyst.data.minute_bars import BcolzMinuteBarWriter -from catalyst.data.us_equity_pricing import BcolzDailyBarWriter -from catalyst.exchange.exchange_utils import get_exchange_folder -from catalyst.utils.paths import data_root, ensure_directory +from catalyst.data.bundles import from_bundle_ingest_dirname +from catalyst.utils.paths import data_path log = Logger('test_exchange_bundle') @@ -95,3 +90,33 @@ def fetch_candles_chunk(exchange, assets, data_frequency, end_dt, bar_count): end_dt=end_dt ) return candles + +def find_most_recent_time(bundle_name): + """ + Find most recent "time folder" for a given bundle. + + :param bundle_name: + The name of the targeted bundle. + + :return folder: + The name of the time folder. + """ + try: + bundle_folders = os.listdir( + data_path([bundle_name]), + ) + except OSError: + return None + + most_recent_bundle = dict() + for folder in bundle_folders: + date = from_bundle_ingest_dirname(folder) + if not most_recent_bundle or date > \ + most_recent_bundle[most_recent_bundle.keys()[0]]: + most_recent_bundle = dict() + most_recent_bundle[folder] = date + + if most_recent_bundle: + return most_recent_bundle.keys()[0] + else: + return None diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index d2191920..6d124b9f 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -12,24 +12,19 @@ # limitations under the License. import abc -import os from time import sleep import pandas as pd from catalyst.assets._assets import TradingPair from logbook import Logger -from catalyst.data.bundles.core import from_bundle_ingest_dirname, \ - minute_path, daily_path from catalyst.data.data_portal import DataPortal -from catalyst.data.minute_bars import BcolzMinuteBarReader -from catalyst.data.us_equity_pricing import BcolzDailyBarReader +from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import ( ExchangeRequestError, ExchangeBarDataError, - BundleNotFoundError, PricingDataBeforeTradingError, + PricingDataBeforeTradingError, PricingDataNotLoadedError, InvalidHistoryFrequencyError) -from catalyst.utils.paths import data_path log = Logger('DataPortalExchange') @@ -259,34 +254,14 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): def __init__(self, *args, **kwargs): super(DataPortalExchangeBacktest, self).__init__(*args, **kwargs) - self.daily_bar_readers = dict() - self.minute_bar_readers = dict() + self.exchange_bundles = dict() self.history_loaders = dict() self.minute_history_loaders = dict() + for exchange_name in self.exchanges: - name = 'exchange_{}'.format(exchange_name) - time_folder = \ - DataPortalExchangeBacktest.find_most_recent_time(name) - - if time_folder is None: - raise BundleNotFoundError(exchange=exchange_name) - - try: - self.daily_bar_readers[exchange_name] = \ - BcolzDailyBarReader( - daily_path(name, time_folder), - ) - except IOError: - self.daily_bar_readers[exchange_name] = None - - try: - self.minute_bar_readers[exchange_name] = \ - BcolzMinuteBarReader( - minute_path(name, time_folder), - ) - except IOError: - self.minute_bar_readers[exchange_name] = None + self.exchange_bundles[exchange_name] = \ + ExchangeBundle(exchange_name) def _get_first_trading_day(self, assets): first_date = None @@ -295,62 +270,6 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): first_date = asset.start_date return first_date - @staticmethod - def find_most_recent_time(bundle_name): - """ - Find most recent "time folder" for a given bundle. - - :param bundle_name: - The name of the targeted bundle. - - :return folder: - The name of the time folder. - """ - try: - bundle_folders = os.listdir( - data_path([bundle_name]), - ) - except OSError: - return None - - most_recent_bundle = dict() - for folder in bundle_folders: - date = from_bundle_ingest_dirname(folder) - if not most_recent_bundle or date > \ - most_recent_bundle[most_recent_bundle.keys()[0]]: - most_recent_bundle = dict() - most_recent_bundle[folder] = date - - if most_recent_bundle: - return most_recent_bundle.keys()[0] - else: - return None - - def _get_reader(self, data_frequency, exchange_name): - """ - Pick from a collection of readers based of exchange name and frequency. - - :param data_frequency: - The reader frequency: minute, daily. - - :param exchange_name: - The exchange name. - - :return reader: - A reader object. - """ - if data_frequency == 'minute': - reader = self.minute_bar_readers[exchange_name] - elif data_frequency == 'daily': - reader = self.daily_bar_readers[exchange_name] - else: - raise InvalidHistoryFrequencyError(frequency=data_frequency) - - if reader is None: - raise ValueError('reader not found') - - return reader - def get_exchange_history_window(self, exchange, assets, @@ -360,7 +279,9 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): field, data_frequency, ffill=True): - reader = self._get_reader(data_frequency, exchange.name) + bundle = self.exchange_bundles[exchange.name] + + reader = bundle.get_reader(data_frequency) if data_frequency == 'minute': dts = self.trading_calendar.minutes_window( end_dt, -bar_count @@ -416,7 +337,8 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): def get_exchange_spot_value(self, exchange, assets, field, dt, data_frequency): - reader = self._get_reader(data_frequency, exchange.name) + bundle = self.exchange_bundles[exchange.name] + reader = bundle.get_reader(data_frequency) self.ensure_after_first_day(dt, assets) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 890a7bbd..a63aca45 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -26,19 +26,12 @@ log = Logger('exchange_bundle') class ExchangeBundle: - def __init__(self, exchange_name, data_frequency, include_symbols=None, - exclude_symbols=None, start=None, end=None, - show_progress=True, environ=os.environ): + def __init__(self, exchange_name, ): self.exchange = get_exchange(exchange_name) - self.data_frequency = data_frequency - self.assets = self.get_assets(include_symbols, exclude_symbols) - self.start, self.end = self.get_adj_dates(start, end) - self.environ = environ - self.show_progress = show_progress self.minutes_per_day = 1440 self.default_ohlc_ratio = 1000000 - self._writer = None - self._reader = None + self._writers = dict() + self._readers = dict() def get_assets(self, include_symbols, exclude_symbols): # TODO: filter exclude symbols assets @@ -50,14 +43,14 @@ class ExchangeBundle: else: return self.exchange.get_assets() - def get_adj_dates(self, start, end): + def get_adj_dates(self, start, end, assets): now = pd.Timestamp.utcnow() if end > now: log.info('adjusting the end date to now {}'.format(now)) end = now earliest_trade = None - for asset in self.assets: + for asset in assets: if earliest_trade is None or earliest_trade > asset.start_date: earliest_trade = asset.start_date @@ -73,80 +66,90 @@ class ExchangeBundle: return start, end - @property - def reader(self): - if self._reader is not None: - return self._reader + def get_reader(self, data_frequency): + """ + Get a data writer object, either a new object or from cache + + :return: BcolzMinuteBarReader or BcolzDailyBarReader + """ + if data_frequency in self._readers: + return self._readers[data_frequency] root = get_exchange_folder(self.exchange.name) input_dir = BUNDLE_NAME_TEMPLATE.format( root=root, - frequency=self.data_frequency + frequency=data_frequency ) - if self.data_frequency == 'minute': + if data_frequency == 'minute': try: - self._reader = BcolzMinuteBarReader(input_dir) + self._readers[data_frequency] = BcolzMinuteBarReader(input_dir) except IOError: log.debug('no reader data found in {}'.format(input_dir)) - elif self.data_frequency == 'daily': + elif data_frequency == 'daily': try: - self._reader = BcolzDailyBarReader(input_dir) + self._readers[data_frequency] = BcolzDailyBarReader(input_dir) except IOError: log.debug('no reader data found in {}'.format(input_dir)) else: raise ValueError( - 'invalid frequency {}'.format(self.data_frequency) + 'invalid frequency {}'.format(data_frequency) ) - return self._reader + return self._readers[data_frequency] - @property - def writer(self): - if self._writer is not None: - return self._writer + def get_writer(self, data_frequency, start, end): + """ + Get a data writer object, either a new object or from cache + + :return: BcolzMinuteBarWriter or BcolzDailyBarWriter + """ + key = (data_frequency, start, end) + if key in self._writers: + return self._writers[key] open_calendar = get_calendar('OPEN') root = get_exchange_folder(self.exchange.name) output_dir = BUNDLE_NAME_TEMPLATE.format( root=root, - frequency=self.data_frequency + frequency=data_frequency ) ensure_directory(output_dir) - if self.data_frequency == 'minute': + if data_frequency == 'minute': if len(os.listdir(output_dir)) > 0: - self._writer = BcolzMinuteBarWriter.open(output_dir, self.end) + self._writers[key] = \ + BcolzMinuteBarWriter.open(output_dir, end) else: - self._writer = BcolzMinuteBarWriter( + self._writers[key] = BcolzMinuteBarWriter( rootdir=output_dir, calendar=open_calendar, minutes_per_day=self.minutes_per_day, - start_session=self.start, - end_session=self.end, + start_session=start, + end_session=end, write_metadata=True, default_ohlc_ratio=self.default_ohlc_ratio ) - elif self.data_frequency == 'daily': + elif data_frequency == 'daily': if len(os.listdir(output_dir)) > 0: - self._writer = BcolzDailyBarWriter.open(output_dir, self.end) + self._writers[key] = BcolzDailyBarWriter.open(output_dir, end) else: - self._writer = BcolzDailyBarWriter( + self._writers[key] = BcolzDailyBarWriter( filename=output_dir, calendar=open_calendar, - start_session=self.start, - end_session=self.end + start_session=start, + end_session=end ) else: raise ValueError( - 'invalid frequency {}'.format(self.data_frequency) + 'invalid frequency {}'.format(data_frequency) ) - return self._writer + return self._writers[key] - def filter_existing_assets(self, assets, start, end): + def filter_existing_assets(self, assets, start, end, data_frequency): """ For each asset, get the close on the start and end dates of the chunk. If the data exists, the chunk ingestion is complete. @@ -161,20 +164,19 @@ class ExchangeBundle: :return: list[TradingPair] The assets missing from the bundle """ + reader = self.get_reader(data_frequency) missing_assets = [] for asset in assets: has_data = True - if has_data and self.reader is not None: + if has_data and reader is not None: try: - start_close = self.reader.get_value( - asset.sid, start, 'close') + start_close = reader.get_value(asset.sid, start, 'close') if np.isnan(start_close): has_data = False else: - end_close = self.reader.get_value( - asset.sid, end, 'close') + end_close = reader.get_value(asset.sid, end, 'close') if np.isnan(end_close): has_data = False @@ -190,36 +192,148 @@ class ExchangeBundle: return missing_assets - def ingest(self): + def ingest_chunk(self, chunk, previous_candle, data_frequency, assets, + writer): + chunk_end = chunk['end'] + chunk_start = chunk_end - timedelta(minutes=chunk['bar_count']) + + chunk_assets = [] + for asset in assets: + if asset.start_date <= chunk_end: + chunk_assets.append(asset) + + missing_assets = self.filter_existing_assets( + assets=chunk_assets, + start=chunk_start, + end=chunk_end, + data_frequency=data_frequency + ) + + if len(missing_assets) == 0: + log.debug('the data chunk already exists') + return + + # TODO: ensure correct behavior for assets starting in the chunk + candles = fetch_candles_chunk( + exchange=self.exchange, + assets=missing_assets, + data_frequency=data_frequency, + end_dt=chunk_end, + bar_count=chunk['bar_count'] + ) + + num_candles = 0 + data = [] + for asset in candles: + asset_candles = candles[asset] + if not asset_candles: + log.debug( + 'no data: {symbols} on {exchange}, date {end}'.format( + symbols=missing_assets, + exchange=self.exchange.name, + end=chunk_end + ) + ) + continue + + all_dates = [] + all_candles = [] + date = chunk_start + while date <= chunk_end: + + previous = previous_candle[asset] \ + if asset in previous_candle else None + + candle = next((candle for candle in asset_candles \ + if candle['last_traded'] == date), + previous) + + if candle is not None: + all_dates.append(date) + all_candles.append(candle) + + previous_candle[asset] = candle + + date += timedelta(minutes=1) + + df = pd.DataFrame(all_candles, index=all_dates) + if not df.empty: + df.sort_index(inplace=True) + + sid = asset.sid + num_candles += len(df.values) + + data.append((sid, df)) + + try: + log.debug( + 'writing {num_candles} candles from {start} to {end}'.format( + num_candles=num_candles, + start=chunk_start, + end=chunk_end + ) + ) + + for pair in data: + log.debug('data for sid {}\n{}\n{}'.format( + pair[0], pair[1].head(2), pair[1].tail(2))) + + writer.write( + data=data, + show_progress=False, + invalid_data_behavior='raise' + ) + except BcolzMinuteOverlappingData as e: + log.warn('chunk already exists {}: {}'.format(chunk, e)) + + def ingest(self, data_frequency, include_symbols=None, + exclude_symbols=None, start=None, end=None, + show_progress=True, environ=os.environ): + """ + Ingest the bundle + + :param data_frequency: + :param include_symbols: + :param exclude_symbols: + :param start: + :param end: + :param show_progress: + :param environ: + :return: + """ + + assets = self.get_assets(include_symbols, exclude_symbols) + start, end = self.get_adj_dates(start, end, assets) + symbols = [] log.debug( 'ingesting trading pairs {symbols} on exchange {exchange} ' 'from {start} to {end}'.format( symbols=symbols, exchange=self.exchange.name, - start=self.start, - end=self.end + start=start, + end=end ) ) - delta = self.end - self.start - if self.data_frequency == 'minute': + delta = end - start + if data_frequency == 'minute': delta_periods = delta.total_seconds() / 60 - frequency = '1m' - elif self.data_frequency == 'daily': + elif data_frequency == 'daily': delta_periods = delta.total_seconds() / 60 / 60 / 24 - frequency = '1d' else: raise ValueError('frequency not supported') + writer = self.get_writer(data_frequency, start, end) + if delta_periods > self.exchange.num_candles_limit: bar_count = self.exchange.num_candles_limit chunks = [] - last_chunk_date = self.end.floor('1 min') - while last_chunk_date > self.start + timedelta(minutes=bar_count): + last_chunk_date = end.floor('1 min') + while last_chunk_date > start + timedelta(minutes=bar_count): # TODO: account for the partial last bar chunk = dict(end=last_chunk_date, bar_count=bar_count) chunks.append(chunk) @@ -231,102 +345,22 @@ class ExchangeBundle: chunks.reverse() else: - chunks = [dict(end=self.end, bar_count=delta_periods)] + chunks = [dict(end=end, bar_count=delta_periods)] with maybe_show_progress( chunks, - self.show_progress, + show_progress, label='Fetching {exchange} {frequency} candles: '.format( exchange=self.exchange.name, - frequency=self.data_frequency + frequency=data_frequency )) as it: previous_candle = dict() for chunk in it: - chunk_end = chunk['end'] - chunk_start = chunk_end - timedelta(minutes=chunk['bar_count']) - - chunk_assets = [] - for asset in self.assets: - if asset.start_date <= chunk_end: - chunk_assets.append(asset) - - missing_assets = self.filter_existing_assets( - chunk_assets, chunk_start, chunk_end) - - if len(missing_assets) == 0: - log.debug('the data chunk already exists') - continue - - # TODO: ensure correct behavior for assets starting in the chunk - candles = fetch_candles_chunk( - exchange=self.exchange, - assets=missing_assets, - data_frequency=frequency, - end_dt=chunk_end, - bar_count=chunk['bar_count'] + self.ingest_chunk( + chunk=chunk, + previous_candle=previous_candle, + data_frequency=data_frequency, + assets=assets, + writer=writer ) - - num_candles = 0 - data = [] - for asset in candles: - asset_candles = candles[asset] - if not asset_candles: - log.debug( - 'no data: {symbols} on {exchange}, date {end}'.format( - symbols=missing_assets, - exchange=self.exchange.name, - end=chunk_end - ) - ) - continue - - all_dates = [] - all_candles = [] - date = chunk_start - while date <= chunk_end: - - previous = previous_candle[asset] \ - if asset in previous_candle else None - - candle = next((candle for candle in asset_candles \ - if candle['last_traded'] == date), - previous) - - if candle is not None: - all_dates.append(date) - all_candles.append(candle) - - previous_candle[asset] = candle - - date += timedelta(minutes=1) - - df = pd.DataFrame(all_candles, index=all_dates) - if not df.empty: - df.sort_index(inplace=True) - - sid = asset.sid - num_candles += len(df.values) - - data.append((sid, df)) - - try: - log.debug( - 'writing {num_candles} candles from {start} to {end}'.format( - num_candles=num_candles, - start=chunk_start, - end=chunk_end - ) - ) - - for pair in data: - log.debug('data for sid {}\n{}\n{}'.format( - pair[0], pair[1].head(2), pair[1].tail(2))) - - self.writer.write( - data=data, - show_progress=False, - invalid_data_behavior='raise' - ) - except BcolzMinuteOverlappingData as e: - log.warn('chunk already exists {}: {}'.format(chunk, e)) diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index a7337e94..76696c3e 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -14,9 +14,10 @@ class ExchangeBundleTestCase: start = pd.to_datetime('2017-09-01', utc=True) end = pd.Timestamp.utcnow() + exchange_bundle = ExchangeBundle(exchange_name) + log.info('ingesting exchange bundle {}'.format(exchange_name)) - exchange_bundle = ExchangeBundle( - exchange_name=exchange_name, + exchange_bundle.ingest( data_frequency='minute', include_symbols='neo_btc', exclude_symbols=None, @@ -24,5 +25,4 @@ class ExchangeBundleTestCase: end=end, show_progress=True ) - exchange_bundle.ingest() pass diff --git a/tests/exchange/test_data_portal.py b/tests/exchange/test_data_portal.py index 8d8f90e6..80ed5531 100644 --- a/tests/exchange/test_data_portal.py +++ b/tests/exchange/test_data_portal.py @@ -90,6 +90,8 @@ class ExchangeDataPortalTestCase: '1m', 'close', 'minute') + + log.info('found history window: {}'.format(data)) pass def test_get_spot_value_backtest(self): @@ -102,5 +104,5 @@ class ExchangeDataPortalTestCase: date = pd.to_datetime('2017-09-10', utc=True) value = self.data_portal_backtest.get_spot_value( assets, 'close', date, 'minute') + log.info('found spot value {}'.format(value)) pass - From 16cdc196b0a49094797085b9fb5dc072a5b0242a Mon Sep 17 00:00:00 2001 From: fredfortier Date: Sun, 8 Oct 2017 01:18:40 -0400 Subject: [PATCH 045/110] Minor fixes after merging --- catalyst/exchange/bundle_utils.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index e4577706..d863fb94 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -1,5 +1,4 @@ import datetime, requests -import datetime import os from logging import Logger @@ -11,10 +10,13 @@ log = Logger('test_exchange_bundle') EXCHANGE_NAMES = ['bitfinex', 'bittrex', 'poloniex'] API_URL = 'http://data.enigma.co/api/v1' + def get_date_from_ms(ms): return datetime.datetime.fromtimestamp(ms / 1000.0) -def get_history(exchange_name, data_frequency, symbol, start_ms = None, end_ms = None): + +def get_history(exchange_name, data_frequency, symbol, start_ms=None, + end_ms=None): """ History API provides OHLCV data for any of the supported exchanges up to yesterday. @@ -51,7 +53,9 @@ def get_history(exchange_name, data_frequency, symbol, start_ms = None, end_ms = """ if exchange_name not in EXCHANGE_NAMES: - raise ValueError('get_history function only supports the following exchanges: {}'.format(list(EXCHANGE_NAMES))) + raise ValueError( + 'get_history function only supports the following exchanges: {}'.format( + list(EXCHANGE_NAMES))) if data_frequency != 'daily': raise ValueError('get_history currently only supports daily data.') @@ -61,13 +65,13 @@ def get_history(exchange_name, data_frequency, symbol, start_ms = None, end_ms = exchange=exchange_name, symbol=symbol, data_frequency=data_frequency, - ) + ) if start_ms: - url += '&start={}'.format(int(start_ms/1000)) + url += '&start={}'.format(int(start_ms / 1000)) if end_ms: - url += '&end={}'.format(int(end_ms/1000)) + url += '&end={}'.format(int(end_ms / 1000)) try: response = requests.get(url) @@ -77,7 +81,7 @@ def get_history(exchange_name, data_frequency, symbol, start_ms = None, end_ms = data = response.json() if 'error' in response: - raise ValueError(e) + raise ValueError(response['error']) return data @@ -150,7 +154,6 @@ def get_history_mock(exchange_name, data_frequency, symbol, start_ms, end_ms, return ohlcv - def fetch_candles_chunk(exchange, assets, data_frequency, end_dt, bar_count): calc_start_dt = end_dt - datetime.timedelta(minutes=bar_count) candles = exchange.get_candles( @@ -162,6 +165,7 @@ def fetch_candles_chunk(exchange, assets, data_frequency, end_dt, bar_count): ) return candles + def find_most_recent_time(bundle_name): """ Find most recent "time folder" for a given bundle. From 403be971435ac61f7eb192dff1871a23f32db7aa Mon Sep 17 00:00:00 2001 From: fredfortier Date: Sun, 8 Oct 2017 02:27:13 -0400 Subject: [PATCH 046/110] Integrating with history api --- catalyst/assets/_assets.pyx | 25 +++++++++++++++++--- catalyst/exchange/bundle_utils.py | 31 ++++++++++++++++--------- catalyst/exchange/exchange.py | 15 +++++++++++- catalyst/exchange/exchange_bundle.py | 34 ++++++++++++++++++++-------- tests/exchange/test_bundle.py | 21 ++++++++++++++++- 5 files changed, 100 insertions(+), 26 deletions(-) diff --git a/catalyst/assets/_assets.pyx b/catalyst/assets/_assets.pyx index b98127f7..986d2174 100644 --- a/catalyst/assets/_assets.pyx +++ b/catalyst/assets/_assets.pyx @@ -395,6 +395,9 @@ cdef class TradingPair(Asset): cdef readonly float leverage cdef readonly object market_currency cdef readonly object base_currency + cdef readonly object end_daily + cdef readonly object end_minute + cdef readonly object exchange_symbol _kwargnames = frozenset({ 'sid', @@ -409,7 +412,10 @@ cdef class TradingPair(Asset): 'leverage', 'market_currency', 'base_currency', - 'min_trade_size', + 'end_daily', + 'end_minute', + 'exchange_symbol', + 'min_trade_size' }) def __init__(self, object symbol, @@ -418,7 +424,10 @@ cdef class TradingPair(Asset): object asset_name=None, int sid=0, float leverage=1.0, + object end_daily=None, + object end_minute=None, object end_date=None, + object exchange_symbol=None, object first_traded=None, object auto_close_date=None, object exchange_full=None, @@ -474,7 +483,10 @@ cdef class TradingPair(Asset): :param asset_name: :param sid: :param leverage: + :param end_daily + :param end_minute :param end_date: + :param exchange_symbol: :param first_traded: :param auto_close_date: :param exchange_full: @@ -516,6 +528,9 @@ cdef class TradingPair(Asset): ) self.leverage = leverage + self.end_daily = end_daily + self.end_minute = end_minute + self.exchange_symbol = exchange_symbol def __repr__(self): return 'Trading Pair {symbol}({sid}) Exchange: {exchange}, ' \ @@ -523,7 +538,9 @@ cdef class TradingPair(Asset): 'Market Currency: {market_currency}, ' \ 'Base Currency: {base_currency}, ' \ 'Exchange Leverage: {leverage}, ' \ - 'Minimum Trade Size: {min_trade_size}'.format( + 'Minimum Trade Size: {min_trade_size} ' \ + 'Last daily ingestion: {end_daily} ' \ + 'Last minutely ingestion: {end_minute}'.format( symbol=self.symbol, sid=self.sid, exchange=self.exchange, @@ -531,7 +548,9 @@ cdef class TradingPair(Asset): market_currency=self.market_currency, base_currency=self.base_currency, leverage=self.leverage, - min_trade_size=self.min_trade_size + min_trade_size=self.min_trade_size, + end_daily=self.end_daily, + end_minute=self.end_minute ) cpdef __reduce__(self): diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index d863fb94..fe71e025 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -2,6 +2,8 @@ import datetime, requests import os from logging import Logger +import pytz + from catalyst.data.bundles import from_bundle_ingest_dirname from catalyst.utils.paths import data_path @@ -15,8 +17,15 @@ def get_date_from_ms(ms): return datetime.datetime.fromtimestamp(ms / 1000.0) -def get_history(exchange_name, data_frequency, symbol, start_ms=None, - end_ms=None): +def get_seconds_from_date(date): + epoch = datetime.datetime.utcfromtimestamp(0) + epoch = epoch.replace(tzinfo=pytz.UTC) + + return int((date - epoch).total_seconds()) + + +def get_history(exchange_name, data_frequency, symbol, start_seconds=None, + end_seconds=None): """ History API provides OHLCV data for any of the supported exchanges up to yesterday. @@ -27,10 +36,10 @@ def get_history(exchange_name, data_frequency, symbol, start_ms=None, *** currently only 'daily' is supported *** :param symbol: string Required: The trading pair symbol. - :param start: float - Optional: The start date in milliseconds. - :param end: float - Optional: The end date in milliseconds. + :param start_seconds: int + Optional: The start date in seconds. + :param end_seconds: int + Optional: The end date in seconds. :return ohlcv: list[dict[string, float]] Each row contains the following dictionary for the resulting bars: @@ -67,11 +76,11 @@ def get_history(exchange_name, data_frequency, symbol, start_ms=None, data_frequency=data_frequency, ) - if start_ms: - url += '&start={}'.format(int(start_ms / 1000)) + if start_seconds: + url += '&start={}'.format(start_seconds) - if end_ms: - url += '&end={}'.format(int(end_ms / 1000)) + if end_seconds: + url += '&end={}'.format(end_seconds) try: response = requests.get(url) @@ -80,7 +89,7 @@ def get_history(exchange_name, data_frequency, symbol, start_ms=None, data = response.json() - if 'error' in response: + if 'error' in data: raise ValueError(response['error']) return data diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index ef06c438..471215d1 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -226,6 +226,16 @@ class Exchange: else: min_trade_size = 0.0000001 + if 'end_daily' in asset and asset['end_daily'] != 'N/A': + end_daily = pd.to_datetime(asset['end_daily'], utc=True) + else: + end_daily = None + + if 'end_minute' in asset and asset['end_minute'] != 'N/A': + end_minute = pd.to_datetime(asset['end_minute'], utc=True) + else: + end_minute = None + trading_pair = TradingPair( symbol=asset['symbol'], exchange=self.name, @@ -233,7 +243,10 @@ class Exchange: end_date=end_date, leverage=leverage, asset_name=asset_name, - min_trade_size=min_trade_size + min_trade_size=min_trade_size, + end_daily=end_daily, + end_minute=end_minute, + exchange_symbol=exchange_symbol ) self.assets[exchange_symbol] = trading_pair diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index a63aca45..c5404499 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -10,7 +10,8 @@ from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ BcolzMinuteBarWriter, BcolzMinuteBarReader from catalyst.data.us_equity_pricing import BcolzDailyBarWriter, \ BcolzDailyBarReader -from catalyst.exchange.bundle_utils import fetch_candles_chunk +from catalyst.exchange.bundle_utils import fetch_candles_chunk, get_history, \ + get_seconds_from_date from catalyst.exchange.exchange_utils import get_exchange_folder from catalyst.exchange.init_utils import get_exchange from catalyst.utils.cli import maybe_show_progress @@ -136,11 +137,12 @@ class ExchangeBundle: if len(os.listdir(output_dir)) > 0: self._writers[key] = BcolzDailyBarWriter.open(output_dir, end) else: + end_session = end.floor('1d') self._writers[key] = BcolzDailyBarWriter( filename=output_dir, calendar=open_calendar, start_session=start, - end_session=end + end_session=end_session ) else: raise ValueError( @@ -213,14 +215,26 @@ class ExchangeBundle: log.debug('the data chunk already exists') return - # TODO: ensure correct behavior for assets starting in the chunk - candles = fetch_candles_chunk( - exchange=self.exchange, - assets=missing_assets, - data_frequency=data_frequency, - end_dt=chunk_end, - bar_count=chunk['bar_count'] - ) + if data_frequency == 'minute': + # TODO: ensure correct behavior for assets starting in the chunk + candles = fetch_candles_chunk( + exchange=self.exchange, + assets=missing_assets, + data_frequency=data_frequency, + end_dt=chunk_end, + bar_count=chunk['bar_count'] + ) + else: + for asset in missing_assets: + # TODO: switch to Catalyst symbol convention + candles = get_history( + exchange_name=self.exchange.name, + data_frequency=data_frequency, + symbol=asset.exchange_symbol, + start_seconds=get_seconds_from_date(chunk_start), + end_seconds=get_seconds_from_date(chunk_end) + ) + pass num_candles = 0 data = [] diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 76696c3e..f0199425 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -8,7 +8,7 @@ log = Logger('test_exchange_bundle') class ExchangeBundleTestCase: - def test_ingest(self): + def test_ingest_minute(self): exchange_name = 'bitfinex' start = pd.to_datetime('2017-09-01', utc=True) @@ -26,3 +26,22 @@ class ExchangeBundleTestCase: show_progress=True ) pass + + def test_ingest_daily(self): + exchange_name = 'bitfinex' + + start = pd.to_datetime('2017-09-01', utc=True) + end = pd.Timestamp.utcnow() + + exchange_bundle = ExchangeBundle(exchange_name) + + log.info('ingesting exchange bundle {}'.format(exchange_name)) + exchange_bundle.ingest( + data_frequency='daily', + include_symbols='neo_btc', + exclude_symbols=None, + start=start, + end=end, + show_progress=True + ) + pass From 4f80ebee57f5671a2212217f158ea4b2aca9806b Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 9 Oct 2017 14:50:51 -0400 Subject: [PATCH 047/110] Naive integration with the consolidated exchanges api --- catalyst/exchange/bundle_utils.py | 27 +++++++++++++------ catalyst/exchange/exchange_bundle.py | 40 +++++++++++++++++----------- 2 files changed, 44 insertions(+), 23 deletions(-) diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index fe71e025..57a2d6ea 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -1,6 +1,7 @@ import datetime, requests import os from logging import Logger +import pandas as pd import pytz @@ -24,8 +25,7 @@ def get_seconds_from_date(date): return int((date - epoch).total_seconds()) -def get_history(exchange_name, data_frequency, symbol, start_seconds=None, - end_seconds=None): +def get_history(exchange_name, data_frequency, symbol, start=None, end=None): """ History API provides OHLCV data for any of the supported exchanges up to yesterday. @@ -36,10 +36,10 @@ def get_history(exchange_name, data_frequency, symbol, start_seconds=None, *** currently only 'daily' is supported *** :param symbol: string Required: The trading pair symbol. - :param start_seconds: int - Optional: The start date in seconds. - :param end_seconds: int - Optional: The end date in seconds. + :param start: datetime + Optional: The start date. + :param end: datetime + Optional: The end date. :return ohlcv: list[dict[string, float]] Each row contains the following dictionary for the resulting bars: @@ -61,13 +61,18 @@ def get_history(exchange_name, data_frequency, symbol, start_seconds=None, forward fill missing bars outside of this function. """ + start_seconds = get_seconds_from_date(start) if start else None + end_seconds = get_seconds_from_date(end) if end else None + if exchange_name not in EXCHANGE_NAMES: raise ValueError( 'get_history function only supports the following exchanges: {}'.format( list(EXCHANGE_NAMES))) - if data_frequency != 'daily': - raise ValueError('get_history currently only supports daily data.') + if data_frequency != 'daily' and data_frequency != 'minute': + raise ValueError( + 'get_history currently only supports daily and minute data.' + ) url = '{api_url}/candles?exchange={exchange}&market={symbol}&freq={data_frequency}'.format( api_url=API_URL, @@ -92,6 +97,12 @@ def get_history(exchange_name, data_frequency, symbol, start_seconds=None, if 'error' in data: raise ValueError(response['error']) + for candle in data: + last_traded = pd.Timestamp.utcfromtimestamp(candle['ts']) + last_traded = last_traded.replace(tzinfo=pytz.UTC) + + candle['last_traded'] = last_traded + return data diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index c5404499..0b4d8fdd 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -215,26 +215,36 @@ class ExchangeBundle: log.debug('the data chunk already exists') return - if data_frequency == 'minute': - # TODO: ensure correct behavior for assets starting in the chunk - candles = fetch_candles_chunk( - exchange=self.exchange, - assets=missing_assets, - data_frequency=data_frequency, - end_dt=chunk_end, - bar_count=chunk['bar_count'] - ) - else: - for asset in missing_assets: + candles = dict() + for asset in missing_assets: + if chunk_start < asset.end_minute: + # TODO: fetch delta candles from exchanges + history_end = chunk_end \ + if chunk_end <= asset.end_minute else asset.end_minute + # TODO: switch to Catalyst symbol convention - candles = get_history( + candles[asset] = get_history( exchange_name=self.exchange.name, data_frequency=data_frequency, symbol=asset.exchange_symbol, - start_seconds=get_seconds_from_date(chunk_start), - end_seconds=get_seconds_from_date(chunk_end) + start=chunk_start, + end=history_end ) - pass + else: + log.debug( + 'no data in Catalyst api for chunk ' + '{} to {}'.format(chunk_start, chunk_end) + ) + # if data_frequency == 'minute': + # # TODO: ensure correct behavior for assets starting in the chunk + # candles = fetch_candles_chunk( + # exchange=self.exchange, + # assets=missing_assets, + # data_frequency=data_frequency, + # end_dt=chunk_end, + # bar_count=chunk['bar_count'] + # ) + # else: num_candles = 0 data = [] From 8811aa669a31ad067f078ae19089c39641f815be Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 9 Oct 2017 14:52:59 -0400 Subject: [PATCH 048/110] Naive integration with the consolidated exchanges api (minor fix) --- catalyst/exchange/bundle_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index 57a2d6ea..9dd1e436 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -95,7 +95,7 @@ def get_history(exchange_name, data_frequency, symbol, start=None, end=None): data = response.json() if 'error' in data: - raise ValueError(response['error']) + raise ValueError(data['error']) for candle in data: last_traded = pd.Timestamp.utcfromtimestamp(candle['ts']) From 83af12c52c55da1e56eb1195f34b56e6a2d6f22f Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 9 Oct 2017 16:23:02 -0400 Subject: [PATCH 049/110] Added exchange get_history method which merge historical bars from the Catalyst and exchange APIs --- catalyst/exchange/bundle_utils.py | 53 +++++++++++---- catalyst/exchange/exchange.py | 97 +++++++++++++++++++++++++++- catalyst/exchange/exchange_bundle.py | 77 ++++++++-------------- 3 files changed, 162 insertions(+), 65 deletions(-) diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index 9dd1e436..5078f52d 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -6,6 +6,7 @@ import pandas as pd import pytz from catalyst.data.bundles import from_bundle_ingest_dirname +from catalyst.utils.deprecate import deprecated from catalyst.utils.paths import data_path log = Logger('test_exchange_bundle') @@ -106,6 +107,46 @@ def get_history(exchange_name, data_frequency, symbol, start=None, end=None): return data +def get_ffill_candles(candles, start_dt, end_dt, data_frequency, + previous_candle=None): + """ + Create candles for each period of the specified range, forward-filling + missing candles with the previous value. + + :param candles: + :param start_dt: + :param end_dt: + :param data_frequency: + :param previous_candle: + + :return: + """ + all_dates = [] + all_candles = [] + date = start_dt + + while date <= end_dt: + candle = next(( + candle for candle in candles if candle['last_traded'] == date + ), previous_candle) + + if candle is not None: + all_dates.append(date) + all_candles.append(candle) + + previous_candle = candle + + if data_frequency == 'minute': + date += datetime.timedelta(minutes=1) + elif data_frequency == 'daily': + date += datetime.timedelta(days=1) + else: + raise ValueError('invalid data frequency') + + return all_dates, all_candles + + +@deprecated def get_history_mock(exchange_name, data_frequency, symbol, start_ms, end_ms, exchanges): """ @@ -174,18 +215,6 @@ def get_history_mock(exchange_name, data_frequency, symbol, start_ms, end_ms, return ohlcv -def fetch_candles_chunk(exchange, assets, data_frequency, end_dt, bar_count): - calc_start_dt = end_dt - datetime.timedelta(minutes=bar_count) - candles = exchange.get_candles( - data_frequency=data_frequency, - assets=assets, - bar_count=bar_count, - start_dt=calc_start_dt, - end_dt=end_dt - ) - return candles - - def find_most_recent_time(bundle_name): """ Find most recent "time folder" for a given bundle. diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 471215d1..6bf6ee05 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -10,6 +10,7 @@ from catalyst.assets._assets import TradingPair from logbook import Logger from catalyst.data.data_portal import BASE_FIELDS +from catalyst.exchange import bundle_utils from catalyst.exchange.exchange_errors import MismatchingBaseCurrencies, \ InvalidOrderStyle, BaseCurrencyNotFoundError, SymbolNotFoundOnExchange from catalyst.exchange.exchange_execution import ExchangeStopLimitOrder, \ @@ -412,6 +413,95 @@ class Exchange: return value + def get_history(self, assets, end_dt, bar_count, data_frequency): + """ + Retrieve OHLCV bars from the Catalyst and/or exchange API. + + If Catalyst does not have the full data set, retrieve the missing + portion from the exchange API if the exchanges supports + historical data. + + :param assets: list[TradingPair] + The TradingPair asset. + :param data_frequency: str + The bar frequency: daily or minute + :param bar_count: int + The number of bars desired. + :param end: datetime + The last trading date of the last bar. + + :return: + """ + candles = dict() + for asset in assets: + candles[asset] = self.get_asset_history( + asset=asset, + end=end_dt, + bar_count=bar_count, + data_frequency=data_frequency + ) + return candles + + def get_asset_history(self, asset, end, bar_count, data_frequency): + """ + Retrieve the OHLVC bars of a single asset. + + :param asset: TradingPair + The TradingPair asset. + :param data_frequency: str + The bar frequency: daily or minute + :param bar_count: int + The number of bars desired. + :param end: datetime + The last trading date of the last bar. + :return: + """ + start = end - timedelta(minutes=bar_count) + + exchange_start = None + catalyst_end = None + + if start < asset.end_minute: + catalyst_start = start + if end <= asset.end_minute: + catalyst_end = end + else: + catalyst_end = asset.end_minute + + delta = timedelta(minutes=1) \ + if data_frequency == 'minute' else timedelta(days=1) + exchange_start = catalyst_end + delta + + exchange_end = end + + else: + exchange_start = start + exchange_end = end + + data = [] + if catalyst_end is not None: + # TODO: support multiple assets in the Catalyst API. + candles = bundle_utils.get_history( + exchange_name=self.name, + data_frequency=data_frequency, + symbol=asset.exchange_symbol, # TODO: use Catalyst symbol + start=catalyst_start, + end=catalyst_end + ) + data += candles + + if exchange_start is not None: + candles = self.get_candles( + data_frequency=data_frequency, + assets=[asset], + bar_count=bar_count, + start_dt=exchange_start, + end_dt=exchange_end + ) + data += candles[asset] + + return data + def get_history_window(self, assets, end_dt, @@ -455,11 +545,12 @@ class Exchange: A dataframe containing the requested data. """ - candles = self.get_candles( - data_frequency=frequency, + # TODO: try to read from bundle first + candles = self.get_history( assets=assets, + end_dt=end_dt, bar_count=bar_count, - end_dt=end_dt + data_frequency=data_frequency ) series = dict() diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 0b4d8fdd..0e6e30af 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -10,8 +10,7 @@ from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ BcolzMinuteBarWriter, BcolzMinuteBarReader from catalyst.data.us_equity_pricing import BcolzDailyBarWriter, \ BcolzDailyBarReader -from catalyst.exchange.bundle_utils import fetch_candles_chunk, get_history, \ - get_seconds_from_date +from catalyst.exchange.bundle_utils import get_ffill_candles from catalyst.exchange.exchange_utils import get_exchange_folder from catalyst.exchange.init_utils import get_exchange from catalyst.utils.cli import maybe_show_progress @@ -196,6 +195,16 @@ class ExchangeBundle: def ingest_chunk(self, chunk, previous_candle, data_frequency, assets, writer): + """ + Retrieve the specified OHLCV chunk and write it to the bundle + + :param chunk: + :param previous_candle: + :param data_frequency: + :param assets: + :param writer: + :return: + """ chunk_end = chunk['end'] chunk_start = chunk_end - timedelta(minutes=chunk['bar_count']) @@ -215,36 +224,12 @@ class ExchangeBundle: log.debug('the data chunk already exists') return - candles = dict() - for asset in missing_assets: - if chunk_start < asset.end_minute: - # TODO: fetch delta candles from exchanges - history_end = chunk_end \ - if chunk_end <= asset.end_minute else asset.end_minute - - # TODO: switch to Catalyst symbol convention - candles[asset] = get_history( - exchange_name=self.exchange.name, - data_frequency=data_frequency, - symbol=asset.exchange_symbol, - start=chunk_start, - end=history_end - ) - else: - log.debug( - 'no data in Catalyst api for chunk ' - '{} to {}'.format(chunk_start, chunk_end) - ) - # if data_frequency == 'minute': - # # TODO: ensure correct behavior for assets starting in the chunk - # candles = fetch_candles_chunk( - # exchange=self.exchange, - # assets=missing_assets, - # data_frequency=data_frequency, - # end_dt=chunk_end, - # bar_count=chunk['bar_count'] - # ) - # else: + candles = self.exchange.get_history( + assets=missing_assets, + end_dt=chunk_end, + bar_count=chunk['bar_count'], + data_frequency=data_frequency + ) num_candles = 0 data = [] @@ -260,25 +245,17 @@ class ExchangeBundle: ) continue - all_dates = [] - all_candles = [] - date = chunk_start - while date <= chunk_end: + previous = previous_candle[asset] \ + if asset in previous_candle else None - previous = previous_candle[asset] \ - if asset in previous_candle else None - - candle = next((candle for candle in asset_candles \ - if candle['last_traded'] == date), - previous) - - if candle is not None: - all_dates.append(date) - all_candles.append(candle) - - previous_candle[asset] = candle - - date += timedelta(minutes=1) + all_dates, all_candles = get_ffill_candles( + candles=asset_candles, + start_dt=chunk_start, + end_dt=chunk_end, + data_frequency=data_frequency, + previous_candle=previous + ) + previous_candle[asset] = all_candles[-1] df = pd.DataFrame(all_candles, index=all_dates) if not df.empty: From de409efd3e7e82c88b01f367843f3a2d77f2f10c Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Tue, 10 Oct 2017 14:22:49 -0600 Subject: [PATCH 050/110] API uses Catalyst naming convention --- catalyst/exchange/bundle_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index 5078f52d..2a5f0e34 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -34,9 +34,8 @@ def get_history(exchange_name, data_frequency, symbol, start=None, end=None): Required: The name identifier of the exchange (e.g. bitfinex, bittrex, poloniex). :param data_frequency: string Required: The bar frequency (minute or daily) - *** currently only 'daily' is supported *** :param symbol: string - Required: The trading pair symbol. + Required: The trading pair symbol, using Catalyst naming convention :param start: datetime Optional: The start date. :param end: datetime From d3e33c44bfd77c9649fd34347de59f1a53d65558 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 11 Oct 2017 00:13:22 -0400 Subject: [PATCH 051/110] Reading data from bundles first and other fixes --- catalyst/__main__.py | 4 +- .../buy_low_sell_high_neo_with_interface.py | 38 ++--- catalyst/exchange/bitfinex/bitfinex.py | 1 - catalyst/exchange/bundle_utils.py | 46 ++++-- catalyst/exchange/data_portal_exchange.py | 4 +- catalyst/exchange/exchange.py | 90 +++++++++-- catalyst/exchange/exchange_bundle.py | 144 ++++++++++++------ tests/exchange/test_bundle.py | 5 +- 8 files changed, 236 insertions(+), 96 deletions(-) diff --git a/catalyst/__main__.py b/catalyst/__main__.py index fce7fa55..60758487 100644 --- a/catalyst/__main__.py +++ b/catalyst/__main__.py @@ -9,6 +9,7 @@ from six import text_type from catalyst.data import bundles as bundles_module from catalyst.exchange.exchange_bundle import ExchangeBundle +from catalyst.exchange.init_utils import get_exchange from catalyst.utils.cli import Date, Timestamp from catalyst.utils.run_algo import _run, load_extensions @@ -492,7 +493,8 @@ def ingest_exchange(exchange_name, data_frequency, start, end, """ Ingest data for the given exchange. """ - exchange_bundle = ExchangeBundle(exchange_name) + exchange=get_exchange(exchange_name) + exchange_bundle = ExchangeBundle(exchange) click.echo('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest( diff --git a/catalyst/examples/buy_low_sell_high_neo_with_interface.py b/catalyst/examples/buy_low_sell_high_neo_with_interface.py index 2e216cb5..8cb1a5a6 100644 --- a/catalyst/examples/buy_low_sell_high_neo_with_interface.py +++ b/catalyst/examples/buy_low_sell_high_neo_with_interface.py @@ -36,8 +36,8 @@ def _handle_data(context, data): prices = data.history( context.asset, fields='price', - bar_count=30, - frequency='30m' + bar_count=50, + frequency='1m' ) rsi = talib.RSI(prices.values, timeperiod=14)[-1] log.info('got rsi: {}'.format(rsi)) @@ -148,27 +148,27 @@ def analyze(context, stats): pass -# run_algorithm( -# initialize=initialize, -# handle_data=handle_data, -# analyze=analyze, -# exchange_name='bitfinex', -# live=True, -# algo_namespace=algo_namespace, -# base_currency='btc', -# live_graph=False -# ) - -# Backtest run_algorithm( - capital_base=250, - start=pd.to_datetime('2017-09-08', utc=True), - end=pd.to_datetime('2017-09-15', utc=True), - data_frequency='minute', initialize=initialize, handle_data=handle_data, analyze=analyze, exchange_name='bitfinex', + live=True, algo_namespace=algo_namespace, - base_currency='btc' + base_currency='btc', + live_graph=False ) + +# Backtest +# run_algorithm( +# capital_base=250, +# start=pd.to_datetime('2017-09-08', utc=True), +# end=pd.to_datetime('2017-09-15', utc=True), +# data_frequency='minute', +# initialize=initialize, +# handle_data=handle_data, +# analyze=analyze, +# exchange_name='bitfinex', +# algo_namespace=algo_namespace, +# base_currency='btc' +# ) diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index d4d368a9..38785c5f 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -14,7 +14,6 @@ import six from catalyst.assets._assets import TradingPair from logbook import Logger -# from websocket import create_connection from catalyst.exchange.exchange import Exchange from catalyst.exchange.exchange_errors import ( ExchangeRequestError, diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index 5078f52d..ade4f860 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -1,4 +1,5 @@ -import datetime, requests +import requests +from datetime import timedelta, datetime import os from logging import Logger import pandas as pd @@ -16,11 +17,11 @@ API_URL = 'http://data.enigma.co/api/v1' def get_date_from_ms(ms): - return datetime.datetime.fromtimestamp(ms / 1000.0) + return datetime.fromtimestamp(ms / 1000.0) def get_seconds_from_date(date): - epoch = datetime.datetime.utcfromtimestamp(0) + epoch = datetime.utcfromtimestamp(0) epoch = epoch.replace(tzinfo=pytz.UTC) return int((date - epoch).total_seconds()) @@ -107,14 +108,30 @@ def get_history(exchange_name, data_frequency, symbol, start=None, end=None): return data -def get_ffill_candles(candles, start_dt, end_dt, data_frequency, +def get_delta(periods, data_frequency): + return timedelta(minutes=periods) \ + if data_frequency == 'minute' else timedelta(days=periods) + + +def get_start_dt(end_dt, bar_count, data_frequency): + periods = bar_count - 1 + if periods > 1: + delta = get_delta(periods, data_frequency) + start_dt = end_dt - delta + else: + start_dt = end_dt + + return start_dt + + +def get_ffill_candles(candles, bar_count, end_dt, data_frequency, previous_candle=None): """ Create candles for each period of the specified range, forward-filling missing candles with the previous value. :param candles: - :param start_dt: + :param bar_count: :param end_dt: :param data_frequency: :param previous_candle: @@ -123,6 +140,8 @@ def get_ffill_candles(candles, start_dt, end_dt, data_frequency, """ all_dates = [] all_candles = [] + + start_dt = get_start_dt(end_dt, bar_count, data_frequency) date = start_dt while date <= end_dt: @@ -130,18 +149,15 @@ def get_ffill_candles(candles, start_dt, end_dt, data_frequency, candle for candle in candles if candle['last_traded'] == date ), previous_candle) - if candle is not None: - all_dates.append(date) - all_candles.append(candle) + if candle is None: + candle = candles[0] - previous_candle = candle + all_dates.append(date) + all_candles.append(candle) - if data_frequency == 'minute': - date += datetime.timedelta(minutes=1) - elif data_frequency == 'daily': - date += datetime.timedelta(days=1) - else: - raise ValueError('invalid data frequency') + previous_candle = candle + + date += get_delta(1, data_frequency) return all_dates, all_candles diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index 6d124b9f..e47ca435 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -260,8 +260,8 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): self.minute_history_loaders = dict() for exchange_name in self.exchanges: - self.exchange_bundles[exchange_name] = \ - ExchangeBundle(exchange_name) + exchange = self.exchanges[exchange_name] + self.exchange_bundles[exchange_name] = ExchangeBundle(exchange) def _get_first_trading_day(self, assets): first_date = None diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 6bf6ee05..96969692 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -1,5 +1,6 @@ import abc import random +import re from abc import ABCMeta, abstractmethod, abstractproperty from datetime import timedelta from time import sleep @@ -11,8 +12,12 @@ from logbook import Logger from catalyst.data.data_portal import BASE_FIELDS from catalyst.exchange import bundle_utils +from catalyst.exchange.bundle_utils import get_ffill_candles, get_start_dt, \ + get_delta +from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import MismatchingBaseCurrencies, \ - InvalidOrderStyle, BaseCurrencyNotFoundError, SymbolNotFoundOnExchange + InvalidOrderStyle, BaseCurrencyNotFoundError, SymbolNotFoundOnExchange, \ + InvalidHistoryFrequencyError from catalyst.exchange.exchange_execution import ExchangeStopLimitOrder, \ ExchangeLimitOrder, ExchangeStopOrder from catalyst.exchange.exchange_portfolio import ExchangePortfolio @@ -456,7 +461,7 @@ class Exchange: The last trading date of the last bar. :return: """ - start = end - timedelta(minutes=bar_count) + start = get_start_dt(end, bar_count, data_frequency) exchange_start = None catalyst_end = None @@ -475,8 +480,8 @@ class Exchange: exchange_end = end else: - exchange_start = start exchange_end = end + exchange_start = start data = [] if catalyst_end is not None: @@ -495,7 +500,7 @@ class Exchange: data_frequency=data_frequency, assets=[asset], bar_count=bar_count, - start_dt=exchange_start, + start_dt=exchange_start if bar_count > 1 else None, end_dt=exchange_end ) data += candles[asset] @@ -545,25 +550,86 @@ class Exchange: A dataframe containing the requested data. """ - # TODO: try to read from bundle first - candles = self.get_history( + bundle = ExchangeBundle(self) + + freq_match = re.match(r'([0-9].*)(m|M|d|D)', frequency, re.M | re.I) + if freq_match: + candle_size = int(freq_match.group(1)) + unit = freq_match.group(2) + else: + raise InvalidHistoryFrequencyError(frequency) + + if unit.lower() == 'd': + data_frequency = 'daily' + + elif unit.lower() == 'm': + data_frequency = 'minute' + + else: + raise InvalidHistoryFrequencyError(frequency) + + adj_bar_count = candle_size * bar_count + start_dt = get_start_dt(end_dt, adj_bar_count, data_frequency) + + missing_assets = bundle.filter_existing_assets( assets=assets, + start_dt=start_dt, end_dt=end_dt, - bar_count=bar_count, data_frequency=data_frequency ) + if len(missing_assets) > 0: + writer = bundle.get_writer(start_dt, end_dt, data_frequency) + + bundle.ingest_chunk( + bar_count=adj_bar_count, + end_dt=end_dt, + data_frequency=data_frequency, + assets=missing_assets, + writer=writer + ) + + reader = bundle.get_reader(data_frequency) + values = reader.load_raw_arrays( + fields=[field], + start_dt=start_dt, + end_dt=end_dt, + sids=[asset.sid for asset in assets], + )[0] + series = dict() - for asset in assets: - asset_candles = candles[asset] + for asset_index, asset in enumerate(assets): + all_dates = [] + asset_values = [] - values = map(lambda candle: candle[field], asset_candles) - dates = map(lambda candle: candle['last_traded'], asset_candles) + date = start_dt + for value in values: + all_dates.append(date) + asset_values.append(value[asset_index]) - value_series = pd.Series(values, index=dates) + date += get_delta(1, data_frequency) + + value_series = pd.Series(asset_values, index=all_dates) series[asset] = value_series df = pd.DataFrame(series) + + if candle_size > 1: + if field == 'open': + agg = 'first' + elif field == 'high': + agg = 'max' + elif field == 'low': + agg = 'min' + elif field == 'close': + agg = 'last' + elif field == 'volume': + agg = 'sum' + else: + raise ValueError('invalid field') + + df = df.resample('{}T'.format(candle_size)).agg(agg) + return df def synchronize_portfolio(self): diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 0e6e30af..7504f7c0 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -4,15 +4,15 @@ from datetime import timedelta import numpy as np import pandas as pd from logbook import Logger +from pandas import DatetimeIndex from catalyst import get_calendar from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ - BcolzMinuteBarWriter, BcolzMinuteBarReader + BcolzMinuteBarWriter, BcolzMinuteBarReader, BcolzMinuteBarMetadata from catalyst.data.us_equity_pricing import BcolzDailyBarWriter, \ BcolzDailyBarReader -from catalyst.exchange.bundle_utils import get_ffill_candles +from catalyst.exchange.bundle_utils import get_ffill_candles, get_start_dt from catalyst.exchange.exchange_utils import get_exchange_folder -from catalyst.exchange.init_utils import get_exchange from catalyst.utils.cli import maybe_show_progress from catalyst.utils.paths import ensure_directory @@ -26,8 +26,8 @@ log = Logger('exchange_bundle') class ExchangeBundle: - def __init__(self, exchange_name, ): - self.exchange = get_exchange(exchange_name) + def __init__(self, exchange): + self.exchange = exchange self.minutes_per_day = 1440 self.default_ohlc_ratio = 1000000 self._writers = dict() @@ -72,7 +72,8 @@ class ExchangeBundle: :return: BcolzMinuteBarReader or BcolzDailyBarReader """ - if data_frequency in self._readers: + if data_frequency in self._readers \ + and self._readers[data_frequency] is not None: return self._readers[data_frequency] root = get_exchange_folder(self.exchange.name) @@ -81,6 +82,7 @@ class ExchangeBundle: frequency=data_frequency ) + self._readers[data_frequency] = None if data_frequency == 'minute': try: self._readers[data_frequency] = BcolzMinuteBarReader(input_dir) @@ -99,13 +101,16 @@ class ExchangeBundle: return self._readers[data_frequency] - def get_writer(self, data_frequency, start, end): + def update_metadata(self, writer, start_dt, end_dt): + pass + + def get_writer(self, start_dt, end_dt, data_frequency): """ Get a data writer object, either a new object or from cache :return: BcolzMinuteBarWriter or BcolzDailyBarWriter """ - key = (data_frequency, start, end) + key = data_frequency if key in self._writers: return self._writers[key] @@ -120,27 +125,59 @@ class ExchangeBundle: if data_frequency == 'minute': if len(os.listdir(output_dir)) > 0: + + metadata = BcolzMinuteBarMetadata.read(output_dir) + + write_metadata = False + if start_dt < metadata.start_session: + write_metadata = True + start_session = start_dt.floor('1d') + else: + start_session = metadata.start_session + + if end_dt > metadata.end_session: + write_metadata = True + + # TODO: workaround, improve the calendar logic? + if end_dt == start_dt: + end_dt += timedelta(days=1) + + end_session = end_dt.floor('1d') + else: + end_session = metadata.end_session + self._writers[key] = \ - BcolzMinuteBarWriter.open(output_dir, end) + BcolzMinuteBarWriter( + output_dir, + metadata.calendar, + start_session, + end_session, + metadata.minutes_per_day, + metadata.default_ohlc_ratio, + metadata.ohlc_ratios_per_sid, + write_metadata=write_metadata + ) else: self._writers[key] = BcolzMinuteBarWriter( rootdir=output_dir, calendar=open_calendar, minutes_per_day=self.minutes_per_day, - start_session=start, - end_session=end, + start_session=start_dt, + end_session=end_dt, write_metadata=True, default_ohlc_ratio=self.default_ohlc_ratio ) + elif data_frequency == 'daily': if len(os.listdir(output_dir)) > 0: - self._writers[key] = BcolzDailyBarWriter.open(output_dir, end) + self._writers[key] = \ + BcolzDailyBarWriter.open(output_dir, end_dt) else: - end_session = end.floor('1d') + end_session = end_dt.floor('1d') self._writers[key] = BcolzDailyBarWriter( filename=output_dir, calendar=open_calendar, - start_session=start, + start_session=start_dt, end_session=end_session ) else: @@ -150,7 +187,7 @@ class ExchangeBundle: return self._writers[key] - def filter_existing_assets(self, assets, start, end, data_frequency): + def filter_existing_assets(self, assets, start_dt, end_dt, data_frequency): """ For each asset, get the close on the start and end dates of the chunk. If the data exists, the chunk ingestion is complete. @@ -158,9 +195,9 @@ class ExchangeBundle: :param assets: list[TradingPair] The assets is scope. - :param start: + :param start_dt: The chunk start date. - :param end: + :param end_dt: The chunk end date. :return: list[TradingPair] The assets missing from the bundle @@ -171,13 +208,15 @@ class ExchangeBundle: has_data = True if has_data and reader is not None: try: - start_close = reader.get_value(asset.sid, start, 'close') + start_close = \ + reader.get_value(asset.sid, start_dt, 'close') if np.isnan(start_close): has_data = False else: - end_close = reader.get_value(asset.sid, end, 'close') + end_close = reader.get_value(asset.sid, end_dt, + 'close') if np.isnan(end_close): has_data = False @@ -193,8 +232,8 @@ class ExchangeBundle: return missing_assets - def ingest_chunk(self, chunk, previous_candle, data_frequency, assets, - writer): + def ingest_chunk(self, bar_count, end_dt, data_frequency, assets, + writer, previous_candle=dict()): """ Retrieve the specified OHLCV chunk and write it to the bundle @@ -205,18 +244,17 @@ class ExchangeBundle: :param writer: :return: """ - chunk_end = chunk['end'] - chunk_start = chunk_end - timedelta(minutes=chunk['bar_count']) chunk_assets = [] for asset in assets: - if asset.start_date <= chunk_end: + if asset.start_date <= end_dt: chunk_assets.append(asset) + start_dt = get_start_dt(end_dt, bar_count, data_frequency) missing_assets = self.filter_existing_assets( assets=chunk_assets, - start=chunk_start, - end=chunk_end, + start_dt=start_dt, + end_dt=end_dt, data_frequency=data_frequency ) @@ -226,8 +264,8 @@ class ExchangeBundle: candles = self.exchange.get_history( assets=missing_assets, - end_dt=chunk_end, - bar_count=chunk['bar_count'], + end_dt=end_dt, + bar_count=bar_count, data_frequency=data_frequency ) @@ -240,7 +278,7 @@ class ExchangeBundle: 'no data: {symbols} on {exchange}, date {end}'.format( symbols=missing_assets, exchange=self.exchange.name, - end=chunk_end + end=end_dt ) ) continue @@ -250,14 +288,18 @@ class ExchangeBundle: all_dates, all_candles = get_ffill_candles( candles=asset_candles, - start_dt=chunk_start, - end_dt=chunk_end, + bar_count=bar_count, + end_dt=end_dt, data_frequency=data_frequency, previous_candle=previous ) previous_candle[asset] = all_candles[-1] - df = pd.DataFrame(all_candles, index=all_dates) + df = pd.DataFrame( + data=all_candles, + index=all_dates, + columns=['open', 'high', 'low', 'close', 'volume'] + ) if not df.empty: df.sort_index(inplace=True) @@ -268,24 +310,37 @@ class ExchangeBundle: try: log.debug( - 'writing {num_candles} candles from {start} to {end}'.format( + 'writing {num_candles} candles for {bar_count} bars' + 'ending {end}'.format( num_candles=num_candles, - start=chunk_start, - end=chunk_end + bar_count=bar_count, + end=end_dt ) ) - for pair in data: - log.debug('data for sid {}\n{}\n{}'.format( - pair[0], pair[1].head(2), pair[1].tail(2))) - writer.write( data=data, show_progress=False, invalid_data_behavior='raise' ) except BcolzMinuteOverlappingData as e: - log.warn('chunk already exists {}: {}'.format(chunk, e)) + log.warn('chunk already exists: {}'.format(e)) + except Exception as e: + log.warn('error when writing data: {}, trying again'.format(e)) + + # This is workaround, there is an issue with empty + # session_label when using a newly created writer + del self._writers[data_frequency] + + # TODO: these are the dates of the chunk, not the job + writer = self.get_writer(start_dt, end_dt, data_frequency) + writer.write( + data=data, + show_progress=False, + invalid_data_behavior='raise' + ) + + return data def ingest(self, data_frequency, include_symbols=None, exclude_symbols=None, start=None, end=None, @@ -327,7 +382,7 @@ class ExchangeBundle: else: raise ValueError('frequency not supported') - writer = self.get_writer(data_frequency, start, end) + writer = self.get_writer(start, end, data_frequency) if delta_periods > self.exchange.num_candles_limit: bar_count = self.exchange.num_candles_limit @@ -359,9 +414,10 @@ class ExchangeBundle: previous_candle = dict() for chunk in it: self.ingest_chunk( - chunk=chunk, - previous_candle=previous_candle, + bar_count=chunk['bar_count'], + end_dt=chunk['end'], data_frequency=data_frequency, assets=assets, - writer=writer + writer=writer, + previous_candle=previous_candle, ) diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index f0199425..643326a9 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -3,6 +3,7 @@ from logging import Logger import pandas as pd from catalyst.exchange.exchange_bundle import ExchangeBundle +from catalyst.exchange.init_utils import get_exchange log = Logger('test_exchange_bundle') @@ -14,7 +15,7 @@ class ExchangeBundleTestCase: start = pd.to_datetime('2017-09-01', utc=True) end = pd.Timestamp.utcnow() - exchange_bundle = ExchangeBundle(exchange_name) + exchange_bundle = ExchangeBundle(get_exchange(exchange_name)) log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest( @@ -33,7 +34,7 @@ class ExchangeBundleTestCase: start = pd.to_datetime('2017-09-01', utc=True) end = pd.Timestamp.utcnow() - exchange_bundle = ExchangeBundle(exchange_name) + exchange_bundle = ExchangeBundle(get_exchange(exchange_name)) log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest( From 01aeb88e8f9d10c060b13b705438c8cc8311982d Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Wed, 11 Oct 2017 17:05:27 -0600 Subject: [PATCH 052/110] Raising Exceptions without traceback --- catalyst/exchange/exchange_errors.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index 7e751981..f2629ef4 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -1,5 +1,18 @@ +import sys, inspect from catalyst.errors import ZiplineError +class ZiplineErrorSilent(ZiplineError): + def __init__(self, **kwargs): + msg = self.msg.format(**kwargs) + try: + ln = sys.exc_info()[-1].tb_lineno + fn = sys.exc_info()[-1].f_code.co_filename + except AttributeError: + ln = inspect.currentframe().f_back.f_lineno + fn = inspect.currentframe().f_back.f_code.co_filename + msg = "Error traceback: {1} (line {2})\n{0.__name__}: {3}.".format(type(self), fn, ln, msg) + sys.exit(msg) + class ExchangeRequestError(ZiplineError): msg = ( @@ -140,7 +153,7 @@ class MismatchingBaseCurrenciesExchanges(ZiplineError): ).strip() -class SymbolNotFoundOnExchange(ZiplineError): +class SymbolNotFoundOnExchange(ZiplineErrorSilent): """ Raised when a symbol() call contains a non-existant symbol. """ From c24918e2c881d161027e7330732b95db13fe0dbf Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 11 Oct 2017 22:05:29 -0400 Subject: [PATCH 053/110] Bug fixes --- catalyst/exchange/bundle_utils.py | 3 ++- catalyst/exchange/exchange.py | 2 +- catalyst/exchange/exchange_bundle.py | 8 ++------ catalyst/exchange/exchange_errors.py | 4 ++++ tests/exchange/test_bundle.py | 6 ++++-- 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index 56fa6359..ce0130a1 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -7,6 +7,7 @@ import pandas as pd import pytz from catalyst.data.bundles import from_bundle_ingest_dirname +from catalyst.exchange.exchange_errors import ApiCandlesError from catalyst.utils.deprecate import deprecated from catalyst.utils.paths import data_path @@ -96,7 +97,7 @@ def get_history(exchange_name, data_frequency, symbol, start=None, end=None): data = response.json() if 'error' in data: - raise ValueError(data['error']) + raise ApiCandlesError(error=data['error']) for candle in data: last_traded = pd.Timestamp.utcfromtimestamp(candle['ts']) diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 96969692..36eb9628 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -489,7 +489,7 @@ class Exchange: candles = bundle_utils.get_history( exchange_name=self.name, data_frequency=data_frequency, - symbol=asset.exchange_symbol, # TODO: use Catalyst symbol + symbol=asset.symbol, # TODO: use Catalyst symbol start=catalyst_start, end=catalyst_end ) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 7504f7c0..34d5330b 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -131,18 +131,14 @@ class ExchangeBundle: write_metadata = False if start_dt < metadata.start_session: write_metadata = True - start_session = start_dt.floor('1d') + start_session = start_dt else: start_session = metadata.start_session if end_dt > metadata.end_session: write_metadata = True - # TODO: workaround, improve the calendar logic? - if end_dt == start_dt: - end_dt += timedelta(days=1) - - end_session = end_dt.floor('1d') + end_session = end_dt else: end_session = metadata.end_session diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index 7e751981..2d5904cc 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -167,3 +167,7 @@ class PricingDataNotLoadedError(ZiplineError): 'Please ingest data using the command ' '`catalyst ingest -b exchange_{exchange}`. ' 'See catalyst documentation for details.').strip() + + +class ApiCandlesError(ZiplineError): + msg = ('Unable to fetch candles from the remote API: {error}.').strip() diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 643326a9..f99ced28 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -1,3 +1,4 @@ +from datetime import timedelta from logging import Logger import pandas as pd @@ -12,8 +13,9 @@ class ExchangeBundleTestCase: def test_ingest_minute(self): exchange_name = 'bitfinex' - start = pd.to_datetime('2017-09-01', utc=True) - end = pd.Timestamp.utcnow() + # start = pd.to_datetime('2017-09-01', utc=True) + end = pd.Timestamp.utcnow() - timedelta(minutes=5) + start = end - timedelta(minutes=30) exchange_bundle = ExchangeBundle(get_exchange(exchange_name)) From 4895bef392b75cda82d99f8936ad1e89f2a90b19 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 12 Oct 2017 00:51:18 -0400 Subject: [PATCH 054/110] Bug fixes and housekeeping from ingestion testing --- catalyst/__main__.py | 6 +- catalyst/exchange/bittrex/bittrex.py | 2 + catalyst/exchange/bundle_utils.py | 41 ++++++ catalyst/exchange/exchange.py | 17 +-- catalyst/exchange/exchange_bundle.py | 175 ++++++++++++++----------- catalyst/exchange/exchange_errors.py | 4 +- catalyst/exchange/poloniex/poloniex.py | 4 + tests/exchange/test_bundle.py | 19 +++ 8 files changed, 177 insertions(+), 91 deletions(-) diff --git a/catalyst/__main__.py b/catalyst/__main__.py index 60758487..d3ca57c8 100644 --- a/catalyst/__main__.py +++ b/catalyst/__main__.py @@ -443,7 +443,7 @@ def live(ctx, return perf -@main.command() +@main.command(name='ingest-exchange') @click.option( '-x', '--exchange-name', @@ -452,6 +452,7 @@ def live(ctx, ' bittrex, poloniex).', ) @click.option( + '-f', '--data-frequency', type=click.Choice({'daily', 'minute', 'daily,minute'}), default='daily', @@ -473,6 +474,7 @@ def live(ctx, help='The end date of the data range. (default: today)', ) @click.option( + '-i', '--include-symbols', default=None, help='A list of symbols to ingest (optional comma separated list)', @@ -493,7 +495,7 @@ def ingest_exchange(exchange_name, data_frequency, start, end, """ Ingest data for the given exchange. """ - exchange=get_exchange(exchange_name) + exchange = get_exchange(exchange_name) exchange_bundle = ExchangeBundle(exchange) click.echo('ingesting exchange bundle {}'.format(exchange_name)) diff --git a/catalyst/exchange/bittrex/bittrex.py b/catalyst/exchange/bittrex/bittrex.py index e59281fa..d81d49b7 100644 --- a/catalyst/exchange/bittrex/bittrex.py +++ b/catalyst/exchange/bittrex/bittrex.py @@ -28,6 +28,8 @@ class Bittrex(Exchange): self.base_currency = base_currency self._portfolio = portfolio + self.num_candles_limit = 2000 + # Not sure what the rate limit is but trying to play it safe # https://bitcoin.stackexchange.com/questions/53778/bittrex-api-rate-limit self.max_requests_per_minute = 60 diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index ce0130a1..d9b34ac8 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -3,6 +3,7 @@ from datetime import timedelta, datetime import os from logging import Logger import pandas as pd +import numpy as np import pytz @@ -113,6 +114,21 @@ def get_delta(periods, data_frequency): if data_frequency == 'minute' else timedelta(days=periods) +def get_periods(start_dt, end_dt, data_frequency): + delta = end_dt - start_dt + + if data_frequency == 'minute': + delta_periods = delta.total_seconds() / 60 + + elif data_frequency == 'daily': + delta_periods = delta.total_seconds() / 60 / 60 / 24 + + else: + raise ValueError('frequency not supported') + + return int(delta_periods) + + def get_start_dt(end_dt, bar_count, data_frequency): periods = bar_count - 1 if periods > 1: @@ -162,6 +178,31 @@ def get_ffill_candles(candles, bar_count, end_dt, data_frequency, return all_dates, all_candles +def range_in_bundle(asset, start_dt, end_dt, reader): + has_data = True + if has_data and reader is not None: + try: + start_close = \ + reader.get_value(asset.sid, start_dt, 'close') + + if np.isnan(start_close): + has_data = False + + else: + end_close = reader.get_value(asset.sid, end_dt, 'close') + + if np.isnan(end_close): + has_data = False + + except Exception: + has_data = False + + else: + has_data = False + + return has_data + + @deprecated def get_history_mock(exchange_name, data_frequency, symbol, start_ms, end_ms, exchanges): diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 36eb9628..d89e980c 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -466,7 +466,7 @@ class Exchange: exchange_start = None catalyst_end = None - if start < asset.end_minute: + if asset.end_minute is not None and start < asset.end_minute: catalyst_start = start if end <= asset.end_minute: catalyst_end = end @@ -581,13 +581,14 @@ class Exchange: if len(missing_assets) > 0: writer = bundle.get_writer(start_dt, end_dt, data_frequency) - bundle.ingest_chunk( - bar_count=adj_bar_count, - end_dt=end_dt, - data_frequency=data_frequency, - assets=missing_assets, - writer=writer - ) + for asset in missing_assets: + bundle.ingest_chunk( + bar_count=adj_bar_count, + end_dt=end_dt, + data_frequency=data_frequency, + asset=asset, + writer=writer + ) reader = bundle.get_reader(data_frequency) values = reader.load_raw_arrays( diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 34d5330b..dbd4a9ae 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -1,17 +1,16 @@ import os from datetime import timedelta -import numpy as np import pandas as pd -from logbook import Logger -from pandas import DatetimeIndex +from logbook import Logger, DEBUG, INFO from catalyst import get_calendar from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ BcolzMinuteBarWriter, BcolzMinuteBarReader, BcolzMinuteBarMetadata from catalyst.data.us_equity_pricing import BcolzDailyBarWriter, \ BcolzDailyBarReader -from catalyst.exchange.bundle_utils import get_ffill_candles, get_start_dt +from catalyst.exchange.bundle_utils import get_ffill_candles, get_start_dt, \ + get_periods, range_in_bundle from catalyst.exchange.exchange_utils import get_exchange_folder from catalyst.utils.cli import maybe_show_progress from catalyst.utils.paths import ensure_directory @@ -23,6 +22,7 @@ def _cachpath(symbol, type_): BUNDLE_NAME_TEMPLATE = '{root}/{frequency}_bundle' log = Logger('exchange_bundle') +log.level = INFO class ExchangeBundle: @@ -45,8 +45,8 @@ class ExchangeBundle: def get_adj_dates(self, start, end, assets): now = pd.Timestamp.utcnow() - if end > now: - log.info('adjusting the end date to now {}'.format(now)) + if end is None or end > now: + log.debug('adjusting the end date to now {}'.format(now)) end = now earliest_trade = None @@ -54,8 +54,8 @@ class ExchangeBundle: if earliest_trade is None or earliest_trade > asset.start_date: earliest_trade = asset.start_date - if earliest_trade > start: - log.info( + if start is None or earliest_trade > start: + log.debug( 'adjusting start date to earliest trade date found {}'.format( earliest_trade )) @@ -201,65 +201,30 @@ class ExchangeBundle: reader = self.get_reader(data_frequency) missing_assets = [] for asset in assets: - has_data = True - if has_data and reader is not None: - try: - start_close = \ - reader.get_value(asset.sid, start_dt, 'close') - - if np.isnan(start_close): - has_data = False - - else: - end_close = reader.get_value(asset.sid, end_dt, - 'close') - - if np.isnan(end_close): - has_data = False - - except Exception as e: - has_data = False - - else: - has_data = False + has_data = range_in_bundle(asset, start_dt, end_dt, reader) if not has_data: missing_assets.append(asset) return missing_assets - def ingest_chunk(self, bar_count, end_dt, data_frequency, assets, + def ingest_chunk(self, bar_count, end_dt, data_frequency, asset, writer, previous_candle=dict()): """ Retrieve the specified OHLCV chunk and write it to the bundle - :param chunk: - :param previous_candle: + :param bar_count: + :param end_dt: :param data_frequency: - :param assets: + :param asset: :param writer: + :param previous_candle :return: """ - chunk_assets = [] - for asset in assets: - if asset.start_date <= end_dt: - chunk_assets.append(asset) - - start_dt = get_start_dt(end_dt, bar_count, data_frequency) - missing_assets = self.filter_existing_assets( - assets=chunk_assets, - start_dt=start_dt, - end_dt=end_dt, - data_frequency=data_frequency - ) - - if len(missing_assets) == 0: - log.debug('the data chunk already exists') - return - + # The get_history method supports multiple asset candles = self.exchange.get_history( - assets=missing_assets, + assets=[asset], end_dt=end_dt, bar_count=bar_count, data_frequency=data_frequency @@ -272,7 +237,7 @@ class ExchangeBundle: if not asset_candles: log.debug( 'no data: {symbols} on {exchange}, date {end}'.format( - symbols=missing_assets, + symbols=asset, exchange=self.exchange.name, end=end_dt ) @@ -329,6 +294,7 @@ class ExchangeBundle: del self._writers[data_frequency] # TODO: these are the dates of the chunk, not the job + start_dt = get_start_dt(end_dt, bar_count, data_frequency) writer = self.get_writer(start_dt, end_dt, data_frequency) writer.write( data=data, @@ -357,8 +323,8 @@ class ExchangeBundle: assets = self.get_assets(include_symbols, exclude_symbols) start, end = self.get_adj_dates(start, end, assets) - symbols = [] - log.debug( + symbols = list(map(lambda asset: asset.symbol, assets)) + log.info( 'ingesting trading pairs {symbols} on exchange {exchange} ' 'from {start} to {end}'.format( symbols=symbols, @@ -368,39 +334,88 @@ class ExchangeBundle: ) ) - delta = end - start - if data_frequency == 'minute': - delta_periods = delta.total_seconds() / 60 - - elif data_frequency == 'daily': - delta_periods = delta.total_seconds() / 60 / 60 / 24 - - else: - raise ValueError('frequency not supported') - writer = self.get_writer(start, end, data_frequency) + reader = self.get_reader(data_frequency) - if delta_periods > self.exchange.num_candles_limit: - bar_count = self.exchange.num_candles_limit + all_chunks = [] + for asset in assets: + try: + asset_start, asset_end = \ + self.get_adj_dates(start, end, [asset]) - chunks = [] - last_chunk_date = end.floor('1 min') - while last_chunk_date > start + timedelta(minutes=bar_count): - # TODO: account for the partial last bar - chunk = dict(end=last_chunk_date, bar_count=bar_count) - chunks.append(chunk) + except ValueError as e: + log.debug('asset outside of range {} {}'.format(asset, e)) + continue - # TODO: base on frequency - last_chunk_date = \ - last_chunk_date - timedelta(minutes=(bar_count + 1)) + asset_periods = get_periods(asset_start, asset_end, data_frequency) + if asset_periods > self.exchange.num_candles_limit: + bar_count = self.exchange.num_candles_limit - chunks.reverse() + chunks = [] - else: - chunks = [dict(end=end, bar_count=delta_periods)] + period_delta = timedelta(minutes=1) \ + if data_frequency == 'minute' else \ + timedelta(days=1) + + chunk_start = asset_start.floor('1 min') - period_delta + while chunk_start < asset_end: + delta = timedelta(minutes=bar_count) \ + if data_frequency == 'minute' else \ + timedelta(days=bar_count) + + chunk_end = chunk_start + delta \ + if chunk_start + delta < asset_end else asset_end + + chunk_periods = \ + get_periods(chunk_start, chunk_end, data_frequency) + + range_start = \ + get_start_dt(chunk_end, chunk_periods, data_frequency) + + if range_in_bundle(asset, range_start, chunk_end, reader): + log.debug( + 'chunk already ingested {symbol} ' + '{start} to {end}'.format( + symbol=asset.symbol, + start=range_start, + end=chunk_end + ) + ) + + chunk_start = chunk_end + period_delta + continue + + chunk = dict( + asset=asset, + end=chunk_end, + bar_count=chunk_periods + ) + chunks.append(chunk) + + chunk_start = chunk_end + period_delta + + all_chunks += chunks + + else: + if range_in_bundle(asset, asset_start, asset_end, reader): + log.debug( + 'asset already ingested {symbol} ' + '{start} to {end}'.format( + symbol=asset.symbol, + start=asset_start, + end=asset_end + ) + ) + continue + + all_chunks += [ + dict(asset=asset, end=asset_end, bar_count=asset_periods) + ] + + all_chunks.sort(key=lambda chunk: chunk['end']) with maybe_show_progress( - chunks, + all_chunks, show_progress, label='Fetching {exchange} {frequency} candles: '.format( exchange=self.exchange.name, @@ -413,7 +428,7 @@ class ExchangeBundle: bar_count=chunk['bar_count'], end_dt=chunk['end'], data_frequency=data_frequency, - assets=assets, + asset=chunk['asset'], writer=writer, previous_candle=previous_candle, ) diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index 2bb67c45..04cb9064 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -1,6 +1,7 @@ import sys, inspect from catalyst.errors import ZiplineError + class ZiplineErrorSilent(ZiplineError): def __init__(self, **kwargs): msg = self.msg.format(**kwargs) @@ -10,7 +11,8 @@ class ZiplineErrorSilent(ZiplineError): except AttributeError: ln = inspect.currentframe().f_back.f_lineno fn = inspect.currentframe().f_back.f_code.co_filename - msg = "Error traceback: {1} (line {2})\n{0.__name__}: {3}.".format(type(self), fn, ln, msg) + msg = "Error traceback: {1} (line {2})\n{0.__name__}: {3}.".format( + type(self), fn, ln, msg) sys.exit(msg) diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py index 0260b823..151fecd8 100644 --- a/catalyst/exchange/poloniex/poloniex.py +++ b/catalyst/exchange/poloniex/poloniex.py @@ -48,6 +48,10 @@ class Poloniex(Exchange): self.minute_reader = None self.transactions = defaultdict(list) + self.num_candles_limit = 2000 + self.max_requests_per_minute = 20 + self.request_cpt = dict() + def sanitize_curency_symbol(self, exchange_symbol): """ diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index f99ced28..43895198 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -30,6 +30,25 @@ class ExchangeBundleTestCase: ) pass + def test_ingest_minute_all(self): + exchange_name = 'bitfinex' + + # start = pd.to_datetime('2017-09-01', utc=True) + start = pd.to_datetime('2017-10-01', utc=True) + end = pd.to_datetime('2017-10-05', utc=True) + + exchange_bundle = ExchangeBundle(get_exchange(exchange_name)) + + log.info('ingesting exchange bundle {}'.format(exchange_name)) + exchange_bundle.ingest( + data_frequency='minute', + exclude_symbols=None, + start=start, + end=end, + show_progress=True + ) + pass + def test_ingest_daily(self): exchange_name = 'bitfinex' From 73378962aaa385f34cf28c5c91b957cacf94d6e9 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 12 Oct 2017 01:24:21 -0400 Subject: [PATCH 055/110] Bug fixes and housekeeping from ingestion testing --- catalyst/exchange/bitfinex/bitfinex.py | 2 +- catalyst/exchange/exchange.py | 11 +++++++---- catalyst/exchange/exchange_bundle.py | 3 ++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index 38785c5f..a4179a82 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -50,7 +50,7 @@ class Bitfinex(Exchange): self._portfolio = portfolio self.minute_writer = None self.minute_reader = None - self.num_candles_limit = 1000 + self.num_candles_limit = 10000 # Max is 90 but playing it safe # https://www.bitfinex.com/posts/188 diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index d89e980c..3f03be8a 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -418,7 +418,8 @@ class Exchange: return value - def get_history(self, assets, end_dt, bar_count, data_frequency): + def get_history(self, assets, end_dt, bar_count, data_frequency, + fallback_exchange=True): """ Retrieve OHLCV bars from the Catalyst and/or exchange API. @@ -443,11 +444,13 @@ class Exchange: asset=asset, end=end_dt, bar_count=bar_count, - data_frequency=data_frequency + data_frequency=data_frequency, + fallback_exchange=fallback_exchange ) return candles - def get_asset_history(self, asset, end, bar_count, data_frequency): + def get_asset_history(self, asset, end, bar_count, data_frequency, + fallback_exchange=True): """ Retrieve the OHLVC bars of a single asset. @@ -495,7 +498,7 @@ class Exchange: ) data += candles - if exchange_start is not None: + if exchange_start is not None and fallback_exchange: candles = self.get_candles( data_frequency=data_frequency, assets=[asset], diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index dbd4a9ae..134657bc 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -227,7 +227,8 @@ class ExchangeBundle: assets=[asset], end_dt=end_dt, bar_count=bar_count, - data_frequency=data_frequency + data_frequency=data_frequency, + fallback_exchange=False ) num_candles = 0 From c67cbedfbf37f89d42244e3d09769daee331cdab Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Wed, 11 Oct 2017 23:31:08 -0600 Subject: [PATCH 056/110] FIX: Raising Exceptions without traceback --- catalyst/exchange/exchange_errors.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index f2629ef4..83e0254f 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -1,17 +1,16 @@ -import sys, inspect +import sys, traceback from catalyst.errors import ZiplineError -class ZiplineErrorSilent(ZiplineError): - def __init__(self, **kwargs): - msg = self.msg.format(**kwargs) - try: - ln = sys.exc_info()[-1].tb_lineno - fn = sys.exc_info()[-1].f_code.co_filename - except AttributeError: - ln = inspect.currentframe().f_back.f_lineno - fn = inspect.currentframe().f_back.f_code.co_filename - msg = "Error traceback: {1} (line {2})\n{0.__name__}: {3}.".format(type(self), fn, ln, msg) - sys.exit(msg) +def silent_except_hook(exctype, excvalue, exctraceback): + if exctype in [SymbolNotFoundOnExchange,]: + fn = traceback.extract_tb(exctraceback)[-1][0] + ln = traceback.extract_tb(exctraceback)[-1][1] + print "Error traceback: {1} (line {2})\n" \ + "{0.__name__}: {3}.".format(exctype, fn, ln, excvalue) + else: + sys.__excepthook__(exctype, excvalue, exctraceback) + +sys.excepthook = silent_except_hook class ExchangeRequestError(ZiplineError): @@ -153,7 +152,7 @@ class MismatchingBaseCurrenciesExchanges(ZiplineError): ).strip() -class SymbolNotFoundOnExchange(ZiplineErrorSilent): +class SymbolNotFoundOnExchange(ZiplineError): """ Raised when a symbol() call contains a non-existant symbol. """ From 1dcfd169fa15081f25d1e99a0deedd5e076a69d7 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Wed, 11 Oct 2017 23:40:06 -0600 Subject: [PATCH 057/110] FIX: Raising Exceptions without traceback --- catalyst/exchange/exchange_errors.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index fbd972c2..b6716228 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -1,7 +1,6 @@ import sys, traceback from catalyst.errors import ZiplineError -<<<<<<< HEAD def silent_except_hook(exctype, excvalue, exctraceback): if exctype in [SymbolNotFoundOnExchange,]: fn = traceback.extract_tb(exctraceback)[-1][0] @@ -12,21 +11,6 @@ def silent_except_hook(exctype, excvalue, exctraceback): sys.__excepthook__(exctype, excvalue, exctraceback) sys.excepthook = silent_except_hook -======= - -class ZiplineErrorSilent(ZiplineError): - def __init__(self, **kwargs): - msg = self.msg.format(**kwargs) - try: - ln = sys.exc_info()[-1].tb_lineno - fn = sys.exc_info()[-1].f_code.co_filename - except AttributeError: - ln = inspect.currentframe().f_back.f_lineno - fn = inspect.currentframe().f_back.f_code.co_filename - msg = "Error traceback: {1} (line {2})\n{0.__name__}: {3}.".format( - type(self), fn, ln, msg) - sys.exit(msg) ->>>>>>> 73378962aaa385f34cf28c5c91b957cacf94d6e9 class ExchangeRequestError(ZiplineError): @@ -170,7 +154,7 @@ class MismatchingBaseCurrenciesExchanges(ZiplineError): class SymbolNotFoundOnExchange(ZiplineError): """ - Raised when a symbol() call contains a non-existant symbol. + Raised when a symbol() call contains a non-existent symbol. """ msg = ('Symbol {symbol} not found on exchange {exchange}. ' 'Choose from: {supported_symbols}').strip() From 1a87d5a0c0ce62fb2c42bea0793b8ee15e655868 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Thu, 12 Oct 2017 09:15:45 -0600 Subject: [PATCH 058/110] Making errors more verbose and user-friendly --- catalyst/exchange/data_portal_exchange.py | 8 +++++--- catalyst/exchange/exchange_errors.py | 12 +++++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index e47ca435..fe9acd09 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -311,7 +311,7 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): field=field, first_trading_day=self._get_first_trading_day(assets), exchange=exchange.name, - symbols=[asset.symbol for asset in assets], + symbols=[asset.symbol.encode('utf-8') for asset in assets], ) series = dict() @@ -332,7 +332,8 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): raise PricingDataBeforeTradingError( first_trading_day=first_trading_day, exchange=assets[0].exchange, - symbols=[asset.symbol for asset in assets], + symbols=[asset.symbol.encode('utf-8') for asset in assets], + dt = dt, ) def get_exchange_spot_value(self, exchange, assets, field, dt, @@ -356,7 +357,8 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): field=field, first_trading_day=self._get_first_trading_day(assets), exchange=exchange.name, - symbols=[asset.symbol for asset in assets], + symbols=[asset.symbol.encode('utf-8') for asset in assets], + symbol_list = ''.join([asset.symbol.encode('utf-8') for asset in assets]) ) return values diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index b6716228..fc54ec9b 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -2,11 +2,12 @@ import sys, traceback from catalyst.errors import ZiplineError def silent_except_hook(exctype, excvalue, exctraceback): - if exctype in [SymbolNotFoundOnExchange,]: + if exctype in [ PricingDataBeforeTradingError, PricingDataNotLoadedError, + SymbolNotFoundOnExchange, ]: fn = traceback.extract_tb(exctraceback)[-1][0] ln = traceback.extract_tb(exctraceback)[-1][1] print "Error traceback: {1} (line {2})\n" \ - "{0.__name__}: {3}.".format(exctype, fn, ln, excvalue) + "{0.__name__}: {3}".format(exctype, fn, ln, excvalue) else: sys.__excepthook__(exctype, excvalue, exctraceback) @@ -169,15 +170,16 @@ class BundleNotFoundError(ZiplineError): class PricingDataBeforeTradingError(ZiplineError): msg = ('Pricing data for trading pairs {symbols} on exchange {exchange} ' - 'starts on {first_trading_day}.').strip() + 'starts on {first_trading_day}, but you are either trying to trade or ' + 'retrieve pricing data on {dt}. Adjust your dates accordingly.').strip() class PricingDataNotLoadedError(ZiplineError): msg = ('Pricing data {field} for trading pairs {symbols} trading on ' 'exchange {exchange} since {first_trading_day} is unavailable. ' - 'The bundle data is either out-of-date or has not been loaded yet.' + 'The bundle data is either out-of-date or has not been loaded yet. ' 'Please ingest data using the command ' - '`catalyst ingest -b exchange_{exchange}`. ' + '`catalyst ingest-exchange -x {exchange} -i {symbol_list}`. ' 'See catalyst documentation for details.').strip() From e1c2f40ab9f61700987262aa7bd9250cdb40f6be Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 12 Oct 2017 14:06:47 -0400 Subject: [PATCH 059/110] Making some adjustments to the ingestion method after discussion with Victor --- .../buy_low_sell_high_neo_with_interface.py | 36 ++++++++-------- catalyst/exchange/bitfinex/bitfinex.py | 43 +++++++++---------- catalyst/exchange/bundle_utils.py | 17 +++++++- catalyst/exchange/exchange.py | 1 + catalyst/exchange/exchange_bundle.py | 2 + tests/exchange/test_bitfinex.py | 4 ++ tests/exchange/test_bundle.py | 4 +- 7 files changed, 64 insertions(+), 43 deletions(-) diff --git a/catalyst/examples/buy_low_sell_high_neo_with_interface.py b/catalyst/examples/buy_low_sell_high_neo_with_interface.py index 8cb1a5a6..b70e451a 100644 --- a/catalyst/examples/buy_low_sell_high_neo_with_interface.py +++ b/catalyst/examples/buy_low_sell_high_neo_with_interface.py @@ -36,7 +36,7 @@ def _handle_data(context, data): prices = data.history( context.asset, fields='price', - bar_count=50, + bar_count=1, frequency='1m' ) rsi = talib.RSI(prices.values, timeperiod=14)[-1] @@ -148,27 +148,27 @@ def analyze(context, stats): pass -run_algorithm( - initialize=initialize, - handle_data=handle_data, - analyze=analyze, - exchange_name='bitfinex', - live=True, - algo_namespace=algo_namespace, - base_currency='btc', - live_graph=False -) - -# Backtest # run_algorithm( -# capital_base=250, -# start=pd.to_datetime('2017-09-08', utc=True), -# end=pd.to_datetime('2017-09-15', utc=True), -# data_frequency='minute', # initialize=initialize, # handle_data=handle_data, # analyze=analyze, # exchange_name='bitfinex', +# live=True, # algo_namespace=algo_namespace, -# base_currency='btc' +# base_currency='btc', +# live_graph=False # ) + +# Backtest +run_algorithm( + capital_base=250, + start=pd.to_datetime('2017-10-01', utc=True), + end=pd.to_datetime('2017-10-15', utc=True), + data_frequency='minute', + initialize=initialize, + handle_data=handle_data, + analyze=analyze, + exchange_name='bitfinex', + algo_namespace=algo_namespace, + base_currency='btc' +) diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index a4179a82..ac1af89e 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -26,7 +26,6 @@ from catalyst.protocol import Account from catalyst.exchange.exchange_utils import get_exchange_symbols_filename, \ download_exchange_symbols - # Trying to account for REST api instability # https://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request requests.adapters.DEFAULT_RETRIES = 20 @@ -50,7 +49,9 @@ class Bitfinex(Exchange): self._portfolio = portfolio self.minute_writer = None self.minute_reader = None - self.num_candles_limit = 10000 + + # The candle limit for each request + self.num_candles_limit = 1000 # Max is 90 but playing it safe # https://www.bitfinex.com/posts/188 @@ -576,7 +577,7 @@ class Bitfinex(Exchange): response = self._request('symbols', None) for symbol in response.json(): - if(source_dates): + if (source_dates): start_date = self.get_symbol_start_date(symbol) else: try: @@ -587,25 +588,26 @@ class Bitfinex(Exchange): try: end_daily = cached_symbols[symbol]['end_daily'] except KeyError as e: - end_daily ='N/A' + end_daily = 'N/A' try: end_minute = cached_symbols[symbol]['end_minute'] except KeyError as e: end_minute = 'N/A' - symbol_map[symbol]= dict( - symbol = symbol[:-3]+'_'+symbol[-3:], - start_date = start_date, - end_daily = end_daily, - end_minute = end_minute, + symbol_map[symbol] = dict( + symbol=symbol[:-3] + '_' + symbol[-3:], + start_date=start_date, + end_daily=end_daily, + end_minute=end_minute, ) - if(filename is None): + if (filename is None): filename = get_exchange_symbols_filename(self.name) - with open(filename,'w') as f: - json.dump(symbol_map, f, sort_keys=True, indent=2, separators=(',',':')) + with open(filename, 'w') as f: + json.dump(symbol_map, f, sort_keys=True, indent=2, + separators=(',', ':')) def get_symbol_start_date(self, symbol): @@ -634,10 +636,10 @@ class Bitfinex(Exchange): arbitrarily set the ref. date to 15 days ago to be safe with +/- 31 days """ - if(len(response.json())): + if (len(response.json())): startmonth = response.json()[-1][0] else: - startmonth = int((time.time()-15*24*3600)*1000) + startmonth = int((time.time() - 15 * 24 * 3600) * 1000) """ Query again with daily resolution setting the start and end around @@ -646,8 +648,9 @@ class Bitfinex(Exchange): url = '{url}/v2/candles/trade:1D:{symbol}/hist?start={start}&end={end}'.format( url=self.url, symbol=symbol_v2, - start=startmonth - 3600 *24 *31 *1000, - end=min(startmonth + 3600 *24 *31 *1000, int(time.time()*1000)) + start=startmonth - 3600 * 24 * 31 * 1000, + end=min(startmonth + 3600 * 24 * 31 * 1000, + int(time.time() * 1000)) ) try: @@ -656,9 +659,5 @@ class Bitfinex(Exchange): except Exception as e: raise ExchangeRequestError(error=e) - return time.strftime('%Y-%m-%d', time.gmtime(int(response.json()[-1][0]/1000))) - - - - - + return time.strftime('%Y-%m-%d', + time.gmtime(int(response.json()[-1][0] / 1000))) diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index d9b34ac8..2f87b6c9 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -29,6 +29,21 @@ def get_seconds_from_date(date): return int((date - epoch).total_seconds()) +def get_bcolz_chunk(exchange_name, data_frequency, symbol, period_a, period_b): + """ + + :param exchange_name: + :param data_frequency: + :param symbol: + :param period_a: + Example: 2017 + :param period_b: + Example: 10 + + Note: + Filename: bitfinex-daily-neo_eth-2017-10.tar.gz + :return: + """ def get_history(exchange_name, data_frequency, symbol, start=None, end=None): """ History API provides OHLCV data for any of the supported exchanges up to yesterday. @@ -55,7 +70,7 @@ def get_history(exchange_name, data_frequency, symbol, start=None, end=None): Notes ===== - Using milliseconds for the start and end dates for ease of use in the + Using seconds for the start and end dates for ease of use in the function query parameters. Sometimes, one minute goes by without completing a trade of the given diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 3f03be8a..882aeb2b 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -559,6 +559,7 @@ class Exchange: if freq_match: candle_size = int(freq_match.group(1)) unit = freq_match.group(2) + else: raise InvalidHistoryFrequencyError(frequency) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 134657bc..15022384 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -13,6 +13,7 @@ from catalyst.exchange.bundle_utils import get_ffill_candles, get_start_dt, \ get_periods, range_in_bundle from catalyst.exchange.exchange_utils import get_exchange_folder from catalyst.utils.cli import maybe_show_progress +from catalyst.utils.deprecate import deprecated from catalyst.utils.paths import ensure_directory @@ -208,6 +209,7 @@ class ExchangeBundle: return missing_assets + @deprecated def ingest_chunk(self, bar_count, end_dt, data_frequency, asset, writer, previous_candle=dict()): """ diff --git a/tests/exchange/test_bitfinex.py b/tests/exchange/test_bitfinex.py index 7efd4884..c8f969fd 100644 --- a/tests/exchange/test_bitfinex.py +++ b/tests/exchange/test_bitfinex.py @@ -47,6 +47,10 @@ class BitfinexTestCase(BaseExchangeTestCase): def test_get_candles(self): log.info('retrieving candles') + ohlcv_neo = self.exchange.get_candles( + data_frequency='1m', + assets=self.exchange.get_asset('neo_btc') + ) pass def test_tickers(self): diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 43895198..47ebf30d 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -14,8 +14,8 @@ class ExchangeBundleTestCase: exchange_name = 'bitfinex' # start = pd.to_datetime('2017-09-01', utc=True) - end = pd.Timestamp.utcnow() - timedelta(minutes=5) - start = end - timedelta(minutes=30) + start = pd.to_datetime('2017-10-01', utc=True) + end = pd.to_datetime('2017-10-06', utc=True) exchange_bundle = ExchangeBundle(get_exchange(exchange_name)) From c658d15fcbd806ca1863af642dadb147f0a91248 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Fri, 13 Oct 2017 00:50:25 -0400 Subject: [PATCH 060/110] Unit testing ingestion of bundles logic --- .../buy_low_sell_high_neo_with_interface.py | 14 +- catalyst/exchange/exchange_bundle.py | 124 +++++++++++++----- tests/exchange/test_bundle.py | 32 ++++- 3 files changed, 129 insertions(+), 41 deletions(-) diff --git a/catalyst/examples/buy_low_sell_high_neo_with_interface.py b/catalyst/examples/buy_low_sell_high_neo_with_interface.py index b70e451a..7037afad 100644 --- a/catalyst/examples/buy_low_sell_high_neo_with_interface.py +++ b/catalyst/examples/buy_low_sell_high_neo_with_interface.py @@ -33,6 +33,13 @@ def initialize(context): def _handle_data(context, data): + price = data.current(context.asset, 'close') + log.info('got price {price}'.format(price=price)) + + if price is None: + log.warn('no pricing data') + return + prices = data.history( context.asset, fields='price', @@ -55,13 +62,6 @@ def _handle_data(context, data): cash = context.portfolio.cash log.info('base currency available: {cash}'.format(cash=cash)) - price = data.current(context.asset, 'close') - log.info('got price {price}'.format(price=price)) - - if price is None: - log.warn('no pricing data') - return - record(price=price) orders = get_open_orders(context.asset) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 15022384..a843a777 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -1,6 +1,8 @@ import os +import time from datetime import timedelta +import bcolz import pandas as pd from logbook import Logger, DEBUG, INFO @@ -209,7 +211,38 @@ class ExchangeBundle: return missing_assets - @deprecated + def _write(self, data, writer, data_frequency): + """ + Write data to the writer + + :param df: + :param writer: + :return: + """ + try: + writer.write( + data=data, + show_progress=False, + invalid_data_behavior='raise' + ) + except BcolzMinuteOverlappingData as e: + log.warn('chunk already exists: {}'.format(e)) + except Exception as e: + log.warn('error when writing data: {}, trying again'.format(e)) + + # This is workaround, there is an issue with empty + # session_label when using a newly created writer + del self._writers[data_frequency] + + # TODO: these are the dates of the chunk, not the job + writer = self.get_writer(writer._start_session, + writer._end_session, data_frequency) + writer.write( + data=data, + show_progress=False, + invalid_data_behavior='raise' + ) + def ingest_chunk(self, bar_count, end_dt, data_frequency, asset, writer, previous_candle=dict()): """ @@ -264,6 +297,7 @@ class ExchangeBundle: index=all_dates, columns=['open', 'high', 'low', 'close', 'volume'] ) + if not df.empty: df.sort_index(inplace=True) @@ -272,41 +306,67 @@ class ExchangeBundle: data.append((sid, df)) - try: - log.debug( - 'writing {num_candles} candles for {bar_count} bars' - 'ending {end}'.format( - num_candles=num_candles, - bar_count=bar_count, - end=end_dt - ) - ) - - writer.write( - data=data, - show_progress=False, - invalid_data_behavior='raise' - ) - except BcolzMinuteOverlappingData as e: - log.warn('chunk already exists: {}'.format(e)) - except Exception as e: - log.warn('error when writing data: {}, trying again'.format(e)) - - # This is workaround, there is an issue with empty - # session_label when using a newly created writer - del self._writers[data_frequency] - - # TODO: these are the dates of the chunk, not the job - start_dt = get_start_dt(end_dt, bar_count, data_frequency) - writer = self.get_writer(start_dt, end_dt, data_frequency) - writer.write( - data=data, - show_progress=False, - invalid_data_behavior='raise' + log.debug( + 'writing {num_candles} candles for {bar_count} bars' + 'ending {end}'.format( + num_candles=num_candles, + bar_count=bar_count, + end=end_dt ) + ) + self._write(data, writer, data_frequency) return data + def ingest_ctable(self, asset, data_frequency, path): + start_time = time.time() + + reader = BcolzMinuteBarReader(path) + + start = reader.first_trading_day + end = reader.last_available_dt + + open_calendar = get_calendar('OPEN') + periods = open_calendar.minutes_in_range(start, end) + + sid = 284 + arrays = reader.load_raw_arrays( + fields=['open', 'high', 'low', 'close', 'volume'], + start_dt=start, + end_dt=end, + sids=[sid] + ) + + ohlcv = dict( + open=arrays[0].flatten(), + high=arrays[1].flatten(), + low=arrays[2].flatten(), + close=arrays[3].flatten(), + volume=arrays[4].flatten() + ) + + df = pd.DataFrame( + data=ohlcv, + index=periods + ) + + data = [] + if not df.empty: + df.sort_index(inplace=True) + + data.append((sid, df)) + + writer = self.get_writer(start, end, data_frequency) + + self._write(data, writer, data_frequency) + + end_time = time.time() + delta_time = end_time - start_time + + log.info('time elapsed {}'.format(delta_time)) + + pass + def ingest(self, data_frequency, include_symbols=None, exclude_symbols=None, start=None, end=None, show_progress=True, environ=os.environ): diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 47ebf30d..78e7fd31 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -1,6 +1,8 @@ -from datetime import timedelta +from datetime import timedelta, time from logging import Logger +import bcolz +from toolz.itertoolz import join as joinz import pandas as pd from catalyst.exchange.exchange_bundle import ExchangeBundle @@ -22,7 +24,7 @@ class ExchangeBundleTestCase: log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest( data_frequency='minute', - include_symbols='neo_btc', + include_symbols='bcc_btc', exclude_symbols=None, start=start, end=end, @@ -67,3 +69,29 @@ class ExchangeBundleTestCase: show_progress=True ) pass + + def test_merge_ctables(self): + exchange_name = 'bitfinex' + + root = '/Users/fredfortier/.catalyst/data/exchanges/bitfinex/temp_bundles' + path = '00/02/000284.bcolz' + + august = '{}/{}'.format( + root, 'poloniex-minute-btc_usdt-2017-8' + ) + exchange = get_exchange(exchange_name) + asset = exchange.get_asset('btc_usd') + + exchange_bundle = ExchangeBundle(exchange) + exchange_bundle.ingest_ctable( + asset=asset, + data_frequency='minute', + path=august + ) + + september = '{}/{}/{}'.format( + root, 'poloniex-minute-btc_usdt-2017-9', path + ) + zseptember = bcolz.open(september, mode='a') + + pass From 93f4d31399bf8e8e28dcdeae684bb07e66407479 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Fri, 13 Oct 2017 16:29:43 -0400 Subject: [PATCH 061/110] Unit tested ingestion of bundle chunks. This may not be stable yet. --- .../buy_low_sell_high_neo_with_interface.py | 1 + catalyst/exchange/bundle_utils.py | 41 ++++- catalyst/exchange/exchange_bundle.py | 161 ++++++------------ catalyst/exchange/exchange_utils.py | 8 + catalyst/exchange/init_utils.py | 8 + tests/exchange/test_bundle.py | 27 ++- 6 files changed, 118 insertions(+), 128 deletions(-) diff --git a/catalyst/examples/buy_low_sell_high_neo_with_interface.py b/catalyst/examples/buy_low_sell_high_neo_with_interface.py index 7037afad..e02b50de 100644 --- a/catalyst/examples/buy_low_sell_high_neo_with_interface.py +++ b/catalyst/examples/buy_low_sell_high_neo_with_interface.py @@ -91,6 +91,7 @@ def _handle_data(context, data): elif position.amount > 0 and \ price > cost_basis * (1 + context.PROFIT_TARGET): profit = (price * position.amount) - (cost_basis * position.amount) + log.info('closing position, taking profit: {}'.format(profit)) order_target_percent( asset=context.asset, diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index 2f87b6c9..92af9b10 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -1,3 +1,6 @@ +import gzip +import tarfile + import requests from datetime import timedelta, datetime import os @@ -8,7 +11,9 @@ import numpy as np import pytz from catalyst.data.bundles import from_bundle_ingest_dirname +from catalyst.data.bundles.core import download_without_progress from catalyst.exchange.exchange_errors import ApiCandlesError +from catalyst.exchange.exchange_utils import get_exchange_bundles_folder from catalyst.utils.deprecate import deprecated from catalyst.utils.paths import data_path @@ -29,21 +34,43 @@ def get_seconds_from_date(date): return int((date - epoch).total_seconds()) -def get_bcolz_chunk(exchange_name, data_frequency, symbol, period_a, period_b): +def get_bcolz_chunk(exchange_name, symbol, data_frequency, period): """ + Download and extract a bcolz bundle. :param exchange_name: - :param data_frequency: :param symbol: - :param period_a: - Example: 2017 - :param period_b: - Example: 10 + :param data_frequency: + :param period: + :return: Note: Filename: bitfinex-daily-neo_eth-2017-10.tar.gz - :return: """ + + root = get_exchange_bundles_folder(exchange_name) + name = '{exchange}-{frequency}-{symbol}-{period}'.format( + exchange=exchange_name, + frequency=data_frequency, + symbol=symbol, + period=period + ) + path = os.path.join(root, name) + + if not os.path.isdir(path): + url = 'https://s3.amazonaws.com/enigmaco/catalyst-bundles/' \ + 'exchange-{exchange}/{name}.tar.gz'.format( + exchange=exchange_name, + name=name + ) + + bytes = download_without_progress(url) + with tarfile.open('r', fileobj=bytes) as tar: + tar.extractall(path) + + return path + + def get_history(exchange_name, data_frequency, symbol, start=None, end=None): """ History API provides OHLCV data for any of the supported exchanges up to yesterday. diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index a843a777..b45f965e 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -1,10 +1,10 @@ +import calendar import os import time -from datetime import timedelta +from datetime import timedelta, datetime, date -import bcolz import pandas as pd -from logbook import Logger, DEBUG, INFO +from logbook import Logger, INFO from catalyst import get_calendar from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ @@ -12,8 +12,9 @@ from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ from catalyst.data.us_equity_pricing import BcolzDailyBarWriter, \ BcolzDailyBarReader from catalyst.exchange.bundle_utils import get_ffill_candles, get_start_dt, \ - get_periods, range_in_bundle -from catalyst.exchange.exchange_utils import get_exchange_folder + get_periods, range_in_bundle, get_bcolz_chunk +from catalyst.exchange.exchange_utils import get_exchange_folder, \ + get_exchange_bundles_folder from catalyst.utils.cli import maybe_show_progress from catalyst.utils.deprecate import deprecated from catalyst.utils.paths import ensure_directory @@ -35,6 +36,7 @@ class ExchangeBundle: self.default_ohlc_ratio = 1000000 self._writers = dict() self._readers = dict() + self.calendar = get_calendar('OPEN') def get_assets(self, include_symbols, exclude_symbols): # TODO: filter exclude symbols assets @@ -117,8 +119,6 @@ class ExchangeBundle: if key in self._writers: return self._writers[key] - open_calendar = get_calendar('OPEN') - root = get_exchange_folder(self.exchange.name) output_dir = BUNDLE_NAME_TEMPLATE.format( root=root, @@ -159,7 +159,7 @@ class ExchangeBundle: else: self._writers[key] = BcolzMinuteBarWriter( rootdir=output_dir, - calendar=open_calendar, + calendar=self.calendar, minutes_per_day=self.minutes_per_day, start_session=start_dt, end_session=end_dt, @@ -175,7 +175,7 @@ class ExchangeBundle: end_session = end_dt.floor('1d') self._writers[key] = BcolzDailyBarWriter( filename=output_dir, - calendar=open_calendar, + calendar=self.calendar, start_session=start_dt, end_session=end_session ) @@ -318,18 +318,31 @@ class ExchangeBundle: return data - def ingest_ctable(self, asset, data_frequency, path): + def download_bundle(self, name): + """ + + :param name: + :return: + """ + + def ingest_ctable(self, asset, data_frequency, period, writer): start_time = time.time() + path = get_bcolz_chunk( + exchange_name=self.exchange.name, + symbol=asset.symbol, + data_frequency=data_frequency, + period=period + ) + reader = BcolzMinuteBarReader(path) start = reader.first_trading_day end = reader.last_available_dt - open_calendar = get_calendar('OPEN') - periods = open_calendar.minutes_in_range(start, end) + periods = self.calendar.minutes_in_range(start, end) - sid = 284 + sid = asset.sid arrays = reader.load_raw_arrays( fields=['open', 'high', 'low', 'close', 'volume'], start_dt=start, @@ -352,12 +365,10 @@ class ExchangeBundle: data = [] if not df.empty: - df.sort_index(inplace=True) + df.sort_index(inplace=True, ascending=False) data.append((sid, df)) - writer = self.get_writer(start, end, data_frequency) - self._write(data, writer, data_frequency) end_time = time.time() @@ -371,7 +382,6 @@ class ExchangeBundle: exclude_symbols=None, start=None, end=None, show_progress=True, environ=os.environ): """ - Ingest the bundle :param data_frequency: :param include_symbols: @@ -385,113 +395,54 @@ class ExchangeBundle: assets = self.get_assets(include_symbols, exclude_symbols) start, end = self.get_adj_dates(start, end, assets) - - symbols = list(map(lambda asset: asset.symbol, assets)) - log.info( - 'ingesting trading pairs {symbols} on exchange {exchange} ' - 'from {start} to {end}'.format( - symbols=symbols, - exchange=self.exchange.name, - start=start, - end=end - ) - ) - - writer = self.get_writer(start, end, data_frequency) reader = self.get_reader(data_frequency) - all_chunks = [] + chunks = [] + periods = [] for asset in assets: - try: - asset_start, asset_end = \ - self.get_adj_dates(start, end, [asset]) + asset_start, asset_end = self.get_adj_dates(start, end, [asset]) + sessions = self.calendar.sessions_in_range(asset_start, asset_end) - except ValueError as e: - log.debug('asset outside of range {} {}'.format(asset, e)) - continue + dt = sessions[0] + while dt <= sessions[-1]: + period = '{}-{}'.format(dt.year, dt.month) - asset_periods = get_periods(asset_start, asset_end, data_frequency) - if asset_periods > self.exchange.num_candles_limit: - bar_count = self.exchange.num_candles_limit + if period not in periods: + periods.append(period) - chunks = [] + month_range = calendar.monthrange(dt.year, dt.month) + month_start = date(dt.year, dt.month, month_range[0]) + month_end = date(dt.year, dt.month, month_range[1]) - period_delta = timedelta(minutes=1) \ - if data_frequency == 'minute' else \ - timedelta(days=1) - - chunk_start = asset_start.floor('1 min') - period_delta - while chunk_start < asset_end: - delta = timedelta(minutes=bar_count) \ - if data_frequency == 'minute' else \ - timedelta(days=bar_count) - - chunk_end = chunk_start + delta \ - if chunk_start + delta < asset_end else asset_end - - chunk_periods = \ - get_periods(chunk_start, chunk_end, data_frequency) - - range_start = \ - get_start_dt(chunk_end, chunk_periods, data_frequency) - - if range_in_bundle(asset, range_start, chunk_end, reader): - log.debug( - 'chunk already ingested {symbol} ' - '{start} to {end}'.format( - symbol=asset.symbol, - start=range_start, - end=chunk_end + if not range_in_bundle(asset, month_start, month_end, + reader): + log.debug('adding period: {}'.format(period)) + chunks.append( + dict( + asset=asset, + period_end=month_end, + period=period ) ) - chunk_start = chunk_end + period_delta - continue + dt += timedelta(days=1) - chunk = dict( - asset=asset, - end=chunk_end, - bar_count=chunk_periods - ) - chunks.append(chunk) - - chunk_start = chunk_end + period_delta - - all_chunks += chunks - - else: - if range_in_bundle(asset, asset_start, asset_end, reader): - log.debug( - 'asset already ingested {symbol} ' - '{start} to {end}'.format( - symbol=asset.symbol, - start=asset_start, - end=asset_end - ) - ) - continue - - all_chunks += [ - dict(asset=asset, end=asset_end, bar_count=asset_periods) - ] - - all_chunks.sort(key=lambda chunk: chunk['end']) + chunks.sort(key=lambda chunk: chunk['period_end']) + writer = self.get_writer(start, end, data_frequency) with maybe_show_progress( - all_chunks, + chunks, show_progress, label='Fetching {exchange} {frequency} candles: '.format( exchange=self.exchange.name, frequency=data_frequency )) as it: - previous_candle = dict() for chunk in it: - self.ingest_chunk( - bar_count=chunk['bar_count'], - end_dt=chunk['end'], - data_frequency=data_frequency, + self.ingest_ctable( asset=chunk['asset'], - writer=writer, - previous_candle=previous_candle, + data_frequency=data_frequency, + period=chunk['period'], + writer=writer ) + pass diff --git a/catalyst/exchange/exchange_utils.py b/catalyst/exchange/exchange_utils.py index c6884d73..1e73805a 100644 --- a/catalyst/exchange/exchange_utils.py +++ b/catalyst/exchange/exchange_utils.py @@ -162,6 +162,14 @@ def get_exchange_minute_writer_root(exchange_name, environ=None): return minute_data_folder +def get_exchange_bundles_folder(exchange_name, environ=None): + exchange_folder = get_exchange_folder(exchange_name, environ) + + temp_bundles = os.path.join(exchange_folder, 'temp_bundles') + ensure_directory(temp_bundles) + + return temp_bundles + def perf_serial(obj): """JSON serializer for objects not serializable by default json code""" diff --git a/catalyst/exchange/init_utils.py b/catalyst/exchange/init_utils.py index 446fe1cb..a37f0441 100644 --- a/catalyst/exchange/init_utils.py +++ b/catalyst/exchange/init_utils.py @@ -2,6 +2,7 @@ from catalyst.exchange.bitfinex.bitfinex import Bitfinex from catalyst.exchange.bittrex.bittrex import Bittrex from catalyst.exchange.exchange_errors import ExchangeNotFoundError from catalyst.exchange.exchange_utils import get_exchange_auth +from catalyst.exchange.poloniex.poloniex import Poloniex def get_exchange(exchange_name): @@ -20,5 +21,12 @@ def get_exchange(exchange_name): base_currency=None, portfolio=None ) + elif exchange_name == 'poloniex': + return Poloniex( + key=exchange_auth['key'], + secret=exchange_auth['secret'], + base_currency=None, + portfolio=None + ) else: raise ExchangeNotFoundError(exchange_name=exchange_name) diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 78e7fd31..8f4e5a8f 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -71,27 +71,22 @@ class ExchangeBundleTestCase: pass def test_merge_ctables(self): - exchange_name = 'bitfinex' + exchange_name = 'poloniex' + data_frequency = 'minute' - root = '/Users/fredfortier/.catalyst/data/exchanges/bitfinex/temp_bundles' - path = '00/02/000284.bcolz' - - august = '{}/{}'.format( - root, 'poloniex-minute-btc_usdt-2017-8' - ) exchange = get_exchange(exchange_name) - asset = exchange.get_asset('btc_usd') + asset = exchange.get_asset('btc_usdt') + + start = pd.to_datetime('2017-09-01', utc=True) + end = pd.to_datetime('2017-09-06', utc=True) exchange_bundle = ExchangeBundle(exchange) + + writer = exchange_bundle.get_writer(start, end, data_frequency) exchange_bundle.ingest_ctable( asset=asset, - data_frequency='minute', - path=august + data_frequency=data_frequency, + period='2017-9', + writer=writer ) - - september = '{}/{}/{}'.format( - root, 'poloniex-minute-btc_usdt-2017-9', path - ) - zseptember = bcolz.open(september, mode='a') - pass From c52653c84e48c66fc5dca42fdc15c7dc7dc3232f Mon Sep 17 00:00:00 2001 From: fredfortier Date: Fri, 13 Oct 2017 21:00:47 -0400 Subject: [PATCH 062/110] Tested ingestion of minute data with a single market --- catalyst/examples/simple_loop.py | 30 ++++++++++++++ catalyst/exchange/bundle_utils.py | 2 +- catalyst/exchange/exchange_bundle.py | 61 ++++++++++++++++++++-------- catalyst/exchange/exchange_errors.py | 13 ++++-- tests/exchange/test_bundle.py | 20 ++++----- 5 files changed, 93 insertions(+), 33 deletions(-) create mode 100644 catalyst/examples/simple_loop.py diff --git a/catalyst/examples/simple_loop.py b/catalyst/examples/simple_loop.py new file mode 100644 index 00000000..63501037 --- /dev/null +++ b/catalyst/examples/simple_loop.py @@ -0,0 +1,30 @@ +import pandas as pd + +from catalyst import run_algorithm +from catalyst.api import symbol + + +def initialize(context): + print('initializing') + context.asset = symbol('btc_usdt') + + +def handle_data(context, data): + print('handling bar: {}'.format(data.current_dt)) + + price = data.current(context.asset, 'close') + print('got price {price}'.format(price=price)) + + +run_algorithm( + capital_base=250, + start=pd.to_datetime('2017-1-1', utc=True), + end=pd.to_datetime('2017-1-31', utc=True), + data_frequency='minute', + initialize=initialize, + handle_data=handle_data, + analyze=None, + exchange_name='poloniex', + algo_namespace='simple_loop', + base_currency='btc' +) diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index 92af9b10..5a92669d 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -236,7 +236,7 @@ def range_in_bundle(asset, start_dt, end_dt, reader): if np.isnan(end_close): has_data = False - except Exception: + except Exception as e: has_data = False else: diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index b45f965e..bec8a1ca 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -1,9 +1,10 @@ import calendar import os -import time -from datetime import timedelta, datetime, date +import pytz +from datetime import timedelta, datetime import pandas as pd +import numpy as np from logbook import Logger, INFO from catalyst import get_calendar @@ -13,6 +14,7 @@ from catalyst.data.us_equity_pricing import BcolzDailyBarWriter, \ BcolzDailyBarReader from catalyst.exchange.bundle_utils import get_ffill_candles, get_start_dt, \ get_periods, range_in_bundle, get_bcolz_chunk +from catalyst.exchange.exchange_errors import EmptyValuesInBundleError from catalyst.exchange.exchange_utils import get_exchange_folder, \ get_exchange_bundles_folder from catalyst.utils.cli import maybe_show_progress @@ -325,8 +327,18 @@ class ExchangeBundle: :return: """ - def ingest_ctable(self, asset, data_frequency, period, writer): - start_time = time.time() + def ingest_ctable(self, asset, data_frequency, period, writer, + verify=False): + """ + Merge a ctable bundle chunk into the main bundle for the exchange. + + :param asset: TradingPair + :param data_frequency: str + :param period: str + :param writer: + :param verify: + :return: + """ path = get_bcolz_chunk( exchange_name=self.exchange.name, @@ -338,7 +350,10 @@ class ExchangeBundle: reader = BcolzMinuteBarReader(path) start = reader.first_trading_day - end = reader.last_available_dt + + # TODO: temp workaround, remove when the bundles are fixed + # end = reader.last_available_dt + end = reader.last_available_dt - timedelta(days=1) periods = self.calendar.minutes_in_range(start, end) @@ -363,20 +378,23 @@ class ExchangeBundle: index=periods ) + if verify: + nan_rows = df[df.isnull().T.any().T].index + if len(nan_rows) > 0: + raise EmptyValuesInBundleError( + path=path, + start=nan_rows[0], + end=nan_rows[-1] + ) + data = [] if not df.empty: - df.sort_index(inplace=True, ascending=False) + df.sort_index(inplace=True) data.append((sid, df)) self._write(data, writer, data_frequency) - - end_time = time.time() - delta_time = end_time - start_time - - log.info('time elapsed {}'.format(delta_time)) - - pass + return path def ingest(self, data_frequency, include_symbols=None, exclude_symbols=None, start=None, end=None, @@ -411,11 +429,19 @@ class ExchangeBundle: periods.append(period) month_range = calendar.monthrange(dt.year, dt.month) - month_start = date(dt.year, dt.month, month_range[0]) - month_end = date(dt.year, dt.month, month_range[1]) + month_start = pd.to_datetime( + datetime(dt.year, dt.month, 1, 0, 0, 0, 0), + utc=True) - if not range_in_bundle(asset, month_start, month_end, - reader): + # TODO: workaround, remove when bundles are fixed + month_end = pd.to_datetime( + datetime(dt.year, dt.month, month_range[1] - 1, + 23, 59, 0, 0), + utc=True) + has_data = \ + range_in_bundle(asset, month_start, month_end, reader) + + if not has_data: log.debug('adding period: {}'.format(period)) chunks.append( dict( @@ -445,4 +471,3 @@ class ExchangeBundle: period=chunk['period'], writer=writer ) - pass diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index fc54ec9b..aaed1b3a 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -1,16 +1,18 @@ import sys, traceback from catalyst.errors import ZiplineError + def silent_except_hook(exctype, excvalue, exctraceback): - if exctype in [ PricingDataBeforeTradingError, PricingDataNotLoadedError, - SymbolNotFoundOnExchange, ]: + if exctype in [PricingDataBeforeTradingError, PricingDataNotLoadedError, + SymbolNotFoundOnExchange, ]: fn = traceback.extract_tb(exctraceback)[-1][0] ln = traceback.extract_tb(exctraceback)[-1][1] print "Error traceback: {1} (line {2})\n" \ - "{0.__name__}: {3}".format(exctype, fn, ln, excvalue) + "{0.__name__}: {3}".format(exctype, fn, ln, excvalue) else: sys.__excepthook__(exctype, excvalue, exctraceback) + sys.excepthook = silent_except_hook @@ -168,6 +170,11 @@ class BundleNotFoundError(ZiplineError): 'See catalyst documentation for details.').strip() +class EmptyValuesInBundleError(ZiplineError): + msg = ('Found empty values in bundle {path} between ' + '{start} and {end}.').strip() + + class PricingDataBeforeTradingError(ZiplineError): msg = ('Pricing data for trading pairs {symbols} on exchange {exchange} ' 'starts on {first_trading_day}, but you are either trying to trade or ' diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 8f4e5a8f..29b843aa 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -1,8 +1,5 @@ -from datetime import timedelta, time from logging import Logger -import bcolz -from toolz.itertoolz import join as joinz import pandas as pd from catalyst.exchange.exchange_bundle import ExchangeBundle @@ -13,18 +10,18 @@ log = Logger('test_exchange_bundle') class ExchangeBundleTestCase: def test_ingest_minute(self): - exchange_name = 'bitfinex' + exchange_name = 'poloniex' # start = pd.to_datetime('2017-09-01', utc=True) - start = pd.to_datetime('2017-10-01', utc=True) - end = pd.to_datetime('2017-10-06', utc=True) + start = pd.to_datetime('2017-1-1', utc=True) + end = pd.to_datetime('2017-6-30', utc=True) exchange_bundle = ExchangeBundle(get_exchange(exchange_name)) log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest( data_frequency='minute', - include_symbols='bcc_btc', + include_symbols='btc_usdt', exclude_symbols=None, start=start, end=end, @@ -77,8 +74,8 @@ class ExchangeBundleTestCase: exchange = get_exchange(exchange_name) asset = exchange.get_asset('btc_usdt') - start = pd.to_datetime('2017-09-01', utc=True) - end = pd.to_datetime('2017-09-06', utc=True) + start = pd.to_datetime('2017-5-1', utc=True) + end = pd.to_datetime('2017-5-31', utc=True) exchange_bundle = ExchangeBundle(exchange) @@ -86,7 +83,8 @@ class ExchangeBundleTestCase: exchange_bundle.ingest_ctable( asset=asset, data_frequency=data_frequency, - period='2017-9', - writer=writer + period='2017-5', + writer=writer, + verify=True ) pass From bdbaad1c913f132e57df5270ecb4984072ddead2 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Sat, 14 Oct 2017 02:06:26 -0400 Subject: [PATCH 063/110] Improvements and fixes to the ingestion component --- catalyst/examples/simple_loop.py | 6 ++-- catalyst/exchange/bundle_utils.py | 12 ++++++- catalyst/exchange/exchange_bundle.py | 52 ++++++++++++++++++++++------ tests/exchange/test_bundle.py | 5 +-- 4 files changed, 58 insertions(+), 17 deletions(-) diff --git a/catalyst/examples/simple_loop.py b/catalyst/examples/simple_loop.py index 63501037..50301281 100644 --- a/catalyst/examples/simple_loop.py +++ b/catalyst/examples/simple_loop.py @@ -6,7 +6,7 @@ from catalyst.api import symbol def initialize(context): print('initializing') - context.asset = symbol('btc_usdt') + context.asset = symbol('gno_btc') def handle_data(context, data): @@ -18,8 +18,8 @@ def handle_data(context, data): run_algorithm( capital_base=250, - start=pd.to_datetime('2017-1-1', utc=True), - end=pd.to_datetime('2017-1-31', utc=True), + start=pd.to_datetime('2017-5-1', utc=True), + end=pd.to_datetime('2017-5-31', utc=True), data_frequency='minute', initialize=initialize, handle_data=handle_data, diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index 5a92669d..14a0870c 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -1,5 +1,5 @@ -import gzip import tarfile +import shutil import requests from datetime import timedelta, datetime @@ -221,6 +221,16 @@ def get_ffill_candles(candles, bar_count, end_dt, data_frequency, def range_in_bundle(asset, start_dt, end_dt, reader): + """ + Evaluate whether price data of an asset is included has been ingested in + the exchange bundle for the given date range. + + :param asset: + :param start_dt: + :param end_dt: + :param reader: + :return: + """ has_data = True if has_data and reader is not None: try: diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index bec8a1ca..e6163580 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -1,5 +1,7 @@ import calendar import os +import shutil + import pytz from datetime import timedelta, datetime @@ -50,17 +52,20 @@ class ExchangeBundle: else: return self.exchange.get_assets() - def get_adj_dates(self, start, end, assets): - now = pd.Timestamp.utcnow() - if end is None or end > now: - log.debug('adjusting the end date to now {}'.format(now)) - end = now + def get_adj_dates(self, start, end, assets, data_frequency): earliest_trade = None + last_entry = None for asset in assets: if earliest_trade is None or earliest_trade > asset.start_date: earliest_trade = asset.start_date + end_asset = asset.end_minute if data_frequency == 'minute' else \ + asset.end_daily + if end_asset is not None and \ + (last_entry is None or end_asset > last_entry): + last_entry = end_asset + if start is None or earliest_trade > start: log.debug( 'adjusting start date to earliest trade date found {}'.format( @@ -68,6 +73,10 @@ class ExchangeBundle: )) start = earliest_trade + if end is None or (last_entry is not None and end > last_entry): + log.debug('adjusting the end date to now {}'.format(last_entry)) + end = last_entry + if start >= end: raise ValueError('start date cannot be after end date') @@ -328,7 +337,7 @@ class ExchangeBundle: """ def ingest_ctable(self, asset, data_frequency, period, writer, - verify=False): + verify=False, cleanup=False): """ Merge a ctable bundle chunk into the main bundle for the exchange. @@ -336,7 +345,12 @@ class ExchangeBundle: :param data_frequency: str :param period: str :param writer: - :param verify: + :param verify: bool + Ensure that the bundle does not have any missing data. + + :param cleanup: bool + Remove the temp bundle directory after ingestion. + :return: """ @@ -390,10 +404,15 @@ class ExchangeBundle: data = [] if not df.empty: df.sort_index(inplace=True) - data.append((sid, df)) self._write(data, writer, data_frequency) + + if cleanup: + log.debug('removing bundle folder following ' + 'ingestion: {}'.format(path)) + shutil.rmtree(path) + return path def ingest(self, data_frequency, include_symbols=None, @@ -412,15 +431,22 @@ class ExchangeBundle: """ assets = self.get_assets(include_symbols, exclude_symbols) - start, end = self.get_adj_dates(start, end, assets) + start, end = self.get_adj_dates(start, end, assets, data_frequency) reader = self.get_reader(data_frequency) chunks = [] - periods = [] for asset in assets: - asset_start, asset_end = self.get_adj_dates(start, end, [asset]) + try: + asset_start, asset_end = \ + self.get_adj_dates(start, end, [asset], data_frequency) + + except ValueError: + dt += timedelta(days=1) + continue + sessions = self.calendar.sessions_in_range(asset_start, asset_end) + periods = [] dt = sessions[0] while dt <= sessions[-1]: period = '{}-{}'.format(dt.year, dt.month) @@ -438,6 +464,10 @@ class ExchangeBundle: datetime(dt.year, dt.month, month_range[1] - 1, 23, 59, 0, 0), utc=True) + + if month_end > asset_end: + month_end = asset_end + has_data = \ range_in_bundle(asset, month_start, month_end, reader) diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 29b843aa..f2158e87 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -21,7 +21,8 @@ class ExchangeBundleTestCase: log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest( data_frequency='minute', - include_symbols='btc_usdt', + include_symbols='gno_btc', + # include_symbols=None, exclude_symbols=None, start=start, end=end, @@ -72,7 +73,7 @@ class ExchangeBundleTestCase: data_frequency = 'minute' exchange = get_exchange(exchange_name) - asset = exchange.get_asset('btc_usdt') + asset = exchange.get_asset('gno_btc') start = pd.to_datetime('2017-5-1', utc=True) end = pd.to_datetime('2017-5-31', utc=True) From 403f951c771f76589eb438838e01a2357df42631 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Sun, 15 Oct 2017 05:11:44 -0400 Subject: [PATCH 064/110] Added unit tests --- catalyst/exchange/bittrex/bittrex.py | 45 +++++- catalyst/exchange/bundle_utils.py | 15 ++ catalyst/exchange/exchange.py | 43 +++++- catalyst/exchange/exchange_bundle.py | 10 +- catalyst/exchange/poloniex/poloniex.py | 188 ++++++++++++++----------- tests/exchange/test_bitfinex.py | 4 +- tests/exchange/test_bittrex.py | 10 +- tests/exchange/test_poloniex.py | 90 ++++++++++++ 8 files changed, 297 insertions(+), 108 deletions(-) create mode 100644 tests/exchange/test_poloniex.py diff --git a/catalyst/exchange/bittrex/bittrex.py b/catalyst/exchange/bittrex/bittrex.py index d81d49b7..5e35346d 100644 --- a/catalyst/exchange/bittrex/bittrex.py +++ b/catalyst/exchange/bittrex/bittrex.py @@ -327,7 +327,7 @@ class Bittrex(Exchange): try: end_daily = cached_symbols[exchange_symbol]['end_daily'] except KeyError as e: - end_daily ='N/A' + end_daily = 'N/A' try: end_minute = cached_symbols[exchange_symbol]['end_minute'] @@ -336,13 +336,44 @@ class Bittrex(Exchange): symbol_map[exchange_symbol] = dict( symbol=symbol, - start_date=pd.to_datetime(market['Created'], utc=True).strftime("%Y-%m-%d"), - end_daily = end_daily, - end_minute = end_minute, + start_date=pd.to_datetime(market['Created'], + utc=True).strftime("%Y-%m-%d"), + end_daily=end_daily, + end_minute=end_minute, ) - if(filename is None): + if (filename is None): filename = get_exchange_symbols_filename(self.name) - with open(filename,'w') as f: - json.dump(symbol_map, f, sort_keys=True, indent=2, separators=(',',':')) + with open(filename, 'w') as f: + json.dump(symbol_map, f, sort_keys=True, indent=2, + separators=(',', ':')) + + def get_orderbook(self, asset, type='all'): + if type == 'all': + type = 'both' + elif type == 'bid': + type = 'buy' + elif type == 'ask': + type = 'sell' + else: + raise ValueError('invalid type') + + exchange_symbol = asset.exchange_symbol + data = self.api.getorderbook(market=exchange_symbol, type=type) + + result = dict() + for exchange_type in data: + if exchange_type == 'buy': + type = 'bid' + elif exchange_type == 'sell': + type = 'ask' + + result[type] = [] + for entry in data[exchange_type]: + result[type].append(dict( + rate=entry['Rate'], + quantity=entry['Quantity'] + )) + + return result diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index 14a0870c..d38f3a8c 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -220,6 +220,21 @@ def get_ffill_candles(candles, bar_count, end_dt, data_frequency, return all_dates, all_candles +def get_trailing_candles_dt(asset, start_dt, end_dt, data_frequency): + missing_start = None + + if asset.end_minute is not None and start_dt < asset.end_minute: + if asset.end_minute < end_dt: + delta = get_delta(1, data_frequency) + + missing_start = asset.end_minute + delta + + else: + missing_start = start_dt + + return missing_start + + def range_in_bundle(asset, start_dt, end_dt, reader): """ Evaluate whether price data of an asset is included has been ingested in diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 882aeb2b..665dd0ce 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -12,8 +12,8 @@ from logbook import Logger from catalyst.data.data_portal import BASE_FIELDS from catalyst.exchange import bundle_utils -from catalyst.exchange.bundle_utils import get_ffill_candles, get_start_dt, \ - get_delta +from catalyst.exchange.bundle_utils import get_start_dt, \ + get_delta, get_trailing_candles_dt from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import MismatchingBaseCurrencies, \ InvalidOrderStyle, BaseCurrencyNotFoundError, SymbolNotFoundOnExchange, \ @@ -24,6 +24,7 @@ from catalyst.exchange.exchange_portfolio import ExchangePortfolio from catalyst.exchange.exchange_utils import get_exchange_symbols from catalyst.finance.order import ORDER_STATUS from catalyst.finance.transaction import Transaction +from catalyst.utils.deprecate import deprecated log = Logger('Exchange') @@ -418,6 +419,7 @@ class Exchange: return value + @deprecated def get_history(self, assets, end_dt, bar_count, data_frequency, fallback_exchange=True): """ @@ -449,6 +451,7 @@ class Exchange: ) return candles + @deprecated def get_asset_history(self, asset, end, bar_count, data_frequency, fallback_exchange=True): """ @@ -586,14 +589,32 @@ class Exchange: writer = bundle.get_writer(start_dt, end_dt, data_frequency) for asset in missing_assets: - bundle.ingest_chunk( - bar_count=adj_bar_count, - end_dt=end_dt, - data_frequency=data_frequency, + # TODO: use this only for data too recent to be in a bundle + trailing_candles_dt = get_trailing_candles_dt( asset=asset, - writer=writer + start_dt=start_dt, + end_dt=end_dt, + data_frequency=data_frequency ) + if trailing_candles_dt is not None: + # The get_history method supports multiple asset + candles = self.get_candles( + data_frequency=data_frequency, + assets=[asset], + bar_count=bar_count, + start_dt=trailing_candles_dt, + end_dt=end_dt + ) + + bundle.ingest_candles( + candles=candles, + bar_count=adj_bar_count, + end_dt=end_dt, + data_frequency=data_frequency, + writer=writer + ) + reader = bundle.get_reader(data_frequency) values = reader.load_raw_arrays( fields=[field], @@ -892,3 +913,11 @@ class Exchange: :return: """ pass + + @abc.abstractmethod + def get_orderbook(self): + """ + Retrieve the account parameters. + :return: + """ + pass diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index e6163580..bff7bebb 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -254,7 +254,7 @@ class ExchangeBundle: invalid_data_behavior='raise' ) - def ingest_chunk(self, bar_count, end_dt, data_frequency, asset, + def ingest_candles(self, candles, bar_count, end_dt, data_frequency, writer, previous_candle=dict()): """ Retrieve the specified OHLCV chunk and write it to the bundle @@ -268,14 +268,6 @@ class ExchangeBundle: :return: """ - # The get_history method supports multiple asset - candles = self.exchange.get_history( - assets=[asset], - end_dt=end_dt, - bar_count=bar_count, - data_frequency=data_frequency, - fallback_exchange=False - ) num_candles = 0 data = [] diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py index 151fecd8..b8a6e522 100644 --- a/catalyst/exchange/poloniex/poloniex.py +++ b/catalyst/exchange/poloniex/poloniex.py @@ -10,7 +10,7 @@ import numpy as np import pandas as pd import pytz import requests -#import six +# import six from six import iteritems from catalyst.assets._assets import TradingPair from logbook import Logger @@ -32,7 +32,6 @@ from catalyst.exchange.exchange_utils import get_exchange_symbols_filename, \ download_exchange_symbols from catalyst.finance.transaction import Transaction - log = Logger('Poloniex') @@ -52,7 +51,6 @@ class Poloniex(Exchange): self.max_requests_per_minute = 20 self.request_cpt = dict() - def sanitize_curency_symbol(self, exchange_symbol): """ Helper method used to build the universal pair. @@ -63,28 +61,27 @@ class Poloniex(Exchange): """ return exchange_symbol.lower() - def _create_order(self, order_status): """ Create a Catalyst order object from the Exchange order dictionary :param order_status: :return: Order """ - #if order_status['is_cancelled']: + # if order_status['is_cancelled']: # status = ORDER_STATUS.CANCELLED - #elif not order_status['is_live']: + # elif not order_status['is_live']: # log.info('found executed order {}'.format(order_status)) # status = ORDER_STATUS.FILLED - #else: + # else: status = ORDER_STATUS.OPEN amount = float(order_status['amount']) - #filled = float(order_status['executed_amount']) + # filled = float(order_status['executed_amount']) filled = None if order_status['type'] == 'sell': amount = -amount - #filled = -filled + # filled = -filled price = float(order_status['rate']) order_type = order_status['type'] @@ -93,24 +90,25 @@ class Poloniex(Exchange): limit_price = None # TODO: is this comprehensive enough? - #if order_type.endswith('limit'): + # if order_type.endswith('limit'): # limit_price = price - #elif order_type.endswith('stop'): + # elif order_type.endswith('stop'): # stop_price = price - #executed_price = float(order_status['avg_execution_price']) + # executed_price = float(order_status['avg_execution_price']) executed_price = price # TODO: bitfinex does not specify comission. I could calculate it but not sure if it's worth it. commission = None - #date = pd.Timestamp.utcfromtimestamp(float(order_status['timestamp'])) - #date = pytz.utc.localize(date) + # date = pd.Timestamp.utcfromtimestamp(float(order_status['timestamp'])) + # date = pytz.utc.localize(date) date = None order = Order( dt=date, - asset=self.assets[order_status['symbol']], # No such field in Poloniex + asset=self.assets[order_status['symbol']], + # No such field in Poloniex amount=amount, stop=stop_price, limit=limit_price, @@ -121,7 +119,6 @@ class Poloniex(Exchange): order.status = status return order, executed_price - def get_balances(self): log.debug('retrieving wallets balances') @@ -143,7 +140,6 @@ class Poloniex(Exchange): return std_balances - @property def account(self): account = Account() @@ -192,17 +188,18 @@ class Poloniex(Exchange): """ # TODO: use BcolzMinuteBarReader to read from cache - if(data_frequency == '5m' or data_frequency == 'minute'): #TODO: Polo does not have '1m' + if ( + data_frequency == '5m' or data_frequency == 'minute'): # TODO: Polo does not have '1m' frequency = 300 - elif(data_frequency == '15m'): + elif (data_frequency == '15m'): frequency = 900 - elif(data_frequency == '30m'): + elif (data_frequency == '30m'): frequency = 1800 - elif(data_frequency == '2h'): + elif (data_frequency == '2h'): frequency = 7200 - elif(data_frequency == '4h'): + elif (data_frequency == '4h'): frequency = 14400 - elif(data_frequency == '1D' or data_frequency == 'daily'): + elif (data_frequency == '1D' or data_frequency == 'daily'): frequency = 86400 else: raise InvalidHistoryFrequencyError( @@ -216,13 +213,14 @@ class Poloniex(Exchange): for asset in asset_list: end = int(time.time()) - if(bar_count is None): + if (bar_count is None): start = end - 2 * frequency else: start = end - bar_count * frequency - try: - response = self.api.returnchartdata(self.get_symbol(asset),frequency, start, end) + try: + response = self.api.returnchartdata(self.get_symbol(asset), + frequency, start, end) except Exception as e: raise ExchangeRequestError(error=e) @@ -240,7 +238,7 @@ class Poloniex(Exchange): close=np.float64(candle['close']), volume=np.float64(candle['volume']), price=np.float64(candle['close']), - last_traded=pd.Timestamp.utcfromtimestamp( candle['date'] ) + last_traded=pd.Timestamp.utcfromtimestamp(candle['date']) ) return ohlc @@ -257,7 +255,6 @@ class Poloniex(Exchange): return ohlc_map[assets] \ if isinstance(assets, TradingPair) else ohlc_map - def create_order(self, asset, amount, is_buy, style): """ Creating order on the exchange. @@ -270,14 +267,15 @@ class Poloniex(Exchange): """ exchange_symbol = self.get_symbol(asset) - if isinstance(style, ExchangeLimitOrder) or isinstance(style, ExchangeStopLimitOrder): + if isinstance(style, ExchangeLimitOrder) or isinstance(style, + ExchangeStopLimitOrder): if isinstance(style, ExchangeStopLimitOrder): log.warn('{} will ignore the stop price'.format(self.name)) price = style.get_limit_price(is_buy) try: - if(is_buy): + if (is_buy): response = self.api.buy(exchange_symbol, amount, price) else: response = self.api.sell(exchange_symbol, -amount, price) @@ -286,7 +284,7 @@ class Poloniex(Exchange): date = pd.Timestamp.utcnow() - if('orderNumber' in response): + if ('orderNumber' in response): order_id = str(response['orderNumber']) order = Order( dt=date, @@ -298,13 +296,14 @@ class Poloniex(Exchange): ) return order else: - log.warn('{} order failed: {}'.format('buy' if is_buy else 'sell', response['error'])) + log.warn( + '{} order failed: {}'.format('buy' if is_buy else 'sell', + response['error'])) return None else: raise InvalidOrderStyle(exchange=self.name, style=style.__class__.__name__) - def get_open_orders(self, asset='all'): """Retrieve all of the current open orders. @@ -331,7 +330,7 @@ class Poloniex(Exchange): """ try: - if(asset=='all'): + if (asset == 'all'): response = self.api.returnopenorders('all') else: response = self.api.returnopenorders(self.get_symbol(asset)) @@ -346,15 +345,15 @@ class Poloniex(Exchange): print(self.portfolio.open_orders) - #TODO: Need to handle openOrders for 'all' + # TODO: Need to handle openOrders for 'all' orders = list() for order_status in response: - order, executed_price = self._create_order(order_status) # will Throw error b/c Polo doesn't track order['symbol'] + order, executed_price = self._create_order( + order_status) # will Throw error b/c Polo doesn't track order['symbol'] if asset is None or asset == order.sid: orders.append(order) return orders - def get_order(self, order_id): """Lookup an order based on the order id returned from one of the @@ -387,11 +386,10 @@ class Poloniex(Exchange): raise ExchangeRequestError(error=e) for o in response: - if(int(o['orderNumber'])==int(order_id)): + if (int(o['orderNumber']) == int(order_id)): return order - + return None - def cancel_order(self, order_param): """Cancel an open order. @@ -402,7 +400,7 @@ class Poloniex(Exchange): The order_id or order object to cancel. """ - if(isinstance(order_param, Order)): + if (isinstance(order_param, Order)): order = order_param else: order = self._portfolio.open_orders[order_param] @@ -413,20 +411,20 @@ class Poloniex(Exchange): raise ExchangeRequestError(error=e) if 'error' in response: - log.info('Unable to cancel order {order_id} on exchange {exchange} {error}.'.format( - order_id=order.id, - exchange=self.name, - error=response['error'] + log.info( + 'Unable to cancel order {order_id} on exchange {exchange} {error}.'.format( + order_id=order.id, + exchange=self.name, + error=response['error'] )) - #raise OrderCancelError( + # raise OrderCancelError( # order_id=order.id, # exchange=self.name, # error=response['error'] - #) - - self.portfolio.remove_order(order) + # ) + self.portfolio.remove_order(order) def tickers(self, assets): """ @@ -435,7 +433,7 @@ class Poloniex(Exchange): :param assets: :return: - """ + """ symbols = self.get_symbols(assets) log.debug('fetching tickers {}'.format(symbols)) @@ -454,21 +452,21 @@ class Poloniex(Exchange): ticks = dict() for index, symbol in enumerate(symbols): - ticks[assets[index]] = dict( timestamp=pd.Timestamp.utcnow(), bid=float(response[symbol]['highestBid']), ask=float(response[symbol]['lowestAsk']), last_price=float(response[symbol]['last']), - low=float(response[symbol]['lowestAsk']), #TODO: Polo does not provide low - high=float(response[symbol]['highestBid']), #TODO: Polo does not provide high + low=float(response[symbol]['lowestAsk']), + # TODO: Polo does not provide low + high=float(response[symbol]['highestBid']), + # TODO: Polo does not provide high volume=float(response[symbol]['baseVolume']), ) log.debug('got tickers {}'.format(ticks)) return ticks - def generate_symbols_json(self, filename=None, source_dates=False): symbol_map = {} @@ -480,10 +478,11 @@ class Poloniex(Exchange): response = self.api.returnticker() for exchange_symbol in response: - base, market = self.sanitize_curency_symbol(exchange_symbol).split('_') - symbol = '{market}_{base}'.format( market=market, base=base ) + base, market = self.sanitize_curency_symbol(exchange_symbol).split( + '_') + symbol = '{market}_{base}'.format(market=market, base=base) - if(source_dates): + if (source_dates): start_date = self.get_symbol_start_date(exchange_symbol) else: try: @@ -494,7 +493,7 @@ class Poloniex(Exchange): try: end_daily = cached_symbols[exchange_symbol]['end_daily'] except KeyError as e: - end_daily ='N/A' + end_daily = 'N/A' try: end_minute = cached_symbols[exchange_symbol]['end_minute'] @@ -502,28 +501,28 @@ class Poloniex(Exchange): end_minute = 'N/A' symbol_map[exchange_symbol] = dict( - symbol = symbol, - start_date = start_date, - end_daily = end_daily, - end_minute = end_minute, + symbol=symbol, + start_date=start_date, + end_daily=end_daily, + end_minute=end_minute, ) - if(filename is None): + if (filename is None): filename = get_exchange_symbols_filename(self.name) - with open(filename,'w') as f: - json.dump(symbol_map, f, sort_keys=True, indent=2, separators=(',',':')) + with open(filename, 'w') as f: + json.dump(symbol_map, f, sort_keys=True, indent=2, + separators=(',', ':')) def get_symbol_start_date(self, symbol): try: - r = self.api.returnchartdata(symbol,86400,pd.to_datetime('2010-1-1').value // 10 ** 9) + r = self.api.returnchartdata(symbol, 86400, pd.to_datetime( + '2010-1-1').value // 10 ** 9) except Exception as e: raise ExchangeRequestError(error=e) return time.strftime('%Y-%m-%d', time.gmtime(int(r[0]['date']))) - - def check_open_orders(self): """ Need to override this function for Poloniex: @@ -549,22 +548,23 @@ class Poloniex(Exchange): except Exception as e: raise ExchangeRequestError(error=e) - if(order_open): + if (order_open): delta = pd.Timestamp.utcnow() - order.dt log.info( 'order {order_id} still open after {delta}'.format( order_id=order_id, - delta=delta ) - ) + delta=delta) + ) try: response = self.api.returnordertrades(order_id) except Exception as e: raise ExchangeRequestError(error=e) - if('error' in response): - if(not order_open): - raise OrphanOrderReverseError(order_id=order_id, exchange=self.name) + if ('error' in response): + if (not order_open): + raise OrphanOrderReverseError(order_id=order_id, + exchange=self.name) else: for tx in response: """ @@ -576,25 +576,29 @@ class Poloniex(Exchange): When an order if fully filled, we flush the dict of transactions associated with that order. """ - if(not filter(lambda item: item['order_id'] == tx['tradeID'], self.transactions[order_id])): - log.debug('Got new transaction for order {}: amount {}, price {}'.format( - order_id, tx['amount'], tx['rate'])) - tx['amount']=float(tx['amount']) - if(tx['type']=='sell'): + if (not filter( + lambda item: item['order_id'] == tx['tradeID'], + self.transactions[order_id])): + log.debug( + 'Got new transaction for order {}: amount {}, price {}'.format( + order_id, tx['amount'], tx['rate'])) + tx['amount'] = float(tx['amount']) + if (tx['type'] == 'sell'): tx['amount'] = -tx['amount'] transaction = Transaction( asset=order.asset, amount=tx['amount'], dt=pd.to_datetime(tx['date'], utc=True), price=float(tx['rate']), - order_id=tx['tradeID'], # it's a misnomer, but keeping it for compatibility + order_id=tx['tradeID'], + # it's a misnomer, but keeping it for compatibility commission=float(tx['fee']) ) self.transactions[order_id].append(transaction) self.portfolio.execute_transaction(transaction) transactions.append(transaction) - if(not order_open): + if (not order_open): """ Since transactions have been executed individually the only thing left to do is remove them from list of open_orders @@ -603,3 +607,25 @@ class Poloniex(Exchange): del self.transactions[order_id] return transactions + + def get_orderbook(self, asset, type='all'): + exchange_symbol = asset.exchange_symbol + data = self.api.returnOrderBook(market=exchange_symbol) + + result = dict() + for exchange_type in data: + if exchange_type == 'bids': + type = 'bid' + elif exchange_type == 'asks': + type = 'ask' + else: + continue + + result[type] = [] + for entry in data[exchange_type]: + if len(entry) == 2: + result[type].append(dict( + rate=float(entry[0]), + quantity=float(entry[1]) + )) + return result diff --git a/tests/exchange/test_bitfinex.py b/tests/exchange/test_bitfinex.py index c8f969fd..dda5bab9 100644 --- a/tests/exchange/test_bitfinex.py +++ b/tests/exchange/test_bitfinex.py @@ -56,8 +56,8 @@ class BitfinexTestCase(BaseExchangeTestCase): def test_tickers(self): log.info('retrieving tickers') tickers = self.exchange.tickers([ - self.exchange.get_asset('eth_usd'), - self.exchange.get_asset('btc_usd') + self.exchange.get_asset('eth_btc'), + self.exchange.get_asset('etc_btc') ]) pass diff --git a/tests/exchange/test_bittrex.py b/tests/exchange/test_bittrex.py index 5d90d660..f1becbcc 100644 --- a/tests/exchange/test_bittrex.py +++ b/tests/exchange/test_bittrex.py @@ -67,8 +67,8 @@ class BittrexTestCase(BaseExchangeTestCase): def test_tickers(self): log.info('retrieving tickers') tickers = self.exchange.tickers([ - self.exchange.get_asset('ubq_btc'), - self.exchange.get_asset('neo_btc') + self.exchange.get_asset('eth_btc'), + self.exchange.get_asset('etc_btc') ]) assert len(tickers) == 2 pass @@ -81,3 +81,9 @@ class BittrexTestCase(BaseExchangeTestCase): def test_get_account(self): log.info('testing account data') pass + + def test_orderbook(self): + log.info('testing order book for bittrex') + asset = self.exchange.get_asset('eth_btc') + orderbook = self.exchange.get_orderbook(asset) + pass diff --git a/tests/exchange/test_poloniex.py b/tests/exchange/test_poloniex.py new file mode 100644 index 00000000..1da3313d --- /dev/null +++ b/tests/exchange/test_poloniex.py @@ -0,0 +1,90 @@ +from catalyst.exchange.bittrex.bittrex import Bittrex +from catalyst.exchange.poloniex.poloniex import Poloniex +from catalyst.finance.order import Order +from base import BaseExchangeTestCase +from logbook import Logger +from catalyst.exchange.exchange_utils import get_exchange_auth + +log = Logger('test_poloniex') + + +class PoloniexTestCase(BaseExchangeTestCase): + @classmethod + def setup(self): + print ('creating poloniex object') + auth = get_exchange_auth('poloniex') + self.exchange = Poloniex( + key=auth['key'], + secret=auth['secret'], + base_currency='btc' + ) + + def test_order(self): + log.info('creating order') + asset = self.exchange.get_asset('neo_btc') + order_id = self.exchange.order( + asset=asset, + limit_price=0.0005, + amount=1, + ) + log.info('order created {}'.format(order_id)) + assert order_id is not None + pass + + def test_open_orders(self): + log.info('retrieving open orders') + asset = self.exchange.get_asset('neo_btc') + orders = self.exchange.get_open_orders(asset) + pass + + def test_get_order(self): + log.info('retrieving order') + order = self.exchange.get_order( + u'2c584020-9caf-4af5-bde0-332c0bba17e2') + assert isinstance(order, Order) + pass + + def test_cancel_order(self, ): + log.info('cancel order') + self.exchange.cancel_order(u'dc7bcca2-5219-4145-8848-8a593d2a72f9') + pass + + def test_get_candles(self): + log.info('retrieving candles') + ohlcv_neo = self.exchange.get_candles( + data_frequency='5m', + assets=self.exchange.get_asset('neo_btc') + ) + ohlcv_neo_ubq = self.exchange.get_candles( + data_frequency='5m', + assets=[ + self.exchange.get_asset('neo_btc'), + self.exchange.get_asset('ubq_btc') + ], + bar_count=14 + ) + pass + + def test_tickers(self): + log.info('retrieving tickers') + tickers = self.exchange.tickers([ + self.exchange.get_asset('eth_btc'), + self.exchange.get_asset('etc_btc') + ]) + assert len(tickers) == 2 + pass + + def test_get_balances(self): + log.info('testing wallet balances') + balances = self.exchange.get_balances() + pass + + def test_get_account(self): + log.info('testing account data') + pass + + def test_orderbook(self): + log.info('testing order book for bittrex') + asset = self.exchange.get_asset('eth_btc') + orderbook = self.exchange.get_orderbook(asset) + pass From 1732b4a985df459eb9f4eaf03b2a70e4156e4d5b Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 16 Oct 2017 03:09:13 -0400 Subject: [PATCH 065/110] Testing related adjustments --- catalyst/examples/arbitrage_with_interface.py | 12 +- catalyst/examples/simple_loop.py | 14 +- catalyst/exchange/bitfinex/bitfinex.py | 24 ++ catalyst/exchange/bittrex/bittrex.py | 36 +-- catalyst/exchange/bundle_utils.py | 230 ++++++------------ catalyst/exchange/exchange.py | 49 +++- catalyst/exchange/exchange_bundle.py | 67 +++-- catalyst/exchange/poloniex/poloniex.py | 38 +-- catalyst/utils/run_algo.py | 5 +- tests/exchange/test_bitfinex.py | 6 + tests/exchange/test_bundle.py | 2 +- tests/exchange/test_poloniex.py | 3 +- 12 files changed, 257 insertions(+), 229 deletions(-) diff --git a/catalyst/examples/arbitrage_with_interface.py b/catalyst/examples/arbitrage_with_interface.py index 01baefa9..f3625e7a 100644 --- a/catalyst/examples/arbitrage_with_interface.py +++ b/catalyst/examples/arbitrage_with_interface.py @@ -9,7 +9,7 @@ from catalyst.api import ( from catalyst.exchange.stats_utils import get_pretty_stats from catalyst.utils.run_algo import run_algorithm -algo_namespace = 'arbitrage_neo_eth' +algo_namespace = 'arbitrage_eth_btc' log = Logger(algo_namespace) @@ -19,10 +19,10 @@ def initialize(context): # The context contains a new "exchanges" attribute which is a dictionary # of exchange objects by exchange name. This allow easy access to the # exchanges. - context.buying_exchange = context.exchanges['bittrex'] + context.buying_exchange = context.exchanges['poloniex'] context.selling_exchange = context.exchanges['bitfinex'] - context.trading_pair_symbol = 'neo_eth' + context.trading_pair_symbol = 'eth_btc' context.trading_pairs = dict() # Note the second parameter of the symbol() method @@ -30,7 +30,7 @@ def initialize(context): # the exchange information. This allow all other operations using # the TradingPair to target the correct exchange. context.trading_pairs[context.buying_exchange] = \ - symbol('neo_eth', context.buying_exchange.name) + symbol('eth_btc', context.buying_exchange.name) context.trading_pairs[context.selling_exchange] = \ symbol(context.trading_pair_symbol, context.selling_exchange.name) @@ -267,9 +267,9 @@ run_algorithm( initialize=initialize, handle_data=handle_data, analyze=analyze, - exchange_name='bittrex,bitfinex', + exchange_name='poloniex,bitfinex', live=True, algo_namespace=algo_namespace, - base_currency='eth', + base_currency='btc', live_graph=False ) diff --git a/catalyst/examples/simple_loop.py b/catalyst/examples/simple_loop.py index 50301281..84ecc488 100644 --- a/catalyst/examples/simple_loop.py +++ b/catalyst/examples/simple_loop.py @@ -1,4 +1,5 @@ import pandas as pd +import talib from catalyst import run_algorithm from catalyst.api import symbol @@ -15,11 +16,20 @@ def handle_data(context, data): price = data.current(context.asset, 'close') print('got price {price}'.format(price=price)) + prices = data.history( + context.asset, + fields='price', + bar_count=50, + frequency='1m' + ) + rsi = talib.RSI(prices.values, timeperiod=14)[-1] + print('got rsi: {}'.format(rsi)) + run_algorithm( capital_base=250, - start=pd.to_datetime('2017-5-1', utc=True), - end=pd.to_datetime('2017-5-31', utc=True), + start=pd.to_datetime('2017-9-5', utc=True), + end=pd.to_datetime('2017-9-30', utc=True), data_frequency='minute', initialize=initialize, handle_data=handle_data, diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index ac1af89e..d866e912 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -661,3 +661,27 @@ class Bitfinex(Exchange): return time.strftime('%Y-%m-%d', time.gmtime(int(response.json()[-1][0] / 1000))) + + def get_orderbook(self, asset, order_type='all'): + exchange_symbol = asset.exchange_symbol + try: + self.ask_request() + response = self._request( + 'book/{}'.format(exchange_symbol), None) + data = response.json() + + except Exception as e: + raise ExchangeRequestError(error=e) + + # TODO: filter by type + result = dict() + for order_type in data: + result[order_type] = [] + + for entry in data[order_type]: + result[order_type].append(dict( + rate=float(entry['price']), + quantity=float(entry['amount']) + )) + + return result diff --git a/catalyst/exchange/bittrex/bittrex.py b/catalyst/exchange/bittrex/bittrex.py index 5e35346d..2979a71c 100644 --- a/catalyst/exchange/bittrex/bittrex.py +++ b/catalyst/exchange/bittrex/bittrex.py @@ -65,13 +65,19 @@ class Bittrex(Exchange): log.debug('retrieving wallet balances') self.ask_request() balances = self.api.getbalances() + except Exception as e: raise ExchangeRequestError(error=e) std_balances = dict() - for balance in balances: - currency = balance['Currency'].lower() - std_balances[currency] = balance['Available'] + try: + for balance in balances: + currency = balance['Currency'].lower() + std_balances[currency] = balance['Available'] + + except TypeError: + raise ExchangeRequestError(error=balances) + return std_balances def create_order(self, asset, amount, is_buy, style): @@ -349,29 +355,29 @@ class Bittrex(Exchange): json.dump(symbol_map, f, sort_keys=True, indent=2, separators=(',', ':')) - def get_orderbook(self, asset, type='all'): - if type == 'all': - type = 'both' - elif type == 'bid': - type = 'buy' - elif type == 'ask': - type = 'sell' + def get_orderbook(self, asset, order_type='all'): + if order_type == 'all': + order_type = 'both' + elif order_type == 'bid': + order_type = 'buy' + elif order_type == 'ask': + order_type = 'sell' else: raise ValueError('invalid type') exchange_symbol = asset.exchange_symbol - data = self.api.getorderbook(market=exchange_symbol, type=type) + data = self.api.getorderbook(market=exchange_symbol, type=order_type) result = dict() for exchange_type in data: if exchange_type == 'buy': - type = 'bid' + order_type = 'bids' elif exchange_type == 'sell': - type = 'ask' + order_type = 'asks' - result[type] = [] + result[order_type] = [] for entry in data[exchange_type]: - result[type].append(dict( + result[order_type].append(dict( rate=entry['Rate'], quantity=entry['Quantity'] )) diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index d38f3a8c..8689a8d0 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -71,86 +71,6 @@ def get_bcolz_chunk(exchange_name, symbol, data_frequency, period): return path -def get_history(exchange_name, data_frequency, symbol, start=None, end=None): - """ - History API provides OHLCV data for any of the supported exchanges up to yesterday. - - :param exchange_name: string - Required: The name identifier of the exchange (e.g. bitfinex, bittrex, poloniex). - :param data_frequency: string - Required: The bar frequency (minute or daily) - :param symbol: string - Required: The trading pair symbol, using Catalyst naming convention - :param start: datetime - Optional: The start date. - :param end: datetime - Optional: The end date. - - :return ohlcv: list[dict[string, float]] - Each row contains the following dictionary for the resulting bars: - 'ts' : int, the timestamp in seconds - 'open' : float - 'high' : float - 'low' : float - 'close' : float - 'volume' : float - - Notes - ===== - Using seconds for the start and end dates for ease of use in the - function query parameters. - - Sometimes, one minute goes by without completing a trade of the given - trading pair on the given exchange. To minimize the payload size, we - don't return identical sequential bars. Post-processing code will - forward fill missing bars outside of this function. - """ - - start_seconds = get_seconds_from_date(start) if start else None - end_seconds = get_seconds_from_date(end) if end else None - - if exchange_name not in EXCHANGE_NAMES: - raise ValueError( - 'get_history function only supports the following exchanges: {}'.format( - list(EXCHANGE_NAMES))) - - if data_frequency != 'daily' and data_frequency != 'minute': - raise ValueError( - 'get_history currently only supports daily and minute data.' - ) - - url = '{api_url}/candles?exchange={exchange}&market={symbol}&freq={data_frequency}'.format( - api_url=API_URL, - exchange=exchange_name, - symbol=symbol, - data_frequency=data_frequency, - ) - - if start_seconds: - url += '&start={}'.format(start_seconds) - - if end_seconds: - url += '&end={}'.format(end_seconds) - - try: - response = requests.get(url) - except Exception as e: - raise ValueError(e) - - data = response.json() - - if 'error' in data: - raise ApiCandlesError(error=data['error']) - - for candle in data: - last_traded = pd.Timestamp.utcfromtimestamp(candle['ts']) - last_traded = last_traded.replace(tzinfo=pytz.UTC) - - candle['last_traded'] = last_traded - - return data - - def get_delta(periods, data_frequency): return timedelta(minutes=periods) \ if data_frequency == 'minute' else timedelta(days=periods) @@ -270,75 +190,6 @@ def range_in_bundle(asset, start_dt, end_dt, reader): return has_data -@deprecated -def get_history_mock(exchange_name, data_frequency, symbol, start_ms, end_ms, - exchanges): - """ - Mocking the history API written by Victor by proxying the request - to Bitfinex. - - :param exchange_name: string - The name identifier of the exchange (e.g. bitfinex). - Only bitfinex is supported in this mock function. - :param data_frequency: string - The bar frequency (minute or daily) - :param symbol: string - The trading pair symbol. - :param start_ms: float - The start date in milliseconds. - :param end_ms: float - The end date in milliseconds. - :param exchanges: MOCK ONLY - This won't be required in production mode since the exchange object - will be retrieved on the server. - :return ohlcv: list[dict[string, float]] - The open, high, low, volume collection for the resulting bars. - - Notes - ===== - Using milliseconds for the start and end dates for ease of use in - URL query parameters. - - Sometimes, one minute goes by without completing a trade of the given - trading pair on the given exchange. To minimize the payload size, we - don't return identical sequential bars. Post-processing code will - forward fill missing bars outside of this function. - """ - - if exchange_name != 'bitfinex': - raise ValueError('get_history mock function only works with bitfinex') - - exchange = exchanges[exchange_name] - assets = [exchange.get_asset(symbol=symbol)] - start = get_date_from_ms(start_ms) - end = get_date_from_ms(end_ms) - - delta = end - start - - periods = delta.seconds % 3600 / 60.0 \ - if data_frequency == 'minute' else delta.days - - candles = exchange.get_candles( - data_frequency=data_frequency, - assets=assets, - bar_count=periods, - start_dt=start, - end_dt=end - ) - - ohlcv = [] - for candle in candles: - ohlcv.append(dict( - open=candle['open'], - high=candle['high'], - low=candle['low'], - close=candle['close'], - volume=candle['volume'], - last_traded=candle['last_traded'] - )) - return ohlcv - - def find_most_recent_time(bundle_name): """ Find most recent "time folder" for a given bundle. @@ -368,3 +219,84 @@ def find_most_recent_time(bundle_name): return most_recent_bundle.keys()[0] else: return None + + +@deprecated +def get_history(exchange_name, data_frequency, symbol, start=None, end=None): + """ + History API provides OHLCV data for any of the supported exchanges up to yesterday. + + :param exchange_name: string + Required: The name identifier of the exchange (e.g. bitfinex, bittrex, poloniex). + :param data_frequency: string + Required: The bar frequency (minute or daily) + :param symbol: string + Required: The trading pair symbol, using Catalyst naming convention + :param start: datetime + Optional: The start date. + :param end: datetime + Optional: The end date. + + :return ohlcv: list[dict[string, float]] + Each row contains the following dictionary for the resulting bars: + 'ts' : int, the timestamp in seconds + 'open' : float + 'high' : float + 'low' : float + 'close' : float + 'volume' : float + + Notes + ===== + Using seconds for the start and end dates for ease of use in the + function query parameters. + + Sometimes, one minute goes by without completing a trade of the given + trading pair on the given exchange. To minimize the payload size, we + don't return identical sequential bars. Post-processing code will + forward fill missing bars outside of this function. + """ + + start_seconds = get_seconds_from_date(start) if start else None + end_seconds = get_seconds_from_date(end) if end else None + + if exchange_name not in EXCHANGE_NAMES: + raise ValueError( + 'get_history function only supports the following exchanges: {}'.format( + list(EXCHANGE_NAMES))) + + if data_frequency != 'daily' and data_frequency != 'minute': + raise ValueError( + 'get_history currently only supports daily and minute data.' + ) + + url = '{api_url}/candles?exchange={exchange}&market={symbol}&freq={data_frequency}'.format( + api_url=API_URL, + exchange=exchange_name, + symbol=symbol, + data_frequency=data_frequency, + ) + + if start_seconds: + url += '&start={}'.format(start_seconds) + + if end_seconds: + url += '&end={}'.format(end_seconds) + + try: + response = requests.get(url) + except Exception as e: + raise ValueError(e) + + data = response.json() + + if 'error' in data: + raise ApiCandlesError(error=data['error']) + + for candle in data: + last_traded = pd.Timestamp.utcfromtimestamp(candle['ts']) + last_traded = last_traded.replace(tzinfo=pytz.UTC) + + candle['last_traded'] = last_traded + + return data diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 665dd0ce..84e71aba 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -13,7 +13,7 @@ from logbook import Logger from catalyst.data.data_portal import BASE_FIELDS from catalyst.exchange import bundle_utils from catalyst.exchange.bundle_utils import get_start_dt, \ - get_delta, get_trailing_candles_dt + get_delta, get_trailing_candles_dt, get_periods from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import MismatchingBaseCurrencies, \ InvalidOrderStyle, BaseCurrencyNotFoundError, SymbolNotFoundOnExchange, \ @@ -505,9 +505,7 @@ class Exchange: candles = self.get_candles( data_frequency=data_frequency, assets=[asset], - bar_count=bar_count, - start_dt=exchange_start if bar_count > 1 else None, - end_dt=exchange_end + bar_count=bar_count ) data += candles[asset] @@ -588,8 +586,28 @@ class Exchange: if len(missing_assets) > 0: writer = bundle.get_writer(start_dt, end_dt, data_frequency) + chunks = bundle.prepare_chunks( + assets=assets, + data_frequency=data_frequency, + start_dt=start_dt, + end_dt=end_dt + ) + for chunk in chunks: + log.debug('ingesting chunk for pair {}, period {}'.format( + chunk['asset'], + chunk['period'] + )) + self.ingest_ctable( + asset=chunk['asset'], + data_frequency=data_frequency, + period=chunk['period'], + writer=writer + ) + + # Adding bars too recent to be contained in the consolidated + # exchanges bundles. We go directly against the exchange + # to retrieve the candles. for asset in missing_assets: - # TODO: use this only for data too recent to be in a bundle trailing_candles_dt = get_trailing_candles_dt( asset=asset, start_dt=start_dt, @@ -598,18 +616,20 @@ class Exchange: ) if trailing_candles_dt is not None: + trailing_bar_count = \ + get_periods(start_dt, end_dt, data_frequency) + # The get_history method supports multiple asset candles = self.get_candles( data_frequency=data_frequency, assets=[asset], - bar_count=bar_count, - start_dt=trailing_candles_dt, + bar_count=trailing_bar_count, end_dt=end_dt ) bundle.ingest_candles( candles=candles, - bar_count=adj_bar_count, + bar_count=trailing_bar_count, end_dt=end_dt, data_frequency=data_frequency, writer=writer @@ -866,7 +886,7 @@ class Exchange: @abstractmethod def get_candles(self, data_frequency, assets, bar_count=None, - start_date=None): + start_dt=None, end_dt=None): """ Retrieve OHLCV candles for the given assets @@ -878,7 +898,7 @@ class Exchange: The number of bar desired. (default 1) :param end_dt: datetime, optional The last bar date. - :param start_date: datetime, optional + :param start_dt: datetime, optional The first bar date. :return dict[TradingPair, dict[str, Object]]: OHLCV data @@ -915,9 +935,14 @@ class Exchange: pass @abc.abstractmethod - def get_orderbook(self): + def get_orderbook(self, asset, order_type): """ - Retrieve the account parameters. + Retrieve the the orderbook for the given trading pair. + + :param asset: TradingPair + :param order_type: str + The type of orders: bid, ask or all + :return: """ pass diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index bff7bebb..3f46d348 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -245,7 +245,6 @@ class ExchangeBundle: # session_label when using a newly created writer del self._writers[data_frequency] - # TODO: these are the dates of the chunk, not the job writer = self.get_writer(writer._start_session, writer._end_session, data_frequency) writer.write( @@ -255,9 +254,13 @@ class ExchangeBundle: ) def ingest_candles(self, candles, bar_count, end_dt, data_frequency, - writer, previous_candle=dict()): + writer, previous_candle=dict()): """ - Retrieve the specified OHLCV chunk and write it to the bundle + Ingest candles obtained via the get_candles API of an exchange. + + Since exchange APIs generally only do not return candles when there + are no transactions in the period, we ffill values using the + previous candle to ensure that each period has a candle. :param bar_count: :param end_dt: @@ -268,7 +271,6 @@ class ExchangeBundle: :return: """ - num_candles = 0 data = [] for asset in candles: @@ -357,9 +359,8 @@ class ExchangeBundle: start = reader.first_trading_day - # TODO: temp workaround, remove when the bundles are fixed # end = reader.last_available_dt - end = reader.last_available_dt - timedelta(days=1) + end = reader.last_available_dt periods = self.calendar.minutes_in_range(start, end) @@ -407,33 +408,25 @@ class ExchangeBundle: return path - def ingest(self, data_frequency, include_symbols=None, - exclude_symbols=None, start=None, end=None, - show_progress=True, environ=os.environ): + def prepare_chunks(self, assets, data_frequency, start_dt, end_dt): """ + :param assets: :param data_frequency: - :param include_symbols: - :param exclude_symbols: - :param start: - :param end: - :param show_progress: - :param environ: + :param start_dt: + :param end_dt: :return: """ - - assets = self.get_assets(include_symbols, exclude_symbols) - start, end = self.get_adj_dates(start, end, assets, data_frequency) reader = self.get_reader(data_frequency) chunks = [] for asset in assets: try: asset_start, asset_end = \ - self.get_adj_dates(start, end, [asset], data_frequency) + self.get_adj_dates(start_dt, end_dt, [asset], + data_frequency) except ValueError: - dt += timedelta(days=1) continue sessions = self.calendar.sessions_in_range(asset_start, asset_end) @@ -451,11 +444,11 @@ class ExchangeBundle: datetime(dt.year, dt.month, 1, 0, 0, 0, 0), utc=True) - # TODO: workaround, remove when bundles are fixed month_end = pd.to_datetime( - datetime(dt.year, dt.month, month_range[1] - 1, - 23, 59, 0, 0), - utc=True) + datetime( + dt.year, dt.month, month_range[1], 23, 59, 0, 0), + utc=True + ) if month_end > asset_end: month_end = asset_end @@ -477,7 +470,32 @@ class ExchangeBundle: chunks.sort(key=lambda chunk: chunk['period_end']) + return chunks + + def ingest(self, data_frequency, include_symbols=None, + exclude_symbols=None, start=None, end=None, + show_progress=True, environ=os.environ): + """ + + :param data_frequency: + :param include_symbols: + :param exclude_symbols: + :param start: + :param end: + :param show_progress: + :param environ: + :return: + """ + assets = self.get_assets(include_symbols, exclude_symbols) + start, end = self.get_adj_dates(start, end, assets, data_frequency) + writer = self.get_writer(start, end, data_frequency) + chunks = self.prepare_chunks( + assets=assets, + data_frequency=data_frequency, + start_dt=start, + end_dt=end + ) with maybe_show_progress( chunks, show_progress, @@ -485,7 +503,6 @@ class ExchangeBundle: exchange=self.exchange.name, frequency=data_frequency )) as it: - for chunk in it: self.ingest_ctable( asset=chunk['asset'], diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py index b8a6e522..ca13e951 100644 --- a/catalyst/exchange/poloniex/poloniex.py +++ b/catalyst/exchange/poloniex/poloniex.py @@ -173,7 +173,8 @@ class Poloniex(Exchange): # TODO: fetch account data and keep in cache return None - def get_candles(self, data_frequency, assets, bar_count=None): + def get_candles(self, data_frequency, assets, bar_count=None, + start_dt=None, end_dt=None): """ Retrieve OHLVC candles from Poloniex @@ -187,9 +188,10 @@ class Poloniex(Exchange): '5m', '15m', '30m', '2h', '4h', '1D' """ - # TODO: use BcolzMinuteBarReader to read from cache + # TODO: implement end_dt and start_dt filters + if ( - data_frequency == '5m' or data_frequency == 'minute'): # TODO: Polo does not have '1m' + data_frequency == '5m' or data_frequency == 'minute'): # TODO: Polo does not have '1m' frequency = 300 elif (data_frequency == '15m'): frequency = 900 @@ -231,6 +233,9 @@ class Poloniex(Exchange): ) def ohlc_from_candle(candle): + last_traded = pd.Timestamp.utcfromtimestamp(candle['date']) + last_traded = last_traded.replace(tzinfo=pytz.UTC) + ohlc = dict( open=np.float64(candle['open']), high=np.float64(candle['high']), @@ -238,7 +243,7 @@ class Poloniex(Exchange): close=np.float64(candle['close']), volume=np.float64(candle['volume']), price=np.float64(candle['close']), - last_traded=pd.Timestamp.utcfromtimestamp(candle['date']) + last_traded=last_traded ) return ohlc @@ -608,24 +613,23 @@ class Poloniex(Exchange): return transactions - def get_orderbook(self, asset, type='all'): + def get_orderbook(self, asset, order_type='all'): exchange_symbol = asset.exchange_symbol data = self.api.returnOrderBook(market=exchange_symbol) result = dict() - for exchange_type in data: - if exchange_type == 'bids': - type = 'bid' - elif exchange_type == 'asks': - type = 'ask' - else: + for order_type in data: + # TODO: filter by type + if order_type != 'asks' and order_type != 'bids': continue - result[type] = [] - for entry in data[exchange_type]: + result[order_type] = [] + for entry in data[order_type]: if len(entry) == 2: - result[type].append(dict( - rate=float(entry[0]), - quantity=float(entry[1]) - )) + result[order_type].append( + dict( + rate=float(entry[0]), + quantity=float(entry[1]) + ) + ) return result diff --git a/catalyst/utils/run_algo.py b/catalyst/utils/run_algo.py index 9130d2af..ea169d87 100644 --- a/catalyst/utils/run_algo.py +++ b/catalyst/utils/run_algo.py @@ -228,8 +228,11 @@ def _run(handle_data, balances = exchange.get_balances() except ExchangeRequestError as e: if attempt_index < 20: + log.warn('exchange error when retrieving balances, {} ' + 'trying again in 5 seconds'.format(e)) sleep(5) - return fetch_capital_base(attempt_index + 1) + return fetch_capital_base(exchange, attempt_index + 1) + else: raise ExchangeRequestErrorTooManyAttempts( attempts=attempt_index, diff --git a/tests/exchange/test_bitfinex.py b/tests/exchange/test_bitfinex.py index dda5bab9..ded6c8ca 100644 --- a/tests/exchange/test_bitfinex.py +++ b/tests/exchange/test_bitfinex.py @@ -69,3 +69,9 @@ class BitfinexTestCase(BaseExchangeTestCase): log.info('testing exchange balances') balances = self.exchange.get_balances() pass + + def test_orderbook(self): + log.info('testing order book for bitfinex') + asset = self.exchange.get_asset('eth_btc') + orderbook = self.exchange.get_orderbook(asset) + pass diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index f2158e87..b7fe3999 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -14,7 +14,7 @@ class ExchangeBundleTestCase: # start = pd.to_datetime('2017-09-01', utc=True) start = pd.to_datetime('2017-1-1', utc=True) - end = pd.to_datetime('2017-6-30', utc=True) + end = pd.to_datetime('2017-9-30', utc=True) exchange_bundle = ExchangeBundle(get_exchange(exchange_name)) diff --git a/tests/exchange/test_poloniex.py b/tests/exchange/test_poloniex.py index 1da3313d..4f2f12a7 100644 --- a/tests/exchange/test_poloniex.py +++ b/tests/exchange/test_poloniex.py @@ -84,7 +84,8 @@ class PoloniexTestCase(BaseExchangeTestCase): pass def test_orderbook(self): - log.info('testing order book for bittrex') + log.info('testing order book for poloniex') asset = self.exchange.get_asset('eth_btc') + orderbook = self.exchange.get_orderbook(asset) pass From 1263fdd99562e969c6200f98a67bd758b741212d Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 16 Oct 2017 15:38:07 -0400 Subject: [PATCH 066/110] Testing related adjustments --- catalyst/exchange/exchange_bundle.py | 46 +++++++++++++++++++++------- catalyst/exchange/exchange_errors.py | 4 +-- tests/exchange/test_bundle.py | 42 ++++++++++++++++++++----- 3 files changed, 71 insertions(+), 21 deletions(-) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 3f46d348..b04b676c 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -1,12 +1,9 @@ import calendar import os import shutil - -import pytz from datetime import timedelta, datetime import pandas as pd -import numpy as np from logbook import Logger, INFO from catalyst import get_calendar @@ -14,13 +11,11 @@ from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ BcolzMinuteBarWriter, BcolzMinuteBarReader, BcolzMinuteBarMetadata from catalyst.data.us_equity_pricing import BcolzDailyBarWriter, \ BcolzDailyBarReader -from catalyst.exchange.bundle_utils import get_ffill_candles, get_start_dt, \ - get_periods, range_in_bundle, get_bcolz_chunk +from catalyst.exchange.bundle_utils import get_ffill_candles, range_in_bundle, \ + get_bcolz_chunk, get_delta from catalyst.exchange.exchange_errors import EmptyValuesInBundleError -from catalyst.exchange.exchange_utils import get_exchange_folder, \ - get_exchange_bundles_folder +from catalyst.exchange.exchange_utils import get_exchange_folder from catalyst.utils.cli import maybe_show_progress -from catalyst.utils.deprecate import deprecated from catalyst.utils.paths import ensure_directory @@ -387,11 +382,40 @@ class ExchangeBundle: if verify: nan_rows = df[df.isnull().T.any().T].index + if len(nan_rows) > 0: + dates = [] + previous_date = None + for row_date in nan_rows.values: + row_date = pd.to_datetime(row_date) + + if previous_date is None: + dates.append(row_date) + + else: + seq_date = previous_date + get_delta(1, data_frequency) + + if row_date > seq_date: + dates.append(previous_date) + dates.append(row_date) + + previous_date = row_date + + dates.append(pd.to_datetime(nan_rows.values[-1])) + + name = path.split('/')[-1] + log.warn( + '\n{name} with end minute {end_minute} has empty rows ' + 'in ranges: {dates}'.format( + name=name, + end_minute=asset.end_minute, + dates=dates + ) + ) raise EmptyValuesInBundleError( - path=path, - start=nan_rows[0], - end=nan_rows[-1] + name=name, + end_minute=asset.end_minute, + dates=dates ) data = [] diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index aaed1b3a..7ec2f59e 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -171,8 +171,8 @@ class BundleNotFoundError(ZiplineError): class EmptyValuesInBundleError(ZiplineError): - msg = ('Found empty values in bundle {path} between ' - '{start} and {end}.').strip() + msg = ('{name} with end minute {end_minute} has empty rows ' + 'in ranges: {dates}').strip() class PricingDataBeforeTradingError(ZiplineError): diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index b7fe3999..0dd86249 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -2,6 +2,8 @@ from logging import Logger import pandas as pd +from catalyst.data.minute_bars import BcolzMinuteBarReader +from catalyst.exchange.bundle_utils import get_bcolz_chunk from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.init_utils import get_exchange @@ -69,6 +71,33 @@ class ExchangeBundleTestCase: pass def test_merge_ctables(self): + exchange_name = 'bitfinex' + data_frequency = 'minute' + + exchange = get_exchange(exchange_name) + # asset = exchange.get_asset('gno_btc') + # + # start = pd.to_datetime('2017-5-1', utc=True) + # end = pd.to_datetime('2017-5-31', utc=True) + + asset = exchange.get_asset('neo_btc') + + start = pd.to_datetime('2017-9-1', utc=True) + end = pd.to_datetime('2017-9-30', utc=True) + + exchange_bundle = ExchangeBundle(exchange) + + writer = exchange_bundle.get_writer(start, end, data_frequency) + exchange_bundle.ingest_ctable( + asset=asset, + data_frequency=data_frequency, + period='2017-9', + writer=writer, + verify=True + ) + pass + + def test_minute_bundle(self): exchange_name = 'poloniex' data_frequency = 'minute' @@ -78,14 +107,11 @@ class ExchangeBundleTestCase: start = pd.to_datetime('2017-5-1', utc=True) end = pd.to_datetime('2017-5-31', utc=True) - exchange_bundle = ExchangeBundle(exchange) - - writer = exchange_bundle.get_writer(start, end, data_frequency) - exchange_bundle.ingest_ctable( - asset=asset, + path = get_bcolz_chunk( + exchange_name=exchange_name, + symbol=asset.symbol, data_frequency=data_frequency, - period='2017-5', - writer=writer, - verify=True + period='2017-5' ) + reader = BcolzMinuteBarReader(path) pass From e98d10c41b440bd9721e557412473b3cfa3898a8 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Tue, 3 Oct 2017 09:11:59 -0600 Subject: [PATCH 067/110] Fix floats for volume in data.history --- catalyst/data/_equities.pyx | 4 +--- catalyst/data/dispatch_bar_reader.py | 5 ++++- catalyst/data/minute_bars.py | 2 +- catalyst/data/us_equity_pricing.py | 16 ++++++---------- 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/catalyst/data/_equities.pyx b/catalyst/data/_equities.pyx index 563fa56a..c0652b02 100644 --- a/catalyst/data/_equities.pyx +++ b/catalyst/data/_equities.pyx @@ -215,13 +215,11 @@ cpdef _read_bcolz_data(ctable_t table, else: continue - if column_name in ['open', 'high', 'low', 'close']: + if column_name in ['open', 'high', 'low', 'close', 'volume']: where_nan = (outbuf == 0) outbuf_as_float = outbuf.astype(float64) * .000000001 outbuf_as_float[where_nan] = NAN results.append(outbuf_as_float) - elif column_name != 'volume': - results.append(outbuf.astype(uint32)) else: results.append(outbuf) return results diff --git a/catalyst/data/dispatch_bar_reader.py b/catalyst/data/dispatch_bar_reader.py index e72dab7b..7dfd7e95 100644 --- a/catalyst/data/dispatch_bar_reader.py +++ b/catalyst/data/dispatch_bar_reader.py @@ -18,6 +18,7 @@ from numpy import ( full, nan, int64, + float64, zeros ) from six import iteritems, with_metaclass @@ -70,7 +71,9 @@ class AssetDispatchBarReader(with_metaclass(ABCMeta)): return self._dt_window_size(start_dt, end_dt), num_sids def _make_raw_array_out(self, field, shape): - if field != 'volume' and field != 'sid': + if field == 'volume': + out = zeros(shape, dtype=float64) + elif field != 'sid': out = full(shape, nan) else: out = zeros(shape, dtype=int64) diff --git a/catalyst/data/minute_bars.py b/catalyst/data/minute_bars.py index 9cae04af..bd4d5b39 100644 --- a/catalyst/data/minute_bars.py +++ b/catalyst/data/minute_bars.py @@ -1248,7 +1248,7 @@ class BcolzMinuteBarReader(MinuteBarReader): if field != 'volume': out = np.full(shape, np.nan) else: - out = np.zeros(shape, dtype=np.uint64) + out = np.zeros(shape, dtype=np.float64) for i, sid in enumerate(sids): carray = self._open_minute_file(field, sid) diff --git a/catalyst/data/us_equity_pricing.py b/catalyst/data/us_equity_pricing.py index 901a0e60..157d2f7b 100644 --- a/catalyst/data/us_equity_pricing.py +++ b/catalyst/data/us_equity_pricing.py @@ -441,7 +441,7 @@ class BcolzDailyBarWriter(object): dates = raw_data.index.values.astype('datetime64[s]') check_uint32_safe(dates.max().view(np.int64), 'day') processed['day'] = dates.astype('uint32') - processed['volume'] = raw_data.volume.astype('uint64') + processed['volume'] = (raw_data.volume * PRICE_ADJUSTMENT_FACTOR).astype('uint64') return ctable.fromdataframe(processed) @@ -494,9 +494,8 @@ class BcolzDailyBarReader(SessionBarReader): The data in these columns is interpreted as follows: - - Price columns ('open', 'high', 'low', 'close') are interpreted as 1000 * - as-traded dollar value. - - Volume is interpreted as as-traded volume. + - Price columns ('open', 'high', 'low', 'close') and Volume are interpreted + as 10^9 * as-traded dollar value. - Day is interpreted as seconds since midnight UTC, Jan 1, 1970. - Id is the asset id of the row. @@ -762,13 +761,10 @@ class BcolzDailyBarReader(SessionBarReader): """ ix = self.sid_day_index(sid, dt) price = self._spot_col(field)[ix] - if field != 'volume': - if price == 0: - return nan - else: - return price / PRICE_ADJUSTMENT_FACTOR + if field != 'volume' and price == 0: + return nan else: - return price + return price / PRICE_ADJUSTMENT_FACTOR class PanelBarReader(SessionBarReader): From 9bdd8aba48cf08eb740598767eb92992e157e253 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Tue, 17 Oct 2017 03:00:36 -0400 Subject: [PATCH 068/110] Implemented daily data loader and related fixes --- catalyst/exchange/exchange_algorithm.py | 2 +- catalyst/exchange/exchange_bundle.py | 137 ++++++++++++++++-------- tests/exchange/test_bundle.py | 30 +++--- 3 files changed, 106 insertions(+), 63 deletions(-) diff --git a/catalyst/exchange/exchange_algorithm.py b/catalyst/exchange/exchange_algorithm.py index 3304fcc1..80f6a312 100644 --- a/catalyst/exchange/exchange_algorithm.py +++ b/catalyst/exchange/exchange_algorithm.py @@ -530,7 +530,7 @@ class ExchangeTradingAlgorithmLive(ExchangeTradingAlgorithmBase): self.add_exposure_stats(minute_stats) print_df = pd.DataFrame(list(self.minute_stats)) - log.debug( + log.info( 'statistics for the last {stats_minutes} minutes:\n{stats}'.format( stats_minutes=self.stats_minutes, stats=get_pretty_stats( diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index b04b676c..d85f417e 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -13,7 +13,8 @@ from catalyst.data.us_equity_pricing import BcolzDailyBarWriter, \ BcolzDailyBarReader from catalyst.exchange.bundle_utils import get_ffill_candles, range_in_bundle, \ get_bcolz_chunk, get_delta -from catalyst.exchange.exchange_errors import EmptyValuesInBundleError +from catalyst.exchange.exchange_errors import EmptyValuesInBundleError, \ + InvalidHistoryFrequencyError from catalyst.exchange.exchange_utils import get_exchange_folder from catalyst.utils.cli import maybe_show_progress from catalyst.utils.paths import ensure_directory @@ -106,8 +107,8 @@ class ExchangeBundle: except IOError: log.debug('no reader data found in {}'.format(input_dir)) else: - raise ValueError( - 'invalid frequency {}'.format(data_frequency) + raise InvalidHistoryFrequencyError( + frequency=data_frequency ) return self._readers[data_frequency] @@ -186,8 +187,8 @@ class ExchangeBundle: end_session=end_session ) else: - raise ValueError( - 'invalid frequency {}'.format(data_frequency) + raise InvalidHistoryFrequencyError( + frequency=data_frequency ) return self._writers[key] @@ -326,7 +327,7 @@ class ExchangeBundle: """ def ingest_ctable(self, asset, data_frequency, period, writer, - verify=False, cleanup=False): + empty_rows_behavior='warn', cleanup=False): """ Merge a ctable bundle chunk into the main bundle for the exchange. @@ -334,7 +335,7 @@ class ExchangeBundle: :param data_frequency: str :param period: str :param writer: - :param verify: bool + :param empty_rows_behavior: str Ensure that the bundle does not have any missing data. :param cleanup: bool @@ -350,22 +351,41 @@ class ExchangeBundle: period=period ) - reader = BcolzMinuteBarReader(path) - - start = reader.first_trading_day - - # end = reader.last_available_dt - end = reader.last_available_dt - - periods = self.calendar.minutes_in_range(start, end) - sid = asset.sid - arrays = reader.load_raw_arrays( - fields=['open', 'high', 'low', 'close', 'volume'], - start_dt=start, - end_dt=end, - sids=[sid] - ) + if data_frequency == 'minute': + reader = BcolzMinuteBarReader(path) + + start = reader.first_trading_day + end = reader.last_available_dt + + periods = self.calendar.minutes_in_range(start, end) + + arrays = reader.load_raw_arrays( + fields=['open', 'high', 'low', 'close', 'volume'], + start_dt=start, + end_dt=end, + sids=[sid] + ) + + elif data_frequency == 'daily': + reader = BcolzDailyBarReader(path) + + start = writer._start_session + end = writer._end_session + + periods = self.calendar.sessions_in_range(start, end) + + # Note that the parameters convention is totally different + # from the minute reader. + arrays = reader.load_raw_arrays( + columns=['open', 'high', 'low', 'close', 'volume'], + start_date=start, + end_date=end, + assets=[asset] + ) + + else: + raise InvalidHistoryFrequencyError(frequency=data_frequency) ohlcv = dict( open=arrays[0].flatten(), @@ -380,7 +400,7 @@ class ExchangeBundle: index=periods ) - if verify: + if empty_rows_behavior is not 'ignore': nan_rows = df[df.isnull().T.any().T].index if len(nan_rows) > 0: @@ -404,19 +424,22 @@ class ExchangeBundle: dates.append(pd.to_datetime(nan_rows.values[-1])) name = path.split('/')[-1] - log.warn( - '\n{name} with end minute {end_minute} has empty rows ' - 'in ranges: {dates}'.format( + if empty_rows_behavior == 'warn': + log.warn( + '\n{name} with end minute {end_minute} has empty rows ' + 'in ranges: {dates}'.format( + name=name, + end_minute=asset.end_minute, + dates=dates + ) + ) + + elif empty_rows_behavior == 'raise': + raise EmptyValuesInBundleError( name=name, end_minute=asset.end_minute, dates=dates ) - ) - raise EmptyValuesInBundleError( - name=name, - end_minute=asset.end_minute, - dates=dates - ) data = [] if not df.empty: @@ -458,34 +481,53 @@ class ExchangeBundle: periods = [] dt = sessions[0] while dt <= sessions[-1]: - period = '{}-{}'.format(dt.year, dt.month) + period = '{}-{}'.format(dt.year, dt.month) \ + if data_frequency == 'minute' else '{}'.format(dt.year) if period not in periods: periods.append(period) - month_range = calendar.monthrange(dt.year, dt.month) - month_start = pd.to_datetime( - datetime(dt.year, dt.month, 1, 0, 0, 0, 0), - utc=True) + if data_frequency == 'minute': + month_range = calendar.monthrange(dt.year, dt.month) + period_start = pd.to_datetime( + datetime(dt.year, dt.month, 1, 0, 0, 0, 0), + utc=True) - month_end = pd.to_datetime( - datetime( - dt.year, dt.month, month_range[1], 23, 59, 0, 0), - utc=True - ) + period_end = pd.to_datetime( + datetime( + dt.year, dt.month, month_range[1], 23, 59, 0, + 0), + utc=True + ) - if month_end > asset_end: - month_end = asset_end + elif data_frequency == 'daily': + period_start = pd.to_datetime( + datetime(dt.year, 1, 1, 0, 0, 0, 0), + utc=True) + + period_end = pd.to_datetime( + datetime( + dt.year, 12, 31, 23, 59, 0, 0), + utc=True + ) + else: + raise InvalidHistoryFrequencyError( + frequency=data_frequency + ) + + if period_end > asset_end: + period_end = asset_end has_data = \ - range_in_bundle(asset, month_start, month_end, reader) + range_in_bundle(asset, period_start, period_end, + reader) if not has_data: log.debug('adding period: {}'.format(period)) chunks.append( dict( asset=asset, - period_end=month_end, + period_end=period_end, period=period ) ) @@ -532,5 +574,6 @@ class ExchangeBundle: asset=chunk['asset'], data_frequency=data_frequency, period=chunk['period'], - writer=writer + writer=writer, + empty_rows_behavior='ignore' ) diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 0dd86249..a0738dab 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -54,15 +54,15 @@ class ExchangeBundleTestCase: def test_ingest_daily(self): exchange_name = 'bitfinex' - start = pd.to_datetime('2017-09-01', utc=True) - end = pd.Timestamp.utcnow() + start = pd.to_datetime('2017-01-01', utc=True) + end = pd.to_datetime('2017-09-30', utc=True) exchange_bundle = ExchangeBundle(get_exchange(exchange_name)) log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest( data_frequency='daily', - include_symbols='neo_btc', + include_symbols='neo_btc,bch_btc,eth_btc', exclude_symbols=None, start=start, end=end, @@ -71,19 +71,19 @@ class ExchangeBundleTestCase: pass def test_merge_ctables(self): - exchange_name = 'bitfinex' + exchange_name = 'poloniex' data_frequency = 'minute' exchange = get_exchange(exchange_name) - # asset = exchange.get_asset('gno_btc') + asset = exchange.get_asset('gno_btc') + + start = pd.to_datetime('2017-5-1', utc=True) + end = pd.to_datetime('2017-5-31', utc=True) + + # asset = exchange.get_asset('neo_btc') # - # start = pd.to_datetime('2017-5-1', utc=True) - # end = pd.to_datetime('2017-5-31', utc=True) - - asset = exchange.get_asset('neo_btc') - - start = pd.to_datetime('2017-9-1', utc=True) - end = pd.to_datetime('2017-9-30', utc=True) + # start = pd.to_datetime('2017-9-1', utc=True) + # end = pd.to_datetime('2017-9-30', utc=True) exchange_bundle = ExchangeBundle(exchange) @@ -91,9 +91,9 @@ class ExchangeBundleTestCase: exchange_bundle.ingest_ctable( asset=asset, data_frequency=data_frequency, - period='2017-9', + period='2017-5', writer=writer, - verify=True + empty_rows_behavior='raise' ) pass @@ -111,7 +111,7 @@ class ExchangeBundleTestCase: exchange_name=exchange_name, symbol=asset.symbol, data_frequency=data_frequency, - period='2017-5' + period='2017-5', ) reader = BcolzMinuteBarReader(path) pass From a4389ffea4e869300be4d5a34b22d29f43ee8f90 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Tue, 17 Oct 2017 10:17:35 -0600 Subject: [PATCH 069/110] download symbols.json when older than 1 day --- catalyst/exchange/exchange_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/catalyst/exchange/exchange_utils.py b/catalyst/exchange/exchange_utils.py index 1e73805a..1335561c 100644 --- a/catalyst/exchange/exchange_utils.py +++ b/catalyst/exchange/exchange_utils.py @@ -8,7 +8,7 @@ import pandas as pd from catalyst.exchange.exchange_errors import ExchangeAuthNotFound, \ ExchangeSymbolsNotFound -from catalyst.utils.paths import data_root, ensure_directory +from catalyst.utils.paths import data_root, ensure_directory, last_modified_time SYMBOLS_URL = 'https://s3.amazonaws.com/enigmaco/catalyst-exchanges/' \ '{exchange}/symbols.json' @@ -40,7 +40,8 @@ def download_exchange_symbols(exchange_name, environ=None): def get_exchange_symbols(exchange_name, environ=None): filename = get_exchange_symbols_filename(exchange_name) - if not os.path.isfile(filename): + if not os.path.isfile(filename) or \ + pd.Timedelta(pd.Timestamp('now', tz='UTC') - last_modified_time(filename)).days > 1: download_exchange_symbols(exchange_name, environ) if os.path.isfile(filename): From 105fee0fb95ffa173ee22b644b59d217fb8cf298 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Tue, 17 Oct 2017 15:35:49 -0400 Subject: [PATCH 070/110] Fixes to the daily data --- catalyst/exchange/exchange_bundle.py | 22 ++++++++++------------ tests/exchange/test_bundle.py | 19 ++++++++----------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index d85f417e..6da300f7 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -175,17 +175,13 @@ class ExchangeBundle: ) elif data_frequency == 'daily': - if len(os.listdir(output_dir)) > 0: - self._writers[key] = \ - BcolzDailyBarWriter.open(output_dir, end_dt) - else: - end_session = end_dt.floor('1d') - self._writers[key] = BcolzDailyBarWriter( - filename=output_dir, - calendar=self.calendar, - start_session=start_dt, - end_session=end_session - ) + end_session = end_dt.floor('1d') + self._writers[key] = BcolzDailyBarWriter( + filename=output_dir, + calendar=self.calendar, + start_session=start_dt, + end_session=end_session + ) else: raise InvalidHistoryFrequencyError( frequency=data_frequency @@ -327,7 +323,7 @@ class ExchangeBundle: """ def ingest_ctable(self, asset, data_frequency, period, writer, - empty_rows_behavior='warn', cleanup=False): + empty_rows_behavior='strip', cleanup=False): """ Merge a ctable bundle chunk into the main bundle for the exchange. @@ -440,6 +436,8 @@ class ExchangeBundle: end_minute=asset.end_minute, dates=dates ) + else: + df.dropna(inplace=True) data = [] if not df.empty: diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index a0738dab..ea3da2f8 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -71,14 +71,14 @@ class ExchangeBundleTestCase: pass def test_merge_ctables(self): - exchange_name = 'poloniex' - data_frequency = 'minute' + exchange_name = 'bitfinex' + data_frequency = 'daily' exchange = get_exchange(exchange_name) - asset = exchange.get_asset('gno_btc') + asset = exchange.get_asset('neo_btc') - start = pd.to_datetime('2017-5-1', utc=True) - end = pd.to_datetime('2017-5-31', utc=True) + start = pd.to_datetime('2017-9-1', utc=True) + end = pd.to_datetime('2017-9-30', utc=True) # asset = exchange.get_asset('neo_btc') # @@ -91,9 +91,9 @@ class ExchangeBundleTestCase: exchange_bundle.ingest_ctable( asset=asset, data_frequency=data_frequency, - period='2017-5', + period='2017', writer=writer, - empty_rows_behavior='raise' + empty_rows_behavior='strip' ) pass @@ -102,10 +102,7 @@ class ExchangeBundleTestCase: data_frequency = 'minute' exchange = get_exchange(exchange_name) - asset = exchange.get_asset('gno_btc') - - start = pd.to_datetime('2017-5-1', utc=True) - end = pd.to_datetime('2017-5-31', utc=True) + asset = exchange.get_asset('neo_btc') path = get_bcolz_chunk( exchange_name=exchange_name, From d21cc36bef3a2003e81e022c59a14e070afb6556 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Tue, 17 Oct 2017 16:49:04 -0400 Subject: [PATCH 071/110] Fixes a start date issue --- catalyst/exchange/bundle_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index 8689a8d0..2dd7da07 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -92,7 +92,7 @@ def get_periods(start_dt, end_dt, data_frequency): def get_start_dt(end_dt, bar_count, data_frequency): - periods = bar_count - 1 + periods = bar_count if periods > 1: delta = get_delta(periods, data_frequency) start_dt = end_dt - delta From ead6769ea2608425a24da763cb1f9b149b57e3be Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Tue, 17 Oct 2017 15:39:20 -0600 Subject: [PATCH 072/110] retrieve benchmark from ExchangeBundle --- catalyst/data/loader.py | 42 +++++++++++++++++----------- catalyst/exchange/exchange.py | 2 +- catalyst/exchange/exchange_bundle.py | 4 +-- catalyst/utils/run_algo.py | 2 ++ 4 files changed, 31 insertions(+), 19 deletions(-) diff --git a/catalyst/data/loader.py b/catalyst/data/loader.py index 48fd2b13..4167bd84 100644 --- a/catalyst/data/loader.py +++ b/catalyst/data/loader.py @@ -96,10 +96,10 @@ def load_crypto_market_data(trading_day=None, trading_days=None, bm_symbol='USDT if trading_day is None: trading_day = get_calendar('OPEN').trading_day - if trading_days is None: - trading_days = get_calendar('OPEN').all_sessions + #if trading_days is None: + # trading_days = get_calendar('OPEN').schedule - first_date = trading_days[1] + first_date = get_calendar('OPEN').first_trading_session now = pd.Timestamp.utcnow() # We expect to have benchmark and treasury data that's current up until @@ -116,6 +116,7 @@ def load_crypto_market_data(trading_day=None, trading_days=None, bm_symbol='USDT # We'll attempt to download new data if the latest entry in our cache is # before this date. + ''' if(bundle_data): # If we are using the bundle to retrieve the cryptobenchmark, find the last # date for which there is trading data in the bundle @@ -124,19 +125,28 @@ def load_crypto_market_data(trading_day=None, trading_days=None, bm_symbol='USDT last_date = pd.to_datetime(bundle_data.daily_bar_reader._spot_col('day')[ix],unit='s') else: last_date = trading_days[trading_days.get_loc(now, method='ffill') - 2] - - br = ensure_crypto_benchmark_data( - bm_symbol, - first_date, - last_date, - now, - # We need the trading_day to figure out the close prior to the first - # date so that we can compute returns for the first date. - trading_day, - bundle, - bundle_data, - environ, - ) + ''' + last_date = trading_days[trading_days.get_loc(now, method='ffill') - 1] + + # This is exceptional, since placing the import at the module scope breaks things + # and it's only needed here + from catalyst.exchange.poloniex.poloniex import Poloniex + + exchange = Poloniex('','','') + btc_usdt = exchange.get_asset('btc_usdt') + + # exchange.get_history_window() already ensures that we have the right data + # for the right dates + br = exchange.get_history_window( + assets = [btc_usdt,], + end_dt = last_date, + bar_count = pd.Timedelta(last_date - first_date).days, + frequency = '1d', + field = 'close', + data_frequency = 'daily') + br.columns = ['close'] + br = br.pct_change(1).iloc[1:] + # Override first_date for treasury data since we have it for many more years # and is independent of crypto data first_date_treasury = pd.Timestamp('1990-01-02', tz='UTC') diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 84e71aba..59b3ba5a 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -597,7 +597,7 @@ class Exchange: chunk['asset'], chunk['period'] )) - self.ingest_ctable( + bundle.ingest_ctable( asset=chunk['asset'], data_frequency=data_frequency, period=chunk['period'], diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 6da300f7..e9f675e6 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -366,8 +366,8 @@ class ExchangeBundle: elif data_frequency == 'daily': reader = BcolzDailyBarReader(path) - start = writer._start_session - end = writer._end_session + start = reader.first_trading_day + end = reader.last_available_dt periods = self.calendar.sessions_in_range(start, end) diff --git a/catalyst/utils/run_algo.py b/catalyst/utils/run_algo.py index ea169d87..1cb2bd66 100644 --- a/catalyst/utils/run_algo.py +++ b/catalyst/utils/run_algo.py @@ -26,6 +26,7 @@ from functools import partial from catalyst.finance.trading import TradingEnvironment from catalyst.utils.calendars import get_calendar from catalyst.utils.factory import create_simulation_parameters +from catalyst.data.loader import load_crypto_market_data import catalyst.utils.paths as pth from catalyst.exchange.exchange_algorithm import ExchangeTradingAlgorithmLive, \ @@ -190,6 +191,7 @@ def _run(handle_data, open_calendar = get_calendar('OPEN') env = TradingEnvironment( + load=partial(load_crypto_market_data, environ=environ), environ=environ, exchange_tz='UTC', asset_db_path=None # We don't need an asset db, we have exchanges From 4649b31d89a221168850c6d5974a93045743e948 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Tue, 17 Oct 2017 18:29:48 -0400 Subject: [PATCH 073/110] Fixed issues with daily bundles --- catalyst/examples/simple_loop.py | 11 +- catalyst/exchange/data_portal_exchange.py | 29 +++-- catalyst/exchange/exchange.py | 105 ++--------------- catalyst/exchange/exchange_bundle.py | 136 +++++++++++----------- tests/exchange/test_bundle.py | 6 +- 5 files changed, 108 insertions(+), 179 deletions(-) diff --git a/catalyst/examples/simple_loop.py b/catalyst/examples/simple_loop.py index 84ecc488..51c68426 100644 --- a/catalyst/examples/simple_loop.py +++ b/catalyst/examples/simple_loop.py @@ -7,7 +7,7 @@ from catalyst.api import symbol def initialize(context): print('initializing') - context.asset = symbol('gno_btc') + context.asset = symbol('etc_btc') def handle_data(context, data): @@ -19,18 +19,19 @@ def handle_data(context, data): prices = data.history( context.asset, fields='price', - bar_count=50, - frequency='1m' + bar_count=15, + frequency='1d' ) rsi = talib.RSI(prices.values, timeperiod=14)[-1] print('got rsi: {}'.format(rsi)) + pass run_algorithm( capital_base=250, - start=pd.to_datetime('2017-9-5', utc=True), + start=pd.to_datetime('2017-9-01', utc=True), end=pd.to_datetime('2017-9-30', utc=True), - data_frequency='minute', + data_frequency='daily', initialize=initialize, handle_data=handle_data, analyze=None, diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index fe9acd09..ed2f8ed6 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -299,26 +299,36 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): raise InvalidHistoryFrequencyError(frequency=data_frequency) try: - values = reader.load_raw_arrays( + # values = reader.load_raw_arrays( + # fields=[field], + # start_dt=dts[0], + # end_dt=dts[-1], + # sids=[asset.sid for asset in assets], + # )[0] + values = bundle.get_raw_arrays( + assets=assets, fields=[field], start_dt=dts[0], end_dt=dts[-1], - sids=[asset.sid for asset in assets], + data_frequency=data_frequency )[0] except Exception: + first_trading_day = self._get_first_trading_day(assets) + symbols = [asset.symbol.encode('utf-8') for asset in assets] + + symbol_list = ','.join(symbols) raise PricingDataNotLoadedError( field=field, - first_trading_day=self._get_first_trading_day(assets), + first_trading_day=first_trading_day, exchange=exchange.name, - symbols=[asset.symbol.encode('utf-8') for asset in assets], + symbols=symbols, + symbol_list=symbol_list ) series = dict() for index, asset in enumerate(assets): - asset_values = [] - for value in values: - asset_values.append(value[index]) + asset_values = values[:, index] value_series = pd.Series(asset_values, index=dts) series[asset] = value_series @@ -333,7 +343,7 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): first_trading_day=first_trading_day, exchange=assets[0].exchange, symbols=[asset.symbol.encode('utf-8') for asset in assets], - dt = dt, + dt=dt, ) def get_exchange_spot_value(self, exchange, assets, field, dt, @@ -358,7 +368,8 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): first_trading_day=self._get_first_trading_day(assets), exchange=exchange.name, symbols=[asset.symbol.encode('utf-8') for asset in assets], - symbol_list = ''.join([asset.symbol.encode('utf-8') for asset in assets]) + symbol_list=''.join( + [asset.symbol.encode('utf-8') for asset in assets]) ) return values diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 84e71aba..8785402f 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -419,98 +419,6 @@ class Exchange: return value - @deprecated - def get_history(self, assets, end_dt, bar_count, data_frequency, - fallback_exchange=True): - """ - Retrieve OHLCV bars from the Catalyst and/or exchange API. - - If Catalyst does not have the full data set, retrieve the missing - portion from the exchange API if the exchanges supports - historical data. - - :param assets: list[TradingPair] - The TradingPair asset. - :param data_frequency: str - The bar frequency: daily or minute - :param bar_count: int - The number of bars desired. - :param end: datetime - The last trading date of the last bar. - - :return: - """ - candles = dict() - for asset in assets: - candles[asset] = self.get_asset_history( - asset=asset, - end=end_dt, - bar_count=bar_count, - data_frequency=data_frequency, - fallback_exchange=fallback_exchange - ) - return candles - - @deprecated - def get_asset_history(self, asset, end, bar_count, data_frequency, - fallback_exchange=True): - """ - Retrieve the OHLVC bars of a single asset. - - :param asset: TradingPair - The TradingPair asset. - :param data_frequency: str - The bar frequency: daily or minute - :param bar_count: int - The number of bars desired. - :param end: datetime - The last trading date of the last bar. - :return: - """ - start = get_start_dt(end, bar_count, data_frequency) - - exchange_start = None - catalyst_end = None - - if asset.end_minute is not None and start < asset.end_minute: - catalyst_start = start - if end <= asset.end_minute: - catalyst_end = end - else: - catalyst_end = asset.end_minute - - delta = timedelta(minutes=1) \ - if data_frequency == 'minute' else timedelta(days=1) - exchange_start = catalyst_end + delta - - exchange_end = end - - else: - exchange_end = end - exchange_start = start - - data = [] - if catalyst_end is not None: - # TODO: support multiple assets in the Catalyst API. - candles = bundle_utils.get_history( - exchange_name=self.name, - data_frequency=data_frequency, - symbol=asset.symbol, # TODO: use Catalyst symbol - start=catalyst_start, - end=catalyst_end - ) - data += candles - - if exchange_start is not None and fallback_exchange: - candles = self.get_candles( - data_frequency=data_frequency, - assets=[asset], - bar_count=bar_count - ) - data += candles[asset] - - return data - def get_history_window(self, assets, end_dt, @@ -565,10 +473,12 @@ class Exchange: raise InvalidHistoryFrequencyError(frequency) if unit.lower() == 'd': - data_frequency = 'daily' + if data_frequency != 'daily': + raise InvalidHistoryFrequencyError(frequency=frequency) elif unit.lower() == 'm': - data_frequency = 'minute' + if data_frequency != 'minute': + raise InvalidHistoryFrequencyError(frequency=frequency) else: raise InvalidHistoryFrequencyError(frequency) @@ -635,12 +545,12 @@ class Exchange: writer=writer ) - reader = bundle.get_reader(data_frequency) - values = reader.load_raw_arrays( + values = bundle.get_raw_arrays( + assets=assets, fields=[field], start_dt=start_dt, end_dt=end_dt, - sids=[asset.sid for asset in assets], + data_frequency=data_frequency )[0] series = dict() @@ -648,6 +558,7 @@ class Exchange: all_dates = [] asset_values = [] + # TODO: use numpy to avoid the loop date = start_dt for value in values: all_dates.append(date) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 6da300f7..090775aa 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -78,40 +78,40 @@ class ExchangeBundle: return start, end - def get_reader(self, data_frequency): + def get_reader(self, data_frequency, path=None): """ Get a data writer object, either a new object or from cache :return: BcolzMinuteBarReader or BcolzDailyBarReader """ - if data_frequency in self._readers \ - and self._readers[data_frequency] is not None: - return self._readers[data_frequency] + if path is None: + root = get_exchange_folder(self.exchange.name) + path = BUNDLE_NAME_TEMPLATE.format( + root=root, + frequency=data_frequency + ) - root = get_exchange_folder(self.exchange.name) - input_dir = BUNDLE_NAME_TEMPLATE.format( - root=root, - frequency=data_frequency - ) + if path in self._readers and self._readers[path] is not None: + return self._readers[path] - self._readers[data_frequency] = None + self._readers[path] = None if data_frequency == 'minute': try: - self._readers[data_frequency] = BcolzMinuteBarReader(input_dir) + self._readers[path] = BcolzMinuteBarReader(path) except IOError: - log.debug('no reader data found in {}'.format(input_dir)) + log.debug('no reader data found in {}'.format(path)) elif data_frequency == 'daily': try: - self._readers[data_frequency] = BcolzDailyBarReader(input_dir) + self._readers[path] = BcolzDailyBarReader(path) except IOError: - log.debug('no reader data found in {}'.format(input_dir)) + log.debug('no reader data found in {}'.format(path)) else: raise InvalidHistoryFrequencyError( frequency=data_frequency ) - return self._readers[data_frequency] + return self._readers[path] def update_metadata(self, writer, start_dt, end_dt): pass @@ -122,21 +122,21 @@ class ExchangeBundle: :return: BcolzMinuteBarWriter or BcolzDailyBarWriter """ - key = data_frequency - if key in self._writers: - return self._writers[key] - root = get_exchange_folder(self.exchange.name) - output_dir = BUNDLE_NAME_TEMPLATE.format( + path = BUNDLE_NAME_TEMPLATE.format( root=root, frequency=data_frequency ) - ensure_directory(output_dir) + + if path in self._writers: + return self._writers[path] + + ensure_directory(path) if data_frequency == 'minute': - if len(os.listdir(output_dir)) > 0: + if len(os.listdir(path)) > 0: - metadata = BcolzMinuteBarMetadata.read(output_dir) + metadata = BcolzMinuteBarMetadata.read(path) write_metadata = False if start_dt < metadata.start_session: @@ -152,9 +152,9 @@ class ExchangeBundle: else: end_session = metadata.end_session - self._writers[key] = \ + self._writers[path] = \ BcolzMinuteBarWriter( - output_dir, + path, metadata.calendar, start_session, end_session, @@ -164,8 +164,8 @@ class ExchangeBundle: write_metadata=write_metadata ) else: - self._writers[key] = BcolzMinuteBarWriter( - rootdir=output_dir, + self._writers[path] = BcolzMinuteBarWriter( + rootdir=path, calendar=self.calendar, minutes_per_day=self.minutes_per_day, start_session=start_dt, @@ -176,8 +176,8 @@ class ExchangeBundle: elif data_frequency == 'daily': end_session = end_dt.floor('1d') - self._writers[key] = BcolzDailyBarWriter( - filename=output_dir, + self._writers[path] = BcolzDailyBarWriter( + filename=path, calendar=self.calendar, start_session=start_dt, end_session=end_session @@ -187,7 +187,7 @@ class ExchangeBundle: frequency=data_frequency ) - return self._writers[key] + return self._writers[path] def filter_existing_assets(self, assets, start_dt, end_dt, data_frequency): """ @@ -315,6 +315,30 @@ class ExchangeBundle: return data + def get_raw_arrays(self, assets, start_dt, end_dt, fields, data_frequency, + path=None): + reader = self.get_reader(data_frequency, path) + + if data_frequency == 'minute': + values = reader.load_raw_arrays( + fields=fields, + start_dt=start_dt, + end_dt=end_dt, + sids=[asset.sid for asset in assets], + ) + else: + + # Note that the parameters convention is totally different + # from the minute reader. + values = reader.load_raw_arrays( + columns=fields, + start_date=start_dt, + end_date=end_dt, + assets=assets + ) + + return values + def download_bundle(self, name): """ @@ -347,41 +371,23 @@ class ExchangeBundle: period=period ) - sid = asset.sid - if data_frequency == 'minute': - reader = BcolzMinuteBarReader(path) + # TODO: is this the optimal approach? + # Ensures that we read exact range which we want to write + start_dt = writer._start_session + end_dt = writer._end_session - start = reader.first_trading_day - end = reader.last_available_dt + periods = self.calendar.minutes_in_range(start_dt, end_dt) \ + if data_frequency == 'minute' \ + else self.calendar.sessions_in_range(start_dt, end_dt) - periods = self.calendar.minutes_in_range(start, end) - - arrays = reader.load_raw_arrays( - fields=['open', 'high', 'low', 'close', 'volume'], - start_dt=start, - end_dt=end, - sids=[sid] - ) - - elif data_frequency == 'daily': - reader = BcolzDailyBarReader(path) - - start = writer._start_session - end = writer._end_session - - periods = self.calendar.sessions_in_range(start, end) - - # Note that the parameters convention is totally different - # from the minute reader. - arrays = reader.load_raw_arrays( - columns=['open', 'high', 'low', 'close', 'volume'], - start_date=start, - end_date=end, - assets=[asset] - ) - - else: - raise InvalidHistoryFrequencyError(frequency=data_frequency) + arrays = self.get_raw_arrays( + assets=[asset], + fields=['open', 'high', 'low', 'close', 'volume'], + start_dt=start_dt, + end_dt=end_dt, + data_frequency=data_frequency, + path=path + ) ohlcv = dict( open=arrays[0].flatten(), @@ -442,7 +448,7 @@ class ExchangeBundle: data = [] if not df.empty: df.sort_index(inplace=True) - data.append((sid, df)) + data.append((asset.sid, df)) self._write(data, writer, data_frequency) @@ -573,5 +579,5 @@ class ExchangeBundle: data_frequency=data_frequency, period=chunk['period'], writer=writer, - empty_rows_behavior='ignore' + empty_rows_behavior='strip' ) diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index ea3da2f8..c0110476 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -15,15 +15,15 @@ class ExchangeBundleTestCase: exchange_name = 'poloniex' # start = pd.to_datetime('2017-09-01', utc=True) - start = pd.to_datetime('2017-1-1', utc=True) + start = pd.to_datetime('2017-9-1', utc=True) end = pd.to_datetime('2017-9-30', utc=True) exchange_bundle = ExchangeBundle(get_exchange(exchange_name)) log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest( - data_frequency='minute', - include_symbols='gno_btc', + data_frequency='daily', + include_symbols='etc_btc', # include_symbols=None, exclude_symbols=None, start=start, From 1a4dfe8abbc4fc740c6ed01f5b767b6e1170234c Mon Sep 17 00:00:00 2001 From: fredfortier Date: Tue, 17 Oct 2017 21:01:00 -0400 Subject: [PATCH 074/110] Fixed date range issues and issues retrieving the benchmark data --- catalyst/data/loader.py | 80 ++++++++++------- catalyst/exchange/bundle_utils.py | 82 ++++++++++++++++- catalyst/exchange/data_portal_exchange.py | 10 +-- catalyst/exchange/exchange.py | 7 +- catalyst/exchange/exchange_bundle.py | 104 ++++++++-------------- catalyst/exchange/exchange_errors.py | 1 - tests/exchange/test_bundle.py | 2 + 7 files changed, 173 insertions(+), 113 deletions(-) diff --git a/catalyst/data/loader.py b/catalyst/data/loader.py index 4167bd84..fa80317a 100644 --- a/catalyst/data/loader.py +++ b/catalyst/data/loader.py @@ -37,11 +37,11 @@ logger = logbook.Logger('Loader') # Mapping from index symbol to appropriate bond data INDEX_MAPPING = { 'SPY': - (treasuries, 'treasury_curves.csv', 'www.federalreserve.gov'), + (treasuries, 'treasury_curves.csv', 'www.federalreserve.gov'), '^GSPTSE': - (treasuries_can, 'treasury_curves_can.csv', 'bankofcanada.ca'), + (treasuries_can, 'treasury_curves_can.csv', 'bankofcanada.ca'), '^FTSE': # use US treasuries until UK bonds implemented - (treasuries, 'treasury_curves.csv', 'www.federalreserve.gov'), + (treasuries, 'treasury_curves.csv', 'www.federalreserve.gov'), } ONE_HOUR = pd.Timedelta(hours=1) @@ -89,14 +89,19 @@ def has_data_for_dates(series_or_df, first_date, last_date): if not isinstance(dts, pd.DatetimeIndex): raise TypeError("Expected a DatetimeIndex, but got %s." % type(dts)) first, last = dts[[0, -1]].tz_localize(None) - return (first <= first_date.tz_localize(None)) and (last >= last_date.tz_localize(None)) + return (first <= first_date.tz_localize(None)) and ( + last >= last_date.tz_localize(None)) -def load_crypto_market_data(trading_day=None, trading_days=None, bm_symbol='USDT_BTC', - bundle=None, bundle_data=None, environ=None): +def load_crypto_market_data(trading_day=None, trading_days=None, + bm_symbol=None, bundle=None, bundle_data=None, + environ=None, exchange=None): if trading_day is None: trading_day = get_calendar('OPEN').trading_day - #if trading_days is None: + + # TODO: consider making configurable + bm_symbol = 'btc_usdt' + # if trading_days is None: # trading_days = get_calendar('OPEN').schedule first_date = get_calendar('OPEN').first_trading_session @@ -128,28 +133,29 @@ def load_crypto_market_data(trading_day=None, trading_days=None, bm_symbol='USDT ''' last_date = trading_days[trading_days.get_loc(now, method='ffill') - 1] - # This is exceptional, since placing the import at the module scope breaks things - # and it's only needed here - from catalyst.exchange.poloniex.poloniex import Poloniex + if exchange is None: + # This is exceptional, since placing the import at the module scope + # breaks things and it's only needed here + from catalyst.exchange.poloniex.poloniex import Poloniex + exchange = Poloniex('', '', '') - exchange = Poloniex('','','') - btc_usdt = exchange.get_asset('btc_usdt') + benchmark_asset = exchange.get_asset(bm_symbol) # exchange.get_history_window() already ensures that we have the right data # for the right dates br = exchange.get_history_window( - assets = [btc_usdt,], - end_dt = last_date, - bar_count = pd.Timedelta(last_date - first_date).days, - frequency = '1d', - field = 'close', - data_frequency = 'daily') + assets=[benchmark_asset], + end_dt=last_date, + bar_count=pd.Timedelta(last_date - first_date).days, + frequency='1d', + field='close', + data_frequency='daily') br.columns = ['close'] br = br.pct_change(1).iloc[1:] # Override first_date for treasury data since we have it for many more years # and is independent of crypto data - first_date_treasury = pd.Timestamp('1990-01-02', tz='UTC') + first_date_treasury = pd.Timestamp('1990-01-02', tz='UTC') tc = ensure_treasury_data( bm_symbol, first_date_treasury, @@ -158,7 +164,8 @@ def load_crypto_market_data(trading_day=None, trading_days=None, bm_symbol='USDT environ, ) benchmark_returns = br[br.index.slice_indexer(first_date, last_date)] - treasury_curves = tc[tc.index.slice_indexer(first_date_treasury, last_date)] + treasury_curves = tc[ + tc.index.slice_indexer(first_date_treasury, last_date)] return benchmark_returns, treasury_curves @@ -256,12 +263,11 @@ def ensure_crypto_benchmark_data(symbol, bundle, bundle_data, environ=None): - filename = get_benchmark_filename(symbol) logger.info( ('Loading benchmark data for {symbol!r} ' - 'from {first_date} to {last_date}'), + 'from {first_date} to {last_date}'), symbol=symbol, first_date=first_date, last_date=last_date @@ -282,7 +288,7 @@ def ensure_crypto_benchmark_data(symbol, # If no cached data was found or it was missing any dates then download the # necessary data. - if(bundle == 'poloniex'): + if (bundle == 'poloniex'): ''' If we're using the Poloniex bundle, we'll get the benchmark from the bundle instead of downloading it from Poloniex every time we need it. @@ -290,27 +296,34 @@ def ensure_crypto_benchmark_data(symbol, prevents users abroad from getting Catalyst to work ''' logger.info( - ('Retrieving benchmark data from bundle for {symbol!r} from {first_date} to {last_date}'), + ( + 'Retrieving benchmark data from bundle for {symbol!r} from {first_date} to {last_date}'), symbol=symbol, first_date=first_date, last_date=last_date) - asset = bundle_data.asset_finder.lookup_symbol(symbol=symbol,as_of_date=None) + asset = bundle_data.asset_finder.lookup_symbol(symbol=symbol, + as_of_date=None) fields = ['day', 'close'] raw = bundle_data.daily_bar_reader.load_raw_arrays( columns=fields, start_date=first_date - trading_day, end_date=last_date, - assets=[asset,]) - bench_raw = pd.concat([pd.DataFrame(raw[0], columns=['date']),pd.DataFrame(raw[1], columns=['close'])], axis=1) - bench_raw['date'] = pd.to_datetime(bench_raw['date'],unit='s') + assets=[asset, ]) + bench_raw = pd.concat([pd.DataFrame(raw[0], columns=['date']), + pd.DataFrame(raw[1], columns=['close'])], + axis=1) + bench_raw['date'] = pd.to_datetime(bench_raw['date'], unit='s') bench_raw.set_index('date', inplace=True) bench_raw.sort_index(inplace=True) - bench_raw = bench_raw[pd.to_datetime(first_date - trading_day):pd.to_datetime(last_date)] + bench_raw = bench_raw[ + pd.to_datetime(first_date - trading_day):pd.to_datetime( + last_date)] else: # This is how it used to be: downloading the benchmark everytime. # Leaving this code here to be repurposed in the future for other bundles. logger.info( - ('Downloading benchmark data for {symbol!r} from {first_date} to {last_date}'), + ( + 'Downloading benchmark data for {symbol!r} from {first_date} to {last_date}'), symbol=symbol, first_date=first_date, last_date=last_date) raise DeprecationWarning('poloniex bundle deprecated') @@ -386,7 +399,7 @@ def ensure_benchmark_data(symbol, first_date, last_date, now, trading_day, # necessary data. logger.info( ('Downloading benchmark data for {symbol!r} ' - 'from {first_date} to {last_date}'), + 'from {first_date} to {last_date}'), symbol=symbol, first_date=first_date - trading_day, last_date=last_date @@ -447,7 +460,7 @@ def ensure_benchmark_data(symbol, first_date, last_date, now, trading_day, # necessary data. logger.info( ('Downloading benchmark data for {symbol!r} ' - 'from {first_date} to {last_date}'), + 'from {first_date} to {last_date}'), symbol=symbol, first_date=first_date - trading_day, last_date=last_date @@ -531,7 +544,8 @@ def _load_cached_data(filename, first_date, last_date, now, resource_name, data = pd.DataFrame.from_csv(path) if data.empty: raise ValueError("File is empty.") - data.index = pd.to_datetime(data.index, infer_datetime_format=True, errors='coerce' ).tz_localize('UTC') + data.index = pd.to_datetime(data.index, infer_datetime_format=True, + errors='coerce').tz_localize('UTC') if has_data_for_dates(data, first_date, last_date): return data diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index 2dd7da07..e5f970d8 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -1,8 +1,9 @@ +import calendar import tarfile import shutil import requests -from datetime import timedelta, datetime +from datetime import timedelta, datetime, date import os from logging import Logger import pandas as pd @@ -12,7 +13,8 @@ import pytz from catalyst.data.bundles import from_bundle_ingest_dirname from catalyst.data.bundles.core import download_without_progress -from catalyst.exchange.exchange_errors import ApiCandlesError +from catalyst.exchange.exchange_errors import ApiCandlesError, \ + PricingDataBeforeTradingError from catalyst.exchange.exchange_utils import get_exchange_bundles_folder from catalyst.utils.deprecate import deprecated from catalyst.utils.paths import data_path @@ -102,6 +104,82 @@ def get_start_dt(end_dt, bar_count, data_frequency): return start_dt +def get_adj_dates(start, end, assets, data_frequency): + """ + Contains a date range to the trading availability of the specified pairs. + + :param start: + :param end: + :param assets: + :param data_frequency: + :return: + """ + earliest_trade = None + last_entry = None + for asset in assets: + if earliest_trade is None or earliest_trade > asset.start_date: + earliest_trade = asset.start_date + + end_asset = asset.end_minute if data_frequency == 'minute' else \ + asset.end_daily + if end_asset is not None and \ + (last_entry is None or end_asset > last_entry): + last_entry = end_asset + + if start is None or earliest_trade > start: + log.debug( + 'adjusting start date to earliest trade date found {}'.format( + earliest_trade + )) + start = earliest_trade + + if end is None or (last_entry is not None and end > last_entry): + log.debug('adjusting the end date to now {}'.format(last_entry)) + end = last_entry + + if start >= end: + raise PricingDataBeforeTradingError( + symbols=[asset.symbol], + exchange=asset.exchange, + first_trading_day=earliest_trade, + dt=end + ) + + return start, end + + +def get_month_start_end(dt): + """ + Returns the first and last day of the month for the specified date. + + :param dt: + :return: + """ + month_range = calendar.monthrange(dt.year, dt.month) + month_start = pd.to_datetime(datetime( + dt.year, dt.month, 1, 0, 0, 0, 0 + ), utc=True) + + month_end = pd.to_datetime(datetime( + dt.year, dt.month, month_range[1], 23, 59, 0, 0 + ), utc=True) + + return month_start, month_end + + +def get_year_start_end(dt): + """ + Returns the first and last day of the year for the specified date. + + :param dt: + :return: + """ + year_start = pd.to_datetime(date(dt.year, 1, 1), utc=True) + year_end = pd.to_datetime(date(dt.year, 12, 31), utc=True) + + return year_start, year_end + + def get_ffill_candles(candles, bar_count, end_dt, data_frequency, previous_candle=None): """ diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index ed2f8ed6..d7b163ac 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -281,7 +281,6 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): ffill=True): bundle = self.exchange_bundles[exchange.name] - reader = bundle.get_reader(data_frequency) if data_frequency == 'minute': dts = self.trading_calendar.minutes_window( end_dt, -bar_count @@ -299,12 +298,6 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): raise InvalidHistoryFrequencyError(frequency=data_frequency) try: - # values = reader.load_raw_arrays( - # fields=[field], - # start_dt=dts[0], - # end_dt=dts[-1], - # sids=[asset.sid for asset in assets], - # )[0] values = bundle.get_raw_arrays( assets=assets, fields=[field], @@ -333,8 +326,7 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): value_series = pd.Series(asset_values, index=dts) series[asset] = value_series - df = pd.DataFrame(series) - return df + return pd.DataFrame(series) def ensure_after_first_day(self, dt, assets): first_trading_day = self._get_first_trading_day(assets) diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 7c6f5d41..93cc128b 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -13,7 +13,7 @@ from logbook import Logger from catalyst.data.data_portal import BASE_FIELDS from catalyst.exchange import bundle_utils from catalyst.exchange.bundle_utils import get_start_dt, \ - get_delta, get_trailing_candles_dt, get_periods + get_delta, get_trailing_candles_dt, get_periods, get_adj_dates from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import MismatchingBaseCurrencies, \ InvalidOrderStyle, BaseCurrencyNotFoundError, SymbolNotFoundOnExchange, \ @@ -486,6 +486,9 @@ class Exchange: adj_bar_count = candle_size * bar_count start_dt = get_start_dt(end_dt, adj_bar_count, data_frequency) + start_dt, end_dt = get_adj_dates(start_dt, end_dt, assets, + data_frequency) + missing_assets = bundle.filter_existing_assets( assets=assets, start_dt=start_dt, @@ -511,6 +514,8 @@ class Exchange: asset=chunk['asset'], data_frequency=data_frequency, period=chunk['period'], + start_dt=chunk['period_start'], + end_dt=chunk['period_end'], writer=writer ) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 090775aa..dbc42c0b 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -5,6 +5,7 @@ from datetime import timedelta, datetime import pandas as pd from logbook import Logger, INFO +from pandas.tseries.offsets import MonthBegin, YearBegin, YearEnd from catalyst import get_calendar from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ @@ -12,9 +13,10 @@ from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ from catalyst.data.us_equity_pricing import BcolzDailyBarWriter, \ BcolzDailyBarReader from catalyst.exchange.bundle_utils import get_ffill_candles, range_in_bundle, \ - get_bcolz_chunk, get_delta + get_bcolz_chunk, get_delta, get_adj_dates, get_month_start_end, \ + get_year_start_end from catalyst.exchange.exchange_errors import EmptyValuesInBundleError, \ - InvalidHistoryFrequencyError + InvalidHistoryFrequencyError, PricingDataBeforeTradingError from catalyst.exchange.exchange_utils import get_exchange_folder from catalyst.utils.cli import maybe_show_progress from catalyst.utils.paths import ensure_directory @@ -48,36 +50,6 @@ class ExchangeBundle: else: return self.exchange.get_assets() - def get_adj_dates(self, start, end, assets, data_frequency): - - earliest_trade = None - last_entry = None - for asset in assets: - if earliest_trade is None or earliest_trade > asset.start_date: - earliest_trade = asset.start_date - - end_asset = asset.end_minute if data_frequency == 'minute' else \ - asset.end_daily - if end_asset is not None and \ - (last_entry is None or end_asset > last_entry): - last_entry = end_asset - - if start is None or earliest_trade > start: - log.debug( - 'adjusting start date to earliest trade date found {}'.format( - earliest_trade - )) - start = earliest_trade - - if end is None or (last_entry is not None and end > last_entry): - log.debug('adjusting the end date to now {}'.format(last_entry)) - end = last_entry - - if start >= end: - raise ValueError('start date cannot be after end date') - - return start, end - def get_reader(self, data_frequency, path=None): """ Get a data writer object, either a new object or from cache @@ -346,8 +318,8 @@ class ExchangeBundle: :return: """ - def ingest_ctable(self, asset, data_frequency, period, writer, - empty_rows_behavior='strip', cleanup=False): + def ingest_ctable(self, asset, data_frequency, period, start_dt, end_dt, + writer, empty_rows_behavior='strip', cleanup=False): """ Merge a ctable bundle chunk into the main bundle for the exchange. @@ -371,11 +343,6 @@ class ExchangeBundle: period=period ) - # TODO: is this the optimal approach? - # Ensures that we read exact range which we want to write - start_dt = writer._start_session - end_dt = writer._end_session - periods = self.calendar.minutes_in_range(start_dt, end_dt) \ if data_frequency == 'minute' \ else self.calendar.sessions_in_range(start_dt, end_dt) @@ -474,10 +441,9 @@ class ExchangeBundle: for asset in assets: try: asset_start, asset_end = \ - self.get_adj_dates(start_dt, end_dt, [asset], - data_frequency) + get_adj_dates(start_dt, end_dt, [asset], data_frequency) - except ValueError: + except PricingDataBeforeTradingError: continue sessions = self.calendar.sessions_in_range(asset_start, asset_end) @@ -491,46 +457,48 @@ class ExchangeBundle: if period not in periods: periods.append(period) + # Adjusting the period dates to match the availability + # of the trading pair if data_frequency == 'minute': - month_range = calendar.monthrange(dt.year, dt.month) - period_start = pd.to_datetime( - datetime(dt.year, dt.month, 1, 0, 0, 0, 0), - utc=True) + period_start, period_end = get_month_start_end(dt) + asset_start_month, _ = get_month_start_end(asset_start) - period_end = pd.to_datetime( - datetime( - dt.year, dt.month, month_range[1], 23, 59, 0, - 0), - utc=True - ) + if asset_start_month == period_start \ + and period_start < asset_start: + period_start = asset_start + + _, asset_end_month = get_month_start_end(asset_end) + if asset_end_month == period_end \ + and period_end > asset_end: + period_end = asset_end elif data_frequency == 'daily': - period_start = pd.to_datetime( - datetime(dt.year, 1, 1, 0, 0, 0, 0), - utc=True) + period_start, period_end = get_year_start_end(dt) + asset_start_year, _ = get_year_start_end(asset_start) - period_end = pd.to_datetime( - datetime( - dt.year, 12, 31, 23, 59, 0, 0), - utc=True - ) + if asset_start_year == period_start \ + and period_start < asset_start: + period_start = asset_start + + _, asset_end_year = get_year_start_end(asset_end) + if asset_end_year == period_end \ + and period_end > asset_end: + period_end = asset_end else: raise InvalidHistoryFrequencyError( frequency=data_frequency ) - if period_end > asset_end: - period_end = asset_end - - has_data = \ - range_in_bundle(asset, period_start, period_end, - reader) + has_data = range_in_bundle( + asset, period_start, period_end, reader + ) if not has_data: log.debug('adding period: {}'.format(period)) chunks.append( dict( asset=asset, + period_start=period_start, period_end=period_end, period=period ) @@ -557,7 +525,7 @@ class ExchangeBundle: :return: """ assets = self.get_assets(include_symbols, exclude_symbols) - start, end = self.get_adj_dates(start, end, assets, data_frequency) + start, end = get_adj_dates(start, end, assets, data_frequency) writer = self.get_writer(start, end, data_frequency) chunks = self.prepare_chunks( @@ -578,6 +546,8 @@ class ExchangeBundle: asset=chunk['asset'], data_frequency=data_frequency, period=chunk['period'], + start_dt=chunk['period_start'], + end_dt=chunk['period_end'], writer=writer, empty_rows_behavior='strip' ) diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index 7ec2f59e..b6c51155 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -189,6 +189,5 @@ class PricingDataNotLoadedError(ZiplineError): '`catalyst ingest-exchange -x {exchange} -i {symbol_list}`. ' 'See catalyst documentation for details.').strip() - class ApiCandlesError(ZiplineError): msg = ('Unable to fetch candles from the remote API: {error}.').strip() diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index c0110476..5772f6ea 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -92,6 +92,8 @@ class ExchangeBundleTestCase: asset=asset, data_frequency=data_frequency, period='2017', + start_dt=start, + end_dt=end, writer=writer, empty_rows_behavior='strip' ) From 74fd4a6a0f502e9e2be0112f3294b2da87bd9cf1 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 18 Oct 2017 00:04:55 -0400 Subject: [PATCH 075/110] Trying to fix an issue with merging new candles in get_history() --- catalyst/exchange/bitfinex/bitfinex.py | 3 + catalyst/exchange/bittrex/bittrex.py | 3 + catalyst/exchange/bundle_utils.py | 14 ++- catalyst/exchange/exchange.py | 121 ++++++++++--------------- catalyst/exchange/exchange_bundle.py | 64 ++++++++----- catalyst/exchange/exchange_errors.py | 9 +- catalyst/exchange/poloniex/poloniex.py | 3 + catalyst/utils/run_algo.py | 3 + 8 files changed, 117 insertions(+), 103 deletions(-) diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index d866e912..1817ea2d 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -15,6 +15,7 @@ from catalyst.assets._assets import TradingPair from logbook import Logger from catalyst.exchange.exchange import Exchange +from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import ( ExchangeRequestError, InvalidHistoryFrequencyError, @@ -58,6 +59,8 @@ class Bitfinex(Exchange): self.max_requests_per_minute = 20 self.request_cpt = dict() + self.bundle = ExchangeBundle(self) + def _request(self, operation, data, version='v1'): payload_object = { 'request': '/{}/{}'.format(version, operation), diff --git a/catalyst/exchange/bittrex/bittrex.py b/catalyst/exchange/bittrex/bittrex.py index 2979a71c..dcc9b943 100644 --- a/catalyst/exchange/bittrex/bittrex.py +++ b/catalyst/exchange/bittrex/bittrex.py @@ -7,6 +7,7 @@ from six.moves import urllib from catalyst.exchange.bittrex.bittrex_api import Bittrex_api from catalyst.exchange.exchange import Exchange +from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import InvalidHistoryFrequencyError, \ ExchangeRequestError, InvalidOrderStyle, OrderNotFound, OrderCancelError, \ CreateOrderError @@ -41,6 +42,8 @@ class Bittrex(Exchange): self.assets = dict() self.load_assets() + self.bundle = ExchangeBundle(self) + @property def account(self): pass diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index e5f970d8..ebf39d76 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -1,11 +1,9 @@ import calendar import tarfile -import shutil import requests from datetime import timedelta, datetime, date import os -from logging import Logger import pandas as pd import numpy as np @@ -19,7 +17,6 @@ from catalyst.exchange.exchange_utils import get_exchange_bundles_folder from catalyst.utils.deprecate import deprecated from catalyst.utils.paths import data_path -log = Logger('test_exchange_bundle') EXCHANGE_NAMES = ['bitfinex', 'bittrex', 'poloniex'] API_URL = 'http://data.enigma.co/api/v1' @@ -78,6 +75,12 @@ def get_delta(periods, data_frequency): if data_frequency == 'minute' else timedelta(days=periods) +def get_periods_range(start_dt, end_dt, data_frequency): + freq = 'T' if data_frequency == 'minute' else 'D' + + return pd.date_range(start_dt, end_dt, freq=freq) + + def get_periods(start_dt, end_dt, data_frequency): delta = end_dt - start_dt @@ -127,14 +130,9 @@ def get_adj_dates(start, end, assets, data_frequency): last_entry = end_asset if start is None or earliest_trade > start: - log.debug( - 'adjusting start date to earliest trade date found {}'.format( - earliest_trade - )) start = earliest_trade if end is None or (last_entry is not None and end > last_entry): - log.debug('adjusting the end date to now {}'.format(last_entry)) end = last_entry if start >= end: diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 93cc128b..34093111 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -11,20 +11,18 @@ from catalyst.assets._assets import TradingPair from logbook import Logger from catalyst.data.data_portal import BASE_FIELDS -from catalyst.exchange import bundle_utils from catalyst.exchange.bundle_utils import get_start_dt, \ get_delta, get_trailing_candles_dt, get_periods, get_adj_dates from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import MismatchingBaseCurrencies, \ InvalidOrderStyle, BaseCurrencyNotFoundError, SymbolNotFoundOnExchange, \ - InvalidHistoryFrequencyError + InvalidHistoryFrequencyError, MismatchingFrequencyError from catalyst.exchange.exchange_execution import ExchangeStopLimitOrder, \ ExchangeLimitOrder, ExchangeStopOrder from catalyst.exchange.exchange_portfolio import ExchangePortfolio from catalyst.exchange.exchange_utils import get_exchange_symbols from catalyst.finance.order import ORDER_STATUS from catalyst.finance.transaction import Transaction -from catalyst.utils.deprecate import deprecated log = Logger('Exchange') @@ -43,6 +41,7 @@ class Exchange: self.num_candles_limit = None self.max_requests_per_minute = None self.request_cpt = None + self.bundle = ExchangeBundle(self) @property def positions(self): @@ -367,55 +366,32 @@ class Exchange: ) ) - if field == 'price': - field = 'close' - # Don't use a timezone here dt = pd.Timestamp.utcnow().floor('1 min') - value = None - if self.minute_reader is not None: + ohlc = self.get_candles(data_frequency, asset) + if field not in ohlc: + raise KeyError('Invalid column: %s' % field) + + if self.minute_writer is not None: + df = pd.DataFrame( + [ohlc], + index=pd.DatetimeIndex([dt]), + columns=['open', 'high', 'low', 'close', 'volume'] + ) + try: - # Slight delay to minimize the chances that multiple algos - # might try to hit the cache at the exact same time. - sleep_time = random.uniform(0.5, 0.8) - sleep(sleep_time) - # TODO: This does not always! Why is that? Open an issue with zipline. - # See: https://github.com/zipline-live/zipline/issues/26 - value = self.minute_reader.get_value( + # TODO: use victor's modified branch using int64 + self.minute_writer.write_sid( sid=asset.sid, - dt=dt, - field=field + df=df ) + log.debug('wrote minute data: {}'.format(dt)) except Exception as e: - log.warn('minute data not found: {}'.format(e)) - - if value is None or np.isnan(value): - ohlc = self.get_candles(data_frequency, asset) - if field not in ohlc: - raise KeyError('Invalid column: %s' % field) - - if self.minute_writer is not None: - df = pd.DataFrame( - [ohlc], - index=pd.DatetimeIndex([dt]), - columns=['open', 'high', 'low', 'close', 'volume'] - ) - - try: - # TODO: use victor's modified branch using int64 - self.minute_writer.write_sid( - sid=asset.sid, - df=df - ) - log.debug('wrote minute data: {}'.format(dt)) - except Exception as e: - log.warn( - 'unable to write minute data: {} {}'.format(dt, e)) + log.warn( + 'unable to write minute data: {} {}'.format(dt, e)) value = ohlc[field] log.debug('got spot value: {}'.format(value)) - else: - log.debug('got spot value from cache: {}'.format(value)) return value @@ -462,8 +438,6 @@ class Exchange: A dataframe containing the requested data. """ - bundle = ExchangeBundle(self) - freq_match = re.match(r'([0-9].*)(m|M|d|D)', frequency, re.M | re.I) if freq_match: candle_size = int(freq_match.group(1)) @@ -474,11 +448,17 @@ class Exchange: if unit.lower() == 'd': if data_frequency != 'daily': - raise InvalidHistoryFrequencyError(frequency=frequency) + raise MismatchingFrequencyError( + frequency=frequency, + data_frequency=data_frequency + ) elif unit.lower() == 'm': if data_frequency != 'minute': - raise InvalidHistoryFrequencyError(frequency=frequency) + raise MismatchingFrequencyError( + frequency=frequency, + data_frequency=data_frequency + ) else: raise InvalidHistoryFrequencyError(frequency) @@ -489,36 +469,30 @@ class Exchange: start_dt, end_dt = get_adj_dates(start_dt, end_dt, assets, data_frequency) - missing_assets = bundle.filter_existing_assets( + missing_assets = self.bundle.filter_existing_assets( assets=assets, start_dt=start_dt, end_dt=end_dt, data_frequency=data_frequency ) - if len(missing_assets) > 0: - writer = bundle.get_writer(start_dt, end_dt, data_frequency) - - chunks = bundle.prepare_chunks( + if missing_assets: + self.bundle.ingest_assets( assets=assets, - data_frequency=data_frequency, start_dt=start_dt, - end_dt=end_dt + end_dt=end_dt, + data_frequency=data_frequency ) - for chunk in chunks: - log.debug('ingesting chunk for pair {}, period {}'.format( - chunk['asset'], - chunk['period'] - )) - bundle.ingest_ctable( - asset=chunk['asset'], - data_frequency=data_frequency, - period=chunk['period'], - start_dt=chunk['period_start'], - end_dt=chunk['period_end'], - writer=writer - ) + # We check again for data which may be too recent for the consolidated + # exchanges service + missing_assets = self.bundle.filter_existing_assets( + assets=assets, + start_dt=start_dt, + end_dt=end_dt, + data_frequency=data_frequency + ) + if missing_assets: # Adding bars too recent to be contained in the consolidated # exchanges bundles. We go directly against the exchange # to retrieve the candles. @@ -542,21 +516,22 @@ class Exchange: end_dt=end_dt ) - bundle.ingest_candles( + # TODO: Do I need the previous_candle? + self.bundle.ingest_candles( candles=candles, bar_count=trailing_bar_count, + start_dt=start_dt, end_dt=end_dt, - data_frequency=data_frequency, - writer=writer + data_frequency=data_frequency ) - values = bundle.get_raw_arrays( + values = self.bundle.get_raw_arrays( assets=assets, fields=[field], start_dt=start_dt, end_dt=end_dt, data_frequency=data_frequency - )[0] + ) series = dict() for asset_index, asset in enumerate(assets): @@ -565,7 +540,7 @@ class Exchange: # TODO: use numpy to avoid the loop date = start_dt - for value in values: + for value in values[0]: all_dates.append(date) asset_values.append(value[asset_index]) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index dbc42c0b..6198cdc3 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -14,7 +14,7 @@ from catalyst.data.us_equity_pricing import BcolzDailyBarWriter, \ BcolzDailyBarReader from catalyst.exchange.bundle_utils import get_ffill_candles, range_in_bundle, \ get_bcolz_chunk, get_delta, get_adj_dates, get_month_start_end, \ - get_year_start_end + get_year_start_end, get_periods, get_periods_range from catalyst.exchange.exchange_errors import EmptyValuesInBundleError, \ InvalidHistoryFrequencyError, PricingDataBeforeTradingError from catalyst.exchange.exchange_utils import get_exchange_folder @@ -207,7 +207,7 @@ class ExchangeBundle: # This is workaround, there is an issue with empty # session_label when using a newly created writer - del self._writers[data_frequency] + del self._writers[writer._rootdir] writer = self.get_writer(writer._start_session, writer._end_session, data_frequency) @@ -217,8 +217,9 @@ class ExchangeBundle: invalid_data_behavior='raise' ) - def ingest_candles(self, candles, bar_count, end_dt, data_frequency, - writer, previous_candle=dict()): + def ingest_candles(self, candles, bar_count, start_dt, end_dt, + data_frequency, + previous_candle=dict()): """ Ingest candles obtained via the get_candles API of an exchange. @@ -235,6 +236,8 @@ class ExchangeBundle: :return: """ + writer = self.get_writer(start_dt, end_dt, data_frequency) + num_candles = 0 data = [] for asset in candles: @@ -291,6 +294,9 @@ class ExchangeBundle: path=None): reader = self.get_reader(data_frequency, path) + if reader.last_available_dt < end_dt: + return [] + if data_frequency == 'minute': values = reader.load_raw_arrays( fields=fields, @@ -356,6 +362,9 @@ class ExchangeBundle: path=path ) + if not arrays: + return path + ohlcv = dict( open=arrays[0].flatten(), high=arrays[1].flatten(), @@ -446,7 +455,7 @@ class ExchangeBundle: except PricingDataBeforeTradingError: continue - sessions = self.calendar.sessions_in_range(asset_start, asset_end) + sessions = get_periods_range(asset_start, asset_end, 'daily') periods = [] dt = sessions[0] @@ -510,29 +519,22 @@ class ExchangeBundle: return chunks - def ingest(self, data_frequency, include_symbols=None, - exclude_symbols=None, start=None, end=None, - show_progress=True, environ=os.environ): + def ingest_assets(self, assets, start_dt, end_dt, data_frequency, + show_progress=False): """ + Determine if data is missing from the bundle and attempt to ingest it. - :param data_frequency: - :param include_symbols: - :param exclude_symbols: - :param start: - :param end: - :param show_progress: - :param environ: + :param assets: + :param start_dt: + :param end_dt: :return: """ - assets = self.get_assets(include_symbols, exclude_symbols) - start, end = get_adj_dates(start, end, assets, data_frequency) - - writer = self.get_writer(start, end, data_frequency) + writer = self.get_writer(start_dt, end_dt, data_frequency) chunks = self.prepare_chunks( assets=assets, data_frequency=data_frequency, - start_dt=start, - end_dt=end + start_dt=start_dt, + end_dt=end_dt ) with maybe_show_progress( chunks, @@ -551,3 +553,23 @@ class ExchangeBundle: writer=writer, empty_rows_behavior='strip' ) + + def ingest(self, data_frequency, include_symbols=None, + exclude_symbols=None, start=None, end=None, + show_progress=True, environ=os.environ): + """ + + :param data_frequency: + :param include_symbols: + :param exclude_symbols: + :param start: + :param end: + :param show_progress: + :param environ: + :return: + """ + assets = self.get_assets(include_symbols, exclude_symbols) + start_dt, end_dt = get_adj_dates(start, end, assets, data_frequency) + + self.ingest_assets(assets, start_dt, end_dt, data_frequency, + show_progress) diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index b6c51155..3a3b6747 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -78,7 +78,14 @@ class AlgoPickleNotFound(ZiplineError): class InvalidHistoryFrequencyError(ZiplineError): msg = ( - 'History frequency {frequency} not supported by the exchange.' + 'Frequency {frequency} not supported by the exchange.' + ).strip() + + +class MismatchingFrequencyError(ZiplineError): + msg = ( + 'Bar aggregate frequency {frequency} not compatible with ' + 'data frequency {data_frequency}.' ).strip() diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py index ca13e951..e24acc75 100644 --- a/catalyst/exchange/poloniex/poloniex.py +++ b/catalyst/exchange/poloniex/poloniex.py @@ -15,6 +15,7 @@ from six import iteritems from catalyst.assets._assets import TradingPair from logbook import Logger +from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.poloniex.poloniex_api import Poloniex_api # from websocket import create_connection @@ -51,6 +52,8 @@ class Poloniex(Exchange): self.max_requests_per_minute = 20 self.request_cpt = dict() + self.bundle = ExchangeBundle(self) + def sanitize_curency_symbol(self, exchange_symbol): """ Helper method used to build the universal pair. diff --git a/catalyst/utils/run_algo.py b/catalyst/utils/run_algo.py index 1cb2bd66..bf149642 100644 --- a/catalyst/utils/run_algo.py +++ b/catalyst/utils/run_algo.py @@ -262,6 +262,9 @@ def _run(handle_data, data_frequency='minute' ) + # TODO: use the constructor instead + sim_params._arena = 'live' + algorithm_class = partial( ExchangeTradingAlgorithmLive, exchanges=exchanges, From 733f2c3433217c1ce42a692522dcff69b306fd4c Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 18 Oct 2017 00:18:07 -0400 Subject: [PATCH 076/110] Fixed an issue with writer retry --- catalyst/exchange/exchange_bundle.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 6198cdc3..060cf776 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -207,7 +207,10 @@ class ExchangeBundle: # This is workaround, there is an issue with empty # session_label when using a newly created writer - del self._writers[writer._rootdir] + key = writer._rootdir if data_frequency == 'minute' \ + else writer._filename + + del self._writers[key] writer = self.get_writer(writer._start_session, writer._end_session, data_frequency) From fb32e1ce5d647ae21328e05deda0762c48a84576 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Tue, 17 Oct 2017 23:16:49 -0600 Subject: [PATCH 077/110] Fixing DailyBarReader for volume to 0 instead of NaN --- catalyst/data/_equities.pyx | 4 +++- catalyst/data/loader.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/catalyst/data/_equities.pyx b/catalyst/data/_equities.pyx index c0652b02..cb14f09a 100644 --- a/catalyst/data/_equities.pyx +++ b/catalyst/data/_equities.pyx @@ -215,11 +215,13 @@ cpdef _read_bcolz_data(ctable_t table, else: continue - if column_name in ['open', 'high', 'low', 'close', 'volume']: + if column_name in ['open', 'high', 'low', 'close']: where_nan = (outbuf == 0) outbuf_as_float = outbuf.astype(float64) * .000000001 outbuf_as_float[where_nan] = NAN results.append(outbuf_as_float) + elif column_name in ['volume']: + results.append(outbuf.astype(float64) * .000000001) else: results.append(outbuf) return results diff --git a/catalyst/data/loader.py b/catalyst/data/loader.py index fa80317a..7211938e 100644 --- a/catalyst/data/loader.py +++ b/catalyst/data/loader.py @@ -152,6 +152,8 @@ def load_crypto_market_data(trading_day=None, trading_days=None, data_frequency='daily') br.columns = ['close'] br = br.pct_change(1).iloc[1:] + br.loc[first_date]=0 + br=br.sort_index() # Override first_date for treasury data since we have it for many more years # and is independent of crypto data From 188a4a3f3d05428c2b2796c7098f934ecb3efff1 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 18 Oct 2017 02:11:21 -0400 Subject: [PATCH 078/110] Unit testing an issue with the daily loader --- catalyst/exchange/exchange.py | 6 ++-- tests/exchange/test_bundle.py | 63 ++++++++++++++++++++++++++++++----- 2 files changed, 57 insertions(+), 12 deletions(-) diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 34093111..c1c3d24f 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -486,17 +486,17 @@ class Exchange: # We check again for data which may be too recent for the consolidated # exchanges service - missing_assets = self.bundle.filter_existing_assets( + trailing_assets = self.bundle.filter_existing_assets( assets=assets, start_dt=start_dt, end_dt=end_dt, data_frequency=data_frequency ) - if missing_assets: + if trailing_assets: # Adding bars too recent to be contained in the consolidated # exchanges bundles. We go directly against the exchange # to retrieve the candles. - for asset in missing_assets: + for asset in trailing_assets: trailing_candles_dt = get_trailing_candles_dt( asset=asset, start_dt=start_dt, diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 5772f6ea..e8af2148 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -71,32 +71,77 @@ class ExchangeBundleTestCase: pass def test_merge_ctables(self): - exchange_name = 'bitfinex' + exchange_name = 'poloniex' + + # Switch between daily and minute for testing data_frequency = 'daily' + # data_frequency = 'minute' exchange = get_exchange(exchange_name) - asset = exchange.get_asset('neo_btc') + assets = [ + exchange.get_asset('eth_btc'), + exchange.get_asset('etc_btc'), + ] start = pd.to_datetime('2017-9-1', utc=True) end = pd.to_datetime('2017-9-30', utc=True) - # asset = exchange.get_asset('neo_btc') - # - # start = pd.to_datetime('2017-9-1', utc=True) - # end = pd.to_datetime('2017-9-30', utc=True) - exchange_bundle = ExchangeBundle(exchange) writer = exchange_bundle.get_writer(start, end, data_frequency) + + # In the interest of avoiding abstractions, this is writing a chunk + # to the ctable. It does not include the logic which creates chunks. exchange_bundle.ingest_ctable( - asset=asset, + asset=assets[0], data_frequency=data_frequency, + # period='2017-9', + period='2017', + # Dont't forget to update if you change your dates + start_dt=start, + end_dt=end, + writer=writer, + empty_rows_behavior='strip' + ) + exchange_bundle.ingest_ctable( + asset=assets[1], + data_frequency=data_frequency, + # period='2017-9', period='2017', start_dt=start, end_dt=end, writer=writer, empty_rows_behavior='strip' ) + + # Since this pair was loaded last. It should be there in daily mode. + last_asset_array = exchange_bundle.get_raw_arrays( + assets=[assets[1]], + start_dt=start, + end_dt=end, + fields=['close'], + data_frequency=data_frequency + ) + print('found {} rows for last ingestion'.format( + len(last_asset_array[0])) + ) + + # In daily mode, this returns an error. It appears that writing + # a second asset in the same date range removed the first asset. + + # In minute mode, the data is there too. This signals that the minute + # writer / reader is more powerful. This explains why I did not + # encounter these problems as I have been focusing on minute data. + first_asset_array = exchange_bundle.get_raw_arrays( + assets=[assets[0]], + start_dt=start, + end_dt=end, + fields=['close'], + data_frequency=data_frequency + ) + print('found {} rows for first ingestion'.format( + len(first_asset_array[0])) + ) pass def test_minute_bundle(self): @@ -112,5 +157,5 @@ class ExchangeBundleTestCase: data_frequency=data_frequency, period='2017-5', ) - reader = BcolzMinuteBarReader(path) + pass From 86f892eade1dcae9eb363256cd9f3842734c2603 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 18 Oct 2017 04:29:22 -0400 Subject: [PATCH 079/110] Unit tested a daily reader/writer based on the minute bundle --- tests/exchange/test_bundle.py | 116 +++++++++++++++++++++++++++++++++- 1 file changed, 113 insertions(+), 3 deletions(-) diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index e8af2148..99f10c24 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -1,11 +1,17 @@ from logging import Logger +import numpy as np import pandas as pd -from catalyst.data.minute_bars import BcolzMinuteBarReader -from catalyst.exchange.bundle_utils import get_bcolz_chunk -from catalyst.exchange.exchange_bundle import ExchangeBundle +from catalyst import get_calendar +from catalyst.data.minute_bars import BcolzMinuteBarReader, \ + BcolzMinuteBarWriter +from catalyst.exchange.bundle_utils import get_bcolz_chunk, get_periods_range +from catalyst.exchange.exchange_bundle import ExchangeBundle, \ + BUNDLE_NAME_TEMPLATE +from catalyst.exchange.exchange_utils import get_exchange_folder from catalyst.exchange.init_utils import get_exchange +from catalyst.utils.paths import ensure_directory log = Logger('test_exchange_bundle') @@ -144,6 +150,110 @@ class ExchangeBundleTestCase: ) pass + def test_daily_data_to_minute_table(self): + exchange_name = 'poloniex' + + # Switch between daily and minute for testing + data_frequency = 'daily' + # data_frequency = 'minute' + + exchange = get_exchange(exchange_name) + assets = [ + exchange.get_asset('eth_btc'), + exchange.get_asset('etc_btc'), + ] + + start = pd.to_datetime('2017-9-1', utc=True) + end = pd.to_datetime('2017-9-30', utc=True) + + # Preparing the bundle folder + root = get_exchange_folder(exchange.name) + path = BUNDLE_NAME_TEMPLATE.format( + root=root, + frequency=data_frequency + ) + ensure_directory(path) + + exchange_bundle = ExchangeBundle(exchange) + calendar = get_calendar('OPEN') + + # We are using a BcolzMinuteBarWriter even though the data is daily + # Each day has a maximum of one bar + + # I tried setting the minutes_per_day to 1 will not create + # unnecessary bars + writer = BcolzMinuteBarWriter( + rootdir=path, + calendar=calendar, + minutes_per_day=1, + start_session=start, + end_session=end, + write_metadata=True, + default_ohlc_ratio=exchange_bundle.default_ohlc_ratio + ) + + # This will read the daily data in a bundle created by + # the daily writer. It will write to the minute writer which + # we are passing. + + # Ingesting a second asset to ensure that multiple chunks + # don't override each other + for asset in assets: + exchange_bundle.ingest_ctable( + asset=asset, + data_frequency=data_frequency, + period='2017', + start_dt=start, + end_dt=end, + writer=writer, + empty_rows_behavior='strip' + ) + + # Simplifying the data reader to play nice with 1 minute per day + class BcolzDayBarReader(BcolzMinuteBarReader): + def load_raw_arrays(self, fields, start_dt, end_dt, sids): + start_idx = self._find_position_of_minute(start_dt) + end_idx = self._find_position_of_minute(end_dt) + + num_days = (end_idx - start_idx + 1) + shape = num_days, len(sids) + + data = [] + for field in fields: + out = np.full(shape, np.nan) + + for i, sid in enumerate(sids): + carray = reader._open_minute_file(field, sid) + a = carray[start_idx:end_idx + 1] + + where = a != 0 + + out[:len(where), i][where] = ( + a[where] * self._ohlc_ratio_inverse_for_sid(sid) + ) + + data.append(out) + + return data + + reader = BcolzDayBarReader(path) + + # Reading the two assets to ensure that no data was lost + for asset in assets: + sid = asset.sid + + daily_values = reader.load_raw_arrays( + fields=['open', 'high', 'low', 'close', 'volume'], + start_dt=start, + end_dt=end, + sids=[sid], + ) + + print('found {} rows for last ingestion'.format( + len(daily_values[0])) + ) + pass + def test_minute_bundle(self): exchange_name = 'poloniex' data_frequency = 'minute' From b357a0656a0072dcb7d6081c38f2f21e9f4adb12 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 18 Oct 2017 13:58:37 -0400 Subject: [PATCH 080/110] Added a modified bcolz writer / reader --- catalyst/exchange/exchange_bcolz.py | 65 +++++++++++++++++++++++++++++ tests/exchange/test_bundle.py | 40 ++++-------------- 2 files changed, 72 insertions(+), 33 deletions(-) create mode 100644 catalyst/exchange/exchange_bcolz.py diff --git a/catalyst/exchange/exchange_bcolz.py b/catalyst/exchange/exchange_bcolz.py new file mode 100644 index 00000000..e24a2d41 --- /dev/null +++ b/catalyst/exchange/exchange_bcolz.py @@ -0,0 +1,65 @@ +import numpy as np + +from catalyst import get_calendar +from catalyst.data.minute_bars import BcolzMinuteBarReader, \ + BcolzMinuteBarWriter + + +class BcolzExchangeBarWriter(BcolzMinuteBarWriter): + def __init__(self, *args, **kwargs): + self._data_frequency = kwargs.pop('data_frequency', None) + kwargs.pop('minutes_per_day', None) + kwargs.pop('default_ohlc_ratio', None) + kwargs.pop('calendar', None) + + minutes_per_day = 1440 if self._data_frequency == 'minute' else 1 + default_ohlc_ratio = 1000000 + calendar = get_calendar('OPEN') + + super(BcolzExchangeBarWriter, self) \ + .__init__(*args, **dict(kwargs, + minutes_per_day=minutes_per_day, + default_ohlc_ratio=default_ohlc_ratio, + calendar=calendar + )) + + +class BcolzExchangeBarReader(BcolzMinuteBarReader): + def __init__(self, *args, **kwargs): + self._data_frequency = kwargs.pop('data_frequency', None) + + super(BcolzExchangeBarReader, self).__init__(*args, **kwargs) + + def load_raw_arrays(self, fields, start_dt, end_dt, sids): + + if self._data_frequency == 'minute': + return super(BcolzExchangeBarReader, self) \ + .load_raw_arrays(fields, start_dt, end_dt, sids) + + else: + return self._load_daily_raw_arrays(fields, start_dt, end_dt, sids) + + def _load_daily_raw_arrays(self, fields, start_dt, end_dt, sids): + start_idx = self._find_position_of_minute(start_dt) + end_idx = self._find_position_of_minute(end_dt) + + num_days = (end_idx - start_idx + 1) + shape = num_days, len(sids) + + data = [] + for field in fields: + out = np.full(shape, np.nan) + + for i, sid in enumerate(sids): + carray = self._open_minute_file(field, sid) + a = carray[start_idx:end_idx + 1] + + where = a != 0 + + out[:len(where), i][where] = ( + a[where] * self._ohlc_ratio_inverse_for_sid(sid) + ) + + data.append(out) + + return data diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 99f10c24..6a460695 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -7,6 +7,8 @@ from catalyst import get_calendar from catalyst.data.minute_bars import BcolzMinuteBarReader, \ BcolzMinuteBarWriter from catalyst.exchange.bundle_utils import get_bcolz_chunk, get_periods_range +from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ + BcolzExchangeBarWriter from catalyst.exchange.exchange_bundle import ExchangeBundle, \ BUNDLE_NAME_TEMPLATE from catalyst.exchange.exchange_utils import get_exchange_folder @@ -182,14 +184,12 @@ class ExchangeBundleTestCase: # I tried setting the minutes_per_day to 1 will not create # unnecessary bars - writer = BcolzMinuteBarWriter( + writer = BcolzExchangeBarWriter( rootdir=path, - calendar=calendar, - minutes_per_day=1, + data_frequency=data_frequency, start_session=start, end_session=end, - write_metadata=True, - default_ohlc_ratio=exchange_bundle.default_ohlc_ratio + write_metadata=True ) # This will read the daily data in a bundle created by @@ -209,34 +209,8 @@ class ExchangeBundleTestCase: empty_rows_behavior='strip' ) - # Simplifying the data reader to play nice with 1 minute per day - class BcolzDayBarReader(BcolzMinuteBarReader): - def load_raw_arrays(self, fields, start_dt, end_dt, sids): - start_idx = self._find_position_of_minute(start_dt) - end_idx = self._find_position_of_minute(end_dt) - - num_days = (end_idx - start_idx + 1) - shape = num_days, len(sids) - - data = [] - for field in fields: - out = np.full(shape, np.nan) - - for i, sid in enumerate(sids): - carray = reader._open_minute_file(field, sid) - a = carray[start_idx:end_idx + 1] - - where = a != 0 - - out[:len(where), i][where] = ( - a[where] * self._ohlc_ratio_inverse_for_sid(sid) - ) - - data.append(out) - - return data - - reader = BcolzDayBarReader(path) + reader = BcolzExchangeBarReader(rootdir=path, + data_frequency=data_frequency) # Reading the two assets to ensure that no data was lost for asset in assets: From 7147bfc51f64a52aa48c41065bd136bf206c0572 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 18 Oct 2017 14:36:25 -0400 Subject: [PATCH 081/110] Refactoring to use the updated bundles --- catalyst/exchange/exchange.py | 8 +- catalyst/exchange/exchange_bcolz.py | 7 +- catalyst/exchange/exchange_bundle.py | 143 ++++++++------------------- tests/exchange/test_bundle.py | 71 +++++-------- 4 files changed, 77 insertions(+), 152 deletions(-) diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index c1c3d24f..8e7bfd5d 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -525,12 +525,12 @@ class Exchange: data_frequency=data_frequency ) - values = self.bundle.get_raw_arrays( - assets=assets, + reader = self.bundle.get_reader(data_frequency) + values = reader.load_raw_arrays( + sids=[asset.sid for asset in assets], fields=[field], start_dt=start_dt, - end_dt=end_dt, - data_frequency=data_frequency + end_dt=end_dt ) series = dict() diff --git a/catalyst/exchange/exchange_bcolz.py b/catalyst/exchange/exchange_bcolz.py index e24a2d41..bdcd43c3 100644 --- a/catalyst/exchange/exchange_bcolz.py +++ b/catalyst/exchange/exchange_bcolz.py @@ -12,6 +12,10 @@ class BcolzExchangeBarWriter(BcolzMinuteBarWriter): kwargs.pop('default_ohlc_ratio', None) kwargs.pop('calendar', None) + end_session = kwargs.pop('end_session', None) + if end_session is not None: + end_session = end_session.floor('1d') + minutes_per_day = 1440 if self._data_frequency == 'minute' else 1 default_ohlc_ratio = 1000000 calendar = get_calendar('OPEN') @@ -20,7 +24,8 @@ class BcolzExchangeBarWriter(BcolzMinuteBarWriter): .__init__(*args, **dict(kwargs, minutes_per_day=minutes_per_day, default_ohlc_ratio=default_ohlc_ratio, - calendar=calendar + calendar=calendar, + end_session=end_session )) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 060cf776..6f9313d2 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -1,20 +1,18 @@ -import calendar import os import shutil -from datetime import timedelta, datetime +from datetime import timedelta import pandas as pd from logbook import Logger, INFO -from pandas.tseries.offsets import MonthBegin, YearBegin, YearEnd from catalyst import get_calendar from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ - BcolzMinuteBarWriter, BcolzMinuteBarReader, BcolzMinuteBarMetadata -from catalyst.data.us_equity_pricing import BcolzDailyBarWriter, \ - BcolzDailyBarReader + BcolzMinuteBarMetadata from catalyst.exchange.bundle_utils import get_ffill_candles, range_in_bundle, \ get_bcolz_chunk, get_delta, get_adj_dates, get_month_start_end, \ - get_year_start_end, get_periods, get_periods_range + get_year_start_end, get_periods_range +from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ + BcolzExchangeBarWriter from catalyst.exchange.exchange_errors import EmptyValuesInBundleError, \ InvalidHistoryFrequencyError, PricingDataBeforeTradingError from catalyst.exchange.exchange_utils import get_exchange_folder @@ -66,22 +64,10 @@ class ExchangeBundle: if path in self._readers and self._readers[path] is not None: return self._readers[path] - self._readers[path] = None - if data_frequency == 'minute': - try: - self._readers[path] = BcolzMinuteBarReader(path) - except IOError: - log.debug('no reader data found in {}'.format(path)) - - elif data_frequency == 'daily': - try: - self._readers[path] = BcolzDailyBarReader(path) - except IOError: - log.debug('no reader data found in {}'.format(path)) - else: - raise InvalidHistoryFrequencyError( - frequency=data_frequency - ) + self._readers[path] = BcolzExchangeBarReader( + rootdir=path, + data_frequency=data_frequency + ) return self._readers[path] @@ -105,58 +91,39 @@ class ExchangeBundle: ensure_directory(path) - if data_frequency == 'minute': - if len(os.listdir(path)) > 0: + if len(os.listdir(path)) > 0: - metadata = BcolzMinuteBarMetadata.read(path) + metadata = BcolzMinuteBarMetadata.read(path) - write_metadata = False - if start_dt < metadata.start_session: - write_metadata = True - start_session = start_dt - else: - start_session = metadata.start_session - - if end_dt > metadata.end_session: - write_metadata = True - - end_session = end_dt - else: - end_session = metadata.end_session - - self._writers[path] = \ - BcolzMinuteBarWriter( - path, - metadata.calendar, - start_session, - end_session, - metadata.minutes_per_day, - metadata.default_ohlc_ratio, - metadata.ohlc_ratios_per_sid, - write_metadata=write_metadata - ) + write_metadata = False + if start_dt < metadata.start_session: + write_metadata = True + start_session = start_dt else: - self._writers[path] = BcolzMinuteBarWriter( - rootdir=path, - calendar=self.calendar, - minutes_per_day=self.minutes_per_day, - start_session=start_dt, - end_session=end_dt, - write_metadata=True, - default_ohlc_ratio=self.default_ohlc_ratio - ) + start_session = metadata.start_session - elif data_frequency == 'daily': - end_session = end_dt.floor('1d') - self._writers[path] = BcolzDailyBarWriter( - filename=path, - calendar=self.calendar, - start_session=start_dt, - end_session=end_session - ) + if end_dt > metadata.end_session: + write_metadata = True + + end_session = end_dt + else: + end_session = metadata.end_session + + self._writers[path] = \ + BcolzExchangeBarWriter( + rootdir=path, + start_session=start_session, + end_session=end_session, + write_metadata=write_metadata, + data_frequency=data_frequency + ) else: - raise InvalidHistoryFrequencyError( - frequency=data_frequency + self._writers[path] = BcolzExchangeBarWriter( + rootdir=path, + start_session=start_dt, + end_session=end_dt, + write_metadata=True, + data_frequency=data_frequency ) return self._writers[path] @@ -293,33 +260,6 @@ class ExchangeBundle: return data - def get_raw_arrays(self, assets, start_dt, end_dt, fields, data_frequency, - path=None): - reader = self.get_reader(data_frequency, path) - - if reader.last_available_dt < end_dt: - return [] - - if data_frequency == 'minute': - values = reader.load_raw_arrays( - fields=fields, - start_dt=start_dt, - end_dt=end_dt, - sids=[asset.sid for asset in assets], - ) - else: - - # Note that the parameters convention is totally different - # from the minute reader. - values = reader.load_raw_arrays( - columns=fields, - start_date=start_dt, - end_date=end_dt, - assets=assets - ) - - return values - def download_bundle(self, name): """ @@ -356,13 +296,12 @@ class ExchangeBundle: if data_frequency == 'minute' \ else self.calendar.sessions_in_range(start_dt, end_dt) - arrays = self.get_raw_arrays( - assets=[asset], + reader = self.get_reader(data_frequency, path=path) + arrays = reader.load_raw_arrays( + sids=[asset.sid], fields=['open', 'high', 'low', 'close', 'volume'], start_dt=start_dt, - end_dt=end_dt, - data_frequency=data_frequency, - path=path + end_dt=end_dt ) if not arrays: diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 6a460695..8cefe360 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -82,8 +82,8 @@ class ExchangeBundleTestCase: exchange_name = 'poloniex' # Switch between daily and minute for testing - data_frequency = 'daily' - # data_frequency = 'minute' + # data_frequency = 'daily' + data_frequency = 'minute' exchange = get_exchange(exchange_name) assets = [ @@ -100,39 +100,18 @@ class ExchangeBundleTestCase: # In the interest of avoiding abstractions, this is writing a chunk # to the ctable. It does not include the logic which creates chunks. - exchange_bundle.ingest_ctable( - asset=assets[0], - data_frequency=data_frequency, - # period='2017-9', - period='2017', - # Dont't forget to update if you change your dates - start_dt=start, - end_dt=end, - writer=writer, - empty_rows_behavior='strip' - ) - exchange_bundle.ingest_ctable( - asset=assets[1], - data_frequency=data_frequency, - # period='2017-9', - period='2017', - start_dt=start, - end_dt=end, - writer=writer, - empty_rows_behavior='strip' - ) - - # Since this pair was loaded last. It should be there in daily mode. - last_asset_array = exchange_bundle.get_raw_arrays( - assets=[assets[1]], - start_dt=start, - end_dt=end, - fields=['close'], - data_frequency=data_frequency - ) - print('found {} rows for last ingestion'.format( - len(last_asset_array[0])) - ) + for asset in assets: + exchange_bundle.ingest_ctable( + asset=asset, + data_frequency=data_frequency, + # period='2017-9', + period='2017-9', + # Dont't forget to update if you change your dates + start_dt=start, + end_dt=end, + writer=writer, + empty_rows_behavior='strip' + ) # In daily mode, this returns an error. It appears that writing # a second asset in the same date range removed the first asset. @@ -140,16 +119,18 @@ class ExchangeBundleTestCase: # In minute mode, the data is there too. This signals that the minute # writer / reader is more powerful. This explains why I did not # encounter these problems as I have been focusing on minute data. - first_asset_array = exchange_bundle.get_raw_arrays( - assets=[assets[0]], - start_dt=start, - end_dt=end, - fields=['close'], - data_frequency=data_frequency - ) - print('found {} rows for first ingestion'.format( - len(first_asset_array[0])) - ) + reader = self.get_reader(data_frequency) + for asset in assets: + # Since this pair was loaded last. It should be there in daily mode. + arrays = reader.load_raw_arrays( + sids=[asset.sid], + fields=['close'], + start_dt=start, + end_dt=end + ) + print('found {} rows for {} ingestion'.format( + len(arrays[0]), asset.symbol) + ) pass def test_daily_data_to_minute_table(self): From 521484355ab674f1e1bd8f0b6c4aa34904428b27 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 18 Oct 2017 14:45:56 -0400 Subject: [PATCH 082/110] Minor fix to the bcolz writer --- catalyst/exchange/exchange_bcolz.py | 3 +-- tests/exchange/test_bundle.py | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/catalyst/exchange/exchange_bcolz.py b/catalyst/exchange/exchange_bcolz.py index bdcd43c3..a517b4fe 100644 --- a/catalyst/exchange/exchange_bcolz.py +++ b/catalyst/exchange/exchange_bcolz.py @@ -9,7 +9,6 @@ class BcolzExchangeBarWriter(BcolzMinuteBarWriter): def __init__(self, *args, **kwargs): self._data_frequency = kwargs.pop('data_frequency', None) kwargs.pop('minutes_per_day', None) - kwargs.pop('default_ohlc_ratio', None) kwargs.pop('calendar', None) end_session = kwargs.pop('end_session', None) @@ -17,7 +16,7 @@ class BcolzExchangeBarWriter(BcolzMinuteBarWriter): end_session = end_session.floor('1d') minutes_per_day = 1440 if self._data_frequency == 'minute' else 1 - default_ohlc_ratio = 1000000 + default_ohlc_ratio = kwargs.pop('default_ohlc_ratio', 1000000) calendar = get_calendar('OPEN') super(BcolzExchangeBarWriter, self) \ diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 8cefe360..3aa96b22 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -1,12 +1,9 @@ from logging import Logger -import numpy as np import pandas as pd from catalyst import get_calendar -from catalyst.data.minute_bars import BcolzMinuteBarReader, \ - BcolzMinuteBarWriter -from catalyst.exchange.bundle_utils import get_bcolz_chunk, get_periods_range +from catalyst.exchange.bundle_utils import get_bcolz_chunk from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ BcolzExchangeBarWriter from catalyst.exchange.exchange_bundle import ExchangeBundle, \ From 6f8fbc2b8242a8527d8cd8c75a6467e1c9f4a654 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 18 Oct 2017 15:33:15 -0400 Subject: [PATCH 083/110] Fix issue with retrieving bundles --- catalyst/examples/simple_loop.py | 6 +++--- catalyst/exchange/exchange_bundle.py | 11 +++++++---- tests/exchange/test_bundle.py | 13 +++++++------ 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/catalyst/examples/simple_loop.py b/catalyst/examples/simple_loop.py index 51c68426..77415429 100644 --- a/catalyst/examples/simple_loop.py +++ b/catalyst/examples/simple_loop.py @@ -7,7 +7,7 @@ from catalyst.api import symbol def initialize(context): print('initializing') - context.asset = symbol('etc_btc') + context.asset = symbol('wings_eth') def handle_data(context, data): @@ -35,7 +35,7 @@ run_algorithm( initialize=initialize, handle_data=handle_data, analyze=None, - exchange_name='poloniex', + exchange_name='bittrex', algo_namespace='simple_loop', - base_currency='btc' + base_currency='eth' ) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 6f9313d2..f93dfd29 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -64,10 +64,13 @@ class ExchangeBundle: if path in self._readers and self._readers[path] is not None: return self._readers[path] - self._readers[path] = BcolzExchangeBarReader( - rootdir=path, - data_frequency=data_frequency - ) + try: + self._readers[path] = BcolzExchangeBarReader( + rootdir=path, + data_frequency=data_frequency + ) + except IOError: + self.get_readers[path] = None return self._readers[path] diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 3aa96b22..25d1a6fd 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -76,16 +76,17 @@ class ExchangeBundleTestCase: pass def test_merge_ctables(self): - exchange_name = 'poloniex' + exchange_name = 'bittrex' # Switch between daily and minute for testing # data_frequency = 'daily' - data_frequency = 'minute' + data_frequency = 'daily' exchange = get_exchange(exchange_name) assets = [ exchange.get_asset('eth_btc'), exchange.get_asset('etc_btc'), + exchange.get_asset('wings_eth'), ] start = pd.to_datetime('2017-9-1', utc=True) @@ -102,7 +103,7 @@ class ExchangeBundleTestCase: asset=asset, data_frequency=data_frequency, # period='2017-9', - period='2017-9', + period='2017', # Dont't forget to update if you change your dates start_dt=start, end_dt=end, @@ -116,7 +117,7 @@ class ExchangeBundleTestCase: # In minute mode, the data is there too. This signals that the minute # writer / reader is more powerful. This explains why I did not # encounter these problems as I have been focusing on minute data. - reader = self.get_reader(data_frequency) + reader = exchange_bundle.get_reader(data_frequency) for asset in assets: # Since this pair was loaded last. It should be there in daily mode. arrays = reader.load_raw_arrays( @@ -125,8 +126,8 @@ class ExchangeBundleTestCase: start_dt=start, end_dt=end ) - print('found {} rows for {} ingestion'.format( - len(arrays[0]), asset.symbol) + print('found {} rows for {} ingestion\n{}'.format( + len(arrays[0]), asset.symbol, arrays[0]) ) pass From 6486744c66713231b00b6a7317974c87fd513836 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Wed, 18 Oct 2017 13:55:39 -0600 Subject: [PATCH 084/110] fix exchange_bundle: period month padded --- catalyst/exchange/exchange_bundle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index f93dfd29..561a5b3a 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -405,7 +405,7 @@ class ExchangeBundle: periods = [] dt = sessions[0] while dt <= sessions[-1]: - period = '{}-{}'.format(dt.year, dt.month) \ + period = '{}-{:02d}'.format(dt.year, dt.month) \ if data_frequency == 'minute' else '{}'.format(dt.year) if period not in periods: From b69e78b27dc379ffd4585d28c7453d3f9f5613f2 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Wed, 18 Oct 2017 14:10:03 -0600 Subject: [PATCH 085/110] fixes ingestion of 'minute,daily' parameter --- catalyst/__main__.py | 2 +- catalyst/exchange/exchange_bundle.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/catalyst/__main__.py b/catalyst/__main__.py index d3ca57c8..4c48b7e0 100644 --- a/catalyst/__main__.py +++ b/catalyst/__main__.py @@ -454,7 +454,7 @@ def live(ctx, @click.option( '-f', '--data-frequency', - type=click.Choice({'daily', 'minute', 'daily,minute'}), + type=click.Choice({'daily', 'minute', 'daily,minute', 'minute,daily'}), default='daily', show_default=True, help='The data frequency of the desired OHLCV bars.', diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 561a5b3a..1c5f37f5 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -516,5 +516,6 @@ class ExchangeBundle: assets = self.get_assets(include_symbols, exclude_symbols) start_dt, end_dt = get_adj_dates(start, end, assets, data_frequency) - self.ingest_assets(assets, start_dt, end_dt, data_frequency, - show_progress) + for frequency in data_frequency.split(','): + self.ingest_assets(assets, start_dt, end_dt, frequency, + show_progress) From b785c10036de9c6110f3f0ca5351ec459483cc73 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 18 Oct 2017 16:41:29 -0400 Subject: [PATCH 086/110] Fixed misc issues with the bundle refactoring --- catalyst/exchange/exchange.py | 9 ++++++- catalyst/exchange/exchange_bcolz.py | 5 +++- catalyst/exchange/exchange_bundle.py | 24 +++++++++++-------- catalyst/exchange/exchange_errors.py | 13 ++++++---- tests/exchange/test_bundle.py | 36 +++++++++++++++++++++++----- 5 files changed, 65 insertions(+), 22 deletions(-) diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 8e7bfd5d..0552bcc1 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -16,7 +16,8 @@ from catalyst.exchange.bundle_utils import get_start_dt, \ from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import MismatchingBaseCurrencies, \ InvalidOrderStyle, BaseCurrencyNotFoundError, SymbolNotFoundOnExchange, \ - InvalidHistoryFrequencyError, MismatchingFrequencyError + InvalidHistoryFrequencyError, MismatchingFrequencyError, \ + BundleNotFoundError from catalyst.exchange.exchange_execution import ExchangeStopLimitOrder, \ ExchangeLimitOrder, ExchangeStopOrder from catalyst.exchange.exchange_portfolio import ExchangePortfolio @@ -526,6 +527,12 @@ class Exchange: ) reader = self.bundle.get_reader(data_frequency) + if reader is None: + raise BundleNotFoundError( + exchange=self.name, + data_frequency=data_frequency + ) + values = reader.load_raw_arrays( sids=[asset.sid for asset in assets], fields=[field], diff --git a/catalyst/exchange/exchange_bcolz.py b/catalyst/exchange/exchange_bcolz.py index a517b4fe..9fccd9a1 100644 --- a/catalyst/exchange/exchange_bcolz.py +++ b/catalyst/exchange/exchange_bcolz.py @@ -52,7 +52,10 @@ class BcolzExchangeBarReader(BcolzMinuteBarReader): data = [] for field in fields: - out = np.full(shape, np.nan) + if field != 'volume': + out = np.full(shape, np.nan) + else: + out = np.zeros(shape, dtype=np.float64) for i, sid in enumerate(sids): carray = self._open_minute_file(field, sid) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index f93dfd29..dc38cc7e 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -2,6 +2,7 @@ import os import shutil from datetime import timedelta +import numpy as np import pandas as pd from logbook import Logger, INFO @@ -14,7 +15,8 @@ from catalyst.exchange.bundle_utils import get_ffill_candles, range_in_bundle, \ from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ BcolzExchangeBarWriter from catalyst.exchange.exchange_errors import EmptyValuesInBundleError, \ - InvalidHistoryFrequencyError, PricingDataBeforeTradingError + InvalidHistoryFrequencyError, PricingDataBeforeTradingError, \ + TempBundleNotFoundError from catalyst.exchange.exchange_utils import get_exchange_folder from catalyst.utils.cli import maybe_show_progress from catalyst.utils.paths import ensure_directory @@ -70,7 +72,7 @@ class ExchangeBundle: data_frequency=data_frequency ) except IOError: - self.get_readers[path] = None + self._readers[path] = None return self._readers[path] @@ -300,6 +302,9 @@ class ExchangeBundle: else self.calendar.sessions_in_range(start_dt, end_dt) reader = self.get_reader(data_frequency, path=path) + if reader is None: + raise TempBundleNotFoundError(path=path) + arrays = reader.load_raw_arrays( sids=[asset.sid], fields=['open', 'high', 'low', 'close', 'volume'], @@ -310,13 +315,11 @@ class ExchangeBundle: if not arrays: return path - ohlcv = dict( - open=arrays[0].flatten(), - high=arrays[1].flatten(), - low=arrays[2].flatten(), - close=arrays[3].flatten(), - volume=arrays[4].flatten() - ) + ohlcv = dict() + for index, field in enumerate( + ['open', 'high', 'low', 'close', 'volume']): + ohlcv[field] = arrays[index].flatten() + ohlcv[field] = ohlcv[field][~np.isnan(ohlcv[field])] df = pd.DataFrame( data=ohlcv, @@ -400,7 +403,8 @@ class ExchangeBundle: except PricingDataBeforeTradingError: continue - sessions = get_periods_range(asset_start, asset_end, 'daily') + sessions = get_periods_range(asset_start, asset_end, + data_frequency) periods = [] dt = sessions[0] diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index 3a3b6747..b121acc6 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -171,10 +171,14 @@ class SymbolNotFoundOnExchange(ZiplineError): class BundleNotFoundError(ZiplineError): - msg = ('Unable to find bundle data for exchange {exchange}. ' - 'Please ingest data using the command ' - '`catalyst ingest -b exchange_{exchange}`. ' - 'See catalyst documentation for details.').strip() + msg = ('Unable to find bundle data for exchange {exchange} and ' + 'data frequency {data_frequency}.' + 'Please ingest some price data.' + 'See `catalyst ingest-exchange --help` for details.').strip() + + +class TempBundleNotFoundError(ZiplineError): + msg = ('Temporary bundle not found in: {path}.').strip() class EmptyValuesInBundleError(ZiplineError): @@ -196,5 +200,6 @@ class PricingDataNotLoadedError(ZiplineError): '`catalyst ingest-exchange -x {exchange} -i {symbol_list}`. ' 'See catalyst documentation for details.').strip() + class ApiCandlesError(ZiplineError): msg = ('Unable to fetch candles from the remote API: {error}.').strip() diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 25d1a6fd..1439b2d6 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -57,22 +57,46 @@ class ExchangeBundleTestCase: pass def test_ingest_daily(self): - exchange_name = 'bitfinex' + # exchange_name = 'bitfinex' + # data_frequency = 'daily' + # include_symbols = 'neo_btc,bch_btc,eth_btc' - start = pd.to_datetime('2017-01-01', utc=True) - end = pd.to_datetime('2017-09-30', utc=True) + exchange_name = 'poloniex' + data_frequency = 'daily' + include_symbols = 'btc_usdt' - exchange_bundle = ExchangeBundle(get_exchange(exchange_name)) + start = pd.to_datetime('2015-01-01', utc=True) + end = pd.to_datetime('2015-12-31', utc=True) + + exchange = get_exchange(exchange_name) + exchange_bundle = ExchangeBundle(exchange) log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest( - data_frequency='daily', - include_symbols='neo_btc,bch_btc,eth_btc', + data_frequency=data_frequency, + include_symbols=include_symbols, exclude_symbols=None, start=start, end=end, show_progress=True ) + + symbols = include_symbols.split(',') + assets = [] + for pair_symbol in symbols: + assets.append(exchange.get_asset(pair_symbol)) + + reader = exchange_bundle.get_reader(data_frequency) + for asset in assets: + arrays = reader.load_raw_arrays( + sids=[asset.sid], + fields=['close'], + start_dt=start, + end_dt=end + ) + print('found {} rows for {} ingestion\n{}'.format( + len(arrays[0]), asset.symbol, arrays[0]) + ) pass def test_merge_ctables(self): From 339fa21c3592cbe3003a58b5e23e99b91d6b6b7e Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 18 Oct 2017 17:23:33 -0400 Subject: [PATCH 087/110] Fixed an issue with the backtest get_history_window method. --- catalyst/examples/simple_loop.py | 4 ++-- catalyst/exchange/data_portal_exchange.py | 17 ++++++++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/catalyst/examples/simple_loop.py b/catalyst/examples/simple_loop.py index 77415429..84ed01c5 100644 --- a/catalyst/examples/simple_loop.py +++ b/catalyst/examples/simple_loop.py @@ -7,7 +7,7 @@ from catalyst.api import symbol def initialize(context): print('initializing') - context.asset = symbol('wings_eth') + context.asset = symbol('etc_eth') def handle_data(context, data): @@ -35,7 +35,7 @@ run_algorithm( initialize=initialize, handle_data=handle_data, analyze=None, - exchange_name='bittrex', + exchange_name='poloniex', algo_namespace='simple_loop', base_currency='eth' ) diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index d7b163ac..b1b13dbc 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -24,7 +24,8 @@ from catalyst.exchange.exchange_errors import ( ExchangeRequestError, ExchangeBarDataError, PricingDataBeforeTradingError, - PricingDataNotLoadedError, InvalidHistoryFrequencyError) + PricingDataNotLoadedError, InvalidHistoryFrequencyError, + BundleNotFoundError) log = Logger('DataPortalExchange') @@ -297,13 +298,19 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): else: raise InvalidHistoryFrequencyError(frequency=data_frequency) + reader = bundle.get_reader(data_frequency) + if reader is None: + raise BundleNotFoundError( + exchange=exchange.name, + data_frequency=data_frequency + ) + try: - values = bundle.get_raw_arrays( - assets=assets, + values = reader.load_raw_arrays( + sids=[asset.sid for asset in assets], fields=[field], start_dt=dts[0], - end_dt=dts[-1], - data_frequency=data_frequency + end_dt=dts[-1] )[0] except Exception: From 874a4bb682bd63b2bc70c2d2d541bdf5a2e35a45 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 18 Oct 2017 18:33:37 -0400 Subject: [PATCH 088/110] Fixed an issue with reader array size --- catalyst/exchange/bundle_utils.py | 2 +- catalyst/exchange/exchange_bcolz.py | 24 ++++++++++++++++-------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index ebf39d76..725123a2 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -17,7 +17,6 @@ from catalyst.exchange.exchange_utils import get_exchange_bundles_folder from catalyst.utils.deprecate import deprecated from catalyst.utils.paths import data_path - EXCHANGE_NAMES = ['bitfinex', 'bittrex', 'poloniex'] API_URL = 'http://data.enigma.co/api/v1' @@ -198,6 +197,7 @@ def get_ffill_candles(candles, bar_count, end_dt, data_frequency, start_dt = get_start_dt(end_dt, bar_count, data_frequency) date = start_dt + # TODO: this works well with a small number of candles, consider using numpy as needed while date <= end_dt: candle = next(( candle for candle in candles if candle['last_traded'] == date diff --git a/catalyst/exchange/exchange_bcolz.py b/catalyst/exchange/exchange_bcolz.py index 9fccd9a1..c91a4157 100644 --- a/catalyst/exchange/exchange_bcolz.py +++ b/catalyst/exchange/exchange_bcolz.py @@ -3,6 +3,7 @@ import numpy as np from catalyst import get_calendar from catalyst.data.minute_bars import BcolzMinuteBarReader, \ BcolzMinuteBarWriter +from catalyst.exchange.bundle_utils import get_periods, get_periods_range class BcolzExchangeBarWriter(BcolzMinuteBarWriter): @@ -34,6 +35,10 @@ class BcolzExchangeBarReader(BcolzMinuteBarReader): super(BcolzExchangeBarReader, self).__init__(*args, **kwargs) + @property + def data_frequency(self): + return self._data_frequency + def load_raw_arrays(self, fields, start_dt, end_dt, sids): if self._data_frequency == 'minute': @@ -47,24 +52,27 @@ class BcolzExchangeBarReader(BcolzMinuteBarReader): start_idx = self._find_position_of_minute(start_dt) end_idx = self._find_position_of_minute(end_dt) - num_days = (end_idx - start_idx + 1) + periods = get_periods_range(start_dt, end_dt, self.data_frequency) + num_days = len(periods) shape = num_days, len(sids) + if len(fields) == 1 and fields[0] == 'volume': + fields.insert(0, 'close') + + mask = None data = [] for field in fields: - if field != 'volume': - out = np.full(shape, np.nan) - else: - out = np.zeros(shape, dtype=np.float64) + out = np.full(shape, np.nan) for i, sid in enumerate(sids): carray = self._open_minute_file(field, sid) a = carray[start_idx:end_idx + 1] - where = a != 0 + if mask is None: + mask = a != 0 - out[:len(where), i][where] = ( - a[where] * self._ohlc_ratio_inverse_for_sid(sid) + out[:len(mask), i][mask] = ( + a[mask] * self._ohlc_ratio_inverse_for_sid(sid) ) data.append(out) From 2e46323a9e70cd889e45fc8dfc321e6b06592e9d Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 18 Oct 2017 20:30:24 -0400 Subject: [PATCH 089/110] Fixed an issue with minute bundles --- catalyst/exchange/exchange_bcolz.py | 17 +++++++++++------ catalyst/exchange/exchange_bundle.py | 1 - tests/exchange/test_bundle.py | 6 +++--- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/catalyst/exchange/exchange_bcolz.py b/catalyst/exchange/exchange_bcolz.py index c91a4157..e85e0c4c 100644 --- a/catalyst/exchange/exchange_bcolz.py +++ b/catalyst/exchange/exchange_bcolz.py @@ -41,12 +41,14 @@ class BcolzExchangeBarReader(BcolzMinuteBarReader): def load_raw_arrays(self, fields, start_dt, end_dt, sids): - if self._data_frequency == 'minute': - return super(BcolzExchangeBarReader, self) \ - .load_raw_arrays(fields, start_dt, end_dt, sids) + # if self._data_frequency == 'minute': + # return super(BcolzExchangeBarReader, self) \ + # .load_raw_arrays(fields, start_dt, end_dt, sids) + # + # else: + # return self._load_daily_raw_arrays(fields, start_dt, end_dt, sids) - else: - return self._load_daily_raw_arrays(fields, start_dt, end_dt, sids) + return self._load_daily_raw_arrays(fields, start_dt, end_dt, sids) def _load_daily_raw_arrays(self, fields, start_dt, end_dt, sids): start_idx = self._find_position_of_minute(start_dt) @@ -62,7 +64,10 @@ class BcolzExchangeBarReader(BcolzMinuteBarReader): mask = None data = [] for field in fields: - out = np.full(shape, np.nan) + if field != 'volume': + out = np.full(shape, np.nan) + else: + out = np.zeros(shape, dtype=np.float64) for i, sid in enumerate(sids): carray = self._open_minute_file(field, sid) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index b8006362..c4128696 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -319,7 +319,6 @@ class ExchangeBundle: for index, field in enumerate( ['open', 'high', 'low', 'close', 'volume']): ohlcv[field] = arrays[index].flatten() - ohlcv[field] = ohlcv[field][~np.isnan(ohlcv[field])] df = pd.DataFrame( data=ohlcv, diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 1439b2d6..1372429d 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -17,7 +17,7 @@ log = Logger('test_exchange_bundle') class ExchangeBundleTestCase: def test_ingest_minute(self): - exchange_name = 'poloniex' + exchange_name = 'bitfinex' # start = pd.to_datetime('2017-09-01', utc=True) start = pd.to_datetime('2017-9-1', utc=True) @@ -27,8 +27,8 @@ class ExchangeBundleTestCase: log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest( - data_frequency='daily', - include_symbols='etc_btc', + data_frequency='minute', + include_symbols='neo_eth', # include_symbols=None, exclude_symbols=None, start=start, From e5f7c63ebde70ce7a3e91fa5168834b41f1a4add Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 18 Oct 2017 20:43:26 -0400 Subject: [PATCH 090/110] Fixed an issue with minute bundles --- catalyst/exchange/exchange_bcolz.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/catalyst/exchange/exchange_bcolz.py b/catalyst/exchange/exchange_bcolz.py index e85e0c4c..cfd323f6 100644 --- a/catalyst/exchange/exchange_bcolz.py +++ b/catalyst/exchange/exchange_bcolz.py @@ -54,7 +54,10 @@ class BcolzExchangeBarReader(BcolzMinuteBarReader): start_idx = self._find_position_of_minute(start_dt) end_idx = self._find_position_of_minute(end_dt) - periods = get_periods_range(start_dt, end_dt, self.data_frequency) + periods = self.calendar.minutes_in_range(start_dt, end_dt) \ + if self.data_frequency == 'minute' \ + else self.calendar.sessions_in_range(start_dt, end_dt) + num_days = len(periods) shape = num_days, len(sids) From 8a6d0d7ca0b315c5a7e733b5ce556058aaccf4f8 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Wed, 18 Oct 2017 21:25:27 -0600 Subject: [PATCH 091/110] Catch NoData on Exchange + formatting of errors --- catalyst/exchange/bundle_utils.py | 15 +++++++++++---- catalyst/exchange/data_portal_exchange.py | 14 ++++++++------ catalyst/exchange/exchange.py | 14 +++++++------- catalyst/exchange/exchange_errors.py | 9 +++++++-- 4 files changed, 33 insertions(+), 19 deletions(-) diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index 725123a2..0aa05661 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -12,7 +12,7 @@ import pytz from catalyst.data.bundles import from_bundle_ingest_dirname from catalyst.data.bundles.core import download_without_progress from catalyst.exchange.exchange_errors import ApiCandlesError, \ - PricingDataBeforeTradingError + PricingDataBeforeTradingError, NoDataAvailableOnExchange from catalyst.exchange.exchange_utils import get_exchange_bundles_folder from catalyst.utils.deprecate import deprecated from catalyst.utils.paths import data_path @@ -134,10 +134,17 @@ def get_adj_dates(start, end, assets, data_frequency): if end is None or (last_entry is not None and end > last_entry): end = last_entry - if start >= end: + if end is None: + raise NoDataAvailableOnExchange( + exchange=asset.exchange.title(), + symbol=[asset.symbol.encode('utf-8')], + data_frequency=data_frequency, + ) + + if end is None or start >= end: raise PricingDataBeforeTradingError( - symbols=[asset.symbol], - exchange=asset.exchange, + symbols=[asset.symbol.encode('utf-8')], + exchange=asset.exchange.title(), first_trading_day=earliest_trade, dt=end ) diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index b1b13dbc..ce8ab3ab 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -301,7 +301,7 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): reader = bundle.get_reader(data_frequency) if reader is None: raise BundleNotFoundError( - exchange=exchange.name, + exchange=exchange.name.title(), data_frequency=data_frequency ) @@ -321,9 +321,10 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): raise PricingDataNotLoadedError( field=field, first_trading_day=first_trading_day, - exchange=exchange.name, + exchange=exchange.name.title(), symbols=symbols, - symbol_list=symbol_list + symbol_list=symbol_list, + data_frequency=data_frequency ) series = dict() @@ -340,7 +341,7 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): if dt < first_trading_day: raise PricingDataBeforeTradingError( first_trading_day=first_trading_day, - exchange=assets[0].exchange, + exchange=assets[0].exchange.title(), symbols=[asset.symbol.encode('utf-8') for asset in assets], dt=dt, ) @@ -365,10 +366,11 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): raise PricingDataNotLoadedError( field=field, first_trading_day=self._get_first_trading_day(assets), - exchange=exchange.name, + exchange=exchange.name.title(), symbols=[asset.symbol.encode('utf-8') for asset in assets], symbol_list=''.join( - [asset.symbol.encode('utf-8') for asset in assets]) + [asset.symbol.encode('utf-8') for asset in assets]), + data_frequency=data_frequency ) return values diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 0552bcc1..d21cace4 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -130,7 +130,7 @@ class Exchange: if not symbol: raise ValueError('Currency %s not supported by exchange %s' % - (asset['symbol'], self.name)) + (asset['symbol'], self.name.title())) return symbol @@ -174,10 +174,10 @@ class Exchange: asset = self.assets[key] if not asset: - supported_symbols = [pair.symbol for pair in self.assets.values()] + supported_symbols = [pair.symbol.encode('utf-8') for pair in self.assets.values()] raise SymbolNotFoundOnExchange( symbol=symbol, - exchange=self.name, + exchange=self.name.title(), supported_symbols=supported_symbols ) @@ -529,7 +529,7 @@ class Exchange: reader = self.bundle.get_reader(data_frequency) if reader is None: raise BundleNotFoundError( - exchange=self.name, + exchange=self.name.title(), data_frequency=data_frequency ) @@ -570,7 +570,7 @@ class Exchange: elif field == 'volume': agg = 'sum' else: - raise ValueError('invalid field') + raise ValueError('Invalid field.') df = df.resample('{}T'.format(candle_size)).agg(agg) @@ -592,7 +592,7 @@ class Exchange: if base_position_available is None: raise BaseCurrencyNotFoundError( base_currency=self.base_currency, - exchange=self.name + exchange=self.name.title() ) portfolio = self._portfolio @@ -682,7 +682,7 @@ class Exchange: style = ExchangeStopOrder(stop_price, exchange=self.name) elif style is not None: - raise InvalidOrderStyle(exchange=self.name, + raise InvalidOrderStyle(exchange=self.name.title(), style=style.__class__.__name__) else: raise ValueError('Incomplete order data.') diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index b121acc6..602582d2 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -4,7 +4,7 @@ from catalyst.errors import ZiplineError def silent_except_hook(exctype, excvalue, exctraceback): if exctype in [PricingDataBeforeTradingError, PricingDataNotLoadedError, - SymbolNotFoundOnExchange, ]: + SymbolNotFoundOnExchange, NoDataAvailableOnExchange, ]: fn = traceback.extract_tb(exctraceback)[-1][0] ln = traceback.extract_tb(exctraceback)[-1][1] print "Error traceback: {1} (line {2})\n" \ @@ -197,9 +197,14 @@ class PricingDataNotLoadedError(ZiplineError): 'exchange {exchange} since {first_trading_day} is unavailable. ' 'The bundle data is either out-of-date or has not been loaded yet. ' 'Please ingest data using the command ' - '`catalyst ingest-exchange -x {exchange} -i {symbol_list}`. ' + '`catalyst ingest-exchange -x {exchange} -f {data_frequency} -i {symbol_list}`. ' 'See catalyst documentation for details.').strip() class ApiCandlesError(ZiplineError): msg = ('Unable to fetch candles from the remote API: {error}.').strip() + +class NoDataAvailableOnExchange(ZiplineError): + msg = ('Requested data for trading pair {symbol} is not available on exchange {exchange} ' + 'in `{data_frequency}` frequency at this time. ' + 'Check `http://enigma.co/catalyst/status` for market coverage.').strip() From b3dcb7a9ad5e5700092168ca3c07c81e7b82b824 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 18 Oct 2017 23:26:24 -0400 Subject: [PATCH 092/110] Fixed issue with overlapping chunks --- catalyst/examples/simple_loop.py | 4 +-- catalyst/exchange/exchange_bundle.py | 43 ++++++++++++++++------------ 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/catalyst/examples/simple_loop.py b/catalyst/examples/simple_loop.py index 84ed01c5..7081178f 100644 --- a/catalyst/examples/simple_loop.py +++ b/catalyst/examples/simple_loop.py @@ -7,7 +7,7 @@ from catalyst.api import symbol def initialize(context): print('initializing') - context.asset = symbol('etc_eth') + context.asset = symbol('btc_usdt') def handle_data(context, data): @@ -29,7 +29,7 @@ def handle_data(context, data): run_algorithm( capital_base=250, - start=pd.to_datetime('2017-9-01', utc=True), + start=pd.to_datetime('2015-2-19', utc=True), end=pd.to_datetime('2017-9-30', utc=True), data_frequency='daily', initialize=initialize, diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index c4128696..b6e16ce0 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -265,12 +265,10 @@ class ExchangeBundle: return data - def download_bundle(self, name): - """ - - :param name: - :return: - """ + def get_calendar_periods_range(self, start_dt, end_dt, data_frequency): + return self.calendar.minutes_in_range(start_dt, end_dt) \ + if data_frequency == 'minute' \ + else self.calendar.sessions_in_range(start_dt, end_dt) def ingest_ctable(self, asset, data_frequency, period, start_dt, end_dt, writer, empty_rows_behavior='strip', cleanup=False): @@ -297,10 +295,9 @@ class ExchangeBundle: period=period ) - periods = self.calendar.minutes_in_range(start_dt, end_dt) \ - if data_frequency == 'minute' \ - else self.calendar.sessions_in_range(start_dt, end_dt) - + periods = self.get_calendar_periods_range( + start_dt, end_dt, data_frequency + ) reader = self.get_reader(data_frequency, path=path) if reader is None: raise TempBundleNotFoundError(path=path) @@ -384,6 +381,8 @@ class ExchangeBundle: def prepare_chunks(self, assets, data_frequency, start_dt, end_dt): """ + Split a price data request into chunks corresponding to individual + bundles. :param assets: :param data_frequency: @@ -402,17 +401,25 @@ class ExchangeBundle: except PricingDataBeforeTradingError: continue - sessions = get_periods_range(asset_start, asset_end, - data_frequency) + # Aligning start / end dates with the daily calendar + sessions = get_periods_range(start_dt, end_dt, data_frequency) \ + if data_frequency == 'minute' \ + else self.calendar.sessions_in_range(start_dt, end_dt) - periods = [] + if asset_start < sessions[0]: + asset_start = sessions[0] + + if asset_end > sessions[-1]: + asset_end = sessions[-1] + + chunk_labels = [] dt = sessions[0] while dt <= sessions[-1]: - period = '{}-{:02d}'.format(dt.year, dt.month) \ + label = '{}-{:02d}'.format(dt.year, dt.month) \ if data_frequency == 'minute' else '{}'.format(dt.year) - if period not in periods: - periods.append(period) + if label not in chunk_labels: + chunk_labels.append(label) # Adjusting the period dates to match the availability # of the trading pair @@ -451,13 +458,13 @@ class ExchangeBundle: ) if not has_data: - log.debug('adding period: {}'.format(period)) + log.debug('adding period: {}'.format(label)) chunks.append( dict( asset=asset, period_start=period_start, period_end=period_end, - period=period + period=label ) ) From 6097128d5c2f069f205e6a9ded57c74a5a66ccdd Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 18 Oct 2017 23:45:42 -0400 Subject: [PATCH 093/110] Fixed small issue with minute ingestion --- catalyst/exchange/exchange_bundle.py | 6 +++++- tests/exchange/test_bundle.py | 29 ++++++++++++++++++++++------ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index b6e16ce0..db4bb613 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -453,8 +453,12 @@ class ExchangeBundle: frequency=data_frequency ) + # Currencies don't always start trading at midnight. + # Checking the last minute of the day instead. + range_start = period_start.replace(hour=23, minute=59) \ + if data_frequency == 'minute' else period_start has_data = range_in_bundle( - asset, period_start, period_end, reader + asset, range_start, period_end, reader ) if not has_data: diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 1372429d..f1a50335 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -17,24 +17,41 @@ log = Logger('test_exchange_bundle') class ExchangeBundleTestCase: def test_ingest_minute(self): + data_frequency = 'minute' exchange_name = 'bitfinex' - # start = pd.to_datetime('2017-09-01', utc=True) - start = pd.to_datetime('2017-9-1', utc=True) - end = pd.to_datetime('2017-9-30', utc=True) + exchange = get_exchange(exchange_name) + exchange_bundle = ExchangeBundle(exchange) + assets = [ + exchange.get_asset('neo_eth') + ] - exchange_bundle = ExchangeBundle(get_exchange(exchange_name)) + # start = pd.to_datetime('2017-09-01', utc=True) + start = pd.to_datetime('2017-9-15', utc=True) + end = pd.to_datetime('2017-9-30', utc=True) log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest( - data_frequency='minute', - include_symbols='neo_eth', + data_frequency=data_frequency, + include_symbols=','.join([asset.symbol for asset in assets]), # include_symbols=None, exclude_symbols=None, start=start, end=end, show_progress=True ) + + reader = exchange_bundle.get_reader(data_frequency) + for asset in assets: + arrays = reader.load_raw_arrays( + sids=[asset.sid], + fields=['close'], + start_dt=start, + end_dt=end + ) + print('found {} rows for {} ingestion\n{}'.format( + len(arrays[0]), asset.symbol, arrays[0]) + ) pass def test_ingest_minute_all(self): From 51172759d3b9cdde031fe642deb7b601af3b8082 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 19 Oct 2017 05:19:01 -0400 Subject: [PATCH 094/110] Fixed some issues and optimized data.history() in live mode --- catalyst/examples/simple_loop.py | 20 +- catalyst/exchange/bundle_utils.py | 21 ++- catalyst/exchange/data_portal_exchange.py | 14 ++ catalyst/exchange/exchange.py | 219 +++++++++++----------- catalyst/exchange/exchange_bcolz.py | 10 +- catalyst/exchange/exchange_bundle.py | 94 +--------- 6 files changed, 172 insertions(+), 206 deletions(-) diff --git a/catalyst/examples/simple_loop.py b/catalyst/examples/simple_loop.py index 7081178f..5b7fcd26 100644 --- a/catalyst/examples/simple_loop.py +++ b/catalyst/examples/simple_loop.py @@ -27,15 +27,25 @@ def handle_data(context, data): pass +# run_algorithm( +# capital_base=250, +# start=pd.to_datetime('2017-08-01', utc=True), +# end=pd.to_datetime('2017-9-30', utc=True), +# data_frequency='daily', +# initialize=initialize, +# handle_data=handle_data, +# analyze=None, +# exchange_name='poloniex', +# algo_namespace='simple_loop', +# base_currency='eth' +# ) run_algorithm( - capital_base=250, - start=pd.to_datetime('2015-2-19', utc=True), - end=pd.to_datetime('2017-9-30', utc=True), - data_frequency='daily', initialize=initialize, handle_data=handle_data, analyze=None, exchange_name='poloniex', + live=True, algo_namespace='simple_loop', - base_currency='eth' + base_currency='eth', + live_graph=False ) diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index 0aa05661..d4ace25d 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -135,11 +135,11 @@ def get_adj_dates(start, end, assets, data_frequency): end = last_entry if end is None: - raise NoDataAvailableOnExchange( + raise NoDataAvailableOnExchange( exchange=asset.exchange.title(), symbol=[asset.symbol.encode('utf-8')], data_frequency=data_frequency, - ) + ) if end is None or start >= end: raise PricingDataBeforeTradingError( @@ -184,8 +184,21 @@ def get_year_start_end(dt): return year_start, year_end -def get_ffill_candles(candles, bar_count, end_dt, data_frequency, - previous_candle=None): +def get_df_from_arrays(arrays, periods): + ohlcv = dict() + for index, field in enumerate( + ['open', 'high', 'low', 'close', 'volume']): + ohlcv[field] = arrays[index].flatten() + + df = pd.DataFrame( + data=ohlcv, + index=periods + ) + return df + + +def get_df_from_candles(candles, bar_count, end_dt, data_frequency, + previous_candle=None): """ Create candles for each period of the specified range, forward-filling missing candles with the previous value. diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index ce8ab3ab..6d0d3fc6 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -12,6 +12,7 @@ # limitations under the License. import abc +from datetime import timedelta from time import sleep import pandas as pd @@ -19,6 +20,7 @@ from catalyst.assets._assets import TradingPair from logbook import Logger from catalyst.data.data_portal import DataPortal +from catalyst.errors import HistoryWindowStartsBeforeData from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import ( ExchangeRequestError, @@ -293,6 +295,18 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): session = self.trading_calendar.minute_to_session_label(end_dt) dts = self._get_days_for_window(session, bar_count) + if len(dts) == 0: + symbols = [asset.symbol for asset in assets] + raise PricingDataNotLoadedError( + field=field, + symbols=symbols, + exchange=exchange.name, + first_trading_day= \ + min([asset.start_date for asset in assets]), + data_frequency=data_frequency, + symbol_list=','.join(symbols) + ) + self.ensure_after_first_day(dts[0], assets) else: diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index d21cace4..083ad1a0 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -1,5 +1,4 @@ import abc -import random import re from abc import ABCMeta, abstractmethod, abstractproperty from datetime import timedelta @@ -12,7 +11,8 @@ from logbook import Logger from catalyst.data.data_portal import BASE_FIELDS from catalyst.exchange.bundle_utils import get_start_dt, \ - get_delta, get_trailing_candles_dt, get_periods, get_adj_dates + get_delta, get_trailing_candles_dt, get_periods, get_adj_dates, \ + get_df_from_candles from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import MismatchingBaseCurrencies, \ InvalidOrderStyle, BaseCurrencyNotFoundError, SymbolNotFoundOnExchange, \ @@ -100,12 +100,6 @@ class Exchange: delta = now - cpt_date sleep_period = 60 - delta.total_seconds() - - # log.debug( - # 'max requests {} reached, sleeping for {} seconds'.format( - # self.max_requests_per_minute, - # sleep_period - # )) sleep(sleep_period) now = pd.Timestamp.utcnow() @@ -174,7 +168,8 @@ class Exchange: asset = self.assets[key] if not asset: - supported_symbols = [pair.symbol.encode('utf-8') for pair in self.assets.values()] + supported_symbols = [pair.symbol.encode('utf-8') for pair in + self.assets.values()] raise SymbolNotFoundOnExchange( symbol=symbol, exchange=self.name.title(), @@ -367,35 +362,76 @@ class Exchange: ) ) - # Don't use a timezone here - dt = pd.Timestamp.utcnow().floor('1 min') ohlc = self.get_candles(data_frequency, asset) if field not in ohlc: raise KeyError('Invalid column: %s' % field) - if self.minute_writer is not None: - df = pd.DataFrame( - [ohlc], - index=pd.DatetimeIndex([dt]), - columns=['open', 'high', 'low', 'close', 'volume'] - ) - - try: - # TODO: use victor's modified branch using int64 - self.minute_writer.write_sid( - sid=asset.sid, - df=df - ) - log.debug('wrote minute data: {}'.format(dt)) - except Exception as e: - log.warn( - 'unable to write minute data: {} {}'.format(dt, e)) - - value = ohlc[field] - log.debug('got spot value: {}'.format(value)) + value = ohlc[field] + log.debug('got spot value: {}'.format(value)) return value + def get_series_from_bundle(self, assets, start_dt, end_dt, data_frequency, + field): + """ + + :return: + """ + reader = self.bundle.get_reader(data_frequency) + + if reader is None: + raise BundleNotFoundError( + exchange=self.name.title(), + data_frequency=data_frequency + ) + + series = dict() + try: + arrays = reader.load_raw_arrays( + sids=[asset.sid for asset in assets], + fields=[field], + start_dt=start_dt, + end_dt=end_dt + ) + + periods = self.bundle.get_calendar_periods_range( + start_dt, end_dt, data_frequency + ) + + for asset_index, asset in enumerate(assets): + asset_values = arrays[asset_index] + + value_series = pd.Series(asset_values[0], index=periods) + series[asset] = value_series + + except Exception as e: + log.debug('unable to retreive from bundle: {}'.format(e)) + + return series + + def get_series_from_candles(self, candles, start_dt, end_dt, + field, previous_value=None): + """ + Get a series of field data for the specified candles. + + :param candles: + :param start_dt: + :param end_dt: + :param field: + :param previous_value: + :return: + """ + + dates = [candle['last_traded'] for candle in candles] + values = [candle[field] for candle in candles] + + periods = pd.date_range(start_dt, end_dt) + series = pd.Series(values, index=dates) + + series.reindex(periods, method='ffill', fill_value=previous_value) + + return series + def get_history_window(self, assets, end_dt, @@ -448,11 +484,8 @@ class Exchange: raise InvalidHistoryFrequencyError(frequency) if unit.lower() == 'd': - if data_frequency != 'daily': - raise MismatchingFrequencyError( - frequency=frequency, - data_frequency=data_frequency - ) + if data_frequency == 'minute': + data_frequency = 'daily' elif unit.lower() == 'm': if data_frequency != 'minute': @@ -467,94 +500,70 @@ class Exchange: adj_bar_count = candle_size * bar_count start_dt = get_start_dt(end_dt, adj_bar_count, data_frequency) - start_dt, end_dt = get_adj_dates(start_dt, end_dt, assets, - data_frequency) + adj_start_dt, adj_end_dt = get_adj_dates( + start_dt, end_dt, assets, data_frequency + ) missing_assets = self.bundle.filter_existing_assets( assets=assets, - start_dt=start_dt, - end_dt=end_dt, + start_dt=adj_start_dt, + end_dt=adj_end_dt, data_frequency=data_frequency ) if missing_assets: self.bundle.ingest_assets( assets=assets, - start_dt=start_dt, - end_dt=end_dt, + start_dt=adj_start_dt, + end_dt=adj_end_dt, data_frequency=data_frequency ) - # We check again for data which may be too recent for the consolidated - # exchanges service - trailing_assets = self.bundle.filter_existing_assets( + series = self.get_series_from_bundle( assets=assets, - start_dt=start_dt, - end_dt=end_dt, - data_frequency=data_frequency + start_dt=adj_start_dt, + end_dt=adj_end_dt, + data_frequency=data_frequency, + field=field ) - if trailing_assets: - # Adding bars too recent to be contained in the consolidated - # exchanges bundles. We go directly against the exchange - # to retrieve the candles. - for asset in trailing_assets: - trailing_candles_dt = get_trailing_candles_dt( - asset=asset, - start_dt=start_dt, - end_dt=end_dt, - data_frequency=data_frequency + + for asset in assets: + if asset not in series or series[asset].index[-1] < end_dt: + # Adding bars too recent to be contained in the consolidated + # exchanges bundles. We go directly against the exchange + # to retrieve the candles. + + trailing_dt = \ + series[asset].index[-1] + get_delta(1, data_frequency) \ + if asset in series else start_dt + + trailing_bar_count = \ + get_periods(trailing_dt, end_dt, data_frequency) + + # The get_history method supports multiple asset + candles = self.get_candles( + data_frequency=data_frequency, + assets=asset, + bar_count=trailing_bar_count, + end_dt=end_dt ) - if trailing_candles_dt is not None: - trailing_bar_count = \ - get_periods(start_dt, end_dt, data_frequency) + last_value = series[asset].iloc(0) if asset in series \ + else np.nan - # The get_history method supports multiple asset - candles = self.get_candles( - data_frequency=data_frequency, - assets=[asset], - bar_count=trailing_bar_count, - end_dt=end_dt - ) + candle_series = self.get_series_from_candles( + candles=candles, + start_dt=trailing_dt, + end_dt=end_dt, + field=field, + previous_value=last_value + ) - # TODO: Do I need the previous_candle? - self.bundle.ingest_candles( - candles=candles, - bar_count=trailing_bar_count, - start_dt=start_dt, - end_dt=end_dt, - data_frequency=data_frequency - ) + if asset in series: + series[asset].append(candle_series) - reader = self.bundle.get_reader(data_frequency) - if reader is None: - raise BundleNotFoundError( - exchange=self.name.title(), - data_frequency=data_frequency - ) - - values = reader.load_raw_arrays( - sids=[asset.sid for asset in assets], - fields=[field], - start_dt=start_dt, - end_dt=end_dt - ) - - series = dict() - for asset_index, asset in enumerate(assets): - all_dates = [] - asset_values = [] - - # TODO: use numpy to avoid the loop - date = start_dt - for value in values[0]: - all_dates.append(date) - asset_values.append(value[asset_index]) - - date += get_delta(1, data_frequency) - - value_series = pd.Series(asset_values, index=all_dates) - series[asset] = value_series + else: + series[asset] = candle_series df = pd.DataFrame(series) diff --git a/catalyst/exchange/exchange_bcolz.py b/catalyst/exchange/exchange_bcolz.py index cfd323f6..523eae28 100644 --- a/catalyst/exchange/exchange_bcolz.py +++ b/catalyst/exchange/exchange_bcolz.py @@ -61,12 +61,13 @@ class BcolzExchangeBarReader(BcolzMinuteBarReader): num_days = len(periods) shape = num_days, len(sids) - if len(fields) == 1 and fields[0] == 'volume': - fields.insert(0, 'close') + all_fields = fields[:] + if len(all_fields) == 1 and all_fields[0] == 'volume': + all_fields.insert(0, 'close') mask = None data = [] - for field in fields: + for field in all_fields: if field != 'volume': out = np.full(shape, np.nan) else: @@ -83,6 +84,7 @@ class BcolzExchangeBarReader(BcolzMinuteBarReader): a[mask] * self._ohlc_ratio_inverse_for_sid(sid) ) - data.append(out) + if field in fields: + data.append(out) return data diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index db4bb613..9fc5b9c9 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -2,16 +2,15 @@ import os import shutil from datetime import timedelta -import numpy as np import pandas as pd from logbook import Logger, INFO from catalyst import get_calendar from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ BcolzMinuteBarMetadata -from catalyst.exchange.bundle_utils import get_ffill_candles, range_in_bundle, \ +from catalyst.exchange.bundle_utils import range_in_bundle, \ get_bcolz_chunk, get_delta, get_adj_dates, get_month_start_end, \ - get_year_start_end, get_periods_range + get_year_start_end, get_periods_range, get_df_from_arrays from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ BcolzExchangeBarWriter from catalyst.exchange.exchange_errors import EmptyValuesInBundleError, \ @@ -192,79 +191,6 @@ class ExchangeBundle: invalid_data_behavior='raise' ) - def ingest_candles(self, candles, bar_count, start_dt, end_dt, - data_frequency, - previous_candle=dict()): - """ - Ingest candles obtained via the get_candles API of an exchange. - - Since exchange APIs generally only do not return candles when there - are no transactions in the period, we ffill values using the - previous candle to ensure that each period has a candle. - - :param bar_count: - :param end_dt: - :param data_frequency: - :param asset: - :param writer: - :param previous_candle - :return: - """ - - writer = self.get_writer(start_dt, end_dt, data_frequency) - - num_candles = 0 - data = [] - for asset in candles: - asset_candles = candles[asset] - if not asset_candles: - log.debug( - 'no data: {symbols} on {exchange}, date {end}'.format( - symbols=asset, - exchange=self.exchange.name, - end=end_dt - ) - ) - continue - - previous = previous_candle[asset] \ - if asset in previous_candle else None - - all_dates, all_candles = get_ffill_candles( - candles=asset_candles, - bar_count=bar_count, - end_dt=end_dt, - data_frequency=data_frequency, - previous_candle=previous - ) - previous_candle[asset] = all_candles[-1] - - df = pd.DataFrame( - data=all_candles, - index=all_dates, - columns=['open', 'high', 'low', 'close', 'volume'] - ) - - if not df.empty: - df.sort_index(inplace=True) - - sid = asset.sid - num_candles += len(df.values) - - data.append((sid, df)) - - log.debug( - 'writing {num_candles} candles for {bar_count} bars' - 'ending {end}'.format( - num_candles=num_candles, - bar_count=bar_count, - end=end_dt - ) - ) - self._write(data, writer, data_frequency) - - return data - def get_calendar_periods_range(self, start_dt, end_dt, data_frequency): return self.calendar.minutes_in_range(start_dt, end_dt) \ if data_frequency == 'minute' \ @@ -295,9 +221,6 @@ class ExchangeBundle: period=period ) - periods = self.get_calendar_periods_range( - start_dt, end_dt, data_frequency - ) reader = self.get_reader(data_frequency, path=path) if reader is None: raise TempBundleNotFoundError(path=path) @@ -312,16 +235,12 @@ class ExchangeBundle: if not arrays: return path - ohlcv = dict() - for index, field in enumerate( - ['open', 'high', 'low', 'close', 'volume']): - ohlcv[field] = arrays[index].flatten() - - df = pd.DataFrame( - data=ohlcv, - index=periods + periods = self.get_calendar_periods_range( + start_dt, end_dt, data_frequency ) + df = get_df_from_arrays(arrays, periods) + if empty_rows_behavior is not 'ignore': nan_rows = df[df.isnull().T.any().T].index @@ -369,7 +288,6 @@ class ExchangeBundle: if not df.empty: df.sort_index(inplace=True) data.append((asset.sid, df)) - self._write(data, writer, data_frequency) if cleanup: From 675957b197f07d844f9a0c588979d04d0c7d3478 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Thu, 19 Oct 2017 10:04:17 -0600 Subject: [PATCH 095/110] Open calendar starts on 2015-02-19 --- catalyst/utils/calendars/exchange_calendar_open.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalyst/utils/calendars/exchange_calendar_open.py b/catalyst/utils/calendars/exchange_calendar_open.py index 1dbc8cd3..68c385f5 100644 --- a/catalyst/utils/calendars/exchange_calendar_open.py +++ b/catalyst/utils/calendars/exchange_calendar_open.py @@ -31,4 +31,4 @@ class OpenExchangeCalendar(TradingCalendar): return DateOffset(days=1) def __init__(self, *args, **kwargs): - super(OpenExchangeCalendar, self).__init__(start=Timestamp('2015-03-01', tz='UTC'), **kwargs) + super(OpenExchangeCalendar, self).__init__(start=Timestamp('2015-02-19', tz='UTC'), **kwargs) From 2502c9a2bb149af2f784fdc76d5988e495735989 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 19 Oct 2017 14:49:32 -0400 Subject: [PATCH 096/110] Minor fixes --- catalyst/examples/simple_loop.py | 6 +++--- catalyst/exchange/exchange.py | 5 ++--- tests/exchange/test_bundle.py | 8 ++++---- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/catalyst/examples/simple_loop.py b/catalyst/examples/simple_loop.py index 5b7fcd26..dc130b3a 100644 --- a/catalyst/examples/simple_loop.py +++ b/catalyst/examples/simple_loop.py @@ -7,7 +7,7 @@ from catalyst.api import symbol def initialize(context): print('initializing') - context.asset = symbol('btc_usdt') + context.asset = symbol('xrp_btc') def handle_data(context, data): @@ -29,7 +29,7 @@ def handle_data(context, data): # run_algorithm( # capital_base=250, -# start=pd.to_datetime('2017-08-01', utc=True), +# start=pd.to_datetime('2015-08-01', utc=True), # end=pd.to_datetime('2017-9-30', utc=True), # data_frequency='daily', # initialize=initialize, @@ -43,7 +43,7 @@ run_algorithm( initialize=initialize, handle_data=handle_data, analyze=None, - exchange_name='poloniex', + exchange_name='bitfinex', live=True, algo_namespace='simple_loop', base_currency='eth', diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index 083ad1a0..b88739da 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -11,8 +11,7 @@ from logbook import Logger from catalyst.data.data_portal import BASE_FIELDS from catalyst.exchange.bundle_utils import get_start_dt, \ - get_delta, get_trailing_candles_dt, get_periods, get_adj_dates, \ - get_df_from_candles + get_delta, get_periods, get_adj_dates from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import MismatchingBaseCurrencies, \ InvalidOrderStyle, BaseCurrencyNotFoundError, SymbolNotFoundOnExchange, \ @@ -405,7 +404,7 @@ class Exchange: series[asset] = value_series except Exception as e: - log.debug('unable to retreive from bundle: {}'.format(e)) + log.debug('unable to retrieve from bundle: {}'.format(e)) return series diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index f1a50335..f7a3901d 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -78,12 +78,12 @@ class ExchangeBundleTestCase: # data_frequency = 'daily' # include_symbols = 'neo_btc,bch_btc,eth_btc' - exchange_name = 'poloniex' + exchange_name = 'bitfinex' data_frequency = 'daily' - include_symbols = 'btc_usdt' + include_symbols = 'etc_btc' - start = pd.to_datetime('2015-01-01', utc=True) - end = pd.to_datetime('2015-12-31', utc=True) + start = pd.to_datetime('2016-11-01', utc=True) + end = pd.to_datetime('2017-10-16', utc=True) exchange = get_exchange(exchange_name) exchange_bundle = ExchangeBundle(exchange) From 6f57660944dadf344ea24728f6304aea04b9afb7 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Thu, 19 Oct 2017 12:55:42 -0600 Subject: [PATCH 097/110] WIP: docs build --- docs/CNAME | 2 +- docs/source/conf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/CNAME b/docs/CNAME index 604be5df..79f02e82 100644 --- a/docs/CNAME +++ b/docs/CNAME @@ -1 +1 @@ -www.zipline.io +enigma-catalyst.readthedocs.io \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 62d0bd16..0be5eb29 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,7 +1,7 @@ import sys import os -from zipline import __version__ as version +from catalyst import __version__ as version # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the From 331a31b25a1c6600822c67fa5bdf14ef480ba836 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Thu, 19 Oct 2017 13:18:21 -0600 Subject: [PATCH 098/110] WIP: docs build --- etc/requirements_docs.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/etc/requirements_docs.txt b/etc/requirements_docs.txt index 362d4725..400b3108 100644 --- a/etc/requirements_docs.txt +++ b/etc/requirements_docs.txt @@ -1,3 +1,4 @@ Sphinx>=1.3.2 numpydoc>=0.5.0 sphinx-autobuild==0.6.0 +enigma-catalyst # readthedocs.org From 619eb3cfa48179afcfa0fe03e4eb42f4e9348834 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 19 Oct 2017 15:24:00 -0400 Subject: [PATCH 099/110] Fixed an issue with data.history more recent than the server --- catalyst/examples/buy_low_sell_high_live.py | 156 ++++++++++++++++++++ catalyst/exchange/bundle_utils.py | 10 +- catalyst/exchange/exchange.py | 59 +++++--- catalyst/exchange/exchange_bundle.py | 4 +- 4 files changed, 194 insertions(+), 35 deletions(-) create mode 100644 catalyst/examples/buy_low_sell_high_live.py diff --git a/catalyst/examples/buy_low_sell_high_live.py b/catalyst/examples/buy_low_sell_high_live.py new file mode 100644 index 00000000..a6b1ea98 --- /dev/null +++ b/catalyst/examples/buy_low_sell_high_live.py @@ -0,0 +1,156 @@ +import talib +from logbook import Logger + +from catalyst.api import ( + order, + order_target_percent, + symbol, + record, + get_open_orders, +) +from catalyst.exchange.stats_utils import get_pretty_stats +from catalyst.utils.run_algo import run_algorithm + +algo_namespace = 'buy_the_dip_live' +log = Logger('buy low sell high') + + +def initialize(context): + log.info('initializing algo') + context.ASSET_NAME = 'XRP_BTC' + context.asset = symbol(context.ASSET_NAME) + + context.TARGET_POSITIONS = 300 + context.PROFIT_TARGET = 0.1 + context.SLIPPAGE_ALLOWED = 0.02 + + context.retry_check_open_orders = 10 + context.retry_update_portfolio = 10 + context.retry_order = 5 + + context.errors = [] + pass + + +def _handle_data(context, data): + prices = data.history( + context.asset, + fields='price', + bar_count=20, + frequency='15m' + ) + rsi = talib.RSI(prices.values, timeperiod=14)[-1] + log.info('got rsi: {}'.format(rsi)) + + # Buying more when RSI is low, this should lower our cost basis + if rsi <= 30: + buy_increment = 50 + elif rsi <= 40: + buy_increment = 20 + # elif rsi <= 70: + # buy_increment = 5 + else: + buy_increment = None + + cash = context.portfolio.cash + log.info('base currency available: {cash}'.format(cash=cash)) + + price = data.current(context.asset, 'price') + log.info('got price {price}'.format(price=price)) + + record( + price=price, + rsi=rsi, + ) + + orders = get_open_orders(context.asset) + if orders: + log.info('skipping bar until all open orders execute') + return + + is_buy = False + cost_basis = None + if context.asset in context.portfolio.positions: + position = context.portfolio.positions[context.asset] + + cost_basis = position.cost_basis + log.info( + 'found {amount} positions with cost basis {cost_basis}'.format( + amount=position.amount, + cost_basis=cost_basis + ) + ) + + if position.amount >= context.TARGET_POSITIONS: + log.info('reached positions target: {}'.format(position.amount)) + return + + if price < cost_basis: + is_buy = True + elif position.amount > 0 and \ + price > cost_basis * (1 + context.PROFIT_TARGET): + profit = (price * position.amount) - (cost_basis * position.amount) + log.info('closing position, taking profit: {}'.format(profit)) + order_target_percent( + asset=context.asset, + target=0, + limit_price=price * (1 - context.SLIPPAGE_ALLOWED), + ) + else: + log.info('no buy or sell opportunity found') + else: + is_buy = True + + if is_buy: + if buy_increment is None: + log.info('the rsi is too high to consider buying {}'.format(rsi)) + return + + if price * buy_increment > cash: + log.info('not enough base currency to consider buying') + return + + log.info( + 'buying position cheaper than cost basis {} < {}'.format( + price, + cost_basis + ) + ) + order( + asset=context.asset, + amount=buy_increment, + limit_price=price * (1 + context.SLIPPAGE_ALLOWED) + ) + + +def handle_data(context, data): + log.info('handling bar {}'.format(data.current_dt)) + # try: + _handle_data(context, data) + # except Exception as e: + # log.warn('aborting the bar on error {}'.format(e)) + # context.errors.append(e) + + log.info('completed bar {}, total execution errors {}'.format( + data.current_dt, + len(context.errors) + )) + + if len(context.errors) > 0: + log.info('the errors:\n{}'.format(context.errors)) + + +def analyze(context, stats): + log.info('the daily stats:\n{}'.format(get_pretty_stats(stats))) + pass + + +run_algorithm( + initialize=initialize, + handle_data=handle_data, + analyze=analyze, + exchange_name='bitfinex', + live=True, + algo_namespace=algo_namespace, + base_currency='btc' +) diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index d4ace25d..536baa28 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -134,21 +134,13 @@ def get_adj_dates(start, end, assets, data_frequency): if end is None or (last_entry is not None and end > last_entry): end = last_entry - if end is None: + if end is None or start >= end: raise NoDataAvailableOnExchange( exchange=asset.exchange.title(), symbol=[asset.symbol.encode('utf-8')], data_frequency=data_frequency, ) - if end is None or start >= end: - raise PricingDataBeforeTradingError( - symbols=[asset.symbol.encode('utf-8')], - exchange=asset.exchange.title(), - first_trading_day=earliest_trade, - dt=end - ) - return start, end diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index b88739da..c5884afe 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -16,7 +16,7 @@ from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import MismatchingBaseCurrencies, \ InvalidOrderStyle, BaseCurrencyNotFoundError, SymbolNotFoundOnExchange, \ InvalidHistoryFrequencyError, MismatchingFrequencyError, \ - BundleNotFoundError + BundleNotFoundError, NoDataAvailableOnExchange from catalyst.exchange.exchange_execution import ExchangeStopLimitOrder, \ ExchangeLimitOrder, ExchangeStopOrder from catalyst.exchange.exchange_portfolio import ExchangePortfolio @@ -487,11 +487,13 @@ class Exchange: data_frequency = 'daily' elif unit.lower() == 'm': - if data_frequency != 'minute': - raise MismatchingFrequencyError( - frequency=frequency, - data_frequency=data_frequency - ) + # if data_frequency != 'minute': + # raise MismatchingFrequencyError( + # frequency=frequency, + # data_frequency=data_frequency + # ) + if data_frequency == 'daily': + data_frequency = 'minute' else: raise InvalidHistoryFrequencyError(frequency) @@ -499,32 +501,41 @@ class Exchange: adj_bar_count = candle_size * bar_count start_dt = get_start_dt(end_dt, adj_bar_count, data_frequency) - adj_start_dt, adj_end_dt = get_adj_dates( - start_dt, end_dt, assets, data_frequency - ) + try: + adj_start_dt, adj_end_dt = get_adj_dates( + start_dt, end_dt, assets, data_frequency + ) + in_bundle = True - missing_assets = self.bundle.filter_existing_assets( - assets=assets, - start_dt=adj_start_dt, - end_dt=adj_end_dt, - data_frequency=data_frequency - ) + except NoDataAvailableOnExchange: + in_bundle = False - if missing_assets: - self.bundle.ingest_assets( + if in_bundle: + missing_assets = self.bundle.filter_existing_assets( assets=assets, start_dt=adj_start_dt, end_dt=adj_end_dt, data_frequency=data_frequency ) - series = self.get_series_from_bundle( - assets=assets, - start_dt=adj_start_dt, - end_dt=adj_end_dt, - data_frequency=data_frequency, - field=field - ) + if missing_assets: + self.bundle.ingest_assets( + assets=assets, + start_dt=adj_start_dt, + end_dt=adj_end_dt, + data_frequency=data_frequency + ) + + series = self.get_series_from_bundle( + assets=assets, + start_dt=adj_start_dt, + end_dt=adj_end_dt, + data_frequency=data_frequency, + field=field + ) + + else: + series = dict() for asset in assets: if asset not in series or series[asset].index[-1] < end_dt: diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 9fc5b9c9..b8f4276a 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -15,7 +15,7 @@ from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ BcolzExchangeBarWriter from catalyst.exchange.exchange_errors import EmptyValuesInBundleError, \ InvalidHistoryFrequencyError, PricingDataBeforeTradingError, \ - TempBundleNotFoundError + TempBundleNotFoundError, NoDataAvailableOnExchange from catalyst.exchange.exchange_utils import get_exchange_folder from catalyst.utils.cli import maybe_show_progress from catalyst.utils.paths import ensure_directory @@ -316,7 +316,7 @@ class ExchangeBundle: asset_start, asset_end = \ get_adj_dates(start_dt, end_dt, [asset], data_frequency) - except PricingDataBeforeTradingError: + except NoDataAvailableOnExchange: continue # Aligning start / end dates with the daily calendar From 2dd8f5414856e7473b0da14b6a1f4ce7ba60f98a Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Thu, 19 Oct 2017 14:35:25 -0600 Subject: [PATCH 100/110] WIP: build docs --- docs/source/conf.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 0be5eb29..467af2d5 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,7 +1,7 @@ import sys import os -from catalyst import __version__ as version +#from catalyst import __version__ as version # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -44,7 +44,8 @@ project = u'Zipline' copyright = u'2016, Quantopian Inc.' # The full version, including alpha/beta/rc tags, but excluding the commit hash -release = version.split('+', 1)[0] +#release = version.split('+', 1)[0] +release = '0.3' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. From 09f27e58802202148c900f06e5590c56265f9db2 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Thu, 19 Oct 2017 14:45:32 -0600 Subject: [PATCH 101/110] WIP: build docs --- docs/source/conf.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 467af2d5..2ea7ca72 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -21,14 +21,14 @@ extensions = [ extlinks = { - 'issue': ('https://github.com/quantopian/zipline/issues/%s', '#'), - 'commit': ('https://github.com/quantopian/zipline/commit/%s', ''), + 'issue': ('https://github.com/enigmampc/catalyst/issues/%s', '#'), + 'commit': ('https://github.com/enigmampc/catalyst/commit/%s', ''), } # -- Docstrings --------------------------------------------------------------- -extensions += ['numpydoc'] -numpydoc_show_class_members = False +#extensions += ['numpydoc'] +#numpydoc_show_class_members = False # Add any paths that contain templates here, relative to this directory. templates_path = ['.templates'] @@ -40,8 +40,8 @@ source_suffix = '.rst' master_doc = 'index' # General information about the project. -project = u'Zipline' -copyright = u'2016, Quantopian Inc.' +project = u'Catalyst' +copyright = u'2017, Enigma MPC' # The full version, including alpha/beta/rc tags, but excluding the commit hash #release = version.split('+', 1)[0] @@ -85,7 +85,7 @@ html_show_sphinx = True html_show_copyright = True # Output file base name for HTML help builder. -htmlhelp_basename = 'ziplinedoc' +htmlhelp_basename = 'catalystdoc' intersphinx_mapping = { 'http://docs.python.org/dev': None, From 2c91decc1b875ea6f4127cb8c38b21b094dfd4cf Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Thu, 19 Oct 2017 15:31:43 -0600 Subject: [PATCH 102/110] WIP: docs build --- docs/source/conf.py | 2 +- docs/source/install.rst | 16 +++++++++++----- setup.py | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 2ea7ca72..3e90f91c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -41,7 +41,7 @@ master_doc = 'index' # General information about the project. project = u'Catalyst' -copyright = u'2017, Enigma MPC' +copyright = u'2017, Enigma MPC, Inc.' # The full version, including alpha/beta/rc tags, but excluding the commit hash #release = version.split('+', 1)[0] diff --git a/docs/source/install.rst b/docs/source/install.rst index 71e3d21f..b13e8dfd 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -4,12 +4,12 @@ Install Installing with ``pip`` ----------------------- -Installing Zipline via ``pip`` is slightly more involved than the average +Installing Catalyst via ``pip`` is slightly more involved than the average Python package. There are two reasons for the additional complexity: -1. Zipline ships several C extensions that require access to the CPython C API. +1. Catalyst ships several C extensions that require access to the CPython C API. In order to build the C extensions, ``pip`` needs access to the CPython header files for your Python installation. @@ -28,13 +28,19 @@ your particular platform), you should be able to simply run .. code-block:: bash - $ pip install zipline + $ pip install enigma-catalyst -If you use Python for anything other than Zipline, we **strongly** recommend +If you use Python for anything other than Catalyst, we **strongly** recommend that you install in a `virtualenv `_. The `Hitchhiker's Guide to Python`_ provides an `excellent tutorial on virtualenv -`_. +`_. Here's a summarized +version: + +.. code-block:: bash + $ virtualenv catalyst-venv + $ source ./catalyst-venv/bin/activate + $ pip install enigma-catalyst GNU/Linux ~~~~~~~~~ diff --git a/setup.py b/setup.py index a3243545..c358948b 100644 --- a/setup.py +++ b/setup.py @@ -304,7 +304,7 @@ setup( if '__pycache__' not in root}, license='Apache 2.0', classifiers=[ - 'Development Status :: 2 - Pre-Alpha', + 'Development Status :: 3 - Alpha', 'License :: OSI Approved :: Apache Software License', 'Natural Language :: English', 'Programming Language :: Python', From b1a247df6acbfa26bd46c3ff8616496d180a9801 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Thu, 19 Oct 2017 18:03:13 -0600 Subject: [PATCH 103/110] gh-pages initial build: Installation (WIP) --- docs/source/conf.py | 2 +- docs/source/install.rst | 25 ++++++++++++++----------- etc/requirements_docs.txt | 1 - 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 3e90f91c..dd0304d0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -94,6 +94,6 @@ intersphinx_mapping = { 'pandas': ('http://pandas.pydata.org/pandas-docs/stable/', None), } -doctest_global_setup = "import zipline" +doctest_global_setup = "import catalyst" todo_include_todos = True diff --git a/docs/source/install.rst b/docs/source/install.rst index b13e8dfd..57fb711f 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -38,6 +38,7 @@ Python`_ provides an `excellent tutorial on virtualenv version: .. code-block:: bash + $ virtualenv catalyst-venv $ source ./catalyst-venv/bin/activate $ pip install enigma-catalyst @@ -66,15 +67,17 @@ On `Arch Linux`_, you can acquire the additional dependencies via ``pacman``: $ pacman -S lapack gcc gcc-fortran pkg-config -There are also AUR packages available for installing `Python 3.4 -`_ (Arch's default python is now -3.5, but Zipline only currently supports 3.4), and `ta-lib -`_, an optional Zipline dependency. -Python 2 is also installable via: +.. Commenting it out until Catalyst fully supports Python 3.X +.. +.. There are also AUR packages available for installing `Python 3.4 +.. `_ (Arch's default python is now +.. 3.5, but Zipline only currently supports 3.4), and `ta-lib +.. `_, an optional Zipline dependency. +.. Python 2 is also installable via: -.. code-block:: bash +.. - $ pacman -S python2 +.. $ pacman -S python2 OSX ~~~ @@ -104,20 +107,20 @@ For windows, the easiest and best supported way to install zipline is to use Installing with ``conda`` ------------------------- -Another way to install Zipline is via the ``conda`` package manager, which +Another way to install Catalyst is via the ``conda`` package manager, which comes as part of Continuum Analytics' `Anaconda `_ distribution. The primary advantage of using Conda over ``pip`` is that conda natively understands the complex binary dependencies of packages like ``numpy`` and -``scipy``. This means that ``conda`` can install Zipline and its dependencies -without requiring the use of a second tool to acquire Zipline's non-Python +``scipy``. This means that ``conda`` can install Catalyst and its dependencies +without requiring the use of a second tool to acquire Catalyst's non-Python dependencies. For instructions on how to install ``conda``, see the `Conda Installation Documentation `_ -Once conda has been set up you can install Zipline from our ``Quantopian`` +Once conda has been set up you can install Catalyst from our ``Quantopian`` channel: .. code-block:: bash diff --git a/etc/requirements_docs.txt b/etc/requirements_docs.txt index 400b3108..362d4725 100644 --- a/etc/requirements_docs.txt +++ b/etc/requirements_docs.txt @@ -1,4 +1,3 @@ Sphinx>=1.3.2 numpydoc>=0.5.0 sphinx-autobuild==0.6.0 -enigma-catalyst # readthedocs.org From 946d24bd7a355f09e0371b716e41e6ce76295b63 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 19 Oct 2017 23:23:37 -0400 Subject: [PATCH 104/110] Refactoring related to auto-ingestion --- catalyst/data/loader.py | 22 ++- catalyst/data/minute_bars.py | 12 +- catalyst/examples/buy_low_sell_high_live.py | 36 ++-- .../buy_low_sell_high_neo_with_interface.py | 2 - catalyst/exchange/bundle_utils.py | 148 +---------------- catalyst/exchange/data_portal_exchange.py | 153 ++++++----------- catalyst/exchange/exchange.py | 82 +--------- catalyst/exchange/exchange_bcolz.py | 4 +- catalyst/exchange/exchange_bundle.py | 154 +++++++++++++++++- .../utils/calendars/exchange_calendar_open.py | 2 +- catalyst/utils/run_algo.py | 12 +- tests/exchange/test_bundle.py | 10 +- 12 files changed, 279 insertions(+), 358 deletions(-) diff --git a/catalyst/data/loader.py b/catalyst/data/loader.py index 7211938e..ce293e9b 100644 --- a/catalyst/data/loader.py +++ b/catalyst/data/loader.py @@ -95,7 +95,8 @@ def has_data_for_dates(series_or_df, first_date, last_date): def load_crypto_market_data(trading_day=None, trading_days=None, bm_symbol=None, bundle=None, bundle_data=None, - environ=None, exchange=None): + environ=None, exchange=None, start_dt=None, + end_dt=None): if trading_day is None: trading_day = get_calendar('OPEN').trading_day @@ -104,8 +105,11 @@ def load_crypto_market_data(trading_day=None, trading_days=None, # if trading_days is None: # trading_days = get_calendar('OPEN').schedule - first_date = get_calendar('OPEN').first_trading_session - now = pd.Timestamp.utcnow() + if start_dt is None: + start_dt = get_calendar('OPEN').first_trading_session + + if end_dt is None: + end_dt = pd.Timestamp.utcnow() # We expect to have benchmark and treasury data that's current up until # **two** full trading days prior to the most recently completed trading @@ -131,7 +135,7 @@ def load_crypto_market_data(trading_day=None, trading_days=None, else: last_date = trading_days[trading_days.get_loc(now, method='ffill') - 2] ''' - last_date = trading_days[trading_days.get_loc(now, method='ffill') - 1] + last_date = trading_days[trading_days.get_loc(end_dt, method='ffill') - 1] if exchange is None: # This is exceptional, since placing the import at the module scope @@ -146,14 +150,14 @@ def load_crypto_market_data(trading_day=None, trading_days=None, br = exchange.get_history_window( assets=[benchmark_asset], end_dt=last_date, - bar_count=pd.Timedelta(last_date - first_date).days, + bar_count=pd.Timedelta(last_date - start_dt).days, frequency='1d', field='close', data_frequency='daily') br.columns = ['close'] br = br.pct_change(1).iloc[1:] - br.loc[first_date]=0 - br=br.sort_index() + br.loc[start_dt] = 0 + br = br.sort_index() # Override first_date for treasury data since we have it for many more years # and is independent of crypto data @@ -162,10 +166,10 @@ def load_crypto_market_data(trading_day=None, trading_days=None, bm_symbol, first_date_treasury, last_date, - now, + end_dt, environ, ) - benchmark_returns = br[br.index.slice_indexer(first_date, last_date)] + benchmark_returns = br[br.index.slice_indexer(start_dt, last_date)] treasury_curves = tc[ tc.index.slice_indexer(first_date_treasury, last_date)] return benchmark_returns, treasury_curves diff --git a/catalyst/data/minute_bars.py b/catalyst/data/minute_bars.py index bd4d5b39..83dafa1c 100644 --- a/catalyst/data/minute_bars.py +++ b/catalyst/data/minute_bars.py @@ -44,7 +44,6 @@ from catalyst.utils.calendars import get_calendar from catalyst.utils.cli import maybe_show_progress from catalyst.utils.memoize import lazyval - logger = logbook.Logger('MinuteBars') US_EQUITIES_MINUTES_PER_DAY = 390 @@ -1125,7 +1124,7 @@ class BcolzMinuteBarReader(MinuteBarReader): else: return np.nan - #if field != 'volume': + # if field != 'volume': value *= self._ohlc_ratio_inverse_for_sid(sid) return value @@ -1206,7 +1205,7 @@ class BcolzMinuteBarReader(MinuteBarReader): minute_dt.value / NANOS_IN_MINUTE, self._minutes_per_day, False, - ) + ) def load_raw_arrays(self, fields, start_dt, end_dt, sids): """ @@ -1262,10 +1261,10 @@ class BcolzMinuteBarReader(MinuteBarReader): where = values != 0 # first slice down to len(where) because we might not have # written data for all the minutes requested - #if field != 'volume': + # if field != 'volume': out[:len(where), i][where] = ( values[where] * self._ohlc_ratio_inverse_for_sid(sid)) - #else: + # else: # out[:len(where), i][where] = values[where] results.append(out) @@ -1353,9 +1352,10 @@ class H5MinuteBarUpdateReader(MinuteBarUpdateReader): path : str The path of the HDF5 file from which to source data. """ + def __init__(self, path): self._panel = pd.read_hdf(path) def read(self, dts, sids): panel = self._panel[sids, dts, :] - return panel.iteritems() \ No newline at end of file + return panel.iteritems() diff --git a/catalyst/examples/buy_low_sell_high_live.py b/catalyst/examples/buy_low_sell_high_live.py index a6b1ea98..6742896f 100644 --- a/catalyst/examples/buy_low_sell_high_live.py +++ b/catalyst/examples/buy_low_sell_high_live.py @@ -1,6 +1,7 @@ import talib from logbook import Logger +import pandas as pd from catalyst.api import ( order, order_target_percent, @@ -17,10 +18,10 @@ log = Logger('buy low sell high') def initialize(context): log.info('initializing algo') - context.ASSET_NAME = 'XRP_BTC' + context.ASSET_NAME = 'etc_btc' context.asset = symbol(context.ASSET_NAME) - context.TARGET_POSITIONS = 300 + context.TARGET_POSITIONS = 3 context.PROFIT_TARGET = 0.1 context.SLIPPAGE_ALLOWED = 0.02 @@ -33,6 +34,9 @@ def initialize(context): def _handle_data(context, data): + price = data.current(context.asset, 'price') + log.info('got price {price}'.format(price=price)) + prices = data.history( context.asset, fields='price', @@ -44,20 +48,17 @@ def _handle_data(context, data): # Buying more when RSI is low, this should lower our cost basis if rsi <= 30: - buy_increment = 50 + buy_increment = 1 elif rsi <= 40: - buy_increment = 20 - # elif rsi <= 70: - # buy_increment = 5 + buy_increment = 0.5 + elif rsi <= 70: + buy_increment = 0.2 else: buy_increment = None cash = context.portfolio.cash log.info('base currency available: {cash}'.format(cash=cash)) - price = data.current(context.asset, 'price') - log.info('got price {price}'.format(price=price)) - record( price=price, rsi=rsi, @@ -146,11 +147,22 @@ def analyze(context, stats): run_algorithm( + capital_base=1, initialize=initialize, handle_data=handle_data, analyze=analyze, exchange_name='bitfinex', - live=True, - algo_namespace=algo_namespace, - base_currency='btc' + start=pd.to_datetime('2017-5-01', utc=True), + end=pd.to_datetime('2017-10-01', utc=True), + base_currency='btc', + data_frequency='daily' ) +# run_algorithm( +# initialize=initialize, +# handle_data=handle_data, +# analyze=analyze, +# exchange_name='poloniex', +# live=True, +# algo_namespace=algo_namespace, +# base_currency='btc' +# ) diff --git a/catalyst/examples/buy_low_sell_high_neo_with_interface.py b/catalyst/examples/buy_low_sell_high_neo_with_interface.py index e02b50de..a7630ef2 100644 --- a/catalyst/examples/buy_low_sell_high_neo_with_interface.py +++ b/catalyst/examples/buy_low_sell_high_neo_with_interface.py @@ -163,8 +163,6 @@ def analyze(context, stats): # Backtest run_algorithm( capital_base=250, - start=pd.to_datetime('2017-10-01', utc=True), - end=pd.to_datetime('2017-10-15', utc=True), data_frequency='minute', initialize=initialize, handle_data=handle_data, diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index 536baa28..fe622b52 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -1,18 +1,15 @@ import calendar -import tarfile - -import requests -from datetime import timedelta, datetime, date import os -import pandas as pd -import numpy as np +import tarfile +from datetime import timedelta, datetime, date +import numpy as np +import pandas as pd import pytz from catalyst.data.bundles import from_bundle_ingest_dirname from catalyst.data.bundles.core import download_without_progress -from catalyst.exchange.exchange_errors import ApiCandlesError, \ - PricingDataBeforeTradingError, NoDataAvailableOnExchange +from catalyst.exchange.exchange_errors import NoDataAvailableOnExchange from catalyst.exchange.exchange_utils import get_exchange_bundles_folder from catalyst.utils.deprecate import deprecated from catalyst.utils.paths import data_path @@ -189,60 +186,6 @@ def get_df_from_arrays(arrays, periods): return df -def get_df_from_candles(candles, bar_count, end_dt, data_frequency, - previous_candle=None): - """ - Create candles for each period of the specified range, forward-filling - missing candles with the previous value. - - :param candles: - :param bar_count: - :param end_dt: - :param data_frequency: - :param previous_candle: - - :return: - """ - all_dates = [] - all_candles = [] - - start_dt = get_start_dt(end_dt, bar_count, data_frequency) - date = start_dt - - # TODO: this works well with a small number of candles, consider using numpy as needed - while date <= end_dt: - candle = next(( - candle for candle in candles if candle['last_traded'] == date - ), previous_candle) - - if candle is None: - candle = candles[0] - - all_dates.append(date) - all_candles.append(candle) - - previous_candle = candle - - date += get_delta(1, data_frequency) - - return all_dates, all_candles - - -def get_trailing_candles_dt(asset, start_dt, end_dt, data_frequency): - missing_start = None - - if asset.end_minute is not None and start_dt < asset.end_minute: - if asset.end_minute < end_dt: - delta = get_delta(1, data_frequency) - - missing_start = asset.end_minute + delta - - else: - missing_start = start_dt - - return missing_start - - def range_in_bundle(asset, start_dt, end_dt, reader): """ Evaluate whether price data of an asset is included has been ingested in @@ -278,6 +221,7 @@ def range_in_bundle(asset, start_dt, end_dt, reader): return has_data +@deprecated def find_most_recent_time(bundle_name): """ Find most recent "time folder" for a given bundle. @@ -308,83 +252,3 @@ def find_most_recent_time(bundle_name): else: return None - -@deprecated -def get_history(exchange_name, data_frequency, symbol, start=None, end=None): - """ - History API provides OHLCV data for any of the supported exchanges up to yesterday. - - :param exchange_name: string - Required: The name identifier of the exchange (e.g. bitfinex, bittrex, poloniex). - :param data_frequency: string - Required: The bar frequency (minute or daily) - :param symbol: string - Required: The trading pair symbol, using Catalyst naming convention - :param start: datetime - Optional: The start date. - :param end: datetime - Optional: The end date. - - :return ohlcv: list[dict[string, float]] - Each row contains the following dictionary for the resulting bars: - 'ts' : int, the timestamp in seconds - 'open' : float - 'high' : float - 'low' : float - 'close' : float - 'volume' : float - - Notes - ===== - Using seconds for the start and end dates for ease of use in the - function query parameters. - - Sometimes, one minute goes by without completing a trade of the given - trading pair on the given exchange. To minimize the payload size, we - don't return identical sequential bars. Post-processing code will - forward fill missing bars outside of this function. - """ - - start_seconds = get_seconds_from_date(start) if start else None - end_seconds = get_seconds_from_date(end) if end else None - - if exchange_name not in EXCHANGE_NAMES: - raise ValueError( - 'get_history function only supports the following exchanges: {}'.format( - list(EXCHANGE_NAMES))) - - if data_frequency != 'daily' and data_frequency != 'minute': - raise ValueError( - 'get_history currently only supports daily and minute data.' - ) - - url = '{api_url}/candles?exchange={exchange}&market={symbol}&freq={data_frequency}'.format( - api_url=API_URL, - exchange=exchange_name, - symbol=symbol, - data_frequency=data_frequency, - ) - - if start_seconds: - url += '&start={}'.format(start_seconds) - - if end_seconds: - url += '&end={}'.format(end_seconds) - - try: - response = requests.get(url) - except Exception as e: - raise ValueError(e) - - data = response.json() - - if 'error' in data: - raise ApiCandlesError(error=data['error']) - - for candle in data: - last_traded = pd.Timestamp.utcfromtimestamp(candle['ts']) - last_traded = last_traded.replace(tzinfo=pytz.UTC) - - candle['last_traded'] = last_traded - - return data diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index 6d0d3fc6..468fb8a1 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -12,15 +12,15 @@ # limitations under the License. import abc -from datetime import timedelta from time import sleep +import numpy as np import pandas as pd from catalyst.assets._assets import TradingPair from logbook import Logger from catalyst.data.data_portal import DataPortal -from catalyst.errors import HistoryWindowStartsBeforeData +from catalyst.exchange.bundle_utils import get_start_dt from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import ( ExchangeRequestError, @@ -153,6 +153,10 @@ class DataPortalExchangeBase(DataPortal): exchange = self.exchanges[assets.exchange] spot_values = self.get_exchange_spot_value( exchange, [assets], field, dt, data_frequency) + + if not spot_values: + return np.nan + return spot_values[0] else: @@ -282,109 +286,60 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): field, data_frequency, ffill=True): + """ + Fetching price history window from the exchange bundle. + + Using a try... except approach to minimize reads most of the time, + when the data exists. + + :param exchange: + :param assets: + :param end_dt: + :param bar_count: + :param frequency: + :param field: + :param data_frequency: + :param ffill: + :return: + """ + bundle = self.exchange_bundles[exchange.name] - - if data_frequency == 'minute': - dts = self.trading_calendar.minutes_window( - end_dt, -bar_count - ) - - self.ensure_after_first_day(dts[0], assets) - - elif data_frequency == 'daily': - session = self.trading_calendar.minute_to_session_label(end_dt) - dts = self._get_days_for_window(session, bar_count) - - if len(dts) == 0: - symbols = [asset.symbol for asset in assets] - raise PricingDataNotLoadedError( - field=field, - symbols=symbols, - exchange=exchange.name, - first_trading_day= \ - min([asset.start_date for asset in assets]), - data_frequency=data_frequency, - symbol_list=','.join(symbols) - ) - - self.ensure_after_first_day(dts[0], assets) - - else: - raise InvalidHistoryFrequencyError(frequency=data_frequency) - - reader = bundle.get_reader(data_frequency) - if reader is None: - raise BundleNotFoundError( - exchange=exchange.name.title(), - data_frequency=data_frequency - ) - - try: - values = reader.load_raw_arrays( - sids=[asset.sid for asset in assets], - fields=[field], - start_dt=dts[0], - end_dt=dts[-1] - )[0] - - except Exception: - first_trading_day = self._get_first_trading_day(assets) - symbols = [asset.symbol.encode('utf-8') for asset in assets] - - symbol_list = ','.join(symbols) - raise PricingDataNotLoadedError( - field=field, - first_trading_day=first_trading_day, - exchange=exchange.name.title(), - symbols=symbols, - symbol_list=symbol_list, - data_frequency=data_frequency - ) - - series = dict() - for index, asset in enumerate(assets): - asset_values = values[:, index] - - value_series = pd.Series(asset_values, index=dts) - series[asset] = value_series - + series = bundle.get_history_window_series_and_load( + assets=assets, + end_dt=end_dt, + bar_count=bar_count, + field=field, + data_frequency=data_frequency + ) return pd.DataFrame(series) - def ensure_after_first_day(self, dt, assets): - first_trading_day = self._get_first_trading_day(assets) - if dt < first_trading_day: - raise PricingDataBeforeTradingError( - first_trading_day=first_trading_day, - exchange=assets[0].exchange.title(), - symbols=[asset.symbol.encode('utf-8') for asset in assets], - dt=dt, - ) - def get_exchange_spot_value(self, exchange, assets, field, dt, data_frequency): bundle = self.exchange_bundles[exchange.name] - reader = bundle.get_reader(data_frequency) - self.ensure_after_first_day(dt, assets) + if data_frequency == 'daily': + dt = dt.floor('1D') + else: + dt = dt.floor('1 min') - values = [] - for asset in assets: - try: - value = reader.get_value( - sid=asset.sid, - dt=dt, - field=field + try: + return bundle.get_spot_values(assets, field, dt, data_frequency) + + except PricingDataNotLoadedError: + log.info( + 'pricing data for {symbol} not found on {dt}' + ', updating the bundles.'.format( + symbol=[asset.symbol for asset in assets], + dt=dt ) - values.append(value) - except Exception: - raise PricingDataNotLoadedError( - field=field, - first_trading_day=self._get_first_trading_day(assets), - exchange=exchange.name.title(), - symbols=[asset.symbol.encode('utf-8') for asset in assets], - symbol_list=''.join( - [asset.symbol.encode('utf-8') for asset in assets]), - data_frequency=data_frequency - ) - - return values + ) + bundle.ingest_assets( + assets=assets, + start_dt=self._first_trading_day, + end_dt=self._last_available_session, + data_frequency=data_frequency, + show_progress=True + ) + return bundle.get_spot_values( + assets, field, dt, data_frequency, True + ) diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index c5884afe..f8c902a8 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -16,7 +16,7 @@ from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import MismatchingBaseCurrencies, \ InvalidOrderStyle, BaseCurrencyNotFoundError, SymbolNotFoundOnExchange, \ InvalidHistoryFrequencyError, MismatchingFrequencyError, \ - BundleNotFoundError, NoDataAvailableOnExchange + BundleNotFoundError, NoDataAvailableOnExchange, PricingDataNotLoadedError from catalyst.exchange.exchange_execution import ExchangeStopLimitOrder, \ ExchangeLimitOrder, ExchangeStopOrder from catalyst.exchange.exchange_portfolio import ExchangePortfolio @@ -370,44 +370,6 @@ class Exchange: return value - def get_series_from_bundle(self, assets, start_dt, end_dt, data_frequency, - field): - """ - - :return: - """ - reader = self.bundle.get_reader(data_frequency) - - if reader is None: - raise BundleNotFoundError( - exchange=self.name.title(), - data_frequency=data_frequency - ) - - series = dict() - try: - arrays = reader.load_raw_arrays( - sids=[asset.sid for asset in assets], - fields=[field], - start_dt=start_dt, - end_dt=end_dt - ) - - periods = self.bundle.get_calendar_periods_range( - start_dt, end_dt, data_frequency - ) - - for asset_index, asset in enumerate(assets): - asset_values = arrays[asset_index] - - value_series = pd.Series(asset_values[0], index=periods) - series[asset] = value_series - - except Exception as e: - log.debug('unable to retrieve from bundle: {}'.format(e)) - - return series - def get_series_from_candles(self, candles, start_dt, end_dt, field, previous_value=None): """ @@ -487,11 +449,6 @@ class Exchange: data_frequency = 'daily' elif unit.lower() == 'm': - # if data_frequency != 'minute': - # raise MismatchingFrequencyError( - # frequency=frequency, - # data_frequency=data_frequency - # ) if data_frequency == 'daily': data_frequency = 'minute' @@ -499,42 +456,15 @@ class Exchange: raise InvalidHistoryFrequencyError(frequency) adj_bar_count = candle_size * bar_count - start_dt = get_start_dt(end_dt, adj_bar_count, data_frequency) - try: - adj_start_dt, adj_end_dt = get_adj_dates( - start_dt, end_dt, assets, data_frequency - ) - in_bundle = True - - except NoDataAvailableOnExchange: - in_bundle = False - - if in_bundle: - missing_assets = self.bundle.filter_existing_assets( + series = self.bundle.get_history_window_series_and_load( assets=assets, - start_dt=adj_start_dt, - end_dt=adj_end_dt, + end_dt=end_dt, + bar_count=adj_bar_count, + field=field, data_frequency=data_frequency ) - - if missing_assets: - self.bundle.ingest_assets( - assets=assets, - start_dt=adj_start_dt, - end_dt=adj_end_dt, - data_frequency=data_frequency - ) - - series = self.get_series_from_bundle( - assets=assets, - start_dt=adj_start_dt, - end_dt=adj_end_dt, - data_frequency=data_frequency, - field=field - ) - - else: + except PricingDataNotLoadedError: series = dict() for asset in assets: diff --git a/catalyst/exchange/exchange_bcolz.py b/catalyst/exchange/exchange_bcolz.py index 523eae28..0d19d04f 100644 --- a/catalyst/exchange/exchange_bcolz.py +++ b/catalyst/exchange/exchange_bcolz.py @@ -48,9 +48,9 @@ class BcolzExchangeBarReader(BcolzMinuteBarReader): # else: # return self._load_daily_raw_arrays(fields, start_dt, end_dt, sids) - return self._load_daily_raw_arrays(fields, start_dt, end_dt, sids) + return self._load_raw_arrays(fields, start_dt, end_dt, sids) - def _load_daily_raw_arrays(self, fields, start_dt, end_dt, sids): + def _load_raw_arrays(self, fields, start_dt, end_dt, sids): start_idx = self._find_position_of_minute(start_dt) end_idx = self._find_position_of_minute(end_dt) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index b8f4276a..ad014b8d 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -10,12 +10,13 @@ from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ BcolzMinuteBarMetadata from catalyst.exchange.bundle_utils import range_in_bundle, \ get_bcolz_chunk, get_delta, get_adj_dates, get_month_start_end, \ - get_year_start_end, get_periods_range, get_df_from_arrays + get_year_start_end, get_periods_range, get_df_from_arrays, get_start_dt from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ BcolzExchangeBarWriter from catalyst.exchange.exchange_errors import EmptyValuesInBundleError, \ InvalidHistoryFrequencyError, PricingDataBeforeTradingError, \ - TempBundleNotFoundError, NoDataAvailableOnExchange + TempBundleNotFoundError, NoDataAvailableOnExchange, \ + PricingDataNotLoadedError from catalyst.exchange.exchange_utils import get_exchange_folder from catalyst.utils.cli import maybe_show_progress from catalyst.utils.paths import ensure_directory @@ -451,3 +452,152 @@ class ExchangeBundle: for frequency in data_frequency.split(','): self.ingest_assets(assets, start_dt, end_dt, frequency, show_progress) + + def get_history_window_series_and_load(self, + assets, + end_dt, + bar_count, + field, + data_frequency): + try: + series = self.get_history_window_series( + assets=assets, + end_dt=end_dt, + bar_count=bar_count, + field=field, + data_frequency=data_frequency + ) + return pd.DataFrame(series) + + except PricingDataNotLoadedError: + start_dt = get_start_dt(end_dt, bar_count, data_frequency) + log.info( + 'pricing data for {symbol} not found in range ' + '{start} to {end}, updating the bundles.'.format( + symbol=[asset.symbol for asset in assets], + start=start_dt, + end=end_dt + ) + ) + self.ingest_assets( + assets=assets, + start_dt=start_dt, + end_dt=end_dt, + data_frequency=data_frequency, + show_progress=True + ) + series = self.get_history_window_series( + assets=assets, + end_dt=end_dt, + bar_count=bar_count, + field=field, + data_frequency=data_frequency, + reset_reader=True + ) + return series + + def get_spot_values(self, assets, field, dt, data_frequency, + reset_reader=False): + values = [] + try: + reader = self.get_reader(data_frequency) + if reset_reader: + del self._readers[reader._rootdir] + reader = self.get_reader(data_frequency) + + for asset in assets: + value = reader.get_value( + sid=asset.sid, + dt=dt, + field=field + ) + values.append(value) + + return values + + except Exception: + symbols = [asset.symbol.encode('utf-8') for asset in assets] + raise PricingDataNotLoadedError( + field=field, + first_trading_day=min([asset.start_date for asset in assets]), + exchange=self.exchange.name, + symbols=symbols, + symbol_list=','.join(symbols), + data_frequency=data_frequency + ) + + def get_history_window_series(self, + assets, + end_dt, + bar_count, + field, + data_frequency, + reset_reader=False): + start_dt = get_start_dt(end_dt, bar_count, data_frequency) + start_dt, end_dt = \ + get_adj_dates(start_dt, end_dt, assets, data_frequency) + + reader = self.get_reader(data_frequency) + if reset_reader: + del self._readers[reader._rootdir] + reader = self.get_reader(data_frequency) + + if reader is None: + symbols = [asset.symbol.encode('utf-8') for asset in assets] + raise PricingDataNotLoadedError( + field=field, + first_trading_day=min([asset.start_date for asset in assets]), + exchange=self.exchange.name, + symbols=symbols, + symbol_list=','.join(symbols), + data_frequency=data_frequency + ) + + for asset in assets: + asset_start_dt, asset_end_dt = \ + get_adj_dates(start_dt, end_dt, assets, data_frequency) + + in_bundle = range_in_bundle( + asset, asset_start_dt, asset_end_dt, reader + ) + if not in_bundle: + raise PricingDataNotLoadedError( + field=field, + first_trading_day=asset.start_date, + exchange=self.exchange.name, + symbols=asset.symbol, + symbol_list=asset.symbol, + data_frequency=data_frequency + ) + + series = dict() + try: + arrays = reader.load_raw_arrays( + sids=[asset.sid for asset in assets], + fields=[field], + start_dt=start_dt, + end_dt=end_dt + ) + + except Exception: + symbols = [asset.symbol.encode('utf-8') for asset in assets] + raise PricingDataNotLoadedError( + field=field, + first_trading_day=min([asset.start_date for asset in assets]), + exchange=self.exchange.name, + symbols=symbols, + symbol_list=','.join(symbols), + data_frequency=data_frequency + ) + + periods = self.get_calendar_periods_range( + start_dt, end_dt, data_frequency + ) + + for asset_index, asset in enumerate(assets): + asset_values = arrays[asset_index] + + value_series = pd.Series(asset_values[0], index=periods) + series[asset] = value_series + + return series diff --git a/catalyst/utils/calendars/exchange_calendar_open.py b/catalyst/utils/calendars/exchange_calendar_open.py index 68c385f5..54be460f 100644 --- a/catalyst/utils/calendars/exchange_calendar_open.py +++ b/catalyst/utils/calendars/exchange_calendar_open.py @@ -31,4 +31,4 @@ class OpenExchangeCalendar(TradingCalendar): return DateOffset(days=1) def __init__(self, *args, **kwargs): - super(OpenExchangeCalendar, self).__init__(start=Timestamp('2015-02-19', tz='UTC'), **kwargs) + super(OpenExchangeCalendar, self).__init__(start=Timestamp('2015-3-1', tz='UTC'), **kwargs) diff --git a/catalyst/utils/run_algo.py b/catalyst/utils/run_algo.py index bf149642..3a318748 100644 --- a/catalyst/utils/run_algo.py +++ b/catalyst/utils/run_algo.py @@ -191,7 +191,12 @@ def _run(handle_data, open_calendar = get_calendar('OPEN') env = TradingEnvironment( - load=partial(load_crypto_market_data, environ=environ), + load=partial( + load_crypto_market_data, + environ=environ, + start_dt=start, + end_dt=end + ), environ=environ, exchange_tz='UTC', asset_db_path=None # We don't need an asset db, we have exchanges @@ -263,7 +268,7 @@ def _run(handle_data, ) # TODO: use the constructor instead - sim_params._arena = 'live' + # sim_params._arena = 'live' algorithm_class = partial( ExchangeTradingAlgorithmLive, @@ -284,7 +289,8 @@ def _run(handle_data, exchanges=exchanges, asset_finder=None, trading_calendar=open_calendar, - first_trading_day=None, + first_trading_day=start, + last_available_session=end ) sim_params = create_simulation_parameters( diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index f7a3901d..301f71bb 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -3,7 +3,8 @@ from logging import Logger import pandas as pd from catalyst import get_calendar -from catalyst.exchange.bundle_utils import get_bcolz_chunk +from catalyst.exchange.bundle_utils import get_bcolz_chunk, get_periods, \ + get_periods_range from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ BcolzExchangeBarWriter from catalyst.exchange.exchange_bundle import ExchangeBundle, \ @@ -78,12 +79,13 @@ class ExchangeBundleTestCase: # data_frequency = 'daily' # include_symbols = 'neo_btc,bch_btc,eth_btc' - exchange_name = 'bitfinex' + exchange_name = 'poloniex' data_frequency = 'daily' - include_symbols = 'etc_btc' + include_symbols = 'btc_usdt' - start = pd.to_datetime('2016-11-01', utc=True) + start = pd.to_datetime('2016-1-1', utc=True) end = pd.to_datetime('2017-10-16', utc=True) + periods = get_periods_range(start, end, data_frequency) exchange = get_exchange(exchange_name) exchange_bundle = ExchangeBundle(exchange) From cb6432c395fc7d7b51346d3416f97b0fb536d6f6 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Thu, 19 Oct 2017 23:32:55 -0600 Subject: [PATCH 105/110] docs: Catalyst Install --- docs/source/index.rst | 12 +-- docs/source/install.rst | 230 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 226 insertions(+), 16 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index ee713eb5..d61bce1a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -4,9 +4,9 @@ :maxdepth: 1 install - beginner-tutorial - bundles - development-guidelines - appendix - release-process - releases +.. beginner-tutorial +.. bundles +.. development-guidelines +.. appendix +.. release-process +.. releases diff --git a/docs/source/install.rst b/docs/source/install.rst index 57fb711f..028f9295 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -13,7 +13,7 @@ There are two reasons for the additional complexity: In order to build the C extensions, ``pip`` needs access to the CPython header files for your Python installation. -2. Zipline depends on `numpy `_, the core library for +2. Catalyst depends on `numpy `_, the core library for numerical array computing in Python. Numpy depends on having the `LAPACK `_ linear algebra routines available. @@ -41,7 +41,15 @@ version: $ virtualenv catalyst-venv $ source ./catalyst-venv/bin/activate - $ pip install enigma-catalyst + $ pip install enigma- + +Though not required by Catalyst directly, our example algorithms use matplotlib +to visually display the results of the trading algorithms. If you wish to run +any examples or use matplotlib during development, it can be installed using: + +.. code-block:: bash + + $ pip install matplotlib GNU/Linux ~~~~~~~~~ @@ -71,8 +79,8 @@ On `Arch Linux`_, you can acquire the additional dependencies via ``pacman``: .. .. There are also AUR packages available for installing `Python 3.4 .. `_ (Arch's default python is now -.. 3.5, but Zipline only currently supports 3.4), and `ta-lib -.. `_, an optional Zipline dependency. +.. 3.5, but Catalyst only currently supports 3.4), and `ta-lib +.. `_, an optional Catalyst dependency. .. Python 2 is also installable via: .. @@ -96,12 +104,132 @@ following brew packages: $ brew install freetype pkg-config gcc openssl +OSX + virtualenv + matplotlib +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A note about using matplotlib in virtual enviroments on OSX: it may be necessary to run + +.. code-block:: bash + + echo "backend: TkAgg" > ~/.matplotlib/matplotlibrc + +in order to override the default ``macosx`` backend for your system, which may not +be accessible from inside the virtual environment. This will allow Catalyst to open +matplotlib charts from within a virtual environment, which is useful for displaying +the performance of your backtests. To learn more about matplotlib backends, please refer to the +`matplotlib backend documentation `_. + + Windows ~~~~~~~ -For windows, the easiest and best supported way to install zipline is to use +In Windows, you will need the `Microsoft Visual C++ Compiler for Python 2.7 +`_. This package +contains the compiler and the set of system headers necessary for producing +binary wheels for Python 2.7 packages. If it's not already in your system, download +it and install it before proceeding to the next step. + +For windows, the easiest and best supported way to install Catalyst is to use :ref:`Conda `. +Amazon Linux AMI +~~~~~~~~~~~~~~~~ + +The packages ``pip`` and ``setuptools`` that come shipped by default are very outdated. +Thus, you first need to run: + +.. code-block:: bash + + pip install --upgrade pip setuptools + +The default installation is also missing the C and C++ compilers, which you install by: + +.. code-block:: bash + + sudo yum install gcc gcc-c++ + +Then you should follow the regular installation instructions outlined at the beginning +of this page. + + +Troubleshooting ``pip`` Install +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Issue**: + Package enigma-catalyst cannot be found + +**Solution**: + Make sure you have the most up-to-date version of pip installed, by running: + + .. code-block:: bash + + pip install --upgrade pip + + On Windows, the recommended command is: + + .. code-block:: bash + + python -m pip install --upgrade pip + +---- + +**Issue**: + Package enigma-catalyst cannot still be found, even after upgrading pip (see above), with an error similar to: + + .. code-block:: bash + + Downloading/unpacking enigma-catalyst + Could not find a version that satisfies the requirement enigma-catalyst (from versions: 0.1.dev9, 0.2.dev2, 0.1.dev4, 0.1.dev5, 0.1.dev3, 0.2.dev1, 0.1.dev8, 0.1.dev6) + Cleaning up... + No distributions matching the version for enigma-catalyst + +**Solution**: + In some systems (this error has been reported in Ubuntu), pip is configured to only find stable versions by default. Since Catalyst is in alpha version, pip cannot find a matching version that satisfies the installation requirements. The solution is to include the `--pre` flag to include pre-release and development versions: + + .. code-block:: bash + + pip install --pre enigma-catalyst + +---- + +**Issue**: + Package enigma-catalyst fails to install because of outdated setuptools + +**Solution**: + Upgrade to the most up-to-date setuptools package by running: + + .. code-block:: bash + + pip install --upgrade pip setuptools + +---- + +**Issue**: + Missing required packages + +**Solution**: + Download `requirements.txt + `_ + (click on the *Raw* button and Right click -> Save As...) and use it to + install all the required dependencies by running: + + .. code-block:: bash + + pip install -r requirements.txt + +---- + +**Issue**: + Installation fails with error: ``fatal error: Python.h: No such file or directory`` + +**Solution**: + Some systems (this issue has been reported in Ubuntu) require `python-dev` for the proper build and installation of package dependencies. The solution is to install python-dev, which is independent of the virtual environment. In Ubuntu, you would need to run: + + .. code-block:: bash + + sudo apt-get install python-dev + + .. _conda: Installing with ``conda`` @@ -118,14 +246,96 @@ without requiring the use of a second tool to acquire Catalyst's non-Python dependencies. For instructions on how to install ``conda``, see the `Conda Installation -Documentation `_ +Documentation `_. Alternatively, you +can install MiniConda, which is a smaller footprint (fewer packages and smaller +size) than its big brother Anaconda, but it still contains all the main packages +needed. To install MiniConda, you can follow these steps: -Once conda has been set up you can install Catalyst from our ``Quantopian`` -channel: +1. Download `MiniConda `_. Select Python 2.7 for + your Operating System. +2. Install MiniConda. See the `Installation Instructions `_ + if you need help. +3. Ensure the correct installation by running ``conda list`` in a Terminal window, + which should print the list of packages installed with Conda. -.. code-block:: bash +Once either Conda or MiniConda has been set up you can install Catalyst: + +1. Download the file `python2.7-environment.yml `_. +2. Open a Terminal window and enter [``cd/dir``] into the directory where you saved + the above ``python2.7-environment.yml`` file. +3. Install using this file. This step can take about 5-10 minutes to install. + + .. code-block:: bash + + conda env create -f python2.7-environment.yml + +4. Activate the environment (which you need to do every time you start a new session + to run Catalyst): + + **Linux or OSX:** + + .. code-block:: bash + + source activate catalyst + + **Windows:** + + .. code-block:: bash + + activate catalyst + +Congratulations! You now have Catalyst installed. + +Troubleshooting ``conda`` Install +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If the command ``conda env create -f python2.7-environment.yml`` in step 3 above failed +for any reason, you can try setting up the environment manually with the following steps: + +1. Create the environment: + + .. code-block:: bash + + conda create --name catalyst python=2.7 scipy + +2. Activate the environment: + + **Linux or OSX:** + + .. code-block:: bash + + source activate catalyst + + **Windows:** + + .. code-block:: bash + + activate catalyst + +3. Install the Catalyst inside the environment: + + .. code-block:: bash + + pip install enigma-catalyst matplotlib + +Getting Help +------------ + +If after following the instructions above, and going through the *Troubleshooting* sections, +you still experience problems installing Catalyst, you can seek additional help through the +following channels: + +- Join our `Discord community `_, and head over the #catalyst_dev + channel where many other users (as well as the project developers) hang out, and can assist + you with your particular issue. The more descriptive and the more information you can provide, + the easiest will be for others to help you out. + +- Report the problem you are experiencing on our + `GitHub repository `_ following the guidelines + provided therein. Before you do so, take a moment to browse through all `previous reported issues + `_ in the likely case + that someone else experienced that same issue before, and you get a hint on how to solve it. - conda install -c Quantopian zipline .. _`Debian-derived`: https://www.debian.org/misc/children-distros .. _`RHEL-derived`: https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives From f56abcfc3e8bab154cea7e376d73ecf4699c0729 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Thu, 19 Oct 2017 23:54:02 -0600 Subject: [PATCH 106/110] DOC: welcome page --- docs/source/index.rst | 2 +- docs/source/welcome.rst | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 docs/source/welcome.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index d61bce1a..8fc3387a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,4 +1,4 @@ -.. include:: ../../README.rst +.. include:: welcome.rst .. toctree:: :maxdepth: 1 diff --git a/docs/source/welcome.rst b/docs/source/welcome.rst new file mode 100644 index 00000000..e7f3e92d --- /dev/null +++ b/docs/source/welcome.rst @@ -0,0 +1,7 @@ +.. image:: https://www.enigma.co/images/enigma-catalyst.png +| +| +Catalyst is a data-driven crypto investment platform. It supports both +backtesting and live-trading in a number of different crypto-exchanges. +Catalyst empowers users to share and curate data and build profitable, +data-driven investment strategies. \ No newline at end of file From 3804af381340121cbf212e365e4568ea600e19a4 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Fri, 20 Oct 2017 00:13:23 -0600 Subject: [PATCH 107/110] DOC: welcome page w/ logo --- docs/source/welcome.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/source/welcome.rst b/docs/source/welcome.rst index e7f3e92d..4a711af1 100644 --- a/docs/source/welcome.rst +++ b/docs/source/welcome.rst @@ -1,7 +1,9 @@ -.. image:: https://www.enigma.co/images/enigma-catalyst.png -| +.. image:: https://s3.amazonaws.com/enigmaco-docs/enigma-catalyst.jpg | Catalyst is a data-driven crypto investment platform. It supports both backtesting and live-trading in a number of different crypto-exchanges. Catalyst empowers users to share and curate data and build profitable, -data-driven investment strategies. \ No newline at end of file +data-driven investment strategies. + +Features +======== \ No newline at end of file From 653f4c2a5a4a9ed6531ed3fc1c7e7ae9f73e710e Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Fri, 20 Oct 2017 08:27:36 -0600 Subject: [PATCH 108/110] DOC: Features --- docs/source/index.rst | 4 ++++ docs/source/welcome.rst | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 8fc3387a..d3ffbfec 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,4 +1,8 @@ .. include:: welcome.rst +| +| +Table of Contents +----------------- .. toctree:: :maxdepth: 1 diff --git a/docs/source/welcome.rst b/docs/source/welcome.rst index 4a711af1..5cfa0473 100644 --- a/docs/source/welcome.rst +++ b/docs/source/welcome.rst @@ -6,4 +6,17 @@ Catalyst empowers users to share and curate data and build profitable, data-driven investment strategies. Features -======== \ No newline at end of file +======== + +- Ease of use: Catalyst tries to get out of your way so that you can + focus on algorithm development. See examples provided. +- Support for several of the top crypto-exchanges by trading volume. +- Input of historical pricing data of all crypto-assets by exchange, + with daily and minute resolution. +- Backtesting and live-trading functionality, with a seamless transition + between the two modes. +- Output of performance statistics are based on Pandas DataFrames to + integrate nicely into the existing PyData eco-system. +- Statistic and machine learning libraries like matplotlib, scipy, + statsmodels, and sklearn support development, analysis, and + visualization of state-of-the-art trading systems. \ No newline at end of file From 5fd4ca33d33535dfb440109a4395d9e0490b33d0 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Fri, 20 Oct 2017 10:14:31 -0600 Subject: [PATCH 109/110] DOC: beginner tutorial --- catalyst/examples/buy_btc_simple.py | 8 + docs/source/beginner-tutorial.rst | 760 +++++++--------------------- docs/source/index.rst | 2 +- docs/source/welcome.rst | 9 +- 4 files changed, 207 insertions(+), 572 deletions(-) create mode 100644 catalyst/examples/buy_btc_simple.py diff --git a/catalyst/examples/buy_btc_simple.py b/catalyst/examples/buy_btc_simple.py new file mode 100644 index 00000000..f7eb8aa0 --- /dev/null +++ b/catalyst/examples/buy_btc_simple.py @@ -0,0 +1,8 @@ +from catalyst.api import order, record, symbol + +def initialize(context): + context.asset = symbol('btc_usd') + +def handle_data(context, data): + order(asset, 1) + record(btc=data.current(context.asset, 'price')) \ No newline at end of file diff --git a/docs/source/beginner-tutorial.rst b/docs/source/beginner-tutorial.rst index c2f9a07d..b3b1b133 100644 --- a/docs/source/beginner-tutorial.rst +++ b/docs/source/beginner-tutorial.rst @@ -1,608 +1,281 @@ -Zipline Beginner Tutorial -------------------------- +Catalyst Beginner Tutorial +-------------------------- Basics ~~~~~~ -Zipline is an open-source algorithmic trading simulator written in -Python. +Catalyst is an open-source algorithmic trading simulator for crypto +assets written in Python. -The source can be found at: https://github.com/quantopian/zipline +The source can be found at: https://github.com/enigmampc/catalyst Some benefits include: +- Support for several of the top crypto-exchanges by trading volume. - Realistic: slippage, transaction costs, order delays. - Stream-based: Process each event individually, avoids look-ahead bias. - Batteries included: Common transforms (moving average) as well as common risk calculations (Sharpe). - Developed and continuously updated by - `Quantopian `__ which provides an - easy-to-use web-interface to Zipline, 10 years of minute-resolution - historical US stock data, and live-trading capabilities. This - tutorial is directed at users wishing to use Zipline without using - Quantopian. If you instead want to get started on Quantopian, see - `here `__. + `Enigma MPC `__ which is building the Enigma + data marketplace protocol as well as Catalyst, the first application + that will run on our protocol. Powered by our financial data + marketplace, Catalyst empowers users to share and curate data and + build profitable, data-driven investment strategies. -This tutorial assumes that you have zipline correctly installed, see the -`installation -instructions `__ if -you haven't set up zipline yet. +This tutorial assumes that you have Catalyst correctly installed, see the +:doc:`installation instructions ` if you haven't set up +Catalyst yet. -Every ``zipline`` algorithm consists of two functions you have to +Every ``catalyst`` algorithm consists of at least two functions you have to define: * ``initialize(context)`` * ``handle_data(context, data)`` -Before the start of the algorithm, ``zipline`` calls the +Before the start of the algorithm, ``catalyst`` calls the ``initialize()`` function and passes in a ``context`` variable. ``context`` is a persistent namespace for you to store variables you need to access from one algorithm iteration to the next. -After the algorithm has been initialized, ``zipline`` calls the +After the algorithm has been initialized, ``catalyst`` calls the ``handle_data()`` function once for each event. At every call, it passes the same ``context`` variable and an event-frame called ``data`` containing the current trading bar with open, high, low, and close -(OHLC) prices as well as volume for each stock in your universe. For -more information on these functions, see the `relevant part of the -Quantopian docs `__. +(OHLC) prices as well as volume for each crypto asset in your universe. + +.. For more information on these functions, see the `relevant part of the +.. Quantopian docs `. My first algorithm ~~~~~~~~~~~~~~~~~~ Lets take a look at a very simple algorithm from the ``examples`` -directory, ``buyapple.py``: +directory, ``buy_btc.py``: .. code-block:: python - from zipline.examples import buyapple - buyapple?? - - -.. code-block:: python - - from zipline.api import order, record, symbol + from catalyst.api import order, record, symbol def initialize(context): - pass + context.asset = symbol('btc_usd') def handle_data(context, data): - order(symbol('AAPL'), 10) - record(AAPL=data.current(symbol('AAPL'), 'price')) + order(context.asset, 1) + record(btc = data.current(context.asset, 'price')) As you can see, we first have to import some functions we would like to use. All functions commonly used in your algorithm can be found in -``zipline.api``. Here we are using :func:`~zipline.api.order()` which takes two -arguments: a security object, and a number specifying how many stocks you would -like to order (if negative, :func:`~zipline.api.order()` will sell/short -stocks). In this case we want to order 10 shares of Apple at each iteration. For -more documentation on ``order()``, see the `Quantopian docs -`__. +``catalyst.api``. Here we are using :func:`~catalyst.api.order()` which takes two +arguments: a cryptoasset object, and a number specifying how many assets you would +like to order (if negative, :func:`~catalyst.api.order()` will sell/short +assets). In this case we want to order 1 bitcoin at each iteration. -Finally, the :func:`~zipline.api.record` function allows you to save the value +.. For more documentation on ``order()``, see the `Quantopian docs +.. `__. + +Finally, the :func:`~catalyst.api.record` function allows you to save the value of a variable at each iteration. You provide it with a name for the variable together with the variable itself: ``varname=var``. After the algorithm finished running you will have access to each variable value you tracked -with :func:`~zipline.api.record` under the name you provided (we will see this -further below). You also see how we can access the current price data of the -AAPL stock in the ``data`` event frame (for more information see -`here `__. +with :func:`~catalyst.api.record` under the name you provided (we will see this +further below). You also see how we can access the current price data of +a bitcoin in the ``data`` event frame. + +.. (for more information see `here `__. Running the algorithm ~~~~~~~~~~~~~~~~~~~~~ -To now test this algorithm on financial data, ``zipline`` provides three -interfaces: A command-line interface, ``IPython Notebook`` magic, and -:func:`~zipline.run_algorithm`. +To can now test this algorithm on crypto data, ``catalyst`` provides three +interfaces: -Ingesting Data +- A command-line interface, +- ``IPython Notebook`` magic, +- and :func:`~catalyst.run_algorithm`. + +Ingesting data ^^^^^^^^^^^^^^ -If you haven't ingested the data, run: -.. code-block:: bash +In previous versions of Catalyst you needed to manually ingest data before running +your algorithm to make it available at runtime. Starting with version 0.3, the +algorithm will automagically ingest the data it needs the first time that encounters +a data request for data that it doesn't have. - $ zipline ingest [-b ] +Still, we believe it is important for you to have a high-level understanding +of how data is managed: -where ```` is the name of the bundle to ingest, defaulting to -:ref:`quantopian-quandl `. +- Pricing data is split and packaged into ``bundles``: chunks of data organized + as time series that are kept up to date daily on Enigma's servers. Catalyst + downloads the bundles that needs at any given time, and reconstructs the whole + dataset in your hard drive. -you can check out the :ref:`ingesting data ` section for -more detail. +- Pricing data is provided in ``daily`` and ``minute`` resolution. Those are different + bundle datasets, and are managed separately. + +- Bundles are exchange-specific, as the pricing data is specific to the trades that + happen in each exchange. You can optionally specify which exchange you want pricing + data from. + +- Catalyst keeps track of all the downloaded bundles, so that it only has to download + them once, and will do incremental updates as needed. + +- When running in ``live trading`` mode, Catalyst will first look for historical + pricing data in the locally stored bundles. If there is anything missing, Catalyst will + hit the exchange for the most recent data, and merge it with the local bundle to make + it available for future iterations. + +If you want to learn more, check out the :ref:`ingesting data ` section +for more detail. Command line interface ^^^^^^^^^^^^^^^^^^^^^^ -After you installed zipline you should be able to execute the following +After you installed Catalyst you should be able to execute the following from your command line (e.g. ``cmd.exe`` on Windows, or the Terminal app -on OSX): +on OSX). Displaying here a simplified output for eductional purposes: .. code-block:: bash - $ zipline run --help + $ catalyst --help .. parsed-literal:: - Usage: zipline run [OPTIONS] + Usage: catalyst [OPTIONS] COMMAND [ARGS]... - Run a backtest for the given algorithm. + Top level catalyst entry point. + + Options: + --version Show the version and exit. + --help Show this message and exit. + + Commands: + ingest-exchange Ingest data for the given exchange. + live Trade live with the given algorithm. + run Run a backtest for the given algorithm. + +There are three main modes you can run on Catalyst. The first being ``ingest-exchange`` +for data ingestion, which we have summarized in the previous section. The second +is ``live`` to use your algorithm to trade live against a given exchange, and the +third mode ``run`` is to backtest your algorithm before trading live with it. + +Let's start with backtesting, so run this other command to learn more about +the available options: + +.. code-block:: bash + + $ catalyst run --help + +.. parsed-literal:: + + Usage: catalyst run [OPTIONS] + + Run a backtest for the given algorithm. + + Options: + -f, --algofile FILENAME The file that contains the algorithm to run. + -t, --algotext TEXT The algorithm script to run. + -D, --define TEXT Define a name to be bound in the namespace + before executing the algotext. For example + '-Dname=value'. The value may be any python + expression. These are evaluated in order so + they may refer to previously defined names. + --data-frequency [daily|minute] + The data frequency of the simulation. + [default: daily] + --capital-base FLOAT The starting capital for the simulation. + [default: 10000000.0] + -b, --bundle BUNDLE-NAME The data bundle to use for the simulation. + [default: poloniex] + --bundle-timestamp TIMESTAMP The date to lookup data on or before. + [default: ] + -s, --start DATE The start date of the simulation. + -e, --end DATE The end date of the simulation. + -o, --output FILENAME The location to write the perf data. If this + is '-' the perf will be written to stdout. + [default: -] + --print-algo / --no-print-algo Print the algorithm to stdout. + -x, --exchange-name [poloniex|bitfinex|bittrex] + The name of the targeted exchange + (supported: bitfinex, bittrex, poloniex). + -n, --algo-namespace TEXT A label assigned to the algorithm for data + storage purposes. + -c, --base-currency TEXT The base currency used to calculate + statistics (e.g. usd, btc, eth). + --help Show this message and exit. - Options: - -f, --algofile FILENAME The file that contains the algorithm to run. - -t, --algotext TEXT The algorithm script to run. - -D, --define TEXT Define a name to be bound in the namespace - before executing the algotext. For example - '-Dname=value'. The value may be any python - expression. These are evaluated in order so - they may refer to previously defined names. - --data-frequency [minute|daily] - The data frequency of the simulation. - [default: daily] - --capital-base FLOAT The starting capital for the simulation. - [default: 10000000.0] - -b, --bundle BUNDLE-NAME The data bundle to use for the simulation. - [default: quantopian-quandl] - --bundle-timestamp TIMESTAMP The date to lookup data on or before. - [default: ] - -s, --start DATE The start date of the simulation. - -e, --end DATE The end date of the simulation. - -o, --output FILENAME The location to write the perf data. If this - is '-' the perf will be written to stdout. - [default: -] - --print-algo / --no-print-algo Print the algorithm to stdout. - --help Show this message and exit. As you can see there are a couple of flags that specify where to find your -algorithm (``-f``) as well as parameters specifying which data to use, -defaulting to the :ref:`quantopian-quandl-mirror`. There are also arguments for -the date range to run the algorithm over (``--start`` and ``--end``). Finally, -you'll want to save the performance metrics of your algorithm so that you can -analyze how it performed. This is done via the ``--output`` flag and will cause -it to write the performance ``DataFrame`` in the pickle Python file format. -Note that you can also define a configuration file with these parameters that -you can then conveniently pass to the ``-c`` option so that you don't have to -supply the command line args all the time (see the .conf files in the examples -directory). +algorithm (``-f``) as well as a parameter to specify which exchange to use. +There are also arguments for the date range to run the algorithm over +(``--start`` and ``--end``). Finally, you'll want to save the performance +metrics of your algorithm so that you can analyze how it performed. This is +done via the ``--output`` flag and will cause it to write the performance +``DataFrame`` in the pickle Python file format. Note that you can also define +a configuration file with these parameters that you can then conveniently pass +to the ``-c`` option so that you don't have to supply the command line args +all the time (see the .conf files in the examples directory). Thus, to execute our algorithm from above and save the results to -``buyapple_out.pickle`` we would call ``zipline run`` as follows: +``buy_btc_simple_out.pickle`` we would call ``catalyst run`` as follows: .. code-block:: python - zipline run -f ../../zipline/examples/buyapple.py --start 2000-1-1 --end 2014-1-1 -o buyapple_out.pickle + catalyst run -f buy_btc_simple.py -x bitfinex --start 2016-1-1 --end 2016-9-29 -o buy_simple_btc_out.pickle -.. parsed-literal:: +.. +.. parsed-literal - AAPL - [2015-11-04 22:45:32.820166] INFO: Performance: Simulated 3521 trading days out of 3521. - [2015-11-04 22:45:32.820314] INFO: Performance: first open: 2000-01-03 14:31:00+00:00 - [2015-11-04 22:45:32.820401] INFO: Performance: last close: 2013-12-31 21:00:00+00:00 +.. AAPL +.. [2015-11-04 22:45:32.820166] INFO: Performance: Simulated 3521 trading days out of 3521. +.. [2015-11-04 22:45:32.820314] INFO: Performance: first open: 2000-01-03 14:31:00+00:00 +.. [2015-11-04 22:45:32.820401] INFO: Performance: last close: 2013-12-31 21:00:00+00:00 ``run`` first calls the ``initialize()`` function, and then -streams the historical stock price day-by-day through ``handle_data()``. -After each call to ``handle_data()`` we instruct ``zipline`` to order 10 -stocks of AAPL. After the call of the ``order()`` function, ``zipline`` +streams the historical asset price day-by-day through ``handle_data()``. +After each call to ``handle_data()`` we instruct ``catalyst`` to order 1 +bitcoin. After the call of the ``order()`` function, ``catalyst`` enters the ordered stock and amount in the order book. After the -``handle_data()`` function has finished, ``zipline`` looks for any open +``handle_data()`` function has finished, ``catalyst`` looks for any open orders and tries to fill them. If the trading volume is high enough for -this stock, the order is executed after adding the commission and +this asset, the order is executed after adding the commission and applying the slippage model which models the influence of your order on the stock price, so your algorithm will be charged more than just the -stock price \* 10. (Note, that you can also change the commission and -slippage model that ``zipline`` uses, see the `Quantopian -docs `__ for more -information). +asset price. (Note, that you can also change the commission and +slippage model that ``catalyst`` uses). -Lets take a quick look at the performance ``DataFrame``. For this, we +.. see the `Quantopian docs `__ +.. for more information). + +Let's take a quick look at the performance ``DataFrame``. For this, we use ``pandas`` from inside the IPython Notebook and print the first ten -rows. Note that ``zipline`` makes heavy usage of ``pandas``, especially -for data input and outputting so it's worth spending some time to learn -it. +rows. Note that ``catalyst`` makes heavy usage of +`pandas `_, especially for data input and +outputting so it's worth spending some time to learn it. .. code-block:: python import pandas as pd - perf = pd.read_pickle('buyapple_out.pickle') # read in perf DataFrame + perf = pd.read_pickle('buy_btc_simple_out.pickle') # read in perf DataFrame perf.head() -.. raw:: html - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
AAPLalgo_volatilityalgorithm_period_returnalphabenchmark_period_returnbenchmark_volatilitybetacapital_usedending_cashending_exposure...short_exposureshort_valueshorts_countsortinostarting_cashstarting_exposurestarting_valuetrading_daystransactionstreasury_period_return
2000-01-03 21:00:003.7383140.000000e+000.000000e+00-0.065800-0.0095490.0000000.0000000.0000010000000.000000.00000...0000.00000010000000.000000.000000.000001[]0.0658
2000-01-04 21:00:003.4231353.367492e-07-3.000000e-08-0.064897-0.0475280.3232290.000001-34.531359999965.4686534.23135...0000.00000010000000.000000.000000.000002[{u'order_id': u'513357725cb64a539e3dd02b47da7...0.0649
2000-01-05 21:00:003.4732294.001918e-07-9.906000e-09-0.066196-0.0456970.3293210.000001-35.032299999930.4363669.46458...0000.0000009999965.4686534.2313534.231353[{u'order_id': u'd7d4ad03cfec4d578c0d817dc3829...0.0662
2000-01-06 21:00:003.1726614.993979e-06-6.410420e-07-0.065758-0.0447850.298325-0.000006-32.026619999898.4097595.17983...000-12731.7805169999930.4363669.4645869.464584[{u'order_id': u'1fbf5e9bfd7c4d9cb2e8383e1085e...0.0657
2000-01-07 21:00:003.3229455.977002e-06-2.201900e-07-0.065206-0.0189080.3753010.000005-33.529459999864.88030132.91780...000-12629.2745839999898.4097595.1798395.179835[{u'order_id': u'9ea6b142ff09466b9113331a37437...0.0652
-

5 rows × 39 columns

-
- - - -As you can see, there is a row for each trading day, starting on the -first business day of 2000. In the columns you can find various +There is a row for each trading day, starting on the first day of our +simulation Jan 1st, 2016. In the columns you can find various information about the state of your algorithm. The very first column -``AAPL`` was placed there by the ``record()`` function mentioned earlier -and allows us to plot the price of apple. For example, we could easily +``btc`` was placed there by the ``record()`` function mentioned earlier +and allows us to plot the price of bitcoin. For example, we could easily examine now how our portfolio value changed over time compared to the -AAPL stock price. +bitcoin price. -.. code-block:: python - - %pylab inline - figsize(12, 12) - import matplotlib.pyplot as plt - - ax1 = plt.subplot(211) - perf.portfolio_value.plot(ax=ax1) - ax1.set_ylabel('portfolio value') - ax2 = plt.subplot(212, sharex=ax1) - perf.AAPL.plot(ax=ax2) - ax2.set_ylabel('AAPL stock price') - -.. parsed-literal:: - - Populating the interactive namespace from numpy and matplotlib - -.. parsed-literal:: - - - -.. image:: tutorial_files/tutorial_11_2.png - - -As you can see, our algorithm performance as assessed by the -``portfolio_value`` closely matches that of the AAPL stock price. This -is not surprising as our algorithm only bought AAPL every chance it got. - -IPython Notebook -~~~~~~~~~~~~~~~~ - -The `IPython Notebook `__ is a very -powerful browser-based interface to a Python interpreter (this tutorial -was written in it). As it is already the de-facto interface for most -quantitative researchers ``zipline`` provides an easy way to run your -algorithm inside the Notebook without requiring you to use the CLI. - -To use it you have to write your algorithm in a cell and let ``zipline`` -know that it is supposed to run this algorithm. This is done via the -``%%zipline`` IPython magic command that is available after you -``import zipline`` from within the IPython Notebook. This magic takes -the same arguments as the command line interface described above. Thus -to run the algorithm from above with the same parameters we just have to -execute the following cell after importing ``zipline`` to register the -magic. - -.. code-block:: python - - %load_ext zipline - -.. code-block:: python - - %%zipline --start 2000-1-1 --end 2014-1-1 - from zipline.api import symbol, order, record - - def initialize(context): - pass - - def handle_data(context, data): - order(symbol('AAPL'), 10) - record(AAPL=data[symbol('AAPL')].price) - -Note that we did not have to specify an input file as above since the -magic will use the contents of the cell and look for your algorithm -functions there. Also, instead of defining an output file we are -specifying a variable name with ``-o`` that will be created in the name -space and contain the performance ``DataFrame`` we looked at above. - -.. code-block:: python - - _.head() - -.. raw:: html - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
AAPLalgo_volatilityalgorithm_period_returnalphabenchmark_period_returnbenchmark_volatilitybetacapital_usedending_cashending_exposure...short_exposureshort_valueshorts_countsortinostarting_cashstarting_exposurestarting_valuetrading_daystransactionstreasury_period_return
2000-01-03 21:00:003.7383140.000000e+000.000000e+00-0.065800-0.0095490.0000000.0000000.0000010000000.000000.00000...0000.00000010000000.000000.000000.000001[]0.0658
2000-01-04 21:00:003.4231353.367492e-07-3.000000e-08-0.064897-0.0475280.3232290.000001-34.531359999965.4686534.23135...0000.00000010000000.000000.000000.000002[{u'commission': 0.3, u'amount': 10, u'sid': 0...0.0649
2000-01-05 21:00:003.4732294.001918e-07-9.906000e-09-0.066196-0.0456970.3293210.000001-35.032299999930.4363669.46458...0000.0000009999965.4686534.2313534.231353[{u'commission': 0.3, u'amount': 10, u'sid': 0...0.0662
2000-01-06 21:00:003.1726614.993979e-06-6.410420e-07-0.065758-0.0447850.298325-0.000006-32.026619999898.4097595.17983...000-12731.7805169999930.4363669.4645869.464584[{u'commission': 0.3, u'amount': 10, u'sid': 0...0.0657
2000-01-07 21:00:003.3229455.977002e-06-2.201900e-07-0.065206-0.0189080.3753010.000005-33.529459999864.88030132.91780...000-12629.2745839999898.4097595.1798395.179835[{u'commission': 0.3, u'amount': 10, u'sid': 0...0.0652
-

5 rows × 39 columns

-
+Our algorithm performance as assessed by the +``portfolio_value`` closely matches that of the bitcoin price. This +is not surprising as our algorithm only bought bitcoin every chance it got. Access to previous prices using ``history`` @@ -627,22 +300,16 @@ we need a new concept: History ``data.history()`` is a convenience function that keeps a rolling window of data for you. The first argument is the number of bars you want to collect, the second argument is the unit (either ``'1d'`` for ``'1m'`` -but note that you need to have minute-level data for using ``1m``). For -a more detailed description ``history()``'s features, see the -`Quantopian docs `__. -Let's look at the strategy which should make this clear: +but note that you need to have minute-level data for using ``1m``). This is +a function we use in the ``handle_data()`` section: .. code-block:: python - %%zipline --start 2000-1-1 --end 2012-1-1 -o dma.pickle + from catalyst.api import order, record, symbol - - from zipline.api import order_target, record, symbol - - def initialize(context): + def initialize(context): context.i = 0 - context.asset = symbol('AAPL') - + context.asset = symbol('btc_usd') def handle_data(context, data): # Skip first 300 days to get full windows @@ -665,67 +332,22 @@ Let's look at the strategy which should make this clear: order_target(context.asset, 0) # Save values for later inspection - record(AAPL=data.current(context.asset, 'price'), + record(btc=data.current(context.asset, 'price'), short_mavg=short_mavg, long_mavg=long_mavg) - def analyze(context, perf): - fig = plt.figure() - ax1 = fig.add_subplot(211) - perf.portfolio_value.plot(ax=ax1) - ax1.set_ylabel('portfolio value in $') - - ax2 = fig.add_subplot(212) - perf['AAPL'].plot(ax=ax2) - perf[['short_mavg', 'long_mavg']].plot(ax=ax2) - - perf_trans = perf.ix[[t != [] for t in perf.transactions]] - buys = perf_trans.ix[[t[0]['amount'] > 0 for t in perf_trans.transactions]] - sells = perf_trans.ix[ - [t[0]['amount'] < 0 for t in perf_trans.transactions]] - ax2.plot(buys.index, perf.short_mavg.ix[buys.index], - '^', markersize=10, color='m') - ax2.plot(sells.index, perf.short_mavg.ix[sells.index], - 'v', markersize=10, color='k') - ax2.set_ylabel('price in $') - plt.legend(loc=0) - plt.show() - -.. image:: tutorial_files/tutorial_22_1.png - -Here we are explicitly defining an ``analyze()`` function that gets -automatically called once the backtest is done (this is not possible on -Quantopian currently). - -Although it might not be directly apparent, the power of ``history()`` -(pun intended) can not be under-estimated as most algorithms make use of -prior market developments in one form or another. You could easily -devise a strategy that trains a classifier with -`scikit-learn `__ which tries to -predict future market movements based on past prices (note, that most of -the ``scikit-learn`` functions require ``numpy.ndarray``\ s rather than -``pandas.DataFrame``\ s, so you can simply pass the underlying -``ndarray`` of a ``DataFrame`` via ``.values``). - -We also used the ``order_target()`` function above. This and other -functions like it can make order management and portfolio rebalancing -much easier. See the `Quantopian documentation on order -functions `__ fore -more details. - Conclusions ~~~~~~~~~~~ We hope that this tutorial gave you a little insight into the -architecture, API, and features of ``zipline``. For next steps, check +architecture, API, and features of ``catalyst``. For next steps, check out some of the -`examples `__. +`examples `__. +The natural next step would be too look into the +`buy_and_hodl `_ +example, which is a more elaborated and realistic version of the ``buy_btc_simple`` example presented in this tutorial. -Feel free to ask questions on `our mailing -list `__, report -problems on our `GitHub issue -tracker `__, -`get -involved `__, -and `checkout Quantopian `__. +Feel free to ask questions on the ``#catalyst_dev`` channel of our +`Discord group `__ and report +problems on our `GitHub issue tracker `__. diff --git a/docs/source/index.rst b/docs/source/index.rst index d3ffbfec..7d9dc349 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,7 +8,7 @@ Table of Contents :maxdepth: 1 install -.. beginner-tutorial + beginner-tutorial .. bundles .. development-guidelines .. appendix diff --git a/docs/source/welcome.rst b/docs/source/welcome.rst index 5cfa0473..edca29dd 100644 --- a/docs/source/welcome.rst +++ b/docs/source/welcome.rst @@ -9,8 +9,13 @@ Features ======== - Ease of use: Catalyst tries to get out of your way so that you can - focus on algorithm development. See examples provided. -- Support for several of the top crypto-exchanges by trading volume. + focus on algorithm development. See + `examples `_ + provided. +- Support for several of the top crypto-exchanges by trading volume: + `Bitfinex `_, `Bittrex `_, + and `Poloniex `_. +- Secure: You and only you have access to each exchange API keys for your accounts. - Input of historical pricing data of all crypto-assets by exchange, with daily and minute resolution. - Backtesting and live-trading functionality, with a seamless transition From fadd4abe5af9bc149a248a4ad48730993d01ada0 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Fri, 20 Oct 2017 10:55:35 -0600 Subject: [PATCH 110/110] DOC: naming convention --- catalyst/__main__.py | 2 +- docs/source/index.rst | 1 + docs/source/naming-convention.rst | 66 +++++++++++++++++++++++++++++++ docs/source/welcome.rst | 5 ++- 4 files changed, 71 insertions(+), 3 deletions(-) create mode 100644 docs/source/naming-convention.rst diff --git a/catalyst/__main__.py b/catalyst/__main__.py index 4c48b7e0..1edb53f4 100644 --- a/catalyst/__main__.py +++ b/catalyst/__main__.py @@ -498,7 +498,7 @@ def ingest_exchange(exchange_name, data_frequency, start, end, exchange = get_exchange(exchange_name) exchange_bundle = ExchangeBundle(exchange) - click.echo('ingesting exchange bundle {}'.format(exchange_name)) + click.echo('Ingesting exchange bundle {}...'.format(exchange_name)) exchange_bundle.ingest( data_frequency=data_frequency, include_symbols=include_symbols, diff --git a/docs/source/index.rst b/docs/source/index.rst index 7d9dc349..0efccaeb 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -9,6 +9,7 @@ Table of Contents install beginner-tutorial + naming-convention .. bundles .. development-guidelines .. appendix diff --git a/docs/source/naming-convention.rst b/docs/source/naming-convention.rst new file mode 100644 index 00000000..2a9e0398 --- /dev/null +++ b/docs/source/naming-convention.rst @@ -0,0 +1,66 @@ +Naming Convention +================= + +Catalyst introduces a standardized naming convention for all asset pairs +trading on any exchange in the following form: + + + **{market_currency}_{base_currency}** + +Where {market_currency} is the asset to be traded using {base_currency} as +the reference, both written in lowercase and separated with an underscore. + +This standardization is needed to overcome the lack of consistency in the +naming of assets across different exchanges, and making it easier to the user +to refer to the asset pairs that you want to trade. + +Catalyst maintains a `Market Coverage Overview `_ +where you can check the mapping between Catalyst naming pairs and that of each +exchange. Catalyst will always expect in all its functions that you will refer to +the asset pairs by using the Catalyst naming convention. + +If at any point, you input the wrong name for an asset pair, you will get an error +of that pair not found in the given exchange, and a list of pairs available on that exchange: + +.. code-block:: bash + + $ catalyst ingest-exchange -x poloniex -i btc_usd + +.. parsed-literal:: + + Ingesting exchange bundle poloniex... + Error traceback: /Volumes/Data/Users/victoris/Desktop/Enigma/user-install/catalyst-dev/catalyst/exchange/exchange.py (line 175) + SymbolNotFoundOnExchange: Symbol btc_usd not found on exchange Poloniex. + Choose from: ['rep_usdt', 'gno_btc', 'xvc_btc', 'pink_btc', 'sys_btc', + 'emc2_btc', 'rads_btc', 'note_btc', 'maid_btc', 'bch_btc', 'gnt_btc', + 'bcn_btc', 'rep_btc', 'bcy_btc', 'cvc_btc', 'nxt_xmr', 'zec_usdt', + 'fct_btc', 'gas_btc', 'pot_btc', 'eth_usdt', 'btc_usdt', 'lbc_btc', + 'dcr_btc', 'etc_usdt', 'omg_eth', 'amp_btc', 'xpm_btc', 'nxt_btc', + 'vtc_btc', 'steem_eth', 'blk_xmr', 'pasc_btc', 'zec_xmr', 'grc_btc', + 'nxc_btc', 'btcd_btc', 'ltc_btc', 'dash_btc', 'naut_btc', 'zec_eth', + 'zec_btc', 'burst_btc', 'zrx_eth', 'bela_btc', 'steem_btc', 'etc_btc', + 'eth_btc', 'huc_btc', 'strat_btc', 'lsk_btc', 'exp_btc', 'clam_btc', + 'rep_eth', 'dash_xmr', 'cvc_eth', 'bch_usdt', 'zrx_btc', 'dash_usdt', + 'blk_btc', 'xrp_btc', 'nxt_usdt', 'neos_btc', 'omg_btc', 'bts_btc', + 'doge_btc', 'gnt_eth', 'sbd_btc', 'gno_eth', 'xcp_btc', 'ltc_usdt', + 'btm_btc', 'xmr_usdt', 'lsk_eth', 'omni_btc', 'nav_btc', 'fldc_btc', + 'ppc_btc', 'xbc_btc', 'dgb_btc', 'sc_btc', 'btcd_xmr', 'vrc_btc', + 'ric_btc', 'str_btc', 'maid_xmr', 'xmr_btc', 'sjcx_btc', 'via_btc', + 'xem_btc', 'nmc_btc', 'etc_eth', 'ltc_xmr', 'ardr_btc', 'gas_eth', + 'flo_btc', 'xrp_usdt', 'game_btc', 'bch_eth', 'bcn_xmr', 'str_usdt'] + +In the example above, exchange Poloniex does not use USD, but uses instead the +USDT cryptocurrency asset that is issued on the Bitcoin blockchain via the Omni +Layer Protocol. Each USDT unit is backed by a U.S Dollar held in the reserves of +Tether Limited. USDT can be transferred, stored, and spent, just like bitcoins +or any other cryptocurrency. Given its 1:1 mapping to the USD, is a viable alternative. + +.. code-block:: bash + + $ catalyst ingest-exchange -x poloniex -i btc_usdt + +.. parsed-literal:: + + Ingesting exchange bundle poloniex... + [====================================] Fetching poloniex daily candles: : 100% + diff --git a/docs/source/welcome.rst b/docs/source/welcome.rst index edca29dd..410d8c75 100644 --- a/docs/source/welcome.rst +++ b/docs/source/welcome.rst @@ -10,14 +10,15 @@ Features - Ease of use: Catalyst tries to get out of your way so that you can focus on algorithm development. See - `examples `_ + `examples of trading strategies `_ provided. - Support for several of the top crypto-exchanges by trading volume: `Bitfinex `_, `Bittrex `_, and `Poloniex `_. - Secure: You and only you have access to each exchange API keys for your accounts. - Input of historical pricing data of all crypto-assets by exchange, - with daily and minute resolution. + with daily and minute resolution. See + `Catalyst Market Coverage Overview `_. - Backtesting and live-trading functionality, with a seamless transition between the two modes. - Output of performance statistics are based on Pandas DataFrames to