From cb870422c30caaed8af082dab6cd2028c1e6404f Mon Sep 17 00:00:00 2001 From: Abner Ayala-Acevedo Date: Wed, 25 Oct 2017 12:11:40 -0700 Subject: [PATCH 01/37] Create simple_universe.py This example aims to help users get familiar with catalyst API's to collect and handle data. --- catalyst/examples/simple_universe.py | 107 +++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 catalyst/examples/simple_universe.py diff --git a/catalyst/examples/simple_universe.py b/catalyst/examples/simple_universe.py new file mode 100644 index 00000000..4909fee2 --- /dev/null +++ b/catalyst/examples/simple_universe.py @@ -0,0 +1,107 @@ +""" +Requires Catalyst version 0.3.0 or above +Tested on Catalyst version 0.3.2 + +These example aims to provide and easy way for users to learn how to collect data from the different exchanges. +You simply need to specify the exchange and the market that you want to focus on. +You will all see how to create a universe and filter it base on the exchange and the market you desire. + +The example prints out the closing price of all the pairs for a given market-exchange every 30 minutes. +The example also contains the ohlcv minute data for the past seven days which could be used to create indicators +Use this as the backbone to create your own trading strategies. +""" + +import pandas as pd +from catalyst import run_algorithm +from catalyst.exchange.exchange_utils import get_exchange_symbols + +from catalyst.api import ( + symbols, +) + + +def initialize(context): + context.i = -1 # counts the minutes + context.exchange = 'poloniex' # must match the exchange specified in run_algorithm + context.base_currency = 'btc' # must match the base currency specified in run_algorithm + + +def handle_data(context, data): + lookback = 60 * 24 * 7 # (minutes, hours, days) of how far to lookback in the data history + context.i += 1 + + # we must first wait until enough minutes, hours or days have passed for data history to work + if context.i < lookback: + return + + # current date formatted into a string + today = context.blotter.current_dt.strftime('%Y-%m-%d %H:%M:%S') + date, time = str(today).split(' ') + + # update universe everyday + new_day = 60 * 24 + if not context.i % new_day: + context.universe = universe(context, date) + + # get data every 30 minutes + minutes = 30 + if not context.i % minutes and context.universe: + # we iterate for every pair in the current universe + for coin in context.coins: + pair = str(coin.symbol) + + # ohlcv data (the standard data required for candlestick or indicators/signals) + open = data.history(coin, 'open', bar_count=lookback, frequency='1m').ffill().bfill() + high = data.history(coin, 'high', bar_count=lookback, frequency='1m').ffill().bfill() + low = data.history(coin, 'low', bar_count=lookback, frequency='1m').ffill().bfill() + close = data.history(coin, 'price', bar_count=lookback, frequency='1m').ffill().bfill() + volume = data.history(coin, 'volume', bar_count=lookback, frequency='1m').ffill().bfill() + + # close[-1] is the equivalent to current price + # displays the minute price for each pair every 30 minutes + print(today, pair, close[-1]) + + +def analyze(context=None, results=None): + pass + + +def universe(context, date): + # Get the universe for a given exchange and a given base_currency market + # Example: Poloniex BTC Market + json_symbols = get_exchange_symbols(context.exchange) # get all the pairs for the exchange + poloniex_universe_df = pd.DataFrame.from_dict(json_symbols).transpose().astype(str) # convert into a dataframe + poloniex_universe_df['base_currency'] = poloniex_universe_df.apply(lambda row: row.symbol.split('_')[1], + axis=1) + poloniex_universe_df['market_currency'] = poloniex_universe_df.apply(lambda row: row.symbol.split('_')[0], + axis=1) + # Filter all the exchange pairs to only the ones for a give base currency + poloniex_universe_df = poloniex_universe_df[poloniex_universe_df['base_currency'] == context.base_currency] + + # Filter all the pairs to ensure that pair existed in the current date + poloniex_universe_df = poloniex_universe_df[poloniex_universe_df.start_date < date] + context.coins = symbols(*poloniex_universe_df.symbol) # convert all the pairs to symbols + return poloniex_universe_df.symbol.tolist() + + +if __name__ == '__main__': + start_date = pd.to_datetime('2017-01-01', utc=True) + end_date = pd.to_datetime('2017-10-15', utc=True) + + performance = run_algorithm(start=start_date, end=end_date, + capital_base=10000.0, + initialize=initialize, + handle_data=handle_data, + analyze=analyze, + exchange_name='poloniex', + bundle='poloniex', + data_frequency='minute', + base_currency='btc', + live=False, + live_graph=False, + algo_namespace='simple_universe') + +""" +Run in Terminal (inside catalyst environment): +python simple_universe.py +""" From c8cc2edd36e2d069b7e0be94a1ecc7ab18880661 Mon Sep 17 00:00:00 2001 From: Abner Ayala-Acevedo Date: Thu, 26 Oct 2017 15:16:13 -0700 Subject: [PATCH 02/37] Convert to 30 minutes ohlcv data --- catalyst/examples/simple_universe.py | 56 ++++++++++++++++++---------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/catalyst/examples/simple_universe.py b/catalyst/examples/simple_universe.py index 4909fee2..172837bb 100644 --- a/catalyst/examples/simple_universe.py +++ b/catalyst/examples/simple_universe.py @@ -9,9 +9,13 @@ You will all see how to create a universe and filter it base on the exchange and The example prints out the closing price of all the pairs for a given market-exchange every 30 minutes. The example also contains the ohlcv minute data for the past seven days which could be used to create indicators Use this as the backbone to create your own trading strategies. + +Variables lookback date and date are used to ensure data for a coin existed on the lookback period specified. """ +import numpy as np import pandas as pd +from datetime import timedelta from catalyst import run_algorithm from catalyst.exchange.exchange_utils import get_exchange_symbols @@ -30,18 +34,16 @@ def handle_data(context, data): lookback = 60 * 24 * 7 # (minutes, hours, days) of how far to lookback in the data history context.i += 1 - # we must first wait until enough minutes, hours or days have passed for data history to work - if context.i < lookback: - return - # current date formatted into a string - today = context.blotter.current_dt.strftime('%Y-%m-%d %H:%M:%S') - date, time = str(today).split(' ') + today = context.blotter.current_dt + date, time = today.strftime('%Y-%m-%d %H:%M:%S').split(' ') + lookback_date = today - timedelta(days=(lookback / (60 * 24))) # subtract the amount of days specified in lookback + lookback_date = lookback_date.strftime('%Y-%m-%d %H:%M:%S').split(' ')[0] # get only the date as a string # update universe everyday new_day = 60 * 24 if not context.i % new_day: - context.universe = universe(context, date) + context.universe = universe(context, lookback_date, date) # get data every 30 minutes minutes = 30 @@ -50,25 +52,30 @@ def handle_data(context, data): for coin in context.coins: pair = str(coin.symbol) - # ohlcv data (the standard data required for candlestick or indicators/signals) - open = data.history(coin, 'open', bar_count=lookback, frequency='1m').ffill().bfill() - high = data.history(coin, 'high', bar_count=lookback, frequency='1m').ffill().bfill() - low = data.history(coin, 'low', bar_count=lookback, frequency='1m').ffill().bfill() - close = data.history(coin, 'price', bar_count=lookback, frequency='1m').ffill().bfill() - volume = data.history(coin, 'volume', bar_count=lookback, frequency='1m').ffill().bfill() + # 30 minute interval ohlcv data (the standard data required for candlestick or indicators/signals) + # 30T means 30 minutes re-sampling of one minute data. change to your desire time interval. + open = fill(data.history(coin, 'open', bar_count=lookback, frequency='1m')).resample('30T').first() + high = fill(data.history(coin, 'high', bar_count=lookback, frequency='1m')).resample('30T').max() + low = fill(data.history(coin, 'low', bar_count=lookback, frequency='1m')).resample('30T').min() + close = fill(data.history(coin, 'price', bar_count=lookback, frequency='1m')).resample('30T').last() + volume = fill(data.history(coin, 'volume', bar_count=lookback, frequency='1m')).resample('30T').sum() # close[-1] is the equivalent to current price # displays the minute price for each pair every 30 minutes - print(today, pair, close[-1]) + print(today, pair, open[-1], high[-1], low[-1], close[-1], volume[-1]) + + # ---------------------------------------------------------------------------------------------------------- + # -------------------------------------- Insert Your Strategy Here ----------------------------------------- + # ---------------------------------------------------------------------------------------------------------- def analyze(context=None, results=None): pass -def universe(context, date): - # Get the universe for a given exchange and a given base_currency market - # Example: Poloniex BTC Market +# Get the universe for a given exchange and a given base_currency market +# Example: Poloniex BTC Market +def universe(context, lookback_date, current_date): json_symbols = get_exchange_symbols(context.exchange) # get all the pairs for the exchange poloniex_universe_df = pd.DataFrame.from_dict(json_symbols).transpose().astype(str) # convert into a dataframe poloniex_universe_df['base_currency'] = poloniex_universe_df.apply(lambda row: row.symbol.split('_')[1], @@ -78,12 +85,23 @@ def universe(context, date): # Filter all the exchange pairs to only the ones for a give base currency poloniex_universe_df = poloniex_universe_df[poloniex_universe_df['base_currency'] == context.base_currency] - # Filter all the pairs to ensure that pair existed in the current date - poloniex_universe_df = poloniex_universe_df[poloniex_universe_df.start_date < date] + # Filter all the pairs to ensure that pair existed in the current date range + poloniex_universe_df = poloniex_universe_df[poloniex_universe_df.start_date < lookback_date] + poloniex_universe_df = poloniex_universe_df[poloniex_universe_df.end_daily >= current_date] context.coins = symbols(*poloniex_universe_df.symbol) # convert all the pairs to symbols return poloniex_universe_df.symbol.tolist() +# Replace all NA, NAN or infinite values with its nearest value +def fill(series): + if isinstance(series, pd.Series): + return series.replace([np.inf, -np.inf], np.nan).ffill().bfill() + elif isinstance(series, np.ndarray): + return pd.Series(series).replace([np.inf, -np.inf], np.nan).ffill().bfill().values + else: + return series + + if __name__ == '__main__': start_date = pd.to_datetime('2017-01-01', utc=True) end_date = pd.to_datetime('2017-10-15', utc=True) From 76a8362e3d390a196bc421c7af958de0cf467452 Mon Sep 17 00:00:00 2001 From: Abner Ayala-Acevedo Date: Thu, 26 Oct 2017 15:17:02 -0700 Subject: [PATCH 03/37] Update simple_universe.py --- catalyst/examples/simple_universe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalyst/examples/simple_universe.py b/catalyst/examples/simple_universe.py index 172837bb..f5c0a1d2 100644 --- a/catalyst/examples/simple_universe.py +++ b/catalyst/examples/simple_universe.py @@ -1,6 +1,6 @@ """ Requires Catalyst version 0.3.0 or above -Tested on Catalyst version 0.3.2 +Tested on Catalyst version 0.3.3 These example aims to provide and easy way for users to learn how to collect data from the different exchanges. You simply need to specify the exchange and the market that you want to focus on. From cd0157347fa16735df06ba0b9cb085c334b25a46 Mon Sep 17 00:00:00 2001 From: Abner Ayala-Acevedo Date: Thu, 26 Oct 2017 18:10:12 -0700 Subject: [PATCH 04/37] Refactoring --- catalyst/examples/simple_universe.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/catalyst/examples/simple_universe.py b/catalyst/examples/simple_universe.py index f5c0a1d2..c4e24182 100644 --- a/catalyst/examples/simple_universe.py +++ b/catalyst/examples/simple_universe.py @@ -77,19 +77,19 @@ def analyze(context=None, results=None): # Example: Poloniex BTC Market def universe(context, lookback_date, current_date): json_symbols = get_exchange_symbols(context.exchange) # get all the pairs for the exchange - poloniex_universe_df = pd.DataFrame.from_dict(json_symbols).transpose().astype(str) # convert into a dataframe - poloniex_universe_df['base_currency'] = poloniex_universe_df.apply(lambda row: row.symbol.split('_')[1], + universe_df = pd.DataFrame.from_dict(json_symbols).transpose().astype(str) # convert into a dataframe + universe_df['base_currency'] = universe_df.apply(lambda row: row.symbol.split('_')[1], axis=1) - poloniex_universe_df['market_currency'] = poloniex_universe_df.apply(lambda row: row.symbol.split('_')[0], + universe_df['market_currency'] = universe_df.apply(lambda row: row.symbol.split('_')[0], axis=1) # Filter all the exchange pairs to only the ones for a give base currency - poloniex_universe_df = poloniex_universe_df[poloniex_universe_df['base_currency'] == context.base_currency] + universe_df = universe_df[universe_df['base_currency'] == context.base_currency] # Filter all the pairs to ensure that pair existed in the current date range - poloniex_universe_df = poloniex_universe_df[poloniex_universe_df.start_date < lookback_date] - poloniex_universe_df = poloniex_universe_df[poloniex_universe_df.end_daily >= current_date] - context.coins = symbols(*poloniex_universe_df.symbol) # convert all the pairs to symbols - return poloniex_universe_df.symbol.tolist() + universe_df = universe_df[universe_df.start_date < lookback_date] + universe_df = universe_df[universe_df.end_daily >= current_date] + context.coins = symbols(*universe_df.symbol) # convert all the pairs to symbols + return universe_df.symbol.tolist() # Replace all NA, NAN or infinite values with its nearest value @@ -112,7 +112,6 @@ if __name__ == '__main__': handle_data=handle_data, analyze=analyze, exchange_name='poloniex', - bundle='poloniex', data_frequency='minute', base_currency='btc', live=False, From 12695474e304187fdbb5760a4e02f6cf2e6d4e14 Mon Sep 17 00:00:00 2001 From: damo1884 Date: Sun, 5 Nov 2017 01:14:09 -0700 Subject: [PATCH 05/37] Add TALib Simple Example --- catalyst/examples/talib_simple.py | 319 ++++++++++++++++++++++++++++++ 1 file changed, 319 insertions(+) create mode 100644 catalyst/examples/talib_simple.py diff --git a/catalyst/examples/talib_simple.py b/catalyst/examples/talib_simple.py new file mode 100644 index 00000000..2c332552 --- /dev/null +++ b/catalyst/examples/talib_simple.py @@ -0,0 +1,319 @@ +# Run Command +# catalyst run --start 2017-1-1 --end 2017-11-1 -o talib_simple.pickle -f talib_simple.py -x poloniex +# +# Description +# Simple TALib Example showing how to use various indicators in you strategy +# Based loosly on https://github.com/mellertson/talib-macd-example/blob/master/talib-macd-matplotlib-example.py + +import pandas as pd +import numpy as np +import talib as ta +import datetime +import os +from os.path import basename +import matplotlib.pyplot as plt +from matplotlib.dates import date2num +from matplotlib.finance import candlestick_ohlc +from logbook import Logger +from catalyst.exchange.stats_utils import get_pretty_stats + +from catalyst.api import ( + order, + order_target_percent, + symbol, + record, + get_open_orders, +) + +algo_namespace = 'talib_sample' +log = Logger(algo_namespace) + +def initialize(context): + + log.info('Starting TALib Simple Example') + + context.ASSET_NAME = 'BTC_USDT' + context.asset = symbol(context.ASSET_NAME) + + context.ORDER_SIZE = 10 + context.SLIPPAGE_ALLOWED = 0.05 + + context.swallow_errors = True + context.errors = [] + + # Bars to look at per iteration should be bigger than SMA_SLOW + context.BARS = 100 + context.COUNT = 0 + + # Technical Analysis Settings + context.SMA_FAST = 50 + context.SMA_SLOW = 100 + context.RSI_PERIOD = 14 + context.RSI_OVER_BOUGHT = 80 + context.RSI_OVER_SOLD = 20 + context.RSI_AVG_PERIOD = 15 + context.MACD_FAST = 12 + context.MACD_SLOW = 26 + context.MACD_SIGNAL = 9 + context.STOCH_K = 14 + context.STOCH_D = 3 + context.STOCH_OVER_BOUGHT = 80 + context.STOCH_OVER_SOLD = 20 + + pass + +def _handle_data(context, data): + + # Get price, open, high, low, close + prices = data.history( + context.asset, + bar_count=context.BARS, + fields=['price','open','high','low','close'], + frequency='1d') + + # Create a analysis data frame + analysis = pd.DataFrame(index = prices.index) + + # SMA FAST + analysis['sma_f'] = ta.SMA(prices.close.as_matrix(), context.SMA_FAST) + # SMA SLOW + analysis['sma_s'] = ta.SMA(prices.close.as_matrix(), context.SMA_SLOW) + + # Relative Strength Index + analysis['rsi'] = ta.RSI(prices.close.as_matrix(), context.RSI_PERIOD) + # RSI SMA + analysis['sma_r'] = ta.SMA(analysis.rsi.as_matrix(), context.RSI_AVG_PERIOD) + + # MACD, MACD Signal, MACD Histogram + analysis['macd'], analysis['macdSignal'], analysis['macdHist'] = ta.MACD(prices.close.as_matrix(), fastperiod=context.MACD_FAST, slowperiod=context.MACD_SLOW, signalperiod=context.MACD_SIGNAL) + + # Stochastics %K %D + # %K = (Current Close - Lowest Low)/(Highest High - Lowest Low) * 100 + # %D = 3-day SMA of %K + analysis['stoch_k'], analysis['stoch_d'] = ta.STOCH(prices.high.as_matrix(), prices.low.as_matrix(), prices.close.as_matrix(), slowk_period=context.STOCH_K, slowd_period=context.STOCH_D) + + # SMA FAST over SLOW Crossover + analysis['sma_test'] = np.where(analysis.sma_f > analysis.sma_s, 1, 0) + + # MACD over Signal Crossover + analysis['macd_test'] = np.where((analysis.macd > analysis.macdSignal), 1, 0) + + # Stochastics OVER BOUGHT & Decreasing + analysis['stoch_over_bought'] = np.where((analysis.stoch_k > context.STOCH_OVER_BOUGHT) & (analysis.stoch_k > analysis.stoch_k.shift(1)), 1, 0) + + # Stochastics OVER SOLD & Increasing + analysis['stoch_over_sold'] = np.where((analysis.stoch_k < context.STOCH_OVER_SOLD) & (analysis.stoch_k > analysis.stoch_k.shift(1)), 1, 0) + + # RSI OVER BOUGHT & Decreasing + analysis['rsi_over_bought'] = np.where((analysis.rsi > context.RSI_OVER_BOUGHT) & (analysis.rsi < analysis.rsi.shift(1)), 1, 0) + + # RSI OVER SOLD & Increasing + analysis['rsi_over_sold'] = np.where((analysis.rsi < context.RSI_OVER_SOLD) & (analysis.rsi > analysis.rsi.shift(1)), 1, 0) + + # Save the prices and analysis to send to analyze + context.prices=prices + context.analysis=analysis + context.price = data.current(context.asset, 'price') + + makeOrders(context, analysis) + + # Log the values of this bar + logAnalysis(analysis) + + +def handle_data(context, data): + log.info('handling bar {}'.format(data.current_dt)) + try: + _handle_data(context, data) + except Exception as e: + log.warn('aborting the bar on error {}'.format(e)) + context.errors.append(e) + + log.info('completed bar {}, total execution errors {}'.format( + data.current_dt, + len(context.errors) + )) + + if len(context.errors) > 0: + log.info('the errors:\n{}'.format(context.errors)) + + +def analyze(context, results): + # Save results in CSV file + filename = os.path.splitext(os.path.basename('talib_simple'))[0] + results.to_csv(filename + '.csv') + + log.info('the daily stats:\n{}'.format(get_pretty_stats(results))) + chart(context, context.prices, context.analysis) + pass + +def makeOrders(context, analysis): + + if context.asset in context.portfolio.positions: + + # Current position + position = context.portfolio.positions[context.asset] + + if(position == 0): + log.info('Position Zero') + return + + # Cost Basis + cost_basis = position.cost_basis + + log.info( + 'Holdings: {amount} @ {cost_basis}'.format( + amount=position.amount, + cost_basis=cost_basis + ) + ) + + # Sell when holding and got sell singnal + if isSell(context, analysis): + profit = (context.price * position.amount) - (cost_basis * position.amount) + order_target_percent( + asset=context.asset, + target=0, + limit_price=context.price * (1 - context.SLIPPAGE_ALLOWED), + ) + log.info( + 'Sold {amount} @ {price} Profit: {profit}'.format( + amount=position.amount, + price=context.price, + profit=profit + ) + ) + else: + log.info('no buy or sell opportunity found') + else: + # Buy when not holding and got buy signal + if isBuy(context, analysis): + order( + asset=context.asset, + amount=context.ORDER_SIZE, + limit_price=context.price * (1 + context.SLIPPAGE_ALLOWED) + ) + log.info( + 'Bought {amount} @ {price}'.format( + amount=context.ORDER_SIZE, + price=context.price + ) + ) + + +def isBuy(context, analysis): + # Bullish SMA Crossover + if(getLast(analysis, 'sma_test') == 1): + # Bullish MACD + if(getLast(analysis, 'macd_test') == 1): + return True + + # # Bullish Stochastics + # if(getLast(analysis, 'stoch_over_sold') == 1): + # return True + + # # Bullish RSI + # if(getLast(analysis, 'rsi_over_sold') == 1): + # return True + + return False + +def isSell(context, analysis): + # Bearish SMA Crossover + if(getLast(analysis, 'sma_test') == 0): + # Bearish MACD + if(getLast(analysis, 'macd_test') == 0): + return True + + # # Bearish Stochastics + # if(getLast(analysis, 'stoch_over_bought') == 0): + # return True + + # # Bearish RSI + # if(getLast(analysis, 'rsi_over_bought') == 0): + # return True + + return False + +def chart(context, prices, analysis): + + # Data for matplotlib finance plot + dates = date2num(prices.index.to_pydatetime()) + prices_ochl = [tuple([dates[i], + prices.open[i], + prices.close[i], + prices.high[i], + prices.low[i]]) for i in range(len(dates))] #_1 + + # Prepare plot + fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, sharex=True) + ax1.set_ylabel(context.ASSET_NAME, size=20) + + #size plot + fig.set_size_inches(15,30) + + # Plot candles + candlestick_ohlc(ax1, prices_ochl, width=0.5, colorup='g', colordown='r') + + # Draw Moving Averages + analysis.sma_f.plot(ax=ax1, c='r') + analysis.sma_s.plot(ax=ax1, c='g') + + #RSI + ax2.set_ylabel('RSI', size=12) + analysis.rsi.plot(ax = ax2, c='g', label = 'Period: ' + str(context.RSI_PERIOD)) + analysis.sma_r.plot(ax = ax2, c='r', label = 'MA: ' + str(context.RSI_AVG_PERIOD)) + ax2.axhline(y=30, c='b') + ax2.axhline(y=50, c='black') + ax2.axhline(y=70, c='b') + ax2.set_ylim([0,100]) + handles, labels = ax2.get_legend_handles_labels() + ax2.legend(handles, labels) + + # Draw MACD computed with Talib + ax3.set_ylabel('MACD: '+ str(context.MACD_FAST) + ', ' + str(context.MACD_SLOW) + ', ' + str(context.MACD_SIGNAL), size=12) + analysis.macd.plot(ax=ax3, color='b', label='Macd') + analysis.macdSignal.plot(ax=ax3, color='g', label='Signal') + analysis.macdHist.plot(ax=ax3, color='r', label='Hist') + ax3.axhline(0, lw=2, color='0') + handles, labels = ax3.get_legend_handles_labels() + ax3.legend(handles, labels) + + # Stochastic plot + ax4.set_ylabel('Stoch (k,d)', size=12) + analysis.stoch_k.plot(ax=ax4, label='stoch_k:'+ str(context.STOCH_K), color='r') + analysis.stoch_d.plot(ax=ax4, label='stoch_d:'+ str(context.STOCH_D), color='g') + handles, labels = ax4.get_legend_handles_labels() + ax4.legend(handles, labels) + ax4.axhline(y=20, c='b') + ax4.axhline(y=50, c='black') + ax4.axhline(y=80, c='b') + + plt.show() + +def logAnalysis(analysis): + # Log only the last value in the array + log.info('- sma_f: {:.2f}'.format(getLast(analysis, 'sma_f'))) + log.info('- sma_s: {:.2f}'.format(getLast(analysis, 'sma_s'))) + + log.info('- rsi: {:.2f}'.format(getLast(analysis, 'rsi'))) + log.info('- sma_r: {:.2f}'.format(getLast(analysis, 'sma_r'))) + + log.info('- macd: {:.2f}'.format(getLast(analysis, 'macd'))) + log.info('- macdSignal: {:.2f}'.format(getLast(analysis, 'macdSignal'))) + log.info('- macdHist: {:.2f}'.format(getLast(analysis, 'macdHist'))) + + log.info('- stoch_k: {:.2f}'.format(getLast(analysis, 'stoch_k'))) + log.info('- stoch_d: {:.2f}'.format(getLast(analysis, 'stoch_d'))) + + log.info('- sma_test: {}'.format(getLast(analysis, 'sma_test'))) + log.info('- macd_test: {}'.format(getLast(analysis, 'macd_test'))) + + log.info('- stoch_over_bought: {}'.format(getLast(analysis, 'stoch_over_bought'))) + log.info('- stoch_over_sold: {}'.format(getLast(analysis, 'stoch_over_sold'))) + + log.info('- rsi_over_bought: {}'.format(getLast(analysis, 'rsi_over_bought'))) + log.info('- rsi_over_sold: {}'.format(getLast(analysis, 'rsi_over_sold'))) + +def getLast(arr, name): + return arr[name][arr[name].index[-1]] From 061de3c12fe140a9c9fe42889368be93aed2987b Mon Sep 17 00:00:00 2001 From: damo1884 Date: Wed, 8 Nov 2017 19:28:44 -0800 Subject: [PATCH 06/37] fix issue with candlestick chart --- catalyst/examples/talib_simple.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/catalyst/examples/talib_simple.py b/catalyst/examples/talib_simple.py index 2c332552..d45795e5 100644 --- a/catalyst/examples/talib_simple.py +++ b/catalyst/examples/talib_simple.py @@ -42,7 +42,7 @@ def initialize(context): context.errors = [] # Bars to look at per iteration should be bigger than SMA_SLOW - context.BARS = 100 + context.BARS = 365 context.COUNT = 0 # Technical Analysis Settings @@ -144,7 +144,7 @@ def analyze(context, results): results.to_csv(filename + '.csv') log.info('the daily stats:\n{}'.format(get_pretty_stats(results))) - chart(context, context.prices, context.analysis) + chart(context, context.prices, context.analysis, results) pass def makeOrders(context, analysis): @@ -235,31 +235,32 @@ def isSell(context, analysis): return False -def chart(context, prices, analysis): +def chart(context, prices, analysis, results): + results.portfolio_value.plot() # Data for matplotlib finance plot dates = date2num(prices.index.to_pydatetime()) - prices_ochl = [tuple([dates[i], + + # Create the Open High Low Close Tuple + prices_ohlc = [tuple([dates[i], prices.open[i], - prices.close[i], prices.high[i], - prices.low[i]]) for i in range(len(dates))] #_1 + prices.low[i], + prices.close[i]]) for i in range(len(dates))] - # Prepare plot - fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, sharex=True) + fig = plt.figure(figsize=(14,18)) + + # Draw the candle sticks + ax1 = fig.add_subplot(411) ax1.set_ylabel(context.ASSET_NAME, size=20) - - #size plot - fig.set_size_inches(15,30) - - # Plot candles - candlestick_ohlc(ax1, prices_ochl, width=0.5, colorup='g', colordown='r') + candlestick_ohlc(ax1, prices_ohlc, width=0.4 ,colorup='g', colordown='r') # Draw Moving Averages analysis.sma_f.plot(ax=ax1, c='r') analysis.sma_s.plot(ax=ax1, c='g') #RSI + ax2 = fig.add_subplot(412) ax2.set_ylabel('RSI', size=12) analysis.rsi.plot(ax = ax2, c='g', label = 'Period: ' + str(context.RSI_PERIOD)) analysis.sma_r.plot(ax = ax2, c='r', label = 'MA: ' + str(context.RSI_AVG_PERIOD)) @@ -271,6 +272,7 @@ def chart(context, prices, analysis): ax2.legend(handles, labels) # Draw MACD computed with Talib + ax3 = fig.add_subplot(413) ax3.set_ylabel('MACD: '+ str(context.MACD_FAST) + ', ' + str(context.MACD_SLOW) + ', ' + str(context.MACD_SIGNAL), size=12) analysis.macd.plot(ax=ax3, color='b', label='Macd') analysis.macdSignal.plot(ax=ax3, color='g', label='Signal') @@ -280,6 +282,7 @@ def chart(context, prices, analysis): ax3.legend(handles, labels) # Stochastic plot + ax4 = fig.add_subplot(414) ax4.set_ylabel('Stoch (k,d)', size=12) analysis.stoch_k.plot(ax=ax4, label='stoch_k:'+ str(context.STOCH_K), color='r') analysis.stoch_d.plot(ax=ax4, label='stoch_d:'+ str(context.STOCH_D), color='g') From a2bb231424759bc8ee71c6cec46df3e2773fc8a7 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Tue, 14 Nov 2017 12:14:38 -0700 Subject: [PATCH 07/37] DOC: improving Win/Conda install instructions --- docs/source/install.rst | 62 +++++++++++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/docs/source/install.rst b/docs/source/install.rst index bf958cc9..f9229635 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -136,14 +136,17 @@ about matplotlib backends, please refer to the Windows ~~~~~~~ -In Windows, you will need the `Microsoft Visual C++ Compiler for Python 2.7 +In Windows, you will first need to install the `Microsoft Visual C++ Compiler +for Python 2.7 `_. This package contains the compiler and the set of system headers necessary for producing binary wheels for Python 2.7 packages. If it's not already in your system, download it and install it before proceeding to the next step. -For windows, the easiest and best supported way to install Catalyst is to use -:ref:`Conda `. +Once you have the above compiler installed, the easiest and best supported way +to install Catalyst in Windows is to use :ref:`Conda `. If you didn't +any problems installing the compiler, jump to the :ref:`Conda ` section, +otherwise keep on reading to troubleshoot the C++ compiler installtion. Some problems we have encountered installing the **Visual C++ Compiler** mentioned above are as follows: @@ -158,6 +161,8 @@ mentioned above are as follows: ``Registry Editor`` - Navigate to the following folder: ``HKEY_LOCAL_MACHINE\SOFTWARE\Policies\Microsoft\Windows\Installer`` + - If the last folder does not exist, create it by right-clicking on the + parent folder and choosing -> ``New`` -> ``Key`` and typing ``Installer`` - If there is an entry for ``DisableMSI``, set the Value data to 0. - If there is no such entry, click on the ``Edit`` menu -> ``New`` -> ``DWORD (32-bit) Value`` and enter ``DisableMSI`` as the Name (and by @@ -302,9 +307,9 @@ understands the complex binary dependencies of packages like ``numpy`` and dependencies without requiring the use of a second tool to acquire Catalyst's non-Python dependencies. - For Windows, you will need the *Microsoft Visual C++ Compiler for Python - 2.7*. Follow the instructions on the :ref:`Windows` section and come back - here. + For Windows, you will first need to install the *Microsoft Visual C++ + Compiler for Python 2.7*. Follow the instructions on the :ref:`Windows` + section and come back here. For instructions on how to install ``conda``, see the `Conda Installation Documentation `_. Alternatively, @@ -319,10 +324,23 @@ main packages needed. To install MiniConda, you can follow these steps: 3. Ensure the correct installation by running ``conda list`` in a Terminal window, which should print the list of packages installed with Conda. + For Windows, if you accepted the default installation options, you didn't + check an option to add Conda to the PATH, so trying to run ``conda`` from + a regular ``Command Prompt`` will result in the following error: ``'conda' + is no recognized as an internal or external command, operatble program or + batch file``. That's to be expected. You will nee to launch an ``Anaconda + Prompt`` that was added at installation time to your list of programs + available from the Start menu. + Once either Conda or MiniConda has been set up you can install Catalyst: 1. Download the file `python2.7-environment.yml `_. + + To download, simply click on the 'Raw' button and save the file locally to + a folder you can remember. Make sure that the file gets saved with the ``.yml`` + extension, and nothing like a ``.txt`` file or anything else. + 2. Open a Terminal window and enter [``cd/dir``] into the directory where you saved the above ``python2.7-environment.yml`` file. 3. Install using this file. This step can take about 5-10 minutes to install. @@ -346,6 +364,14 @@ Once either Conda or MiniConda has been set up you can install Catalyst: activate catalyst +5. Verify that Catalyst is install correctly: + + .. code-block:: bash + + catalyst --version + + which should display the current version. + Congratulations! You now have Catalyst installed. Troubleshooting ``conda`` Install @@ -355,13 +381,21 @@ If the command ``conda env create -f python2.7-environment.yml`` in step 3 above failed for any reason, you can try setting up the environment manually with the following steps: -1. Create the environment: +1. If the above installation failed, and you have a partially set up catalyst + environment, remove it first. If you are starting from scratch, proceed to + step #2: + + .. code-block:: bash + + conda env remove --name catalyst + +2. Create the environment: .. code-block:: bash conda create --name catalyst python=2.7 scipy zlib -2. Activate the environment: +3. Activate the environment: **Linux or OSX:** @@ -375,12 +409,22 @@ with the following steps: activate catalyst -3. Install the Catalyst inside the environment: +4. Install the Catalyst inside the environment: .. code-block:: bash pip install enigma-catalyst matplotlib +5. Verify that Catalyst is installed correctly: + + .. code-block:: bash + + catalyst --version + + which should display the current version. + +Congratulations! You now have Catalyst properly installed. + Getting Help ------------ From e087e48088eae47fdf8699ad4bfef693bc2fc240 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Tue, 14 Nov 2017 17:04:38 -0500 Subject: [PATCH 08/37] BLD: polishing a sample algorithm --- catalyst/examples/mean_reversion.py | 18 +++++++++--------- catalyst/examples/mean_reversion_simple.py | 16 ++++++++-------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/catalyst/examples/mean_reversion.py b/catalyst/examples/mean_reversion.py index 2e891593..3e0f2ecf 100644 --- a/catalyst/examples/mean_reversion.py +++ b/catalyst/examples/mean_reversion.py @@ -19,7 +19,7 @@ from catalyst.api import symbol, record, order_target_percent, \ # state using the files included in the folder. from catalyst.exchange.stats_utils import extract_transactions, trend_direction -algo_namespace = 'momentum' +algo_namespace = 'mean_reversion' log = Logger(algo_namespace) @@ -30,7 +30,7 @@ def initialize(context): # parameters or values you're going to use. # In our example, we're looking at Ether in USD Tether. - context.eth_btc = symbol('etc_usdt') + context.eth_btc = symbol('neo_usd') context.base_price = None context.current_day = None context.trigger = None @@ -256,18 +256,18 @@ if __name__ == '__main__': MODE = 'backtest' if MODE == 'backtest': + # catalyst run -f catalyst/examples/mean_reversion_simple.py -x poloniex -s 2017-7-1 -e 2017-7-31 -c usdt -n mean-reversion --data-frequency minute --capital-base 10000 run_algorithm( capital_base=1, data_frequency='minute', initialize=initialize, handle_data=handle_data, analyze=analyze, - exchange_name='poloniex', + exchange_name='bitfinex', algo_namespace=algo_namespace, - base_currency='usdt', - start=pd.to_datetime('2017-7-1', utc=True), - # end=pd.to_datetime('2017-9-30', utc=True), - end=pd.to_datetime('2017-10-31', utc=True), + base_currency='usd', + start=pd.to_datetime('2017-10-1', utc=True), + end=pd.to_datetime('2017-11-13', utc=True), ) elif MODE == 'live': @@ -275,9 +275,9 @@ if __name__ == '__main__': initialize=initialize, handle_data=handle_data, analyze=analyze, - exchange_name='poloniex', + exchange_name='bitfinex', live=True, algo_namespace=algo_namespace, - base_currency='usdt', + base_currency='usd', live_graph=True ) diff --git a/catalyst/examples/mean_reversion_simple.py b/catalyst/examples/mean_reversion_simple.py index 7780e3ac..58e15006 100644 --- a/catalyst/examples/mean_reversion_simple.py +++ b/catalyst/examples/mean_reversion_simple.py @@ -19,7 +19,7 @@ from catalyst.api import symbol, record, order_target_percent, \ # state using the files included in the folder. from catalyst.exchange.stats_utils import extract_transactions, trend_direction -algo_namespace = 'momentum' +algo_namespace = 'mean_reversion_simple' log = Logger(algo_namespace) @@ -30,7 +30,7 @@ def initialize(context): # parameters or values you're going to use. # In our example, we're looking at Ether in USD Tether. - context.eth_btc = symbol('etc_usdt') + context.eth_btc = symbol('neo_usd') context.base_price = None context.current_day = None @@ -228,11 +228,11 @@ if __name__ == '__main__': initialize=initialize, handle_data=handle_data, analyze=analyze, - exchange_name='poloniex', + exchange_name='bitfinex', algo_namespace=algo_namespace, - base_currency='usdt', - start=pd.to_datetime('2017-7-1', utc=True), - end=pd.to_datetime('2017-7-31', utc=True), + base_currency='usd', + start=pd.to_datetime('2017-10-1', utc=True), + end=pd.to_datetime('2017-11-10', utc=True), ) elif MODE == 'live': @@ -240,9 +240,9 @@ if __name__ == '__main__': initialize=initialize, handle_data=handle_data, analyze=analyze, - exchange_name='poloniex', + exchange_name='bitfinex', live=True, algo_namespace=algo_namespace, - base_currency='usdt', + base_currency='usd', live_graph=True ) From df14a949189a5e6b27875005f9ade8d211186fbf Mon Sep 17 00:00:00 2001 From: Abner Ayala-Acevedo Date: Thu, 16 Nov 2017 11:22:35 -0800 Subject: [PATCH 09/37] Modified for examples consistency. Fully tested on v0.3.8 --- catalyst/examples/simple_universe.py | 33 ++++++++++++++++------------ 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/catalyst/examples/simple_universe.py b/catalyst/examples/simple_universe.py index c4e24182..20a4fc78 100644 --- a/catalyst/examples/simple_universe.py +++ b/catalyst/examples/simple_universe.py @@ -26,27 +26,29 @@ from catalyst.api import ( def initialize(context): context.i = -1 # counts the minutes - context.exchange = 'poloniex' # must match the exchange specified in run_algorithm - context.base_currency = 'btc' # must match the base currency specified in run_algorithm + context.exchange = context.exchanges.values()[0].name.lower() # exchange name + context.base_currency = context.exchanges.values()[0].base_currency.lower() # market base currency def handle_data(context, data): - lookback = 60 * 24 * 7 # (minutes, hours, days) of how far to lookback in the data history context.i += 1 + lookback_days = 7 # 7 days # current date formatted into a string - today = context.blotter.current_dt + today = data.current_dt date, time = today.strftime('%Y-%m-%d %H:%M:%S').split(' ') - lookback_date = today - timedelta(days=(lookback / (60 * 24))) # subtract the amount of days specified in lookback + lookback_date = today - timedelta(days=lookback_days) # subtract the amount of days specified in lookback lookback_date = lookback_date.strftime('%Y-%m-%d %H:%M:%S').split(' ')[0] # get only the date as a string # update universe everyday - new_day = 60 * 24 + new_day = 60 * 24 # assuming data_frequency='minute' if not context.i % new_day: context.universe = universe(context, lookback_date, date) # get data every 30 minutes minutes = 30 + one_day_in_minutes = 1440 # 1440 assumes data_frequency='minute' + lookback = one_day_in_minutes / minutes * lookback_days # get N lookback_days of history data if not context.i % minutes and context.universe: # we iterate for every pair in the current universe for coin in context.coins: @@ -54,15 +56,15 @@ def handle_data(context, data): # 30 minute interval ohlcv data (the standard data required for candlestick or indicators/signals) # 30T means 30 minutes re-sampling of one minute data. change to your desire time interval. - open = fill(data.history(coin, 'open', bar_count=lookback, frequency='1m')).resample('30T').first() - high = fill(data.history(coin, 'high', bar_count=lookback, frequency='1m')).resample('30T').max() - low = fill(data.history(coin, 'low', bar_count=lookback, frequency='1m')).resample('30T').min() - close = fill(data.history(coin, 'price', bar_count=lookback, frequency='1m')).resample('30T').last() - volume = fill(data.history(coin, 'volume', bar_count=lookback, frequency='1m')).resample('30T').sum() + opened = fill(data.history(coin, 'open', bar_count=lookback, frequency='30T')).values + high = fill(data.history(coin, 'high', bar_count=lookback, frequency='30T')).values + low = fill(data.history(coin, 'low', bar_count=lookback, frequency='30T')).values + close = fill(data.history(coin, 'price', bar_count=lookback, frequency='30T')).values + volume = fill(data.history(coin, 'volume', bar_count=lookback, frequency='30T')).values # close[-1] is the equivalent to current price # displays the minute price for each pair every 30 minutes - print(today, pair, open[-1], high[-1], low[-1], close[-1], volume[-1]) + print(today, pair, opened[-1], high[-1], low[-1], close[-1], volume[-1]) # ---------------------------------------------------------------------------------------------------------- # -------------------------------------- Insert Your Strategy Here ----------------------------------------- @@ -82,6 +84,7 @@ def universe(context, lookback_date, current_date): axis=1) universe_df['market_currency'] = universe_df.apply(lambda row: row.symbol.split('_')[0], axis=1) + # Filter all the exchange pairs to only the ones for a give base currency universe_df = universe_df[universe_df['base_currency'] == context.base_currency] @@ -89,6 +92,8 @@ def universe(context, lookback_date, current_date): universe_df = universe_df[universe_df.start_date < lookback_date] universe_df = universe_df[universe_df.end_daily >= current_date] context.coins = symbols(*universe_df.symbol) # convert all the pairs to symbols + + # print(universe_df.symbol.tolist()) return universe_df.symbol.tolist() @@ -104,10 +109,10 @@ def fill(series): if __name__ == '__main__': start_date = pd.to_datetime('2017-01-01', utc=True) - end_date = pd.to_datetime('2017-10-15', utc=True) + end_date = pd.to_datetime('2017-11-13', utc=True) performance = run_algorithm(start=start_date, end=end_date, - capital_base=10000.0, + capital_base=100.0, # amount of base_currency, not always in dollars unless usd initialize=initialize, handle_data=handle_data, analyze=analyze, From 64532c3d085aff481ac493faba2ba7c3be4c0c4f Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 16 Nov 2017 16:54:32 -0500 Subject: [PATCH 10/37] BLD: minor adjustments to the talib sample algo --- catalyst/examples/talib_simple.py | 156 +++++++++++++++++++----------- 1 file changed, 99 insertions(+), 57 deletions(-) diff --git a/catalyst/examples/talib_simple.py b/catalyst/examples/talib_simple.py index d45795e5..d4a7d807 100644 --- a/catalyst/examples/talib_simple.py +++ b/catalyst/examples/talib_simple.py @@ -5,31 +5,29 @@ # Simple TALib Example showing how to use various indicators in you strategy # Based loosly on https://github.com/mellertson/talib-macd-example/blob/master/talib-macd-matplotlib-example.py -import pandas as pd -import numpy as np -import talib as ta -import datetime import os -from os.path import basename + import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import talib as ta +from logbook import Logger from matplotlib.dates import date2num from matplotlib.finance import candlestick_ohlc -from logbook import Logger -from catalyst.exchange.stats_utils import get_pretty_stats +from catalyst import run_algorithm from catalyst.api import ( order, order_target_percent, symbol, - record, - get_open_orders, ) +from catalyst.exchange.stats_utils import get_pretty_stats algo_namespace = 'talib_sample' log = Logger(algo_namespace) -def initialize(context): +def initialize(context): log.info('Starting TALib Simple Example') context.ASSET_NAME = 'BTC_USDT' @@ -62,17 +60,17 @@ def initialize(context): pass -def _handle_data(context, data): +def _handle_data(context, data): # Get price, open, high, low, close prices = data.history( context.asset, bar_count=context.BARS, - fields=['price','open','high','low','close'], + fields=['price', 'open', 'high', 'low', 'close'], frequency='1d') # Create a analysis data frame - analysis = pd.DataFrame(index = prices.index) + analysis = pd.DataFrame(index=prices.index) # SMA FAST analysis['sma_f'] = ta.SMA(prices.close.as_matrix(), context.SMA_FAST) @@ -82,44 +80,59 @@ def _handle_data(context, data): # Relative Strength Index analysis['rsi'] = ta.RSI(prices.close.as_matrix(), context.RSI_PERIOD) # RSI SMA - analysis['sma_r'] = ta.SMA(analysis.rsi.as_matrix(), context.RSI_AVG_PERIOD) + analysis['sma_r'] = ta.SMA(analysis.rsi.as_matrix(), + context.RSI_AVG_PERIOD) # MACD, MACD Signal, MACD Histogram - analysis['macd'], analysis['macdSignal'], analysis['macdHist'] = ta.MACD(prices.close.as_matrix(), fastperiod=context.MACD_FAST, slowperiod=context.MACD_SLOW, signalperiod=context.MACD_SIGNAL) - + analysis['macd'], analysis['macdSignal'], analysis['macdHist'] = ta.MACD( + prices.close.as_matrix(), fastperiod=context.MACD_FAST, + slowperiod=context.MACD_SLOW, signalperiod=context.MACD_SIGNAL) + # Stochastics %K %D # %K = (Current Close - Lowest Low)/(Highest High - Lowest Low) * 100 # %D = 3-day SMA of %K - analysis['stoch_k'], analysis['stoch_d'] = ta.STOCH(prices.high.as_matrix(), prices.low.as_matrix(), prices.close.as_matrix(), slowk_period=context.STOCH_K, slowd_period=context.STOCH_D) + analysis['stoch_k'], analysis['stoch_d'] = ta.STOCH( + prices.high.as_matrix(), prices.low.as_matrix(), + prices.close.as_matrix(), slowk_period=context.STOCH_K, + slowd_period=context.STOCH_D) # SMA FAST over SLOW Crossover analysis['sma_test'] = np.where(analysis.sma_f > analysis.sma_s, 1, 0) # MACD over Signal Crossover - analysis['macd_test'] = np.where((analysis.macd > analysis.macdSignal), 1, 0) + analysis['macd_test'] = np.where((analysis.macd > analysis.macdSignal), 1, + 0) # Stochastics OVER BOUGHT & Decreasing - analysis['stoch_over_bought'] = np.where((analysis.stoch_k > context.STOCH_OVER_BOUGHT) & (analysis.stoch_k > analysis.stoch_k.shift(1)), 1, 0) + analysis['stoch_over_bought'] = np.where( + (analysis.stoch_k > context.STOCH_OVER_BOUGHT) & ( + analysis.stoch_k > analysis.stoch_k.shift(1)), 1, 0) # Stochastics OVER SOLD & Increasing - analysis['stoch_over_sold'] = np.where((analysis.stoch_k < context.STOCH_OVER_SOLD) & (analysis.stoch_k > analysis.stoch_k.shift(1)), 1, 0) + analysis['stoch_over_sold'] = np.where( + (analysis.stoch_k < context.STOCH_OVER_SOLD) & ( + analysis.stoch_k > analysis.stoch_k.shift(1)), 1, 0) # RSI OVER BOUGHT & Decreasing - analysis['rsi_over_bought'] = np.where((analysis.rsi > context.RSI_OVER_BOUGHT) & (analysis.rsi < analysis.rsi.shift(1)), 1, 0) + analysis['rsi_over_bought'] = np.where( + (analysis.rsi > context.RSI_OVER_BOUGHT) & ( + analysis.rsi < analysis.rsi.shift(1)), 1, 0) # RSI OVER SOLD & Increasing - analysis['rsi_over_sold'] = np.where((analysis.rsi < context.RSI_OVER_SOLD) & (analysis.rsi > analysis.rsi.shift(1)), 1, 0) + analysis['rsi_over_sold'] = np.where( + (analysis.rsi < context.RSI_OVER_SOLD) & ( + analysis.rsi > analysis.rsi.shift(1)), 1, 0) # Save the prices and analysis to send to analyze - context.prices=prices - context.analysis=analysis + context.prices = prices + context.analysis = analysis context.price = data.current(context.asset, 'price') makeOrders(context, analysis) # Log the values of this bar logAnalysis(analysis) - + def handle_data(context, data): log.info('handling bar {}'.format(data.current_dt)) @@ -147,14 +160,14 @@ def analyze(context, results): chart(context, context.prices, context.analysis, results) pass -def makeOrders(context, analysis): +def makeOrders(context, analysis): if context.asset in context.portfolio.positions: # Current position position = context.portfolio.positions[context.asset] - - if(position == 0): + + if (position == 0): log.info('Position Zero') return @@ -170,7 +183,8 @@ def makeOrders(context, analysis): # Sell when holding and got sell singnal if isSell(context, analysis): - profit = (context.price * position.amount) - (cost_basis * position.amount) + profit = (context.price * position.amount) - ( + cost_basis * position.amount) order_target_percent( asset=context.asset, target=0, @@ -178,16 +192,16 @@ def makeOrders(context, analysis): ) log.info( 'Sold {amount} @ {price} Profit: {profit}'.format( - amount=position.amount, - price=context.price, - profit=profit + amount=position.amount, + price=context.price, + profit=profit ) ) else: log.info('no buy or sell opportunity found') else: # Buy when not holding and got buy signal - if isBuy(context, analysis): + if isBuy(context, analysis): order( asset=context.asset, amount=context.ORDER_SIZE, @@ -195,17 +209,17 @@ def makeOrders(context, analysis): ) log.info( 'Bought {amount} @ {price}'.format( - amount=context.ORDER_SIZE, - price=context.price + amount=context.ORDER_SIZE, + price=context.price ) ) def isBuy(context, analysis): # Bullish SMA Crossover - if(getLast(analysis, 'sma_test') == 1): + if (getLast(analysis, 'sma_test') == 1): # Bullish MACD - if(getLast(analysis, 'macd_test') == 1): + if (getLast(analysis, 'macd_test') == 1): return True # # Bullish Stochastics @@ -218,11 +232,12 @@ def isBuy(context, analysis): return False + def isSell(context, analysis): # Bearish SMA Crossover - if(getLast(analysis, 'sma_test') == 0): + if (getLast(analysis, 'sma_test') == 0): # Bearish MACD - if(getLast(analysis, 'macd_test') == 0): + if (getLast(analysis, 'macd_test') == 0): return True # # Bearish Stochastics @@ -235,6 +250,7 @@ def isSell(context, analysis): return False + def chart(context, prices, analysis, results): results.portfolio_value.plot() @@ -243,37 +259,40 @@ def chart(context, prices, analysis, results): # Create the Open High Low Close Tuple prices_ohlc = [tuple([dates[i], - prices.open[i], - prices.high[i], - prices.low[i], - prices.close[i]]) for i in range(len(dates))] + prices.open[i], + prices.high[i], + prices.low[i], + prices.close[i]]) for i in range(len(dates))] - fig = plt.figure(figsize=(14,18)) + fig = plt.figure(figsize=(14, 18)) # Draw the candle sticks ax1 = fig.add_subplot(411) ax1.set_ylabel(context.ASSET_NAME, size=20) - candlestick_ohlc(ax1, prices_ohlc, width=0.4 ,colorup='g', colordown='r') + candlestick_ohlc(ax1, prices_ohlc, width=0.4, colorup='g', colordown='r') # Draw Moving Averages analysis.sma_f.plot(ax=ax1, c='r') analysis.sma_s.plot(ax=ax1, c='g') - #RSI + # RSI ax2 = fig.add_subplot(412) ax2.set_ylabel('RSI', size=12) - analysis.rsi.plot(ax = ax2, c='g', label = 'Period: ' + str(context.RSI_PERIOD)) - analysis.sma_r.plot(ax = ax2, c='r', label = 'MA: ' + str(context.RSI_AVG_PERIOD)) + analysis.rsi.plot(ax=ax2, c='g', + label='Period: ' + str(context.RSI_PERIOD)) + analysis.sma_r.plot(ax=ax2, c='r', + label='MA: ' + str(context.RSI_AVG_PERIOD)) ax2.axhline(y=30, c='b') ax2.axhline(y=50, c='black') ax2.axhline(y=70, c='b') - ax2.set_ylim([0,100]) + ax2.set_ylim([0, 100]) handles, labels = ax2.get_legend_handles_labels() ax2.legend(handles, labels) # Draw MACD computed with Talib ax3 = fig.add_subplot(413) - ax3.set_ylabel('MACD: '+ str(context.MACD_FAST) + ', ' + str(context.MACD_SLOW) + ', ' + str(context.MACD_SIGNAL), size=12) + ax3.set_ylabel('MACD: ' + str(context.MACD_FAST) + ', ' + str( + context.MACD_SLOW) + ', ' + str(context.MACD_SIGNAL), size=12) analysis.macd.plot(ax=ax3, color='b', label='Macd') analysis.macdSignal.plot(ax=ax3, color='g', label='Signal') analysis.macdHist.plot(ax=ax3, color='r', label='Hist') @@ -284,8 +303,10 @@ def chart(context, prices, analysis, results): # Stochastic plot ax4 = fig.add_subplot(414) ax4.set_ylabel('Stoch (k,d)', size=12) - analysis.stoch_k.plot(ax=ax4, label='stoch_k:'+ str(context.STOCH_K), color='r') - analysis.stoch_d.plot(ax=ax4, label='stoch_d:'+ str(context.STOCH_D), color='g') + analysis.stoch_k.plot(ax=ax4, label='stoch_k:' + str(context.STOCH_K), + color='r') + analysis.stoch_d.plot(ax=ax4, label='stoch_d:' + str(context.STOCH_D), + color='g') handles, labels = ax4.get_legend_handles_labels() ax4.legend(handles, labels) ax4.axhline(y=20, c='b') @@ -294,6 +315,7 @@ def chart(context, prices, analysis, results): plt.show() + def logAnalysis(analysis): # Log only the last value in the array log.info('- sma_f: {:.2f}'.format(getLast(analysis, 'sma_f'))) @@ -303,7 +325,8 @@ def logAnalysis(analysis): log.info('- sma_r: {:.2f}'.format(getLast(analysis, 'sma_r'))) log.info('- macd: {:.2f}'.format(getLast(analysis, 'macd'))) - log.info('- macdSignal: {:.2f}'.format(getLast(analysis, 'macdSignal'))) + log.info( + '- macdSignal: {:.2f}'.format(getLast(analysis, 'macdSignal'))) log.info('- macdHist: {:.2f}'.format(getLast(analysis, 'macdHist'))) log.info('- stoch_k: {:.2f}'.format(getLast(analysis, 'stoch_k'))) @@ -312,11 +335,30 @@ def logAnalysis(analysis): log.info('- sma_test: {}'.format(getLast(analysis, 'sma_test'))) log.info('- macd_test: {}'.format(getLast(analysis, 'macd_test'))) - log.info('- stoch_over_bought: {}'.format(getLast(analysis, 'stoch_over_bought'))) - log.info('- stoch_over_sold: {}'.format(getLast(analysis, 'stoch_over_sold'))) + log.info('- stoch_over_bought: {}'.format( + getLast(analysis, 'stoch_over_bought'))) + log.info( + '- stoch_over_sold: {}'.format(getLast(analysis, 'stoch_over_sold'))) + + log.info('- rsi_over_bought: {}'.format( + getLast(analysis, 'rsi_over_bought'))) + log.info( + '- rsi_over_sold: {}'.format(getLast(analysis, 'rsi_over_sold'))) - log.info('- rsi_over_bought: {}'.format(getLast(analysis, 'rsi_over_bought'))) - log.info('- rsi_over_sold: {}'.format(getLast(analysis, 'rsi_over_sold'))) def getLast(arr, name): return arr[name][arr[name].index[-1]] + + +if __name__ == '__main__': + run_algorithm( + capital_base=10000, + data_frequency='daily', + initialize=initialize, + handle_data=handle_data, + analyze=analyze, + exchange_name='poloniex', + base_currency='usdt', + start=pd.to_datetime('2016-11-1', utc=True), + end=pd.to_datetime('2017-11-10', utc=True), + ) From 3fa88a3e564acd6bfa7cd91955fdc4804da99e3a Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 16 Nov 2017 16:55:40 -0500 Subject: [PATCH 11/37] BLD: misc housekeeping --- catalyst/examples/buy_and_hodl.py | 24 +++++++++-- catalyst/examples/mean_reversion_simple.py | 36 ++++++++--------- catalyst/examples/rsi_profit_target.py | 34 ++++++++-------- catalyst/exchange/stats_utils.py | 23 ++++++++--- catalyst/support/issue_47.py | 27 ++++++------- catalyst/utils/run_algo.py | 47 +++++++++++++++++++--- tests/exchange/test_bundle.py | 4 +- 7 files changed, 129 insertions(+), 66 deletions(-) diff --git a/catalyst/examples/buy_and_hodl.py b/catalyst/examples/buy_and_hodl.py index b2b6a7ec..e4146b3c 100644 --- a/catalyst/examples/buy_and_hodl.py +++ b/catalyst/examples/buy_and_hodl.py @@ -14,7 +14,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import pandas as pd +from catalyst import run_algorithm from catalyst.api import ( order_target_value, symbol, @@ -23,6 +25,7 @@ from catalyst.api import ( get_open_orders, ) + def initialize(context): context.ASSET_NAME = 'BTC_USDT' context.TARGET_HODL_RATIO = 0.8 @@ -38,6 +41,7 @@ def initialize(context): context.i = 0 + def handle_data(context, data): context.i += 1 @@ -64,8 +68,8 @@ def handle_data(context, data): order_target_value( context.asset, target_hodl_value, - limit_price=price*1.1, - stop_price=price*0.9, + limit_price=price * 1.1, + stop_price=price * 0.9, ) record( @@ -76,6 +80,7 @@ def handle_data(context, data): leverage=context.account.leverage, ) + def analyze(context=None, results=None): import matplotlib.pyplot as plt @@ -134,4 +139,17 @@ def analyze(context=None, results=None): # Show the plot. plt.gcf().set_size_inches(18, 8) - plt.show() \ No newline at end of file + plt.show() + + +run_algorithm( + capital_base=10000, + data_frequency='minute', + initialize=initialize, + handle_data=handle_data, + analyze=analyze, + exchange_name='poloniex', + base_currency='usd', + start=pd.to_datetime('2017-10-1', utc=True), + end=pd.to_datetime('2017-11-10', utc=True), +) diff --git a/catalyst/examples/mean_reversion_simple.py b/catalyst/examples/mean_reversion_simple.py index 58e15006..9aa5ada3 100644 --- a/catalyst/examples/mean_reversion_simple.py +++ b/catalyst/examples/mean_reversion_simple.py @@ -1,14 +1,12 @@ # For this example, we're going to write a simple momentum script. When the # stock goes up quickly, we're going to buy; when it goes down quickly, we're # going to sell. Hopefully we'll ride the waves. -from datetime import timedelta import pandas as pd import talib # To run an algorithm in Catalyst, you need two functions: initialize and # handle_data. from logbook import Logger -from talib.common import MA_Type from catalyst import run_algorithm from catalyst.api import symbol, record, order_target_percent, \ @@ -17,10 +15,10 @@ from catalyst.api import symbol, record, order_target_percent, \ # In this example, Catalyst will create the `.catalyst/data/live_algos` # directory. If we stop and start the algorithm, Catalyst will resume its # state using the files included in the folder. -from catalyst.exchange.stats_utils import extract_transactions, trend_direction +from catalyst.exchange.stats_utils import extract_transactions -algo_namespace = 'mean_reversion_simple' -log = Logger(algo_namespace) +NAMESPACE = 'mean_reversion_simple' +log = Logger(NAMESPACE) def initialize(context): @@ -30,7 +28,7 @@ def initialize(context): # parameters or values you're going to use. # In our example, we're looking at Ether in USD Tether. - context.eth_btc = symbol('neo_usd') + context.neo_usd = symbol('neo_usd') context.base_price = None context.current_day = None @@ -50,14 +48,14 @@ def handle_data(context, data): context.current_day = today # We're computing the volume-weighted-average-price of the security - # defined above, in the context.eth_btc variable. For this example, we're + # defined above, in the context.neo_usd variable. For this example, we're # using three bars on the 15 min bars. # The frequency attribute determine the bar size. We use this convention # for the frequency alias: # http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases prices = data.history( - context.eth_btc, + context.neo_usd, fields='close', bar_count=50, frequency='15T' @@ -72,7 +70,7 @@ def handle_data(context, data): # We need a variable for the current price of the security to compare to # the average. Since we are requesting two fields, data.current() # returns a DataFrame with - current = data.current(context.eth_btc, fields=['close', 'volume']) + current = data.current(context.neo_usd, fields=['close', 'volume']) price = current['close'] # If base_price is not set, we use the current value. This is the @@ -101,19 +99,19 @@ def handle_data(context, data): # Since we are using limit orders, some orders may not execute immediately # we wait until all orders are executed before considering more trades. - orders = get_open_orders(context.eth_btc) + orders = get_open_orders(context.neo_usd) if len(orders) > 0: return # Exit if we cannot trade - if not data.can_trade(context.eth_btc): + if not data.can_trade(context.neo_usd): return # Another powerful built-in feature of the Catalyst backtester is the # portfolio object. The portfolio object tracks your positions, cash, # cost basis of specific holdings, and more. In this line, we calculate # how long or short our position is at this minute. - pos_amount = context.portfolio.positions[context.eth_btc].amount + pos_amount = context.portfolio.positions[context.neo_usd].amount if rsi[-1] <= 30 and pos_amount == 0: log.info( @@ -121,7 +119,7 @@ def handle_data(context, data): data.current_dt, price, rsi[-1] ) ) - order_target_percent(context.eth_btc, 1) + order_target_percent(context.neo_usd, 1) context.traded_today = True elif rsi[-1] >= 80 and pos_amount > 0: @@ -130,7 +128,7 @@ def handle_data(context, data): data.current_dt, price, rsi[-1] ) ) - order_target_percent(context.eth_btc, 0) + order_target_percent(context.neo_usd, 0) context.traded_today = True @@ -150,7 +148,7 @@ def analyze(context=None, perf=None): perf.loc[:, 'price'].plot(ax=ax2, label='Price') ax2.set_ylabel('{asset} ({base})'.format( - asset=context.eth_btc.symbol, base=base_currency + asset=context.neo_usd.symbol, base=base_currency )) transaction_df = extract_transactions(perf) @@ -218,10 +216,10 @@ def analyze(context=None, perf=None): if __name__ == '__main__': # The execution mode: backtest or live - MODE = 'backtest' + MODE = 'live' if MODE == 'backtest': - # catalyst run -f catalyst/examples/mean_reversion_simple.py -x poloniex -s 2017-7-1 -e 2017-7-31 -c usdt -n mean-reversion --data-frequency minute --capital-base 10000 + # catalyst run -f catalyst/examples/mean_reversion_simple.py -x poloniex -s 2017-10-1 -e 2017-11-10 -c usdt -n mean-reversion --data-frequency minute --capital-base 10000 run_algorithm( capital_base=10000, data_frequency='minute', @@ -229,7 +227,7 @@ if __name__ == '__main__': handle_data=handle_data, analyze=analyze, exchange_name='bitfinex', - algo_namespace=algo_namespace, + algo_namespace=NAMESPACE, base_currency='usd', start=pd.to_datetime('2017-10-1', utc=True), end=pd.to_datetime('2017-11-10', utc=True), @@ -242,7 +240,7 @@ if __name__ == '__main__': analyze=analyze, exchange_name='bitfinex', live=True, - algo_namespace=algo_namespace, + algo_namespace=NAMESPACE, base_currency='usd', live_graph=True ) diff --git a/catalyst/examples/rsi_profit_target.py b/catalyst/examples/rsi_profit_target.py index a426c402..5e24f122 100644 --- a/catalyst/examples/rsi_profit_target.py +++ b/catalyst/examples/rsi_profit_target.py @@ -250,27 +250,27 @@ def analyze(context=None, results=None): pass -run_algorithm( - initialize=initialize, - handle_data=handle_data, - analyze=analyze, - exchange_name='bittrex', - live=True, - algo_namespace=algo_namespace, - base_currency='btc', - live_graph=False -) - -# Backtest # run_algorithm( -# capital_base=0.5, -# data_frequency='minute', # initialize=initialize, # handle_data=handle_data, # analyze=analyze, -# exchange_name='poloniex', +# exchange_name='bittrex', +# live=True, # algo_namespace=algo_namespace, # base_currency='btc', -# start=pd.to_datetime('2017-9-1', utc=True), -# end=pd.to_datetime('2017-10-1', utc=True), +# live_graph=False # ) + +# Backtest +run_algorithm( + capital_base=0.5, + data_frequency='minute', + initialize=initialize, + handle_data=handle_data, + analyze=analyze, + exchange_name='poloniex', + algo_namespace=algo_namespace, + base_currency='btc', + start=pd.to_datetime('2017-9-1', utc=True), + end=pd.to_datetime('2017-10-1', utc=True), +) diff --git a/catalyst/exchange/stats_utils.py b/catalyst/exchange/stats_utils.py index 1290f71f..e982de2a 100644 --- a/catalyst/exchange/stats_utils.py +++ b/catalyst/exchange/stats_utils.py @@ -30,14 +30,25 @@ def crossover(source, target): bool """ - if source[-1] is np.nan or source[-2] is np.nan \ - or target[-1] is np.nan or target[-2] is np.nan: - return False + if isinstance(target, numbers.Number): + if source[-1] is np.nan or source[-2] is np.nan \ + or target is np.nan: + return False + + if source[-1] >= target > source[-2]: + return True + else: + return False - if source[-1] > target[-1] and source[-2] < target[-2]: - return True else: - return False + if source[-1] is np.nan or source[-2] is np.nan \ + or target[-1] is np.nan or target[-2] is np.nan: + return False + + if source[-1] > target[-1] and source[-2] < target[-2]: + return True + else: + return False def crossunder(source, target): diff --git a/catalyst/support/issue_47.py b/catalyst/support/issue_47.py index 5341dd03..3ebc0bc7 100644 --- a/catalyst/support/issue_47.py +++ b/catalyst/support/issue_47.py @@ -1,6 +1,6 @@ """ Requires Catalyst version 0.3.0 or above -Tested on Catalyst version 0.3.2 +Tested on Catalyst version 0.3.3 These example aims to provide and easy way for users to learn how to collect data from the different exchanges. You simply need to specify the exchange and the market that you want to focus on. @@ -27,7 +27,7 @@ from catalyst.api import ( def initialize(context): context.i = -1 # counts the minutes context.exchange = 'poloniex' # must match the exchange specified in run_algorithm - context.base_currency = 'eth' # must match the base currency specified in run_algorithm + context.base_currency = 'btc' # must match the base currency specified in run_algorithm def handle_data(context, data): @@ -56,21 +56,21 @@ def handle_data(context, data): # 30 minute interval ohlcv data (the standard data required for candlestick or indicators/signals) # 30T means 30 minutes re-sampling of one minute data. change to your desire time interval. - open = fill(data.history(coin, 'open', bar_count=lookback, - frequency='1m')).resample('30T').first() + opened = fill(data.history(coin, 'open', bar_count=lookback, + frequency='30T')).values high = fill(data.history(coin, 'high', bar_count=lookback, - frequency='1m')).resample('30T').max() + frequency='30T')).values low = fill(data.history(coin, 'low', bar_count=lookback, - frequency='1m')).resample('30T').min() + frequency='30T')).values close = fill(data.history(coin, 'price', bar_count=lookback, - frequency='1m')).resample('30T').last() + frequency='30T')).values volume = fill(data.history(coin, 'volume', bar_count=lookback, - frequency='1m')).resample('30T').sum() + frequency='30T')).values # close[-1] is the equivalent to current price # displays the minute price for each pair every 30 minutes print( - today, pair, open[-1], high[-1], low[-1], close[-1], volume[-1]) + today, pair, opened[-1], high[-1], low[-1], close[-1], volume[-1]) # ---------------------------------------------------------------------------------------------------------- # -------------------------------------- Insert Your Strategy Here ----------------------------------------- @@ -82,7 +82,7 @@ def analyze(context=None, results=None): # Get the universe for a given exchange and a given base_currency market -# Example: Poloniex BTC Market +# Example: Poloniex btc Market def universe(context, lookback_date, current_date): json_symbols = get_exchange_symbols( context.exchange) # get all the pairs for the exchange @@ -103,7 +103,6 @@ def universe(context, lookback_date, current_date): universe_df = universe_df[universe_df.end_daily >= current_date] context.coins = symbols( *universe_df.symbol) # convert all the pairs to symbols - print(universe_df.head(), len(universe_df)) return universe_df.symbol.tolist() @@ -119,8 +118,8 @@ def fill(series): if __name__ == '__main__': - start_date = pd.to_datetime('2017-01-01', utc=True) - end_date = pd.to_datetime('2017-10-15', utc=True) + start_date = pd.to_datetime('2017-01-08', utc=True) + end_date = pd.to_datetime('2017-11-13', utc=True) performance = run_algorithm(start=start_date, end=end_date, capital_base=10000.0, @@ -129,7 +128,7 @@ if __name__ == '__main__': analyze=analyze, exchange_name='poloniex', data_frequency='minute', - base_currency='eth', + base_currency='btc', live=False, live_graph=False, algo_namespace='simple_universe') diff --git a/catalyst/utils/run_algo.py b/catalyst/utils/run_algo.py index c12dedc9..b4335311 100644 --- a/catalyst/utils/run_algo.py +++ b/catalyst/utils/run_algo.py @@ -1,4 +1,5 @@ import os +import re import sys import warnings from datetime import timedelta @@ -8,6 +9,8 @@ from time import sleep import click import pandas as pd +from catalyst.data.bundles import load +from catalyst.data.data_portal import DataPortal from catalyst.exchange.bittrex.bittrex import Bittrex from catalyst.exchange.bitfinex.bitfinex import Bitfinex from catalyst.exchange.poloniex.poloniex import Poloniex @@ -167,10 +170,12 @@ def _run(handle_data, # This corresponds to the json file containing api token info exchange_auth = get_exchange_auth(exchange_name) - if live and (exchange_auth['key'] == '' or exchange_auth['secret'] == ''): + if live and ( + exchange_auth['key'] == '' or exchange_auth['secret'] == ''): raise ExchangeAuthEmpty( - exchange=exchange_name.title(), - filename=os.path.join(get_exchange_folder(exchange_name, environ), 'auth.json') ) + exchange=exchange_name.title(), + filename=os.path.join( + get_exchange_folder(exchange_name, environ), 'auth.json')) if exchange_name == 'bitfinex': exchanges[exchange_name] = Bitfinex( @@ -287,7 +292,7 @@ def _run(handle_data, algo_namespace=algo_namespace, live_graph=live_graph ) - else: + elif exchanges: # Removed the existing Poloniex fork to keep things simple # We can add back the complexity if required. @@ -317,6 +322,36 @@ def _run(handle_data, exchanges=exchanges ) + elif bundle is not None: + bundle_data = load( + bundle, + environ, + bundle_timestamp, + ) + + prefix, connstr = re.split( + r'sqlite:///', + str(bundle_data.asset_finder.engine.url), + maxsplit=1, + ) + if prefix: + raise ValueError( + "invalid url %r, must begin with 'sqlite:///'" % + str(bundle_data.asset_finder.engine.url), + ) + + env = TradingEnvironment(asset_db_path=connstr, environ=environ) + first_trading_day = \ + bundle_data.equity_minute_bar_reader.first_trading_day + + data = DataPortal( + env.asset_finder, open_calendar, + first_trading_day=first_trading_day, + equity_minute_reader=bundle_data.equity_minute_bar_reader, + equity_daily_reader=bundle_data.equity_daily_bar_reader, + adjustment_reader=bundle_data.adjustment_reader, + ) + perf = algorithm_class( namespace=namespace, env=env, @@ -486,7 +521,9 @@ def run_algorithm(initialize, -------- catalyst.data.bundles.bundles : The available data bundles. """ - load_extensions(default_extension, extensions, strict_extensions, environ) + load_extensions( + default_extension, extensions, strict_extensions, environ + ) # I'm not sure that we need this since the modified DataPortal # does not require extensions to be explicitly loaded. diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 9492438e..3966a1e8 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -460,8 +460,8 @@ class TestExchangeBundle: def bundle_to_csv(self): exchange_name = 'poloniex' data_frequency = 'minute' - period = '2017-09' - symbol = 'eth_btc' + period = '2017-02' + symbol = 'lsk_eth' exchange = get_exchange(exchange_name) asset = exchange.get_asset(symbol) From c260e188b0c220f79173892299ffb0f63145ec1f Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 16 Nov 2017 18:14:24 -0500 Subject: [PATCH 12/37] BUG: looking a potential resampling issue --- tests/exchange/test_data_portal.py | 37 ++++++++++++++++++++++++++++++ tests/exchange/test_utils.py | 15 +++++++++--- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/tests/exchange/test_data_portal.py b/tests/exchange/test_data_portal.py index 31c67295..efd2ffe6 100644 --- a/tests/exchange/test_data_portal.py +++ b/tests/exchange/test_data_portal.py @@ -113,3 +113,40 @@ class TestExchangeDataPortal: ) log.info('found history window: {}'.format(data)) + + def test_validate_resample(self): + symbol = ['eth_btc'] + exchange_name = 'poloniex' + exchange = get_exchange(exchange_name, base_currency=symbol) + + assets = exchange.get_assets(symbols=symbol) + + date = rnd_history_date_days( + max_days=10, + last_dt=pd.to_datetime('2017-11-1', utc=True) + ) + bar_count = rnd_bar_count(max_bars=10) + sample_minutes = 15 + sample_data = self.data_portal_backtest.get_history_window( + assets=assets, + end_dt=date, + bar_count=bar_count, + frequency='{}T'.format(sample_minutes), + field='close', + data_frequency='daily' + ) + minute_data = self.data_portal_backtest.get_history_window( + assets=assets, + end_dt=date, + bar_count=bar_count * sample_minutes, + frequency='1T', + field='close', + data_frequency='daily' + ) + resampled_minute_data = minute_data.resample( + '{}T'.format(sample_minutes)) + + print(sample_data.tail(10)) + print(resampled_minute_data.tail(10)) + print(minute_data.tail(10)) + pass diff --git a/tests/exchange/test_utils.py b/tests/exchange/test_utils.py index eb53bff5..d7f1df87 100644 --- a/tests/exchange/test_utils.py +++ b/tests/exchange/test_utils.py @@ -4,11 +4,20 @@ from random import randint import pandas as pd -def rnd_history_date_days(max_days=30): - now = pd.Timestamp.utcnow() +def rnd_history_date_days(max_days=30, last_dt=None): + if last_dt is None: + last_dt = pd.Timestamp.utcnow() + days = randint(0, max_days) - return now - timedelta(days=days) + return last_dt - timedelta(days=days) + + +def rnd_history_date_minutes(max_minutes=1440): + now = pd.Timestamp.utcnow() + days = randint(0, max_minutes) + + return now - timedelta(minutes=days) def rnd_bar_count(max_bars=21): From 8a89c0c53fec8826016c61ca217b3f1771246d93 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Fri, 17 Nov 2017 09:51:39 -0700 Subject: [PATCH 13/37] BUG: enforced --base_currency in backtesting. Fixes #67. --- catalyst/__main__.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/catalyst/__main__.py b/catalyst/__main__.py index c1872d3f..d44eab4a 100644 --- a/catalyst/__main__.py +++ b/catalyst/__main__.py @@ -30,16 +30,17 @@ except NameError: @click.option( '--strict-extensions/--non-strict-extensions', is_flag=True, - help='If --strict-extensions is passed then catalyst will not run if it' - ' cannot load all of the specified extensions. If this is not passed or' - ' --non-strict-extensions is passed then the failure will be logged but' - ' execution will continue.', + help='If --strict-extensions is passed then catalyst will not run ' + 'if it cannot load all of the specified extensions. If this is ' + 'not passed or --non-strict-extensions is passed then the ' + 'failure will be logged but execution will continue.', ) @click.option( '--default-extension/--no-default-extension', is_flag=True, default=True, - help="Don't load the default catalyst extension.py file in $CATALYST_HOME.", + help="Don't load the default catalyst extension.py file " + "in $CATALYST_HOME.", ) @click.version_option() def main(extension, strict_extensions, default_extension): @@ -124,9 +125,9 @@ def ipython_only(option): '--define', multiple=True, help="Define a name to be bound in the namespace before executing" - " the algotext. For example '-Dname=value'. The value may be any python" - " expression. These are evaluated in order so they may refer to previously" - " defined names.", + " the algotext. For example '-Dname=value'. The value may be" + " any python expression. These are evaluated in order so they" + " may refer to previously defined names.", ) @click.option( '--data-frequency', @@ -176,8 +177,8 @@ def ipython_only(option): default='-', metavar='FILENAME', show_default=True, - help="The location to write the perf data. If this is '-' the perf will" - " be written to stdout.", + help="The location to write the perf data. If this is '-' the perf" + " will be written to stdout.", ) @click.option( '--print-algo/--no-print-algo', @@ -195,7 +196,8 @@ def ipython_only(option): '-x', '--exchange-name', type=click.Choice({'bitfinex', 'bittrex', 'poloniex'}), - help='The name of the targeted exchange (supported: bitfinex, bittrex, poloniex).', + help='The name of the targeted exchange (supported: bitfinex,' + ' bittrex, poloniex).', ) @click.option( '-n', @@ -250,6 +252,9 @@ def run(ctx, if exchange_name is None: ctx.fail("must specify an exchange name '-x'") + if base_currency is None: + ctx.fail("must specify a base currency with '-c'") + perf = _run( initialize=None, handle_data=None, @@ -364,7 +369,8 @@ def catalyst_magic(line, cell=None): '-x', '--exchange-name', type=click.Choice({'bitfinex', 'bittrex', 'poloniex'}), - help='The name of the targeted exchange (supported: bitfinex, bittrex, poloniex).', + help='The name of the targeted exchange (supported: bitfinex,' + ' bittrex, poloniex).', ) @click.option( '-n', From d57b79427b4a52dc9cd87cf95c8bcb20ef63bf66 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Fri, 17 Nov 2017 10:43:20 -0700 Subject: [PATCH 14/37] BUG: enforced --capital base in backtesting --- catalyst/__main__.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/catalyst/__main__.py b/catalyst/__main__.py index d44eab4a..77cfb164 100644 --- a/catalyst/__main__.py +++ b/catalyst/__main__.py @@ -242,18 +242,25 @@ def run(ctx, # does not pass either of these and then passes the first only # to be told they need to pass the second argument also ctx.fail( - "must specify dates with '-s' / '--start' and '-e' / '--end'", + "must specify dates with '-s' / '--start' and '-e' / '--end'" + " in backtest mode", ) if start is None: - ctx.fail("must specify a start date with '-s' / '--start'") + ctx.fail("must specify a start date with '-s' / '--start'" + " in backtest mode") if end is None: - ctx.fail("must specify an end date with '-e' / '--end'") + ctx.fail("must specify an end date with '-e' / '--end'" + " in backtest mode") if exchange_name is None: ctx.fail("must specify an exchange name '-x'") if base_currency is None: - ctx.fail("must specify a base currency with '-c'") + ctx.fail("must specify a base currency with '-c' in backtest mode") + + if capital_base is None: + ctx.fail("must specify a capital base with '--capital-base'" + " in backtest mode") perf = _run( initialize=None, @@ -340,9 +347,9 @@ def catalyst_magic(line, cell=None): '--define', multiple=True, help="Define a name to be bound in the namespace before executing" - " the algotext. For example '-Dname=value'. The value may be any python" - " expression. These are evaluated in order so they may refer to previously" - " defined names.", + " the algotext. For example '-Dname=value'. The value may be" + " any python expression. These are evaluated in order so they" + " may refer to previously defined names.", ) @click.option( '-o', From f3dca74e87dae8e9c144b1d31369956e3eb702d6 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Fri, 17 Nov 2017 19:40:49 -0500 Subject: [PATCH 15/37] BUG: fixed a get_candles issue with the Poloniex exchange --- catalyst/exchange/poloniex/poloniex.py | 5 ++--- tests/exchange/test_poloniex.py | 26 ++++++++++++++------------ 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py index 62845845..0d283d8b 100644 --- a/catalyst/exchange/poloniex/poloniex.py +++ b/catalyst/exchange/poloniex/poloniex.py @@ -226,10 +226,9 @@ class Poloniex(Exchange): ohlc_map = dict() for asset in asset_list: + delta = end_dt - pd.to_datetime('1970-1-1', utc=True) + end = int(delta.total_seconds()) - # TODO: what's wrong with this? - # end = int(time.mktime(end_dt.timetuple())) - end = int(time.time()) if bar_count is None: start = end - 2 * frequency else: diff --git a/tests/exchange/test_poloniex.py b/tests/exchange/test_poloniex.py index b2ad56c3..60ff1e65 100644 --- a/tests/exchange/test_poloniex.py +++ b/tests/exchange/test_poloniex.py @@ -1,9 +1,10 @@ -from catalyst.exchange.bittrex.bittrex import Bittrex from catalyst.exchange.poloniex.poloniex import Poloniex from catalyst.finance.order import Order from base import BaseExchangeTestCase from logbook import Logger from catalyst.exchange.exchange_utils import get_exchange_auth +import pandas as pd +from test_utils import output_df log = Logger('test_poloniex') @@ -51,18 +52,19 @@ class TestPoloniex(BaseExchangeTestCase): def test_get_candles(self): log.info('retrieving candles') - ohlcv_neo = self.exchange.get_candles( - freq='5T', - assets=self.exchange.get_asset('eth_btc') - ) - ohlcv_neo_ubq = self.exchange.get_candles( - freq='5T', - assets=[ - self.exchange.get_asset('neos_btc'), - self.exchange.get_asset('via_btc') - ], - bar_count=14 + assets = self.exchange.get_asset('eth_btc') + ohlcv = self.exchange.get_candles( + end_dt=pd.to_datetime('2017-11-01', utc=True), + freq='30T', + assets=assets, + bar_count=200 ) + df = pd.DataFrame(ohlcv) + df.set_index('last_traded', drop=True, inplace=True) + log.info(df.tail(25)) + + path = output_df(df, assets, 'candles') + log.info('saved candles: {}'.format(path)) pass def test_tickers(self): From cfb3f1ca421527abea7da2394b51dcc427520597 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Sun, 19 Nov 2017 22:04:11 -0700 Subject: [PATCH 16/37] DOC: restructured install page --- docs/source/install.rst | 508 +++++++++++++++++++++------------------- 1 file changed, 261 insertions(+), 247 deletions(-) diff --git a/docs/source/install.rst b/docs/source/install.rst index f9229635..53ccbf29 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -6,7 +6,154 @@ Like any other piece of software, Catalyst has a number of dependencies (other software on which it depends to run) that you will need to install, as well. We recommend using a software named ``Conda`` that will manage all these dependencies for you, and set up the environment needed to get you up -and running as easily as possible. See :ref:`Installing with Conda `. +and running as easily as possible. This is the recommended installation method +for Windows, MacOS and Linux. See :ref:`Installing with Conda `. + +What conda does is create a pre-configured environment, and inside that +environment install Catalyst using ``pip``, Python's package manager. Thus, +as an alternative installation method for MacOS and Linux, you can install +Catalyst directly with ``pip`` (we recommend in combination with a virtual +environemnt). See :ref:`Installing with pip `. + +Regardless of the method, each operating system (OS), has its own +prerequisites, make sure to review the corresponding sections for your system: +:ref:`Linux `, :ref:`MacOS ` and :ref:`Windows `. + +.. _conda: + +Installing with ``conda`` +------------------------- + +The preferred method to install Catalyst is via the ``conda`` package manager, +which comes as part of Continuum Analytics' `Anaconda +`_ distribution. + +The primary advantage of using Conda over ``pip`` is that conda natively +understands the complex binary dependencies of packages like ``numpy`` and +``scipy``. This means that ``conda`` can install Catalyst and its +dependencies without requiring the use of a second tool to acquire Catalyst's +non-Python dependencies. + + For Windows, you will first need to install the *Microsoft Visual C++ + Compiler for Python 2.7*. Follow the instructions on the :ref:`Windows + ` section and come back here. + +For instructions on how to install ``conda``, see the `Conda Installation +Documentation `_. Alternatively, +you can install MiniConda, which is a smaller footprint (fewer packages and +smaller size) than its big brother Anaconda, but it still contains all the +main packages needed. To install MiniConda, you can follow these steps: + +1. Download `MiniConda `_. Select Python 2.7 + for your Operating System. +2. Install MiniConda. See the `Installation Instructions + `_ if you need help. +3. Ensure the correct installation by running ``conda list`` in a Terminal + window, which should print the list of packages installed with Conda. + + For Windows, if you accepted the default installation options, you didn't + check an option to add Conda to the PATH, so trying to run ``conda`` from + a regular ``Command Prompt`` will result in the following error: ``'conda' + is no recognized as an internal or external command, operatble program or + batch file``. That's to be expected. You will nee to launch an ``Anaconda + Prompt`` that was added at installation time to your list of programs + available from the Start menu. + +Once either Conda or MiniConda has been set up you can install Catalyst: + +1. Download the file `python2.7-environment.yml + `_. + + To download, simply click on the 'Raw' button and save the file locally + to a folder you can remember. Make sure that the file gets saved with the + ``.yml`` extension, and nothing like a ``.txt`` file or anything else. + +2. Open a Terminal window and enter [``cd/dir``] into the directory where you + saved the above ``python2.7-environment.yml`` file. + +3. Install using this file. This step can take about 5-10 minutes to install. + + .. code-block:: bash + + conda env create -f python2.7-environment.yml + +4. Activate the environment (which you need to do every time you start a new + session to run Catalyst): + + **Linux or OSX:** + + .. code-block:: bash + + source activate catalyst + + **Windows:** + + .. code-block:: bash + + activate catalyst + +5. Verify that Catalyst is install correctly: + + .. code-block:: bash + + catalyst --version + + which should display the current version. + +Congratulations! You now have Catalyst installed. + +Troubleshooting ``conda`` Install +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If the command ``conda env create -f python2.7-environment.yml`` in step 3 +above failed for any reason, you can try setting up the environment manually +with the following steps: + +1. If the above installation failed, and you have a partially set up catalyst + environment, remove it first. If you are starting from scratch, proceed to + step #2: + + .. code-block:: bash + + conda env remove --name catalyst + +2. Create the environment: + + .. code-block:: bash + + conda create --name catalyst python=2.7 scipy zlib + +3. Activate the environment: + + **Linux or OSX:** + + .. code-block:: bash + + source activate catalyst + + **Windows:** + + .. code-block:: bash + + activate catalyst + +4. Install the Catalyst inside the environment: + + .. code-block:: bash + + pip install enigma-catalyst matplotlib + +5. Verify that Catalyst is installed correctly: + + .. code-block:: bash + + catalyst --version + + which should display the current version. + +Congratulations! You now have Catalyst properly installed. + +.. _pip: Installing with ``pip`` ----------------------- @@ -28,15 +175,21 @@ Because LAPACK and the CPython headers are non-Python dependencies, the correctway to install them varies from platform to platform. If you'd rather use a single tool to install Python and non-Python dependencies, or if you're already using `Anaconda `_ as your Python -distribution, you can skip to the :ref:`Installing with Conda ` -section. +distribution, refer to the :ref:`Installing with Conda ` section. -Once you've installed the necessary additional dependencies (see below for -your particular platform), you should be able to simply run +Once you've installed the necessary additional dependencies for your system +(see below for your particular platform: :ref:`Linux`, :ref:`MacOS` or +:ref:`Windows`), you should be able to simply run .. code-block:: bash - $ pip install enigma-catalyst + $ pip install enigma-catalyst matplotlib + +Note that in the command above we install two different packages. The second +one, ``matplotlib`` is a visualization library. While it's not strictly +required to run catalyst simulations or live trading, it comes in very handy +to visualize the performance of your algorithms, and for this reason we +recommend you install it, as well. If you use Python for anything other than Catalyst, we **strongly** recommend that you install in a `virtualenv @@ -50,158 +203,7 @@ summarized version: $ pip install virtualenv $ virtualenv catalyst-venv $ source ./catalyst-venv/bin/activate - $ pip install enigma-catalyst - -Though not required by Catalyst directly, our example algorithms use -matplotlib to visually display the results of the trading algorithms. If you -wish to run any examples or use matplotlib during development, it can be -installed using: - -.. code-block:: bash - - $ pip install matplotlib - -GNU/Linux -~~~~~~~~~ - -On `Debian-derived`_ Linux distributions, you can acquire all the necessary -binary dependencies from ``apt`` by running: - -.. code-block:: bash - - $ sudo apt-get install libatlas-base-dev python-dev gfortran pkg-config libfreetype6-dev - -On recent `RHEL-derived`_ derived Linux distributions (e.g. Fedora), the -following should be sufficient to acquire the necessary additional -dependencies: - -.. code-block:: bash - - $ sudo dnf install atlas-devel gcc-c++ gcc-gfortran libgfortran python-devel redhat-rep-config - -On `Arch Linux`_, you can acquire the additional dependencies via ``pacman``: - -.. code-block:: bash - - $ pacman -S lapack gcc gcc-fortran pkg-config - -.. Commenting it out until Catalyst fully supports Python 3.X -.. -.. There are also AUR packages available for installing `Python 3.4 -.. `_ (Arch's default python is now -.. 3.5, but Catalyst only currently supports 3.4), and `ta-lib -.. `_, an optional Catalyst dependency. -.. Python 2 is also installable via: - -.. - -.. $ pacman -S python2 - -OSX -~~~ - -The version of Python shipped with OSX by default is generally out of date, -and has a number of quirks because it's used directly by the operating system. -For these reasons, many developers choose to install and use a separate Python -installation. The `Hitchhiker's Guide to Python`_ provides an excellent guide -to `Installing Python on OSX `_, -which explains how to install Python with the `Homebrew`_ manager. - -Assuming you've installed Python with Homebrew, you'll also likely need the -following brew packages: - -.. code-block:: bash - - $ brew install freetype pkg-config gcc openssl - -OSX + virtualenv + matplotlib -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -A note about using matplotlib in virtual enviroments on OSX: it may be -necessary to run - -.. code-block:: bash - - echo "backend: TkAgg" > ~/.matplotlib/matplotlibrc - -in order to override the default ``macosx`` backend for your system, which -may not be accessible from inside the virtual environment. This will allow -Catalyst to open matplotlib charts from within a virtual environment, which -is useful for displaying the performance of your backtests. To learn more -about matplotlib backends, please refer to the -`matplotlib backend documentation `_. - -.. _windows: - -Windows -~~~~~~~ - -In Windows, you will first need to install the `Microsoft Visual C++ Compiler -for Python 2.7 -`_. This -package contains the compiler and the set of system headers necessary for -producing binary wheels for Python 2.7 packages. If it's not already in your -system, download it and install it before proceeding to the next step. - -Once you have the above compiler installed, the easiest and best supported way -to install Catalyst in Windows is to use :ref:`Conda `. If you didn't -any problems installing the compiler, jump to the :ref:`Conda ` section, -otherwise keep on reading to troubleshoot the C++ compiler installtion. - -Some problems we have encountered installing the **Visual C++ Compiler** -mentioned above are as follows: - -- **The system administrator has set policies to prevent this installation**. - - In some systems, there is a default *Windows Software Restriction* policy - that prevents the installation of some software packages like this one. - You'll have to change the Registry to circumvent this: - - - Click ``Start``, and search for ``regedit`` and launch the - ``Registry Editor`` - - Navigate to the following folder: - ``HKEY_LOCAL_MACHINE\SOFTWARE\Policies\Microsoft\Windows\Installer`` - - If the last folder does not exist, create it by right-clicking on the - parent folder and choosing -> ``New`` -> ``Key`` and typing ``Installer`` - - If there is an entry for ``DisableMSI``, set the Value data to 0. - - If there is no such entry, click on the ``Edit`` menu -> ``New`` -> - ``DWORD (32-bit) Value`` and enter ``DisableMSI`` as the Name (and by - default you get 0 as the Value Data) - -| -- **The installer has encountered an unexpected error installing this package. - This may indicate a problem with this package. The error code is 2503.** - - We have observed this when trying to install a package without enough - administrator permissions. Even when you are logged in as an Administrator, - you have to explictily install this package with administrator privileges: - - - Click ``Start`` and find ``CMD`` or ``Command Prompt`` - - Right click on it and choose ``Run as administrator`` - - ``cd`` into the folder where you downloaded ``VCForPython27.msi`` - - Run ``msiexec /i VCForPython27.msi`` - - -Amazon Linux AMI -~~~~~~~~~~~~~~~~ - -The packages ``pip`` and ``setuptools`` that come shipped by default are very -outdated. Thus, you first need to run: - -.. code-block:: bash - - pip install --upgrade pip setuptools - -The default installation is also missing the C and C++ compilers, which you -install by: - -.. code-block:: bash - - sudo yum install gcc gcc-c++ - -Then you should follow the regular installation instructions outlined at the -beginning of this page. - + $ pip install enigma-catalyst matplotlib Troubleshooting ``pip`` Install ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -292,138 +294,150 @@ Troubleshooting ``pip`` Install sudo apt-get install python-dev -.. _conda: +.. _linux: -Installing with ``conda`` -------------------------- +GNU/Linux Requirements +---------------------- -Another way to install Catalyst is via the ``conda`` package manager, which -comes as part of Continuum Analytics' `Anaconda -`_ distribution. +On `Debian-derived`_ Linux distributions, you can acquire all the necessary +binary dependencies from ``apt`` by running: -The primary advantage of using Conda over ``pip`` is that conda natively -understands the complex binary dependencies of packages like ``numpy`` and -``scipy``. This means that ``conda`` can install Catalyst and its -dependencies without requiring the use of a second tool to acquire Catalyst's -non-Python dependencies. +.. code-block:: bash - For Windows, you will first need to install the *Microsoft Visual C++ - Compiler for Python 2.7*. Follow the instructions on the :ref:`Windows` - section and come back here. + $ sudo apt-get install libatlas-base-dev python-dev gfortran pkg-config libfreetype6-dev -For instructions on how to install ``conda``, see the `Conda Installation -Documentation `_. Alternatively, -you can install MiniConda, which is a smaller footprint (fewer packages and -smaller size) than its big brother Anaconda, but it still contains all the -main packages needed. To install MiniConda, you can follow these steps: +On recent `RHEL-derived`_ derived Linux distributions (e.g. Fedora), the +following should be sufficient to acquire the necessary additional +dependencies: -1. Download `MiniConda `_. Select Python 2.7 - for your Operating System. -2. Install MiniConda. See the `Installation Instructions - `_ if you need help. -3. Ensure the correct installation by running ``conda list`` in a Terminal - window, which should print the list of packages installed with Conda. +.. code-block:: bash - For Windows, if you accepted the default installation options, you didn't - check an option to add Conda to the PATH, so trying to run ``conda`` from - a regular ``Command Prompt`` will result in the following error: ``'conda' - is no recognized as an internal or external command, operatble program or - batch file``. That's to be expected. You will nee to launch an ``Anaconda - Prompt`` that was added at installation time to your list of programs - available from the Start menu. + $ sudo dnf install atlas-devel gcc-c++ gcc-gfortran libgfortran python-devel redhat-rep-config -Once either Conda or MiniConda has been set up you can install Catalyst: +On `Arch Linux`_, you can acquire the additional dependencies via ``pacman``: -1. Download the file `python2.7-environment.yml - `_. +.. code-block:: bash - To download, simply click on the 'Raw' button and save the file locally to - a folder you can remember. Make sure that the file gets saved with the ``.yml`` - extension, and nothing like a ``.txt`` file or anything else. + $ pacman -S lapack gcc gcc-fortran pkg-config -2. Open a Terminal window and enter [``cd/dir``] into the directory where you - saved the above ``python2.7-environment.yml`` file. -3. Install using this file. This step can take about 5-10 minutes to install. +.. Commenting it out until Catalyst fully supports Python 3.X +.. +.. There are also AUR packages available for installing `Python 3.4 +.. `_ (Arch's default python is now +.. 3.5, but Catalyst only currently supports 3.4), and `ta-lib +.. `_, an optional Catalyst dependency. +.. Python 2 is also installable via: - .. code-block:: bash +.. - conda env create -f python2.7-environment.yml +.. $ pacman -S python2 -4. Activate the environment (which you need to do every time you start a new - session to run Catalyst): +Amazon Linux AMI Notes +~~~~~~~~~~~~~~~~~~~~~~ - **Linux or OSX:** +The packages ``pip`` and ``setuptools`` that come shipped by default are very +outdated. Thus, you first need to run: - .. code-block:: bash +.. code-block:: bash - source activate catalyst + pip install --upgrade pip setuptools - **Windows:** +The default installation is also missing the C and C++ compilers, which you +install by: - .. code-block:: bash +.. code-block:: bash - activate catalyst + sudo yum install gcc gcc-c++ -5. Verify that Catalyst is install correctly: +Then you should follow the regular installation instructions outlined at the +beginning of this page. - .. code-block:: bash - catalyst --version +.. _MacOS: - which should display the current version. +MacOS Requirements +------------------ -Congratulations! You now have Catalyst installed. +The version of Python shipped with OSX by default is generally out of date, +and has a number of quirks because it's used directly by the operating system. +For these reasons, many developers choose to install and use a separate Python +installation. The `Hitchhiker's Guide to Python`_ provides an excellent guide +to `Installing Python on OSX `_, +which explains how to install Python with the `Homebrew`_ manager. -Troubleshooting ``conda`` Install -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Assuming you've installed Python with Homebrew, you'll also likely need the +following brew packages: -If the command ``conda env create -f python2.7-environment.yml`` in step 3 -above failed for any reason, you can try setting up the environment manually -with the following steps: +.. code-block:: bash -1. If the above installation failed, and you have a partially set up catalyst - environment, remove it first. If you are starting from scratch, proceed to - step #2: + $ brew install freetype pkg-config gcc openssl - .. code-block:: bash +OSX + virtualenv + matplotlib +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - conda env remove --name catalyst +A note about using matplotlib in virtual enviroments on OSX: it may be +necessary to run -2. Create the environment: +.. code-block:: bash - .. code-block:: bash + echo "backend: TkAgg" > ~/.matplotlib/matplotlibrc - conda create --name catalyst python=2.7 scipy zlib +in order to override the default ``macosx`` backend for your system, which +may not be accessible from inside the virtual environment. This will allow +Catalyst to open matplotlib charts from within a virtual environment, which +is useful for displaying the performance of your backtests. To learn more +about matplotlib backends, please refer to the +`matplotlib backend documentation `_. -3. Activate the environment: +.. _windows: - **Linux or OSX:** +Windows Requirements +-------------------- - .. code-block:: bash +In Windows, you will first need to install the `Microsoft Visual C++ Compiler +for Python 2.7 +`_. This +package contains the compiler and the set of system headers necessary for +producing binary wheels for Python 2.7 packages. If it's not already in your +system, download it and install it before proceeding to the next step. - source activate catalyst +Once you have the above compiler installed, the easiest and best supported way +to install Catalyst in Windows is to use :ref:`Conda `. If you didn't +any problems installing the compiler, jump to the :ref:`Conda ` section, +otherwise keep on reading to troubleshoot the C++ compiler installtion. - **Windows:** +Some problems we have encountered installing the **Visual C++ Compiler** +mentioned above are as follows: - .. code-block:: bash +- **The system administrator has set policies to prevent this installation**. + + In some systems, there is a default *Windows Software Restriction* policy + that prevents the installation of some software packages like this one. + You'll have to change the Registry to circumvent this: - activate catalyst + - Click ``Start``, and search for ``regedit`` and launch the + ``Registry Editor`` + - Navigate to the following folder: + ``HKEY_LOCAL_MACHINE\SOFTWARE\Policies\Microsoft\Windows\Installer`` + - If the last folder does not exist, create it by right-clicking on the + parent folder and choosing -> ``New`` -> ``Key`` and typing ``Installer`` + - If there is an entry for ``DisableMSI``, set the Value data to 0. + - If there is no such entry, click on the ``Edit`` menu -> ``New`` -> + ``DWORD (32-bit) Value`` and enter ``DisableMSI`` as the Name (and by + default you get 0 as the Value Data) -4. Install the Catalyst inside the environment: +| +- **The installer has encountered an unexpected error installing this package. + This may indicate a problem with this package. The error code is 2503.** - .. code-block:: bash + We have observed this when trying to install a package without enough + administrator permissions. Even when you are logged in as an Administrator, + you have to explictily install this package with administrator privileges: - pip install enigma-catalyst matplotlib - -5. Verify that Catalyst is installed correctly: - - .. code-block:: bash - - catalyst --version - - which should display the current version. - -Congratulations! You now have Catalyst properly installed. + - Click ``Start`` and find ``CMD`` or ``Command Prompt`` + - Right click on it and choose ``Run as administrator`` + - ``cd`` into the folder where you downloaded ``VCForPython27.msi`` + - Run ``msiexec /i VCForPython27.msi`` Getting Help ------------ From 698b19c8fa2488bbd4a2b41b0960bd0eca17fc9f Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Sun, 19 Nov 2017 23:51:57 -0700 Subject: [PATCH 17/37] DOC: updated examples/buy_and_hodl.py. Added Example Algos and Utilities pages to the documentation --- catalyst/examples/buy_and_hodl.py | 24 +---- docs/source/example-algos.rst | 172 ++++++++++++++++++++++++++++++ docs/source/index.rst | 2 + docs/source/utilities.rst | 149 ++++++++++++++++++++++++++ 4 files changed, 326 insertions(+), 21 deletions(-) create mode 100644 docs/source/example-algos.rst create mode 100644 docs/source/utilities.rst diff --git a/catalyst/examples/buy_and_hodl.py b/catalyst/examples/buy_and_hodl.py index e4146b3c..ef5b2a6f 100644 --- a/catalyst/examples/buy_and_hodl.py +++ b/catalyst/examples/buy_and_hodl.py @@ -16,7 +16,6 @@ # limitations under the License. import pandas as pd -from catalyst import run_algorithm from catalyst.api import ( order_target_value, symbol, @@ -27,15 +26,10 @@ from catalyst.api import ( def initialize(context): - context.ASSET_NAME = 'BTC_USDT' + context.ASSET_NAME = 'btc_usdt' context.TARGET_HODL_RATIO = 0.8 context.RESERVE_RATIO = 1.0 - context.TARGET_HODL_RATIO - # For all trading pairs in the poloniex bundle, the default denomination - # currently supported by Catalyst is 1/1000th of a full coin. Use this - # constant to scale the price of up to that of a full coin if desired. - context.TICK_SIZE = 1000.0 - context.is_buying = True context.asset = symbol(context.ASSET_NAME) @@ -91,7 +85,7 @@ def analyze(context=None, results=None): ax2 = plt.subplot(612, sharex=ax1) ax2.set_ylabel('{asset} (USD)'.format(asset=context.ASSET_NAME)) - (context.TICK_SIZE * results[['price']]).plot(ax=ax2) + results[['price']].plot(ax=ax2) trans = results.ix[[t != [] for t in results.transactions]] buys = trans.ix[ @@ -99,7 +93,7 @@ def analyze(context=None, results=None): ] ax2.plot( buys.index, - context.TICK_SIZE * results.price[buys.index], + results.price[buys.index], '^', markersize=10, color='g', @@ -141,15 +135,3 @@ def analyze(context=None, results=None): plt.gcf().set_size_inches(18, 8) plt.show() - -run_algorithm( - capital_base=10000, - data_frequency='minute', - initialize=initialize, - handle_data=handle_data, - analyze=analyze, - exchange_name='poloniex', - base_currency='usd', - start=pd.to_datetime('2017-10-1', utc=True), - end=pd.to_datetime('2017-11-10', utc=True), -) diff --git a/docs/source/example-algos.rst b/docs/source/example-algos.rst new file mode 100644 index 00000000..7882208d --- /dev/null +++ b/docs/source/example-algos.rst @@ -0,0 +1,172 @@ +| +Example Algorithms +================== + +This section documents a small number of example algorithms to complement the +beginner tutorial, and show how other trading algorithms can be implemented +using Catalyst: + +Buy and Hodl +~~~~~~~~~~~~ + +source: `examples/buy_and_hodl.py `_ + +First ingest the historical pricing data needed to run this algorithm: + +.. code-block:: bash + + catalyst ingest-exchange -x poloniex -f daily -i btc_usdt + +Then, you can run the code below with the following command: + +.. code-block:: bash + + catalyst run -f buy_and_hodl.py --start 2015-3-1 --end 2017-10-31 --capital-base 100000 -x poloniex -c btc -o bah.pickle + +This command will run the trading algorithm in the specified time range and +plot the resulting performance using the matplotlib library. You can choose any +date interval with the ``--start`` and ``--end`` parameters, but bear in mind +that 2015-3-1 is the earliest date that Catalyst supports (if you choose an +earlier date, you'll get an error), and the most recent date you can choose is +one day prior to the current date. + + +.. code-block:: python + + #!/usr/bin/env python + # + # Copyright 2017 Enigma MPC, Inc. + # Copyright 2015 Quantopian, Inc. + # + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + import pandas as pd + + from catalyst.api import ( + order_target_value, + symbol, + record, + cancel_order, + get_open_orders, + ) + + + def initialize(context): + context.ASSET_NAME = 'btc_usdt' + context.TARGET_HODL_RATIO = 0.8 + context.RESERVE_RATIO = 1.0 - context.TARGET_HODL_RATIO + + context.is_buying = True + context.asset = symbol(context.ASSET_NAME) + + context.i = 0 + + + def handle_data(context, data): + context.i += 1 + + starting_cash = context.portfolio.starting_cash + target_hodl_value = context.TARGET_HODL_RATIO * starting_cash + reserve_value = context.RESERVE_RATIO * starting_cash + + # Cancel any outstanding orders + orders = get_open_orders(context.asset) or [] + for order in orders: + cancel_order(order) + + # Stop buying after passing the reserve threshold + cash = context.portfolio.cash + if cash <= reserve_value: + context.is_buying = False + + # Retrieve current asset price from pricing data + price = data.current(context.asset, 'price') + + # Check if still buying and could (approximately) afford another purchase + if context.is_buying and cash > price: + # Place order to make position in asset equal to target_hodl_value + order_target_value( + context.asset, + target_hodl_value, + limit_price=price * 1.1, + stop_price=price * 0.9, + ) + + record( + price=price, + volume=data.current(context.asset, 'volume'), + cash=cash, + starting_cash=context.portfolio.starting_cash, + leverage=context.account.leverage, + ) + + + def analyze(context=None, results=None): + import matplotlib.pyplot as plt + + # Plot the portfolio and asset data. + ax1 = plt.subplot(611) + results[['portfolio_value']].plot(ax=ax1) + ax1.set_ylabel('Portfolio Value (USD)') + + ax2 = plt.subplot(612, sharex=ax1) + ax2.set_ylabel('{asset} (USD)'.format(asset=context.ASSET_NAME)) + results[['price']].plot(ax=ax2) + + trans = results.ix[[t != [] for t in results.transactions]] + buys = trans.ix[ + [t[0]['amount'] > 0 for t in trans.transactions] + ] + ax2.plot( + buys.index, + results.price[buys.index], + '^', + markersize=10, + color='g', + ) + + ax3 = plt.subplot(613, sharex=ax1) + results[['leverage', 'alpha', 'beta']].plot(ax=ax3) + ax3.set_ylabel('Leverage ') + + ax4 = plt.subplot(614, sharex=ax1) + results[['starting_cash', 'cash']].plot(ax=ax4) + ax4.set_ylabel('Cash (USD)') + + results[[ + 'treasury', + 'algorithm', + 'benchmark', + ]] = results[[ + 'treasury_period_return', + 'algorithm_period_return', + 'benchmark_period_return', + ]] + + ax5 = plt.subplot(615, sharex=ax1) + results[[ + 'treasury', + 'algorithm', + 'benchmark', + ]].plot(ax=ax5) + ax5.set_ylabel('Percent Change') + + ax6 = plt.subplot(616, sharex=ax1) + results[['volume']].plot(ax=ax6) + ax6.set_ylabel('Volume (mCoins/5min)') + + plt.legend(loc=3) + + # Show the plot. + plt.gcf().set_size_inches(18, 8) + plt.show() + diff --git a/docs/source/index.rst b/docs/source/index.rst index 82bbbd37..a0e8f097 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -12,6 +12,8 @@ Table of Contents jupyter live-trading naming-convention + example-algos + utilities videos resources development-guidelines diff --git a/docs/source/utilities.rst b/docs/source/utilities.rst new file mode 100644 index 00000000..8c89aa52 --- /dev/null +++ b/docs/source/utilities.rst @@ -0,0 +1,149 @@ +Utilities +========= + +This section covers a variety of utilites that provide complimentary +functionality to your trading algorithms. These are code snippets that you can +add to any algorithm to add the desired functionality. + +If you are looking for example trading algorithms, see the corresponding section. + +Output to CSV file +~~~~~~~~~~~~~~~~~~ + +Add this script to the analyze method to create and save a CSV file with the +results from the trading algorithm. This file will include the default +parameters of the results DataFrame plus any recorded variables and will be +saved in the same location where your trading algorithm is saved. The exact +script that you need to use depends on the interface that you are using to run +your trading algorithm, which could be the CLI or a Python Interpreter. + +1. Script to use with CLI: + + .. code-block:: python + + def analyze(context=None, results=None): + import sys + import os + from os.path import basename + + # Save results in CSV file + filename = os.path.splitext(basename(sys.argv[3]))[0] + results.to_csv(filename + '.csv') + +2. Script to use with Python Interpreter: + + .. code-block:: python + + def analyze(context=None, results=None): + import os + from os.path import basename + + # Save results in CSV file + filename = os.path.splitext(os.path.basename(__file__))[0] + results.to_csv(filename + '.csv') + +Extracting market data +~~~~~~~~~~~~~~~~~~~~~~ + +Use this script to save the price and volume data of one cryptoasset in a CSV +file, which will be saved in the same location and with the same name as your +Python file. To get custom data, simply modify the asset's symbol and the dates. +Run this script directly from your development environment: python scriptname.py, +where the contents of 'scriptname.py' are as follows. Two different version are +provided as an example for daily- and minute-resolution data respectively: + +Simpler case for daily data + +.. code-block:: python + + import os + import pytz + from datetime import datetime + + from catalyst.api import record, symbol, symbols + from catalyst.utils.run_algo import run_algorithm + + def initialize(context): + # Portfolio assets list + context.asset = symbol('btc_usdt') # Bitcoin on Poloniex + + def handle_data(context, data): + # Variables to record for a given asset: price and volume + price = data.current(context.asset, 'price') + volume = data.current(context.asset, 'volume') + record(price=price, volume=volume) + + def analyze(context=None, results=None): + + # Generate DataFrame with Price and Volume only + data = results[['price','volume']] + + # Save results in CSV file + filename = os.path.splitext(os.path.basename(__file__))[0] + data.to_csv(filename + '.csv') + + ''' Bitcoin data is available on Poloniex since 2015-3-1. + Dates vary for other tokens. In the example below, we choose the + full month of July of 2017. + ''' + start = datetime(2017, 1, 1, 0, 0, 0, 0, pytz.utc) + end = datetime(2017, 7, 31, 0, 0, 0, 0, pytz.utc) + results = run_algorithm(initialize=initialize, + handle_data=handle_data, + analyze=analyze, + start=start, + end=end, + exchange_name='poloniex', + capital_base=10000, + base_currency = 'usdt') + +More versatile case for minute data + +.. code-block:: python + + import os + import csv + import pytz + from datetime import datetime + + from catalyst.api import record, symbol, symbols + from catalyst.utils.run_algo import run_algorithm + + + def initialize(context): + # Portfolio assets list + context.asset = symbol('btc_usdt') # Bitcoin on Poloniex + + # Creates a .CSV file with the same name as this script to store results + context.csvfile = open(os.path.splitext( + os.path.basename(__file__))[0]+'.csv', 'w+') + context.csvwriter = csv.writer(context.csvfile) + + def handle_data(context, data): + # Variables to record for a given asset: price and volume + # Other options include 'open', 'high', 'open', 'close' + # Please note that 'price' equals 'close' + date = context.blotter.current_dt # current time in each iteration + price = data.current(context.asset, 'price') + volume = data.current(context.asset, 'volume') + + # Writes one line to CSV on each iteration with the chosen variables + context.csvwriter.writerow([date,price,volume]) + + def analyze(context=None, results=None): + # Close open file properly at the end + context.csvfile.close() + + + # Bitcoin data is available from 2015-3-2. Dates vary for other tokens. + start = datetime(2017, 7, 30, 0, 0, 0, 0, pytz.utc) + end = datetime(2017, 7, 31, 0, 0, 0, 0, pytz.utc) + results = run_algorithm(initialize=initialize, + handle_data=handle_data, + analyze=analyze, + start=start, + end=end, + exchange_name='poloniex', + data_frequency='minute', + base_currency ='usdt', + capital_base=10000 ) \ No newline at end of file From 9cfd50dc4fbb14f3afef392f3ebd9649b9873595 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Mon, 20 Nov 2017 09:12:43 -0700 Subject: [PATCH 18/37] DOC: mean_reversion_simple.py minor edits, and added to doc website --- catalyst/examples/mean_reversion_simple.py | 13 +- docs/source/example-algos.rst | 276 ++++++++++++++++++++- 2 files changed, 280 insertions(+), 9 deletions(-) diff --git a/catalyst/examples/mean_reversion_simple.py b/catalyst/examples/mean_reversion_simple.py index 9aa5ada3..d5efbea8 100644 --- a/catalyst/examples/mean_reversion_simple.py +++ b/catalyst/examples/mean_reversion_simple.py @@ -4,22 +4,21 @@ import pandas as pd import talib -# To run an algorithm in Catalyst, you need two functions: initialize and -# handle_data. from logbook import Logger from catalyst import run_algorithm -from catalyst.api import symbol, record, order_target_percent, \ - get_open_orders +from catalyst.api import symbol, record, order_target_percent, get_open_orders +from catalyst.exchange.stats_utils import extract_transactions + # We give a name to the algorithm which Catalyst will use to persist its state. # In this example, Catalyst will create the `.catalyst/data/live_algos` # directory. If we stop and start the algorithm, Catalyst will resume its # state using the files included in the folder. -from catalyst.exchange.stats_utils import extract_transactions - NAMESPACE = 'mean_reversion_simple' log = Logger(NAMESPACE) +# To run an algorithm in Catalyst, you need two functions: initialize and +# handle_data. def initialize(context): # This initialize function sets any data or variables that you'll use in @@ -216,7 +215,7 @@ def analyze(context=None, perf=None): if __name__ == '__main__': # The execution mode: backtest or live - MODE = 'live' + MODE = 'backtest' if MODE == 'backtest': # catalyst run -f catalyst/examples/mean_reversion_simple.py -x poloniex -s 2017-10-1 -e 2017-11-10 -c usdt -n mean-reversion --data-frequency minute --capital-base 10000 diff --git a/docs/source/example-algos.rst b/docs/source/example-algos.rst index 7882208d..e341f331 100644 --- a/docs/source/example-algos.rst +++ b/docs/source/example-algos.rst @@ -6,8 +6,10 @@ This section documents a small number of example algorithms to complement the beginner tutorial, and show how other trading algorithms can be implemented using Catalyst: -Buy and Hodl -~~~~~~~~~~~~ +.. _buy_and_hodl: + +Buy and Hodl Algorithm +~~~~~~~~~~~~~~~~~~~~~~ source: `examples/buy_and_hodl.py `_ @@ -170,3 +172,273 @@ one day prior to the current date. plt.gcf().set_size_inches(18, 8) plt.show() +.. _mean_reversion: + +Mean Reversion Algorithm +~~~~~~~~~~~~~~~~~~~~~~~~ + +source: `examples/mean_reversion_simple.py `_ + +This algorithm is based on a simple momentum strategy. When the cryptoasset goes +up quickly, we're going to buy; when it goes down quickly, we're going to sell. +Hopefully, we'll ride the waves. + +We are choosing to run this trading algorithm with the ``neo_usd`` currency pair +on the ``Bitfinex`` exchange. Thus, first ingest the historical pricing data +that we need, with minute resolution: + +.. code-block:: bash + + catalyst ingest-exchange -x bitfinex -f minute -i neo_usd + +To run this algorithm, we are opting for the Python interpreter, instead of the +command line (CLI). All of the parameters for the simulation are specified in +lines 218-245, so in order to run the algorithm we just type: + +.. code-block:: bash + + python mean_reversion_simple.py + +.. code-block:: python + + import pandas as pd + import talib + from logbook import Logger + + from catalyst import run_algorithm + from catalyst.api import symbol, record, order_target_percent, get_open_orders + from catalyst.exchange.stats_utils import extract_transactions + + # We give a name to the algorithm which Catalyst will use to persist its state. + # In this example, Catalyst will create the `.catalyst/data/live_algos` + # directory. If we stop and start the algorithm, Catalyst will resume its + # state using the files included in the folder. + NAMESPACE = 'mean_reversion_simple' + log = Logger(NAMESPACE) + + # To run an algorithm in Catalyst, you need two functions: initialize and + # handle_data. + + def initialize(context): + # This initialize function sets any data or variables that you'll use in + # your algorithm. For instance, you'll want to define the trading pair (or + # trading pairs) you want to backtest. You'll also want to define any + # parameters or values you're going to use. + + # In our example, we're looking at Ether in USD Tether. + context.neo_usd = symbol('neo_usd') + context.base_price = None + context.current_day = None + + + def handle_data(context, data): + # This handle_data function is where the real work is done. Our data is + # minute-level tick data, and each minute is called a frame. This function + # runs on each frame of the data. + + # We flag the first period of each day. + # Since cryptocurrencies trade 24/7 the `before_trading_starts` handle + # would only execute once. This method works with minute and daily + # frequencies. + today = data.current_dt.floor('1D') + if today != context.current_day: + context.traded_today = False + context.current_day = today + + # We're computing the volume-weighted-average-price of the security + # defined above, in the context.neo_usd variable. For this example, we're + # using three bars on the 15 min bars. + + # The frequency attribute determine the bar size. We use this convention + # for the frequency alias: + # http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases + prices = data.history( + context.neo_usd, + fields='close', + bar_count=50, + frequency='15T' + ) + + # Ta-lib calculates various technical indicator based on price and + # volume arrays. + + # In this example, we are comp + rsi = talib.RSI(prices.values, timeperiod=14) + + # We need a variable for the current price of the security to compare to + # the average. Since we are requesting two fields, data.current() + # returns a DataFrame with + current = data.current(context.neo_usd, fields=['close', 'volume']) + price = current['close'] + + # If base_price is not set, we use the current value. This is the + # price at the first bar which we reference to calculate price_change. + if context.base_price is None: + context.base_price = price + + price_change = (price - context.base_price) / context.base_price + cash = context.portfolio.cash + + # Now that we've collected all current data for this frame, we use + # the record() method to save it. This data will be available as + # a parameter of the analyze() function for further analysis. + record( + price=price, + volume=current['volume'], + price_change=price_change, + rsi=rsi[-1], + cash=cash + ) + + # We are trying to avoid over-trading by limiting our trades to + # one per day. + if context.traded_today: + return + + # Since we are using limit orders, some orders may not execute immediately + # we wait until all orders are executed before considering more trades. + orders = get_open_orders(context.neo_usd) + if len(orders) > 0: + return + + # Exit if we cannot trade + if not data.can_trade(context.neo_usd): + return + + # Another powerful built-in feature of the Catalyst backtester is the + # portfolio object. The portfolio object tracks your positions, cash, + # cost basis of specific holdings, and more. In this line, we calculate + # how long or short our position is at this minute. + pos_amount = context.portfolio.positions[context.neo_usd].amount + + if rsi[-1] <= 30 and pos_amount == 0: + log.info( + '{}: buying - price: {}, rsi: {}'.format( + data.current_dt, price, rsi[-1] + ) + ) + order_target_percent(context.neo_usd, 1) + context.traded_today = True + + elif rsi[-1] >= 80 and pos_amount > 0: + log.info( + '{}: selling - price: {}, rsi: {}'.format( + data.current_dt, price, rsi[-1] + ) + ) + order_target_percent(context.neo_usd, 0) + context.traded_today = True + + + def analyze(context=None, perf=None): + import matplotlib.pyplot as plt + + # The base currency of the algo exchange + base_currency = context.exchanges.values()[0].base_currency.upper() + + # Plot the portfolio value over time. + ax1 = plt.subplot(611) + perf.loc[:, 'portfolio_value'].plot(ax=ax1) + ax1.set_ylabel('Portfolio Value ({})'.format(base_currency)) + + # Plot the price increase or decrease over time. + ax2 = plt.subplot(612, sharex=ax1) + perf.loc[:, 'price'].plot(ax=ax2, label='Price') + + ax2.set_ylabel('{asset} ({base})'.format( + asset=context.neo_usd.symbol, base=base_currency + )) + + transaction_df = extract_transactions(perf) + if not transaction_df.empty: + buy_df = transaction_df[transaction_df['amount'] > 0] + sell_df = transaction_df[transaction_df['amount'] < 0] + ax2.scatter( + buy_df.index.to_pydatetime(), + perf.loc[buy_df.index, 'price'], + marker='^', + s=100, + c='green', + label='' + ) + ax2.scatter( + sell_df.index.to_pydatetime(), + perf.loc[sell_df.index, 'price'], + marker='v', + s=100, + c='red', + label='' + ) + + ax4 = plt.subplot(613, sharex=ax1) + perf.loc[:, 'cash'].plot( + ax=ax4, label='Base Currency ({})'.format(base_currency) + ) + ax4.set_ylabel('Cash ({})'.format(base_currency)) + + perf['algorithm'] = perf.loc[:, 'algorithm_period_return'] + + ax5 = plt.subplot(614, sharex=ax1) + perf.loc[:, ['algorithm', 'price_change']].plot(ax=ax5) + ax5.set_ylabel('Percent Change') + + ax6 = plt.subplot(615, sharex=ax1) + perf.loc[:, 'rsi'].plot(ax=ax6, label='RSI') + ax6.axhline(70, color='darkgoldenrod') + ax6.axhline(30, color='darkgoldenrod') + + if not transaction_df.empty: + ax6.scatter( + buy_df.index.to_pydatetime(), + perf.loc[buy_df.index, 'rsi'], + marker='^', + s=100, + c='green', + label='' + ) + ax6.scatter( + sell_df.index.to_pydatetime(), + perf.loc[sell_df.index, 'rsi'], + marker='v', + s=100, + c='red', + label='' + ) + plt.legend(loc=3) + + # Show the plot. + plt.gcf().set_size_inches(18, 8) + plt.show() + pass + + + if __name__ == '__main__': + # The execution mode: backtest or live + MODE = 'backtest' + + if MODE == 'backtest': + # catalyst run -f catalyst/examples/mean_reversion_simple.py -x poloniex -s 2017-10-1 -e 2017-11-10 -c usdt -n mean-reversion --data-frequency minute --capital-base 10000 + run_algorithm( + capital_base=10000, + data_frequency='minute', + initialize=initialize, + handle_data=handle_data, + analyze=analyze, + exchange_name='bitfinex', + algo_namespace=NAMESPACE, + base_currency='usd', + start=pd.to_datetime('2017-10-1', utc=True), + end=pd.to_datetime('2017-11-10', utc=True), + ) + + elif MODE == 'live': + run_algorithm( + initialize=initialize, + handle_data=handle_data, + analyze=analyze, + exchange_name='bitfinex', + live=True, + algo_namespace=NAMESPACE, + base_currency='usd', + live_graph=True + ) From 86b2a5c772c58512beaeec5e50cfbba606ad41f2 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Mon, 20 Nov 2017 09:31:47 -0700 Subject: [PATCH 19/37] DOC: videos: +3rd_install, +backtest --- docs/source/videos.rst | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/docs/source/videos.rst b/docs/source/videos.rst index 8fdfae2a..d93c09c5 100644 --- a/docs/source/videos.rst +++ b/docs/source/videos.rst @@ -22,5 +22,21 @@ Where things go smoothly: | Where things don't: + +.. raw:: html - Coming up next! \ No newline at end of file + + +| +| +Backtesting a Strategy +---------------------- + +This algorithm is based on a simple momentum strategy. When the cryptoasset +goes up quickly, we’re going to buy; when it goes down quickly, we’re going to +sell. Hopefully, we’ll ride the waves. + +.. raw:: html + + + From 0af592a5f47f98cb7753203210f7ec72899ecb32 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 20 Nov 2017 17:52:29 -0500 Subject: [PATCH 20/37] BUG: fixed issue #71 with the last candle of a resampled set --- catalyst/constants.py | 2 +- catalyst/exchange/exchange_bundle.py | 24 ++-- catalyst/exchange/exchange_data_portal.py | 2 + catalyst/exchange/exchange_utils.py | 4 +- catalyst/exchange/validator.py | 145 ++++++++++++++++++++++ tests/exchange/test_bundle.py | 6 +- tests/exchange/test_data_portal.py | 45 +------ tests/exchange/test_poloniex.py | 7 +- tests/exchange/test_utils.py | 40 ++++++ 9 files changed, 219 insertions(+), 56 deletions(-) create mode 100644 catalyst/exchange/validator.py diff --git a/catalyst/constants.py b/catalyst/constants.py index cde29914..bbdae343 100644 --- a/catalyst/constants.py +++ b/catalyst/constants.py @@ -2,7 +2,7 @@ import logbook -LOG_LEVEL = logbook.INFO +LOG_LEVEL = logbook.DEBUG DATE_TIME_FORMAT = '%Y-%m-%d %H:%M' diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 48d3294f..6c36fa9e 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -1,5 +1,6 @@ import os import shutil +from datetime import datetime, timedelta from functools import partial from itertools import chain from operator import is_not @@ -7,7 +8,6 @@ from operator import is_not import numpy as np import pandas as pd from catalyst.assets._assets import TradingPair -from datetime import datetime, timedelta from logbook import Logger from pytz import UTC from six import itervalues @@ -19,7 +19,8 @@ from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ BcolzMinuteBarMetadata from catalyst.exchange.bundle_utils import range_in_bundle, \ get_bcolz_chunk, get_month_start_end, \ - get_year_start_end, get_df_from_arrays, get_start_dt, get_period_label + get_year_start_end, get_df_from_arrays, get_start_dt, get_period_label, \ + get_delta from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ BcolzExchangeBarWriter from catalyst.exchange.exchange_errors import EmptyValuesInBundleError, \ @@ -682,7 +683,8 @@ class ExchangeBundle: bar_count, field, data_frequency, - algo_end_dt=None + algo_end_dt=None, + trailing_bar_count=None ): """ Retrieve price data history, ingest missing data. @@ -708,7 +710,8 @@ class ExchangeBundle: end_dt=end_dt, bar_count=bar_count, field=field, - data_frequency=data_frequency + data_frequency=data_frequency, + trailing_bar_count=trailing_bar_count ) return pd.DataFrame(series) @@ -725,7 +728,7 @@ class ExchangeBundle: self.ingest_assets( assets=assets, start_dt=start_dt, - end_dt=algo_end_dt, + end_dt=algo_end_dt, # TODO: apply trailing bars data_frequency=data_frequency, show_progress=True, show_breakdown=True @@ -736,7 +739,8 @@ class ExchangeBundle: bar_count=bar_count, field=field, data_frequency=data_frequency, - reset_reader=True + reset_reader=True, + trailing_bar_count=trailing_bar_count ) return series @@ -746,7 +750,8 @@ class ExchangeBundle: end_dt=end_dt, bar_count=bar_count, field=field, - data_frequency=data_frequency + data_frequency=data_frequency, + trailing_bar_count=trailing_bar_count ) return pd.DataFrame(series) @@ -810,12 +815,17 @@ class ExchangeBundle: bar_count, field, data_frequency, + trailing_bar_count=None, reset_reader=False): start_dt = get_start_dt(end_dt, bar_count, data_frequency, False) start_dt, end_dt = self.get_adj_dates( start_dt, end_dt, assets, data_frequency ) + if trailing_bar_count: + delta = get_delta(trailing_bar_count, data_frequency) + end_dt += delta + reader = self.get_reader(data_frequency) if reset_reader: del self._readers[reader._rootdir] diff --git a/catalyst/exchange/exchange_data_portal.py b/catalyst/exchange/exchange_data_portal.py index 43feaa9c..c5d7ac85 100644 --- a/catalyst/exchange/exchange_data_portal.py +++ b/catalyst/exchange/exchange_data_portal.py @@ -332,6 +332,7 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): frequency, data_frequency ) adj_bar_count = candle_size * bar_count + trailing_bar_count = candle_size - 1 if data_frequency == 'minute' and adj_data_frequency == 'daily': end_dt = end_dt.floor('1D') @@ -343,6 +344,7 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): field=field, data_frequency=adj_data_frequency, algo_end_dt=self._last_available_session, + trailing_bar_count=trailing_bar_count ) df = resample_history_df(pd.DataFrame(series), freq, field) diff --git a/catalyst/exchange/exchange_utils.py b/catalyst/exchange/exchange_utils.py index 554825b4..43144a0d 100644 --- a/catalyst/exchange/exchange_utils.py +++ b/catalyst/exchange/exchange_utils.py @@ -487,6 +487,7 @@ def resample_history_df(df, freq, field): DataFrame """ + print(df.tail(30)) if field == 'open': agg = 'first' elif field == 'high': @@ -500,4 +501,5 @@ def resample_history_df(df, freq, field): else: raise ValueError('Invalid field.') - return df.resample(freq).agg(agg) + resampled_df = df.resample(freq).agg(agg) + return resampled_df diff --git a/catalyst/exchange/validator.py b/catalyst/exchange/validator.py new file mode 100644 index 00000000..2037fd59 --- /dev/null +++ b/catalyst/exchange/validator.py @@ -0,0 +1,145 @@ +import os +import tempfile + +import six +from catalyst.assets._assets import TradingPair, get_calendar +from logbook import Logger + +import pandas as pd +from pandas.util.testing import assert_frame_equal + +from catalyst.constants import LOG_LEVEL +from catalyst.exchange.asset_finder_exchange import AssetFinderExchange +from catalyst.exchange.bundle_utils import get_start_dt +from catalyst.exchange.exchange_data_portal import DataPortalExchangeBacktest +from catalyst.exchange.factory import get_exchange, get_exchanges +from catalyst.utils.paths import ensure_directory +from catalyst.exchange.exchange import Exchange + +log = Logger('Validator', level=LOG_LEVEL) + + +def output_df(df, assets, name=None): + """ + Outputs a price DataFrame to a temp folder. + + Parameters + ---------- + df: pd.DataFrame + assets + name + + Returns + ------- + + """ + if isinstance(assets, TradingPair): + exchange_folder = assets.exchange + asset_folder = assets.symbol + else: + exchange_folder = ','.join([asset.exchange for asset in assets]) + asset_folder = ','.join([asset.symbol for asset in assets]) + + folder = os.path.join( + tempfile.gettempdir(), 'catalyst', exchange_folder, asset_folder + ) + ensure_directory(folder) + + if name is None: + name = 'output' + + path = os.path.join(folder, '{}.csv'.format(name)) + df.to_csv(path) + + return path + + +class Validator(object): + def __init__(self, data_portal): + self.data_portal = data_portal + + def compare_bundle_with_exchange(self, exchange, assets, end_dt, bar_count, + sample_minutes): + """ + Creates DataFrames from the bundle and exchange for the specified + data set. + + Parameters + ---------- + exchange: Exchange + assets + end_dt + bar_count + sample_minutes + + Returns + ------- + + """ + freq = '{}T'.format(sample_minutes) + + log.info('creating data sample from bundle') + df1 = self.data_portal.get_history_window( + assets=assets, + end_dt=end_dt, + bar_count=bar_count, + frequency=freq, + field='close', + data_frequency='minute' + ) + path = output_df(df1, assets, '{}_resampled'.format(freq)) + log.info('saved resampled bundle candles: {}\n{}'.format( + path, df1.tail(10)) + ) + + log.info('creating data sample from exchange api') + candles = exchange.get_candles( + end_dt=end_dt, + freq='{}T'.format(sample_minutes), + assets=assets, + bar_count=bar_count + ) + + series = dict() + for asset in assets: + series[asset] = pd.Series( + data=[candle['close'] for candle in candles[asset]], + index=[candle['last_traded'] for candle in candles[asset]] + ) + + df2 = pd.DataFrame(series) + path = output_df(df2, assets, '{}_api'.format(freq)) + log.info('saved exchange api candles: {}\n{}'.format( + path, df2.tail(10)) + ) + + try: + assert_frame_equal(df1, df2) + return True + except: + log.warn('differences found in dataframes') + return False + + +if __name__ == '__main__': + exchanges = get_exchanges(['poloniex']) + exchange = six.next(six.itervalues(exchanges)) + assets = exchange.get_assets(symbols=['eth_btc']) + + open_calendar = get_calendar('OPEN') + asset_finder = AssetFinderExchange() + data_portal = DataPortalExchangeBacktest( + exchanges=exchanges, + asset_finder=asset_finder, + trading_calendar=open_calendar, + first_trading_day=None # will set dynamically based on assets + ) + validator = Validator(data_portal=data_portal) + + validator.compare_bundle_with_exchange( + exchange=exchange, + assets=assets, + end_dt=pd.to_datetime('2017-11-10 1:00', utc=True), + bar_count=200, + sample_minutes=30 + ) diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index 3966a1e8..cd5f2bde 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -438,7 +438,7 @@ class TestExchangeBundle: pass def main_bundle_to_csv(self): - exchange_name = 'bitfinex' + exchange_name = 'poloniex' data_frequency = 'minute' exchange = get_exchange(exchange_name) @@ -460,8 +460,8 @@ class TestExchangeBundle: def bundle_to_csv(self): exchange_name = 'poloniex' data_frequency = 'minute' - period = '2017-02' - symbol = 'lsk_eth' + period = '2017-01' + symbol = 'eth_btc' exchange = get_exchange(exchange_name) asset = exchange.get_asset(symbol) diff --git a/tests/exchange/test_data_portal.py b/tests/exchange/test_data_portal.py index efd2ffe6..7b2b4720 100644 --- a/tests/exchange/test_data_portal.py +++ b/tests/exchange/test_data_portal.py @@ -1,16 +1,13 @@ import pandas as pd -from catalyst.exchange.exchange_data_portal import DataPortalExchangeBacktest, \ - DataPortalExchangeLive from logbook import Logger -from test_utils import rnd_history_date_days, rnd_bar_count from catalyst import get_calendar from catalyst.exchange.asset_finder_exchange import AssetFinderExchange -from catalyst.exchange.bitfinex.bitfinex import Bitfinex -from catalyst.exchange.bittrex.bittrex import Bittrex -from catalyst.exchange.exchange_utils import get_exchange_auth, \ - get_common_assets +from catalyst.exchange.exchange_data_portal import DataPortalExchangeBacktest, \ + DataPortalExchangeLive +from catalyst.exchange.exchange_utils import get_common_assets from catalyst.exchange.factory import get_exchange, get_exchanges +from test_utils import rnd_history_date_days, rnd_bar_count, output_df log = Logger('test_bitfinex') @@ -115,38 +112,4 @@ class TestExchangeDataPortal: log.info('found history window: {}'.format(data)) def test_validate_resample(self): - symbol = ['eth_btc'] - exchange_name = 'poloniex' - exchange = get_exchange(exchange_name, base_currency=symbol) - - assets = exchange.get_assets(symbols=symbol) - - date = rnd_history_date_days( - max_days=10, - last_dt=pd.to_datetime('2017-11-1', utc=True) - ) - bar_count = rnd_bar_count(max_bars=10) - sample_minutes = 15 - sample_data = self.data_portal_backtest.get_history_window( - assets=assets, - end_dt=date, - bar_count=bar_count, - frequency='{}T'.format(sample_minutes), - field='close', - data_frequency='daily' - ) - minute_data = self.data_portal_backtest.get_history_window( - assets=assets, - end_dt=date, - bar_count=bar_count * sample_minutes, - frequency='1T', - field='close', - data_frequency='daily' - ) - resampled_minute_data = minute_data.resample( - '{}T'.format(sample_minutes)) - - print(sample_data.tail(10)) - print(resampled_minute_data.tail(10)) - print(minute_data.tail(10)) pass diff --git a/tests/exchange/test_poloniex.py b/tests/exchange/test_poloniex.py index 60ff1e65..d84d1010 100644 --- a/tests/exchange/test_poloniex.py +++ b/tests/exchange/test_poloniex.py @@ -54,8 +54,9 @@ class TestPoloniex(BaseExchangeTestCase): log.info('retrieving candles') assets = self.exchange.get_asset('eth_btc') ohlcv = self.exchange.get_candles( - end_dt=pd.to_datetime('2017-11-01', utc=True), - freq='30T', + # end_dt=pd.to_datetime('2017-11-01', utc=True), + end_dt=None, + freq='5T', assets=assets, bar_count=200 ) @@ -63,7 +64,7 @@ class TestPoloniex(BaseExchangeTestCase): df.set_index('last_traded', drop=True, inplace=True) log.info(df.tail(25)) - path = output_df(df, assets, 'candles') + path = output_df(df, assets, '5min_candles') log.info('saved candles: {}'.format(path)) pass diff --git a/tests/exchange/test_utils.py b/tests/exchange/test_utils.py index d7f1df87..ddf76215 100644 --- a/tests/exchange/test_utils.py +++ b/tests/exchange/test_utils.py @@ -1,7 +1,12 @@ +import os +import tempfile from datetime import timedelta from random import randint import pandas as pd +from catalyst.assets._assets import TradingPair + +from catalyst.utils.paths import ensure_directory def rnd_history_date_days(max_days=30, last_dt=None): @@ -24,3 +29,38 @@ def rnd_bar_count(max_bars=21): now = pd.Timestamp.utcnow() return randint(0, max_bars) + + +def output_df(df, assets, name=None): + """ + Outputs a price DataFrame to a temp folder. + + Parameters + ---------- + df: pd.DataFrame + assets + name + + Returns + ------- + + """ + if isinstance(assets, TradingPair): + exchange_folder = assets.exchange + asset_folder = assets.symbol + else: + exchange_folder = ','.join([asset.exchange for asset in assets]) + asset_folder = ','.join([asset.symbol for asset in assets]) + + folder = os.path.join( + tempfile.gettempdir(), 'catalyst', exchange_folder, asset_folder + ) + ensure_directory(folder) + + if name is None: + name = 'output' + + path = os.path.join(folder, '{}.csv'.format(name)) + df.to_csv(path) + + return path From 1e8b0c36a1be2dfa38b81b92c7150b676b7977bc Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 20 Nov 2017 20:00:48 -0500 Subject: [PATCH 21/37] BUG: fixed #74, a problematic scenario when retrieving the history of multiple assets. --- catalyst/exchange/exchange_bundle.py | 101 ++++++++++----------- catalyst/support/issue_74.py | 127 +++++++++++++++++++++++++++ 2 files changed, 173 insertions(+), 55 deletions(-) create mode 100644 catalyst/support/issue_74.py diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 6c36fa9e..d6e1d558 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -826,6 +826,9 @@ class ExchangeBundle: delta = get_delta(trailing_bar_count, data_frequency) end_dt += delta + # This is an attempt to resolve some caching with the reader + # when auto-ingesting data. + # TODO: needs more work reader = self.get_reader(data_frequency) if reset_reader: del self._readers[reader._rootdir] @@ -844,11 +847,11 @@ class ExchangeBundle: end_dt=end_dt ) + series = dict() for asset in assets: asset_start_dt, asset_end_dt = self.get_adj_dates( start_dt, end_dt, assets, data_frequency ) - in_bundle = range_in_bundle( asset, asset_start_dt, asset_end_dt, reader ) @@ -864,75 +867,63 @@ class ExchangeBundle: end_dt=asset_end_dt ) - series = dict() - try: + periods = self.get_calendar_periods_range( + asset_start_dt, asset_end_dt, data_frequency + ) + # This does not behave well when requesting multiple assets + # when the start or end date of one asset is outside of the range + # looking at the logic in load_raw_arrays(), we are not achieving + # any performance gain by requesting multiple sids at once. It's + # looping through the sids and making separate requests anyway. arrays = reader.load_raw_arrays( - sids=[asset.sid for asset in assets], + sids=[asset.sid], fields=[field], start_dt=start_dt, end_dt=end_dt ) + field_values = arrays[0][:, 0] - except Exception: - symbols = [asset.symbol.encode('utf-8') for asset in assets] - raise PricingDataNotLoadedError( - field=field, - first_trading_day=min([asset.start_date for asset in assets]), - exchange=self.exchange.name, - symbols=symbols, - symbol_list=','.join(symbols), - data_frequency=data_frequency, - start_dt=start_dt, - end_dt=end_dt - ) - - periods = self.get_calendar_periods_range( - start_dt, end_dt, data_frequency - ) - - for asset_index, asset in enumerate(assets): - asset_values = arrays[asset_index] - - value_series = pd.Series(asset_values.flatten(), index=periods) + value_series = pd.Series(field_values, index=periods) series[asset] = value_series return series - def clean(self, data_frequency): - """ - Removing the bundle data from the catalyst folder. - Parameters - ---------- - data_frequency: str +def clean(self, data_frequency): + """ + Removing the bundle data from the catalyst folder. - """ - log.debug('cleaning exchange {}, frequency {}'.format( - self.exchange.name, data_frequency - )) - root = get_exchange_folder(self.exchange.name) + Parameters + ---------- + data_frequency: str - symbols = os.path.join(root, 'symbols.json') - if os.path.isfile(symbols): - os.remove(symbols) + """ + log.debug('cleaning exchange {}, frequency {}'.format( + self.exchange.name, data_frequency + )) + root = get_exchange_folder(self.exchange.name) - temp_bundles = os.path.join(root, 'temp_bundles') + symbols = os.path.join(root, 'symbols.json') + if os.path.isfile(symbols): + os.remove(symbols) - if os.path.isdir(temp_bundles): - log.debug('removing folder and content: {}'.format(temp_bundles)) - shutil.rmtree(temp_bundles) - log.debug('{} removed'.format(temp_bundles)) + temp_bundles = os.path.join(root, 'temp_bundles') - frequencies = ['daily', 'minute'] if data_frequency is None \ - else [data_frequency] + if os.path.isdir(temp_bundles): + log.debug('removing folder and content: {}'.format(temp_bundles)) + shutil.rmtree(temp_bundles) + log.debug('{} removed'.format(temp_bundles)) - for frequency in frequencies: - label = '{}_bundle'.format(frequency) - frequency_bundle = os.path.join(root, label) + frequencies = ['daily', 'minute'] if data_frequency is None \ + else [data_frequency] - if os.path.isdir(frequency_bundle): - log.debug( - 'removing folder and content: {}'.format(frequency_bundle) - ) - shutil.rmtree(frequency_bundle) - log.debug('{} removed'.format(frequency_bundle)) + for frequency in frequencies: + label = '{}_bundle'.format(frequency) + frequency_bundle = os.path.join(root, label) + + if os.path.isdir(frequency_bundle): + log.debug( + 'removing folder and content: {}'.format(frequency_bundle) + ) + shutil.rmtree(frequency_bundle) + log.debug('{} removed'.format(frequency_bundle)) diff --git a/catalyst/support/issue_74.py b/catalyst/support/issue_74.py new file mode 100644 index 00000000..ad6d6fee --- /dev/null +++ b/catalyst/support/issue_74.py @@ -0,0 +1,127 @@ +from __future__ import division +import os +import pytz +import numpy as np +import pandas as pd +from scipy.optimize import minimize +import matplotlib.pyplot as plt +from datetime import datetime + +from catalyst.api import record, symbol, symbols, order_target_percent +from catalyst.utils.run_algo import run_algorithm + +np.set_printoptions(threshold='nan', suppress=True) + + +def initialize(context): + # Portfolio assets list + context.assets = symbols('btc_usdt', 'eth_usdt', 'ltc_usdt', 'dash_usdt', + 'xmr_usdt') + context.nassets = len(context.assets) + # Set the time window that will be used to compute expected return + # and asset correlations + context.window = 180 + # Set the number of days between each portfolio rebalancing + context.rebalance_period = 30 + context.i = 0 + + +def handle_data(context, data): + # Only rebalance at the beggining of the algorithm execution and + # every multiple of the rebalance period + if context.i == 0 or context.i % context.rebalance_period == 0: + n = context.window + prices = data.history(context.assets, fields='price', + bar_count=n + 1, frequency='daily') + pr = np.asmatrix(prices) + t_prices = prices.iloc[1:n + 1] + t_val = t_prices.values + tminus_prices = prices.iloc[0:n] + tminus_val = tminus_prices.values + # Compute daily returns (r) + r = np.asmatrix(t_val / tminus_val - 1) + # Compute the expected returns of each asset with the average + # daily return for the selected time window + m = np.asmatrix(np.mean(r, axis=0)) + # ### + stds = np.std(r, axis=0) + # Compute excess returns matrix (xr) + xr = r - m + # Matrix algebra to get variance-covariance matrix + cov_m = np.dot(np.transpose(xr), xr) / n + # Compute asset correlation matrix (informative only) + corr_m = cov_m / np.dot(np.transpose(stds), stds) + + # Define portfolio optimization parameters + n_portfolios = 50000 + results_array = np.zeros((3 + context.nassets, n_portfolios)) + for p in xrange(n_portfolios): + weights = np.random.random(context.nassets) + weights /= np.sum(weights) + w = np.asmatrix(weights) + p_r = np.sum(np.dot(w, np.transpose(m))) * 365 + p_std = np.sqrt( + np.dot(np.dot(w, cov_m), np.transpose(w))) * np.sqrt(365) + + # store results in results array + results_array[0, p] = p_r + results_array[1, p] = p_std + # store Sharpe Ratio (return / volatility) - risk free rate element + # excluded for simplicity + results_array[2, p] = results_array[0, p] / results_array[1, p] + i = 0 + for iw in weights: + results_array[3 + i, p] = weights[i] + i += 1 + + # convert results array to Pandas DataFrame + results_frame = pd.DataFrame(np.transpose(results_array), + columns=['r', 'stdev', + 'sharpe'] + context.assets) + # locate position of portfolio with highest Sharpe Ratio + max_sharpe_port = results_frame.iloc[results_frame['sharpe'].idxmax()] + # locate positon of portfolio with minimum standard deviation + min_vol_port = results_frame.iloc[results_frame['stdev'].idxmin()] + + # order optimal weights for each asset + for asset in context.assets: + if data.can_trade(asset): + order_target_percent(asset, max_sharpe_port[asset]) + + # create scatter plot coloured by Sharpe Ratio + plt.scatter(results_frame.stdev, results_frame.r, + c=results_frame.sharpe, cmap='RdYlGn') + plt.xlabel('Volatility') + plt.ylabel('Returns') + plt.colorbar() + # plot red star to highlight position of portfolio with highest Sharpe Ratio + plt.scatter(max_sharpe_port[1], max_sharpe_port[0], marker='o', + color='b', s=200) + # plot green star to highlight position of minimum variance portfolio + plt.show() + print(max_sharpe_port) + record(pr=pr, r=r, m=m, stds=stds, max_sharpe_port=max_sharpe_port, + corr_m=corr_m) + context.i += 1 + + +def analyze(context=None, results=None): + # Form DataFrame with selected data + data = results[['pr', 'r', 'm', 'stds', 'max_sharpe_port', 'corr_m', + 'portfolio_value']] + + # Save results in CSV file + filename = os.path.splitext(os.path.basename(__file__))[0] + data.to_csv(filename + '.csv') + + +# Bitcoin data is available from 2015-3-2. Dates vary for other tokens. +start = datetime(2017, 1, 1, 0, 0, 0, 0, pytz.utc) +end = datetime(2017, 8, 16, 0, 0, 0, 0, pytz.utc) +results = run_algorithm(initialize=initialize, + handle_data=handle_data, + analyze=analyze, + start=start, + end=end, + exchange_name='poloniex', + capital_base=100000, ) From 0d366a350d9037dc30d94294e60bfcc496703e24 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 20 Nov 2017 21:53:48 -0500 Subject: [PATCH 22/37] BUG: fixed #75, adjusted the ruturn value of `run_algorithm` to support minute stats. --- catalyst/examples/simple_loop.py | 99 ++++++++++++++++++++++--- catalyst/exchange/exchange_algorithm.py | 31 +++++++- catalyst/exchange/exchange_bundle.py | 10 ++- catalyst/exchange/exchange_errors.py | 9 +++ 4 files changed, 135 insertions(+), 14 deletions(-) diff --git a/catalyst/examples/simple_loop.py b/catalyst/examples/simple_loop.py index 8e6ce22e..bfd2d4f0 100644 --- a/catalyst/examples/simple_loop.py +++ b/catalyst/examples/simple_loop.py @@ -2,12 +2,15 @@ import talib import pandas as pd from catalyst import run_algorithm -from catalyst.api import symbol +from catalyst.api import symbol, record +from catalyst.exchange.stats_utils import get_pretty_stats, \ + extract_transactions def initialize(context): print('initializing') - context.asset = symbol('swift_btc') + context.asset = symbol('neo_usd') + context.base_price = None def handle_data(context, data): @@ -20,26 +23,104 @@ def handle_data(context, data): prices = data.history( context.asset, fields='price', - bar_count=15, - frequency='1D' + bar_count=14, + frequency='15T' ) rsi = talib.RSI(prices.values, timeperiod=14)[-1] print('got rsi: {}'.format(rsi)) except Exception as e: print(e) + # If base_price is not set, we use the current value. This is the + # price at the first bar which we reference to calculate price_change. + if context.base_price is None: + context.base_price = price + + price_change = (price - context.base_price) / context.base_price + cash = context.portfolio.cash + + # Now that we've collected all current data for this frame, we use + # the record() method to save it. This data will be available as + # a parameter of the analyze() function for further analysis. + record( + price=price, + price_change=price_change, + cash=cash + ) + + +def analyze(context, perf): + import matplotlib.pyplot as plt + print('the stats: {}'.format(get_pretty_stats(perf))) + + # The base currency of the algo exchange + base_currency = context.exchanges.values()[0].base_currency.upper() + + # Plot the portfolio value over time. + ax1 = plt.subplot(611) + perf.loc[:, 'portfolio_value'].plot(ax=ax1) + ax1.set_ylabel('Portfolio Value ({})'.format(base_currency)) + + # Plot the price increase or decrease over time. + ax2 = plt.subplot(612, sharex=ax1) + perf.loc[:, 'price'].plot(ax=ax2, label='Price') + + ax2.set_ylabel('{asset} ({base})'.format( + asset=context.asset.symbol, base=base_currency + )) + + transaction_df = extract_transactions(perf) + if not transaction_df.empty: + buy_df = transaction_df[transaction_df['amount'] > 0] + sell_df = transaction_df[transaction_df['amount'] < 0] + ax2.scatter( + buy_df.index.to_pydatetime(), + perf.loc[buy_df.index, 'price'], + marker='^', + s=100, + c='green', + label='' + ) + ax2.scatter( + sell_df.index.to_pydatetime(), + perf.loc[sell_df.index, 'price'], + marker='v', + s=100, + c='red', + label='' + ) + + ax4 = plt.subplot(613, sharex=ax1) + perf.loc[:, 'cash'].plot( + ax=ax4, label='Base Currency ({})'.format(base_currency) + ) + ax4.set_ylabel('Cash ({})'.format(base_currency)) + + perf['algorithm'] = perf.loc[:, 'algorithm_period_return'] + + ax5 = plt.subplot(614, sharex=ax1) + perf.loc[:, ['algorithm', 'price_change']].plot(ax=ax5) + ax5.set_ylabel('Percent Change') + + plt.legend(loc=3) + + # Show the plot. + plt.gcf().set_size_inches(18, 8) + plt.show() + pass + run_algorithm( capital_base=250, - start=pd.to_datetime('2015-4-1', utc=True), - end=pd.to_datetime('2017-11-1', utc=True), + start=pd.to_datetime('2017-11-1 0:00', utc=True), + end=pd.to_datetime('2017-11-10 23:59', utc=True), data_frequency='daily', initialize=initialize, handle_data=handle_data, - analyze=None, - exchange_name='bittrex', + analyze=analyze, + exchange_name='bitfinex', algo_namespace='simple_loop', - base_currency='btc' + base_currency='usd' ) # run_algorithm( # initialize=initialize, diff --git a/catalyst/exchange/exchange_algorithm.py b/catalyst/exchange/exchange_algorithm.py index f05bee7e..bb965b58 100644 --- a/catalyst/exchange/exchange_algorithm.py +++ b/catalyst/exchange/exchange_algorithm.py @@ -245,19 +245,42 @@ class ExchangeTradingAlgorithmBacktest(ExchangeTradingAlgorithmBase): else: return MarketOrder() + def is_last_frame_of_day(self, data): + # TODO: adjust here to support more intervals + next_frame_dt = data.current_dt + timedelta(minutes=1) + if next_frame_dt.date() > data.current_dt.date(): + return True + else: + return False + def handle_data(self, data): super(ExchangeTradingAlgorithmBacktest, self).handle_data(data) - minute_stats = self.prepare_period_stats( - data.current_dt, data.current_dt + timedelta(minutes=1)) - self.frame_stats.append(minute_stats) + if self.data_frequency == 'minute': + frame_stats = self.prepare_period_stats( + data.current_dt, data.current_dt + timedelta(minutes=1) + ) + self.frame_stats.append(frame_stats) - def analyze(self, perf): + def _create_stats_df(self): stats = pd.DataFrame(self.frame_stats) stats.set_index('period_close', inplace=True, drop=False) + return stats + def analyze(self, perf): + stats = self._create_stats_df() if self.data_frequency == 'minute' \ + else perf super(ExchangeTradingAlgorithmBacktest, self).analyze(stats) + def run(self, data=None, overwrite_sim_params=True): + perf = super(ExchangeTradingAlgorithmBacktest, self).run( + data, overwrite_sim_params + ) + # Rebuilding the stats to support minute data + stats = self._create_stats_df() if self.data_frequency == 'minute' \ + else perf + return stats + class ExchangeTradingAlgorithmLive(ExchangeTradingAlgorithmBase): def __init__(self, *args, **kwargs): diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index d6e1d558..d4ffffa0 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -26,7 +26,7 @@ from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ from catalyst.exchange.exchange_errors import EmptyValuesInBundleError, \ TempBundleNotFoundError, \ NoDataAvailableOnExchange, \ - PricingDataNotLoadedError + PricingDataNotLoadedError, DataCorruptionError from catalyst.exchange.exchange_utils import get_exchange_folder from catalyst.utils.cli import maybe_show_progress from catalyst.utils.paths import ensure_directory @@ -881,6 +881,14 @@ class ExchangeBundle: start_dt=start_dt, end_dt=end_dt ) + if len(arrays) == 0: + raise DataCorruptionError( + exchange=self.exchange.name, + symbols=asset.symbol, + start_dt=asset_start_dt, + end_dt=asset_end_dt + ) + field_values = arrays[0][:, 0] value_series = pd.Series(field_values, index=periods) diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index 35f320d8..a36bb23a 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -218,6 +218,15 @@ class PricingDataNotLoadedError(ZiplineError): 'for details.').strip() +class DataCorruptionError(ZiplineError): + msg = ('Unable to validate data for {exchange} {symbols} in date range ' + '[{start_dt} - {end_dt}]. The data is either corrupted or ' + 'unavailable. Please try deleting this bundle:' + '\n`catalyst clean-exchange -x {exchange}\n' + 'Then, ingest the data again. Please contact the Catalyst team if ' + 'the issue persists.').strip() + + class ApiCandlesError(ZiplineError): msg = ('Unable to fetch candles from the remote API: {error}.').strip() From 02dc4d6a301e792987d17b45c1af60335dffd064 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Tue, 21 Nov 2017 13:56:05 -0500 Subject: [PATCH 23/37] BUG: made some live-trading adjustments related to issue #71 --- catalyst/data/loader.py | 5 +- catalyst/examples/mean_reversion_simple.py | 12 ++-- catalyst/exchange/exchange.py | 67 +++++++++++++--------- catalyst/exchange/exchange_bundle.py | 11 ++-- catalyst/exchange/exchange_utils.py | 1 - 5 files changed, 54 insertions(+), 42 deletions(-) diff --git a/catalyst/data/loader.py b/catalyst/data/loader.py index 53409d23..ff3f34a8 100644 --- a/catalyst/data/loader.py +++ b/catalyst/data/loader.py @@ -149,13 +149,14 @@ def load_crypto_market_data(trading_day=None, trading_days=None, # exchange.get_history_window() already ensures that we have the right data # for the right dates - br = exchange.get_history_window( + br = exchange.get_history_window_with_bundle( assets=[benchmark_asset], end_dt=last_date, bar_count=pd.Timedelta(last_date - start_dt).days, frequency='1d', field='close', - data_frequency='daily') + data_frequency='daily', + force_auto_ingest=True) br.columns = ['close'] br = br.pct_change(1).iloc[1:] br.loc[start_dt] = 0 diff --git a/catalyst/examples/mean_reversion_simple.py b/catalyst/examples/mean_reversion_simple.py index d5efbea8..34d9c60c 100644 --- a/catalyst/examples/mean_reversion_simple.py +++ b/catalyst/examples/mean_reversion_simple.py @@ -27,7 +27,7 @@ def initialize(context): # parameters or values you're going to use. # In our example, we're looking at Ether in USD Tether. - context.neo_usd = symbol('neo_usd') + context.neo_usd = symbol('neo_btc') context.base_price = None context.current_day = None @@ -57,7 +57,7 @@ def handle_data(context, data): context.neo_usd, fields='close', bar_count=50, - frequency='15T' + frequency='30T' ) # Ta-lib calculates various technical indicator based on price and @@ -215,7 +215,7 @@ def analyze(context=None, perf=None): if __name__ == '__main__': # The execution mode: backtest or live - MODE = 'backtest' + MODE = 'live' if MODE == 'backtest': # catalyst run -f catalyst/examples/mean_reversion_simple.py -x poloniex -s 2017-10-1 -e 2017-11-10 -c usdt -n mean-reversion --data-frequency minute --capital-base 10000 @@ -237,9 +237,9 @@ if __name__ == '__main__': initialize=initialize, handle_data=handle_data, analyze=analyze, - exchange_name='bitfinex', + exchange_name='bittrex', live=True, algo_namespace=NAMESPACE, - base_currency='usd', - live_graph=True + base_currency='btc', + live_graph=False ) diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index f10bbaf0..bd9cca9d 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -468,15 +468,14 @@ class Exchange: return series - @deprecated - def get_history_window_direct(self, - assets, - end_dt, - bar_count, - frequency, - field, - data_frequency=None, - ffill=True): + def get_history_window(self, + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency=None, + ffill=True): """ Public API method that returns a dataframe containing the requested @@ -514,35 +513,46 @@ class Exchange: A dataframe containing the requested data. """ - start_dt = get_start_dt(end_dt, bar_count, data_frequency) + freq, candle_size, unit, data_frequency = get_frequency( + frequency, data_frequency + ) + adj_bar_count = candle_size * bar_count + start_dt = get_start_dt(end_dt, adj_bar_count, data_frequency) # The get_history method supports multiple asset candles = self.get_candles( - data_frequency=frequency, + freq=freq, assets=assets, bar_count=bar_count, start_dt=start_dt, end_dt=end_dt ) - candle_series = self.get_series_from_candles( - candles=candles, - start_dt=start_dt, - end_dt=end_dt, - data_frequency=frequency, - field=field, - ) - df = pd.DataFrame(candle_series) + series = dict() + for asset in candles: + asset_series = self.get_series_from_candles( + candles=candles[asset], + start_dt=start_dt, + end_dt=end_dt, + data_frequency=frequency, + field=field, + ) + series[asset] = asset_series + + df = pd.DataFrame(series) + df.dropna(inplace=True) + return df - def get_history_window(self, - assets, - end_dt, - bar_count, - frequency, - field, - data_frequency=None, - ffill=True): + def get_history_window_with_bundle(self, + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency=None, + ffill=True, + force_auto_ingest=False): """ Public API method that returns a dataframe containing the requested @@ -590,7 +600,8 @@ class Exchange: end_dt=end_dt, bar_count=adj_bar_count, field=field, - data_frequency=data_frequency + data_frequency=data_frequency, + force_auto_ingest=force_auto_ingest ) except (PricingDataNotLoadedError, NoDataAvailableOnExchange): series = dict() diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index d4ffffa0..23fa1737 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -684,7 +684,8 @@ class ExchangeBundle: field, data_frequency, algo_end_dt=None, - trailing_bar_count=None + trailing_bar_count=None, + force_auto_ingest=False ): """ Retrieve price data history, ingest missing data. @@ -703,7 +704,7 @@ class ExchangeBundle: Series """ - if AUTO_INGEST: + if AUTO_INGEST or force_auto_ingest: try: series = self.get_history_window_series( assets=assets, @@ -711,7 +712,7 @@ class ExchangeBundle: bar_count=bar_count, field=field, data_frequency=data_frequency, - trailing_bar_count=trailing_bar_count + trailing_bar_count=trailing_bar_count, ) return pd.DataFrame(series) @@ -740,7 +741,7 @@ class ExchangeBundle: field=field, data_frequency=data_frequency, reset_reader=True, - trailing_bar_count=trailing_bar_count + trailing_bar_count=trailing_bar_count, ) return series @@ -751,7 +752,7 @@ class ExchangeBundle: bar_count=bar_count, field=field, data_frequency=data_frequency, - trailing_bar_count=trailing_bar_count + trailing_bar_count=trailing_bar_count, ) return pd.DataFrame(series) diff --git a/catalyst/exchange/exchange_utils.py b/catalyst/exchange/exchange_utils.py index 43144a0d..d4119491 100644 --- a/catalyst/exchange/exchange_utils.py +++ b/catalyst/exchange/exchange_utils.py @@ -487,7 +487,6 @@ def resample_history_df(df, freq, field): DataFrame """ - print(df.tail(30)) if field == 'open': agg = 'first' elif field == 'high': From c1d140a8319d8732c8a5fdf22ca11b6025c37a29 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Tue, 21 Nov 2017 15:55:56 -0500 Subject: [PATCH 24/37] BUG: fixed issue #77, a sortino warning prevents analyze() from completing --- catalyst/finance/risk/cumulative.py | 8 +++++--- catalyst/finance/risk/period.py | 21 ++++++++++++++------- catalyst/utils/run_algo.py | 5 +++-- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/catalyst/finance/risk/cumulative.py b/catalyst/finance/risk/cumulative.py index 82dd5c0b..20798f30 100644 --- a/catalyst/finance/risk/cumulative.py +++ b/catalyst/finance/risk/cumulative.py @@ -274,12 +274,14 @@ algorithm_returns ({algo_count}) in range {start} : {end} on {dt}" ) try: + risk = self.downside_risk[dt_loc] self.sortino[dt_loc] = sortino_ratio( self.algorithm_returns, - _downside_risk=self.downside_risk[dt_loc] + _downside_risk=risk ) - except Exception as e: - log.debug('sortino ratio error: {}'.format(e)) + except Exception: + # TODO: what causes it to error out? + self.sortino[dt_loc] = 0 self.information[dt_loc] = information_ratio( self.algorithm_returns, diff --git a/catalyst/finance/risk/period.py b/catalyst/finance/risk/period.py index ddef2def..83d34ba1 100644 --- a/catalyst/finance/risk/period.py +++ b/catalyst/finance/risk/period.py @@ -23,7 +23,7 @@ import numpy as np import pandas as pd from . import risk -from . risk import check_entry +from .risk import check_entry from empyrical import ( alpha_beta_aligned, @@ -85,7 +85,7 @@ class RiskMetricsPeriod(object): cum_returns(self.algorithm_returns).iloc[-1] if not self.algorithm_returns.index.equals( - self.benchmark_returns.index + self.benchmark_returns.index ): message = "Mismatch between benchmark_returns ({bm_count}) and \ algorithm_returns ({algo_count}) in range {start} : {end}" @@ -128,10 +128,17 @@ class RiskMetricsPeriod(object): self.downside_risk = downside_risk( self.algorithm_returns.values ) - self.sortino = sortino_ratio( - self.algorithm_returns.values, - _downside_risk=self.downside_risk, - ) + + try: + risk = self.downside_risk + self.sortino = sortino_ratio( + self.algorithm_returns.values, + _downside_risk=risk, + ) + except Exception: + # TODO: what causes it to error out? + self.sortino = 0 + self.information = information_ratio( self.algorithm_returns.values, self.benchmark_returns.values, @@ -141,7 +148,7 @@ class RiskMetricsPeriod(object): self.benchmark_returns.values, ) self.excess_return = self.algorithm_period_returns - \ - self.treasury_period_return + self.treasury_period_return self.max_drawdown = max_drawdown(self.algorithm_returns.values) self.max_leverage = self.calculate_max_leverage() diff --git a/catalyst/utils/run_algo.py b/catalyst/utils/run_algo.py index b4335311..dfb97c3e 100644 --- a/catalyst/utils/run_algo.py +++ b/catalyst/utils/run_algo.py @@ -451,7 +451,8 @@ def run_algorithm(initialize, exchange_name=None, base_currency=None, algo_namespace=None, - live_graph=False): + live_graph=False, + output=os.devnull): """Run a trading algorithm. Parameters @@ -564,7 +565,7 @@ def run_algorithm(initialize, bundle_timestamp=bundle_timestamp, start=start, end=end, - output=os.devnull, + output=output, print_algo=False, local_namespace=False, environ=environ, From 3ec9853b75426dd1f63247632b7421d2c1fc632e Mon Sep 17 00:00:00 2001 From: fredfortier Date: Tue, 21 Nov 2017 20:42:44 -0500 Subject: [PATCH 25/37] BUG: in relation to issue #77, catching the remaining warnings --- catalyst/exchange/asset_finder_exchange.py | 10 +++++----- catalyst/finance/risk/cumulative.py | 4 ++-- catalyst/finance/risk/period.py | 13 +++++++++++-- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/catalyst/exchange/asset_finder_exchange.py b/catalyst/exchange/asset_finder_exchange.py index 61a0d2c2..ceaa5d38 100644 --- a/catalyst/exchange/asset_finder_exchange.py +++ b/catalyst/exchange/asset_finder_exchange.py @@ -41,11 +41,11 @@ class AssetFinderExchange(object): SidsNotFound When a requested sid is not found and default_none=False. """ - for sid in sids: - if sid in self._asset_cache: - log.debug('got asset from cache: {}'.format(sid)) - else: - log.debug('fetching asset: {}'.format(sid)) + # for sid in sids: + # if sid in self._asset_cache: + # log.debug('got asset from cache: {}'.format(sid)) + # else: + # log.debug('fetching asset: {}'.format(sid)) return list() def lookup_symbol(self, symbol, exchange, as_of_date=None, fuzzy=False): diff --git a/catalyst/finance/risk/cumulative.py b/catalyst/finance/risk/cumulative.py index 20798f30..37bd349b 100644 --- a/catalyst/finance/risk/cumulative.py +++ b/catalyst/finance/risk/cumulative.py @@ -196,8 +196,8 @@ class RiskMetricsCumulative(object): self.benchmark_cumulative_returns[dt_loc] = cum_returns( self.benchmark_returns )[-1] - except Exception as e: - log.debug('cumulative returns error: {}'.format(e)) + except Exception: + self.benchmark_cumulative_returns[dt_loc] = 0 benchmark_cumulative_returns_to_date = \ self.benchmark_cumulative_returns[:dt_loc + 1] diff --git a/catalyst/finance/risk/period.py b/catalyst/finance/risk/period.py index 83d34ba1..283bd3c5 100644 --- a/catalyst/finance/risk/period.py +++ b/catalyst/finance/risk/period.py @@ -14,6 +14,7 @@ # limitations under the License. import functools +import warnings import logbook @@ -78,8 +79,14 @@ class RiskMetricsPeriod(object): self.calculate_metrics() def calculate_metrics(self): - self.benchmark_period_returns = \ - cum_returns(self.benchmark_returns).iloc[-1] + warnings.filterwarnings('error') + + try: + self.benchmark_period_returns = \ + cum_returns(self.benchmark_returns).iloc[-1] + except Exception: + # TODO: why is there an error + self.benchmark_period_returns = 0 self.algorithm_period_returns = \ cum_returns(self.algorithm_returns).iloc[-1] @@ -152,6 +159,8 @@ class RiskMetricsPeriod(object): self.max_drawdown = max_drawdown(self.algorithm_returns.values) self.max_leverage = self.calculate_max_leverage() + warnings.resetwarnings() + def to_dict(self): """ Creates a dictionary representing the state of the risk report. From b4ab1a5375066390d45e1ddd16d5b1aeaf9866b5 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Tue, 21 Nov 2017 22:53:50 -0700 Subject: [PATCH 26/37] DOC: remake of beginner tutorial --- docs/source/beginner-tutorial.rst | 250 ++++++++++++++++++++---------- docs/source/install.rst | 14 +- 2 files changed, 173 insertions(+), 91 deletions(-) diff --git a/docs/source/beginner-tutorial.rst b/docs/source/beginner-tutorial.rst index 6848a806..f79c2c7e 100644 --- a/docs/source/beginner-tutorial.rst +++ b/docs/source/beginner-tutorial.rst @@ -5,9 +5,8 @@ Basics ~~~~~~ Catalyst is an open-source algorithmic trading simulator for crypto -assets written in Python. - -The source can be found at: https://github.com/enigmampc/catalyst +assets written in Python. The source code can be found at: +https://github.com/enigmampc/catalyst Some benefits include: @@ -25,8 +24,7 @@ Some benefits include: build profitable, data-driven investment strategies. This tutorial assumes that you have Catalyst correctly installed, see the -:doc:`installation instructions ` if you haven't set up -Catalyst yet. +:doc:`Install` section if you haven't set up Catalyst yet. Every ``catalyst`` algorithm consists of at least two functions you have to define: @@ -40,10 +38,12 @@ Before the start of the algorithm, ``catalyst`` calls the need to access from one algorithm iteration to the next. After the algorithm has been initialized, ``catalyst`` calls the -``handle_data()`` function once for each event. At every call, it passes -the same ``context`` variable and an event-frame called ``data`` -containing the current trading bar with open, high, low, and close -(OHLC) prices as well as volume for each crypto asset in your universe. +``handle_data()`` function on each iteration, that's one per day (daily) or +once every minute (minute), depending on the frequency we choose to run our +simulation. On every iteration, ``handle_data()`` passes the same ``context`` +variable and an event-frame called ``data`` containing the current trading bar +with open, high, low, and close (OHLC) prices as well as volume for each +crypto asset in your universe. .. For more information on these functions, see the `relevant part of the .. Quantopian docs `. @@ -51,8 +51,8 @@ containing the current trading bar with open, high, low, and close My first algorithm ~~~~~~~~~~~~~~~~~~ -Lets take a look at a very simple algorithm from the ``examples`` -directory: `buy_btc_simple.py `_: +Lets take a look at a very simple algorithm from the ``examples`` directory: +`buy_btc_simple.py `_: .. code-block:: python @@ -70,9 +70,9 @@ directory: `buy_btc_simple.py `__. -Running the algorithm -~~~~~~~~~~~~~~~~~~~~~ - -To can now test this algorithm on crypto data, ``catalyst`` provides three -interfaces: - -- A command-line interface, -- ``IPython Notebook`` magic, -- and :func:`~catalyst.run_algorithm`. - Ingesting data -^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~ -In previous versions of Catalyst you needed to manually ingest data before running -your algorithm to make it available at runtime. Starting with version 0.3, the -algorithm will automagically ingest the data it needs the first time that encounters -a data request for data that it doesn't have. +Before you can backtest your algorithm, you first need to load the historical +pricing data that Catalyst needs to run your simulation through a process called +``ingestion``. When you ingest data, Catalyst downloads that data in compressed +form from the Enigma servers (which eventually will migrate to the Enigma Data +Marketplace), and stores it locally to make it available at runtime. -Still, we believe it is important for you to have a high-level understanding -of how data is managed: +In order to ingest data, you need to run a command like the following: + +.. code-block:: bash + + catalyst ingest-exchange -x bitfinex -i btc_usd + +This instructs Catalyst to download pricing data from the ``Bitfinex`` exchange +for the ``btc_usd`` currency pair (this follows from the simple algorithm +presented above where we want to trade ``btc_usd``), and we're choosing to test +our algorithm using historical pricing data from the Bitfinex exchange. By +default, Catalyst assumes that you want data with ``daily`` frequency (one candle +bar per day). If you want instead ``minute`` frequency (one candle bar for every +minute), you would need to specify it as follows: + +.. code-block:: bash + + catalyst ingest-exchange -x bitfinex -i btc_usd -f minute + +.. parsed-literal:: + + Ingesting exchange bundle bitfinex... + [====================================] Ingesting daily price data on bitfinex: 100% + +We believe it is important for you to have a high-level understanding of how +data is managed, hence the following overview: - Pricing data is split and packaged into ``bundles``: chunks of data organized as time series that are kept up to date daily on Enigma's servers. Catalyst - downloads the bundles that needs at any given time, and reconstructs the whole - dataset in your hard drive. + downloads the requested bundles and reconstructs the full dataset in your + hard drive. -- Pricing data is provided in ``daily`` and ``minute`` resolution. Those are different - bundle datasets, and are managed separately. +- Pricing data is provided in ``daily`` and ``minute`` resolution. Those are + different bundle datasets, and are managed separately. -- Bundles are exchange-specific, as the pricing data is specific to the trades that - happen in each exchange. You can optionally specify which exchange you want pricing - data from. +- Bundles are exchange-specific, as the pricing data is specific to the trades + that happen in each exchange. As a result, you can must specify which + exchange you want pricing data from when ingesting data -- Catalyst keeps track of all the downloaded bundles, so that it only has to download - them once, and will do incremental updates as needed. +- Catalyst keeps track of all the downloaded bundles, so that it only has to + download them once, and will do incremental updates as needed. -- When running in ``live trading`` mode, Catalyst will first look for historical - pricing data in the locally stored bundles. If there is anything missing, Catalyst will - hit the exchange for the most recent data, and merge it with the local bundle to make - it available for future iterations. +- When running in ``live trading`` mode, Catalyst will first look for + historical pricing data in the locally stored bundles. If there is anything + missing, Catalyst will hit the exchange for the most recent data, and merge + it with the local bundle to optimize the number of requests it needs to make + to the exchange. -If you want to learn more, check out the :ref:`ingesting data ` section -for more detail. +The ``ingest-exchange`` command in catalyst offers additional parameters to +further tweak the data ingestion process. You can learn more by running the +following from the command line: + +.. code-block:: bash + + catalyst ingest-exchange --help + +Running the algorithm +~~~~~~~~~~~~~~~~~~~~~ + +You can now test your algorithm using cryptoassets' historical pricing data, +``catalyst`` provides three interfaces: + +- A command-line interface (CLI), +- the ``IPython Notebook`` magic, +- and a :func:`~catalyst.run_algorithm` that you can call from other + Python scripts. + +We'll start with the CLI, and introduce the ``IPython Notebook`` below. Some of +the :doc:`example algorithms ` provide instructions on how to run +them both from the CLI, and using the :func:`~catalyst.run_algorithm` function. Command line interface ^^^^^^^^^^^^^^^^^^^^^^ -After you installed Catalyst you should be able to execute the following -from your command line (e.g. ``cmd.exe`` on Windows, or the Terminal app -on OSX). Displaying here a simplified output for eductional purposes: +After you installed Catalyst, you should be able to execute the following +from your command line (e.g. ``cmd.exe`` or the ``Anaconda Prompt`` on Windows, +or the Terminal application on MacOS). .. code-block:: bash $ catalyst --help +This is the resulting output, simplified for eductional purposes: + .. parsed-literal:: Usage: catalyst [OPTIONS] COMMAND [ARGS]... @@ -158,10 +195,11 @@ on OSX). Displaying here a simplified output for eductional purposes: live Trade live with the given algorithm. run Run a backtest for the given algorithm. -There are three main modes you can run on Catalyst. The first being ``ingest-exchange`` -for data ingestion, which we have summarized in the previous section. The second -is ``live`` to use your algorithm to trade live against a given exchange, and the -third mode ``run`` is to backtest your algorithm before trading live with it. +There are three main modes you can run on Catalyst. The first being +``ingest-exchange`` for data ingestion, which we have covered in the previous +section. The second is ``live`` to use your algorithm to trade live against a +given exchange, and the third mode ``run`` is to backtest your algorithm before +trading live with it. Let's start with backtesting, so run this other command to learn more about the available options: @@ -210,22 +248,24 @@ the available options: As you can see there are a couple of flags that specify where to find your -algorithm (``-f``) as well as a parameter to specify which exchange to use. -There are also arguments for the date range to run the algorithm over -(``--start`` and ``--end``). Finally, you'll want to save the performance -metrics of your algorithm so that you can analyze how it performed. This is -done via the ``--output`` flag and will cause it to write the performance -``DataFrame`` in the pickle Python file format. Note that you can also define -a configuration file with these parameters that you can then conveniently pass -to the ``-c`` option so that you don't have to supply the command line args -all the time (see the .conf files in the examples directory). +algorithm (``-f``) as well as a the ``-x`` flag to specify which exchange to +use. There are also arguments for the date range to run the algorithm over +(``--start`` and ``--end``). You also need to set the base currency for your +algorithm through the ``-c`` flag, and the ``--capital_base``. All the +aforementioned parameters are required. Optionally, you will want to save the +performance metrics of your algorithm so that you can analyze how it performed. +This is done via the ``--output`` flag and will cause it to write the +performance ``DataFrame`` in the pickle Python file format. Note that you can +also define a configuration file with these parameters that you can then +conveniently pass to the ``-c`` option so that you don't have to supply the +command line args all the time. Thus, to execute our algorithm from above and save the results to ``buy_btc_simple_out.pickle`` we would call ``catalyst run`` as follows: .. code-block:: python - catalyst run -f buy_btc_simple.py -x bitfinex --start 2016-1-1 --end 2017-9-30 -o buy_btc_simple_out.pickle + catalyst run -f buy_btc_simple.py -x bitfinex --start 2016-1-1 --end 2017-9-30 -c usd --capital-base 100000 -o buy_btc_simple_out.pickle .. parsed-literal:: @@ -253,17 +293,25 @@ slippage model that ``catalyst`` uses). .. see the `Quantopian docs `__ .. for more information). -Let's take a quick look at the performance ``DataFrame``. For this, we -use ``pandas`` from inside the IPython Notebook and print the first ten -rows. Note that ``catalyst`` makes heavy usage of -`pandas `_, especially for data input and -outputting so it's worth spending some time to learn it. + +Let's take a quick look at the performance ``DataFrame``. For this, we write +different Python script--let's call it ``print_results.py``--and we make use of +the fantastic ``pandas`` library to print the first ten rows. Note that +``catalyst`` makes heavy usage of `pandas `_, +especially for data analysis and outputting so it's worth spending some time to +learn it. .. code-block:: python import pandas as pd perf = pd.read_pickle('buy_btc_simple_out.pickle') # read in perf DataFrame - perf.head() + print(perf.head()) + +Which we execute by running: + +.. code-block:: bash + + $ python print_results.py .. raw:: html @@ -429,30 +477,48 @@ and allows us to plot the price of bitcoin. For example, we could easily examine now how our portfolio value changed over time compared to the bitcoin price. -.. code-block:: python - - %load_ext catalyst +Now we will run the simulation again, but this time we extend our original +algorithm with the addition of the ``analyze()`` function. Somewhat analogously +as how ``initialize()`` gets called once before the start of the algorith, +``analyze()`` gets called once at the end of the algorithm, and receives two +variables: ``context``, which we discussed at the very beginning, and ``perf``, +which is the pandas dataframe containing the performance data for our algorithm +that we reviewed above. Inside the ``analyze()`` function is where we can +analyze and visualize the results of our strategy. Here's the revised simple +algorithm (note the addition of Line 1, and Lines 11-18) .. code-block:: python - %pylab inline - figsize(12, 12) import matplotlib.pyplot as plt + from catalyst.api import order, record, symbol - ax1 = plt.subplot(211) - perf.portfolio_value.plot(ax=ax1) - ax1.set_ylabel('portfolio value') - ax2 = plt.subplot(212, sharex=ax1) - perf.btc.plot(ax=ax2) - ax2.set_ylabel('bitcoin price') + def initialize(context): + context.asset = symbol('btc_usd') -.. parsed-literal:: + def handle_data(context, data): + order(context.asset, 1) + record(btc = data.current(context.asset, 'price')) - Populating the interactive namespace from numpy and matplotlib + def analyze(context, perf): + ax1 = plt.subplot(211) + perf.portfolio_value.plot(ax=ax1) + ax1.set_ylabel('portfolio value') + ax2 = plt.subplot(212, sharex=ax1) + perf.btc.plot(ax=ax2) + ax2.set_ylabel('bitcoin price') + plt.show() -.. parsed-literal:: +Here we make use of the external visualization library called +`matplotlib `_, which you might recall we installed +alongside enigma-catalyst (with the exception of the ``Conda`` install, where it +was included by default inside the conda environment we created). If for any +reason you don't have it installed, you can add it by running: - +.. code-block:: python + + (catalyst)$ pip install matplotlib + +If everything works well, you'll see the following chart: .. image:: https://s3.amazonaws.com/enigmaco-docs/github.io/buy_btc_simple_graph.png @@ -460,6 +526,22 @@ Our algorithm performance as assessed by the ``portfolio_value`` closely matches that of the bitcoin price. This is not surprising as our algorithm only bought bitcoin every chance it got. + If you get an error when invoking matplotlib to visualize the performance + results refer to `MacOS + Matplotlib `_. + Alternatively, some users have reported the following error when running an algo + in a Linux environment: + + .. parsed-literal:: + + ImportError: No module named _tkinter, please install the python-tk package + + Which can easily solved by running (in Ubuntu/Debian-based systems): + + .. code-block:: python + + sudo apt install python-tk + + Access to previous prices using ``history`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/install.rst b/docs/source/install.rst index 53ccbf29..e95eea65 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -80,7 +80,7 @@ Once either Conda or MiniConda has been set up you can install Catalyst: 4. Activate the environment (which you need to do every time you start a new session to run Catalyst): - **Linux or OSX:** + **Linux or MacOS:** .. code-block:: bash @@ -125,7 +125,7 @@ with the following steps: 3. Activate the environment: - **Linux or OSX:** + **Linux or MacOS:** .. code-block:: bash @@ -358,11 +358,11 @@ beginning of this page. MacOS Requirements ------------------ -The version of Python shipped with OSX by default is generally out of date, +The version of Python shipped with MacOS by default is generally out of date, and has a number of quirks because it's used directly by the operating system. For these reasons, many developers choose to install and use a separate Python installation. The `Hitchhiker's Guide to Python`_ provides an excellent guide -to `Installing Python on OSX `_, +to `Installing Python on MacOS `_, which explains how to install Python with the `Homebrew`_ manager. Assuming you've installed Python with Homebrew, you'll also likely need the @@ -372,17 +372,17 @@ following brew packages: $ brew install freetype pkg-config gcc openssl -OSX + virtualenv + matplotlib +MacOS + virtualenv + matplotlib ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -A note about using matplotlib in virtual enviroments on OSX: it may be +A note about using matplotlib in virtual enviroments on MacOS: it may be necessary to run .. code-block:: bash echo "backend: TkAgg" > ~/.matplotlib/matplotlibrc -in order to override the default ``macosx`` backend for your system, which +in order to override the default ``MacOS`` backend for your system, which may not be accessible from inside the virtual environment. This will allow Catalyst to open matplotlib charts from within a virtual environment, which is useful for displaying the performance of your backtests. To learn more From 841acf0203efc999a710c9114cf65be023ce7f90 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 22 Nov 2017 16:03:21 -0500 Subject: [PATCH 27/37] BLD: implemented issue #79, using the capital_base parameter to override the amount of base currency available for trading --- catalyst/__main__.py | 5 +++-- catalyst/utils/run_algo.py | 30 ++++++++++++++++++++++++------ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/catalyst/__main__.py b/catalyst/__main__.py index 77cfb164..86e49ca2 100644 --- a/catalyst/__main__.py +++ b/catalyst/__main__.py @@ -177,7 +177,7 @@ def ipython_only(option): default='-', metavar='FILENAME', show_default=True, - help="The location to write the perf data. If this is '-' the perf" + help="The location to write the perf data. If this is '-' the perf" " will be written to stdout.", ) @click.option( @@ -549,9 +549,10 @@ def ingest_exchange(exchange_name, data_frequency, start, end, @click.pass_context def clean_algo(ctx, algo_namespace): click.echo( - 'Deleting the state folder of algo: {}...'.format(algo_namespace) + 'Cleaning algo state: {}'.format(algo_namespace) ) delete_algo_folder(algo_namespace) + click.echo('Done') @main.command(name='clean-exchange') diff --git a/catalyst/utils/run_algo.py b/catalyst/utils/run_algo.py index dfb97c3e..bf359ef4 100644 --- a/catalyst/utils/run_algo.py +++ b/catalyst/utils/run_algo.py @@ -170,8 +170,8 @@ def _run(handle_data, # This corresponds to the json file containing api token info exchange_auth = get_exchange_auth(exchange_name) - if live and ( - exchange_auth['key'] == '' or exchange_auth['secret'] == ''): + if live and (exchange_auth['key'] == '' \ + or exchange_auth['secret'] == ''): raise ExchangeAuthEmpty( exchange=exchange_name.title(), filename=os.path.join( @@ -263,17 +263,35 @@ def _run(handle_data, ) if base_currency in balances: - return balances[base_currency] + base_currency_available = balances[base_currency] + log.info( + 'base currency available in the account: {} {}'.format( + base_currency_available, base_currency + ) + ) + + if capital_base is not None \ + and capital_base < base_currency_available: + log.info( + 'using capital base limit: {} {}'.format( + capital_base, base_currency + ) + ) + amount = capital_base + else: + amount = base_currency_available + + return amount else: raise BaseCurrencyNotFoundError( base_currency=base_currency, exchange=exchange_name ) - capital_base = 0 + combined_capital_base = 0 for exchange_name in exchanges: exchange = exchanges[exchange_name] - capital_base += fetch_capital_base(exchange) + combined_capital_base += fetch_capital_base(exchange) sim_params = create_simulation_parameters( start=start, @@ -338,7 +356,7 @@ def _run(handle_data, raise ValueError( "invalid url %r, must begin with 'sqlite:///'" % str(bundle_data.asset_finder.engine.url), - ) + ) env = TradingEnvironment(asset_db_path=connstr, environ=environ) first_trading_day = \ From 7dddc0a85f8290ce6bc49c3cc64c04205f47cd13 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Wed, 22 Nov 2017 21:11:16 -0500 Subject: [PATCH 28/37] BUG: fixed issue #80 but updated performance stats immediately after registering transactions in live mode --- catalyst/exchange/exchange_algorithm.py | 26 ++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/catalyst/exchange/exchange_algorithm.py b/catalyst/exchange/exchange_algorithm.py index bb965b58..7ec8efd2 100644 --- a/catalyst/exchange/exchange_algorithm.py +++ b/catalyst/exchange/exchange_algorithm.py @@ -288,7 +288,7 @@ class ExchangeTradingAlgorithmLive(ExchangeTradingAlgorithmBase): self.live_graph = kwargs.pop('live_graph', None) self._clock = None - self.minute_stats = deque(maxlen=60) + self.frame_stats = deque(maxlen=60) self.pnl_stats = get_algo_df(self.algo_namespace, 'pnl_stats') @@ -557,8 +557,9 @@ class ExchangeTradingAlgorithmLive(ExchangeTradingAlgorithmBase): ) self.exposure_stats = pd.concat([self.exposure_stats, df]) - save_algo_df(self.algo_namespace, 'exposure_stats', - self.exposure_stats) + save_algo_df( + self.algo_namespace, 'exposure_stats', self.exposure_stats + ) def handle_data(self, data): """ @@ -575,8 +576,11 @@ class ExchangeTradingAlgorithmLive(ExchangeTradingAlgorithmBase): self._synchronize_portfolio() transactions = self._check_open_orders() - for transaction in transactions: - self.perf_tracker.process_transaction(transaction) + if len(transactions) > 0: + for transaction in transactions: + self.perf_tracker.process_transaction(transaction) + + self.perf_tracker.update_performance() if self._handle_data: self._handle_data(self, data) @@ -591,22 +595,22 @@ class ExchangeTradingAlgorithmLive(ExchangeTradingAlgorithmBase): # Performance tracker and keep only minute and cumulative self.perf_tracker.update_performance() - minute_stats = self.prepare_period_stats( + frame_stats = self.prepare_period_stats( data.current_dt, data.current_dt + timedelta(minutes=1)) # Saving the last hour in memory - self.minute_stats.append(minute_stats) + self.frame_stats.append(frame_stats) - self.add_pnl_stats(minute_stats) + self.add_pnl_stats(frame_stats) if self.recorded_vars: - self.add_custom_signals_stats(minute_stats) + self.add_custom_signals_stats(frame_stats) recorded_cols = list(self.recorded_vars.keys()) else: recorded_cols = None - self.add_exposure_stats(minute_stats) + self.add_exposure_stats(frame_stats) - print_df = pd.DataFrame(list(self.minute_stats)) + print_df = pd.DataFrame(list(self.frame_stats)) log.info( 'statistics for the last {stats_minutes} minutes:\n{stats}'.format( stats_minutes=self.stats_minutes, From 7daf295e633bd709b64c8f188677717584ccf026 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 23 Nov 2017 22:11:23 -0500 Subject: [PATCH 29/37] BLD: refactoring to decrease reliance on the Exchange in preparation to support ad-hoc CSV bundles --- catalyst/__main__.py | 20 ++- catalyst/constants.py | 2 +- catalyst/examples/mean_reversion_simple.py | 72 +++++--- catalyst/exchange/bitfinex/bitfinex.py | 2 +- catalyst/exchange/bittrex/bittrex.py | 2 +- catalyst/exchange/bundle_utils.py | 40 ++++- catalyst/exchange/exchange.py | 3 +- catalyst/exchange/exchange_bundle.py | 186 +++++++++++++-------- catalyst/exchange/exchange_data_portal.py | 51 +++--- catalyst/exchange/poloniex/poloniex.py | 2 +- catalyst/exchange/validator.py | 7 +- catalyst/utils/run_algo.py | 2 +- 12 files changed, 249 insertions(+), 140 deletions(-) diff --git a/catalyst/__main__.py b/catalyst/__main__.py index 86e49ca2..ea20c957 100644 --- a/catalyst/__main__.py +++ b/catalyst/__main__.py @@ -499,6 +499,13 @@ def live(ctx, help='A list of symbols to exclude from the ingestion ' '(optional comma separated list)', ) +@click.option( + '--csv', + default=None, + help='The path of a CSV file containing the data. If specified, start, ' + 'end, include-symbols and exclude-symbols will be ignored. Instead,' + 'all data in the file will be ingested.', +) @click.option( '--show-progress/--no-show-progress', default=True, @@ -515,8 +522,8 @@ def live(ctx, help='Report potential anomalies found in data bundles.' ) def ingest_exchange(exchange_name, data_frequency, start, end, - include_symbols, exclude_symbols, show_progress, verbose, - validate): + include_symbols, exclude_symbols, csv, show_progress, + verbose, validate): """ Ingest data for the given exchange. """ @@ -524,8 +531,7 @@ def ingest_exchange(exchange_name, data_frequency, start, end, if exchange_name is None: ctx.fail("must specify an exchange name '-x'") - exchange = get_exchange(exchange_name) - exchange_bundle = ExchangeBundle(exchange) + exchange_bundle = ExchangeBundle(exchange_name) click.echo('Ingesting exchange bundle {}...'.format(exchange_name)) exchange_bundle.ingest( @@ -536,7 +542,8 @@ def ingest_exchange(exchange_name, data_frequency, start, end, end=end, show_progress=show_progress, show_breakdown=verbose, - show_report=validate + show_report=validate, + csv=csv ) @@ -579,8 +586,7 @@ def clean_exchange(ctx, exchange_name, data_frequency): if exchange_name is None: ctx.fail("must specify an exchange name '-x'") - exchange = get_exchange(exchange_name) - exchange_bundle = ExchangeBundle(exchange) + exchange_bundle = ExchangeBundle(exchange_name) click.echo('Cleaning exchange bundle {}...'.format(exchange_name)) exchange_bundle.clean( diff --git a/catalyst/constants.py b/catalyst/constants.py index bbdae343..cde29914 100644 --- a/catalyst/constants.py +++ b/catalyst/constants.py @@ -2,7 +2,7 @@ import logbook -LOG_LEVEL = logbook.DEBUG +LOG_LEVEL = logbook.INFO DATE_TIME_FORMAT = '%Y-%m-%d %H:%M' diff --git a/catalyst/examples/mean_reversion_simple.py b/catalyst/examples/mean_reversion_simple.py index 34d9c60c..6660c829 100644 --- a/catalyst/examples/mean_reversion_simple.py +++ b/catalyst/examples/mean_reversion_simple.py @@ -1,6 +1,9 @@ # For this example, we're going to write a simple momentum script. When the # stock goes up quickly, we're going to buy; when it goes down quickly, we're # going to sell. Hopefully we'll ride the waves. +import os +import tempfile +import time import pandas as pd import talib @@ -9,14 +12,16 @@ from logbook import Logger from catalyst import run_algorithm from catalyst.api import symbol, record, order_target_percent, get_open_orders from catalyst.exchange.stats_utils import extract_transactions - # We give a name to the algorithm which Catalyst will use to persist its state. # In this example, Catalyst will create the `.catalyst/data/live_algos` # directory. If we stop and start the algorithm, Catalyst will resume its # state using the files included in the folder. +from catalyst.utils.paths import ensure_directory + NAMESPACE = 'mean_reversion_simple' log = Logger(NAMESPACE) + # To run an algorithm in Catalyst, you need two functions: initialize and # handle_data. @@ -27,10 +32,16 @@ def initialize(context): # parameters or values you're going to use. # In our example, we're looking at Ether in USD Tether. - context.neo_usd = symbol('neo_btc') + context.neo_eth = symbol('neo_eth') context.base_price = None context.current_day = None + context.RSI_OVERSOLD = 50 + context.RSI_OVERBOUGHT = 80 + context.CANDLE_SIZE = '5T' + + context.start_time = time.time() + def handle_data(context, data): # This handle_data function is where the real work is done. Our data is @@ -47,17 +58,17 @@ def handle_data(context, data): context.current_day = today # We're computing the volume-weighted-average-price of the security - # defined above, in the context.neo_usd variable. For this example, we're + # defined above, in the context.neo_eth variable. For this example, we're # using three bars on the 15 min bars. # The frequency attribute determine the bar size. We use this convention # for the frequency alias: # http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases prices = data.history( - context.neo_usd, + context.neo_eth, fields='close', bar_count=50, - frequency='30T' + frequency=context.CANDLE_SIZE ) # Ta-lib calculates various technical indicator based on price and @@ -69,7 +80,7 @@ def handle_data(context, data): # We need a variable for the current price of the security to compare to # the average. Since we are requesting two fields, data.current() # returns a DataFrame with - current = data.current(context.neo_usd, fields=['close', 'volume']) + current = data.current(context.neo_eth, fields=['close', 'volume']) price = current['close'] # If base_price is not set, we use the current value. This is the @@ -98,42 +109,51 @@ def handle_data(context, data): # Since we are using limit orders, some orders may not execute immediately # we wait until all orders are executed before considering more trades. - orders = get_open_orders(context.neo_usd) + orders = get_open_orders(context.neo_eth) if len(orders) > 0: return # Exit if we cannot trade - if not data.can_trade(context.neo_usd): + if not data.can_trade(context.neo_eth): return # Another powerful built-in feature of the Catalyst backtester is the # portfolio object. The portfolio object tracks your positions, cash, # cost basis of specific holdings, and more. In this line, we calculate # how long or short our position is at this minute. - pos_amount = context.portfolio.positions[context.neo_usd].amount + pos_amount = context.portfolio.positions[context.neo_eth].amount - if rsi[-1] <= 30 and pos_amount == 0: + if rsi[-1] <= context.RSI_OVERSOLD and pos_amount == 0: log.info( '{}: buying - price: {}, rsi: {}'.format( data.current_dt, price, rsi[-1] ) ) - order_target_percent(context.neo_usd, 1) + # Set a style for limit orders, + limit_price = price * 1.005 + order_target_percent( + context.neo_eth, 1, limit_price=limit_price + ) context.traded_today = True - elif rsi[-1] >= 80 and pos_amount > 0: + elif rsi[-1] >= context.RSI_OVERBOUGHT and pos_amount > 0: log.info( '{}: selling - price: {}, rsi: {}'.format( data.current_dt, price, rsi[-1] ) ) - order_target_percent(context.neo_usd, 0) + limit_price = price * 0.995 + order_target_percent( + context.neo_eth, 0, limit_price=limit_price + ) context.traded_today = True def analyze(context=None, perf=None): - import matplotlib.pyplot as plt + end = time.time() + log.info('elapsed time: {}'.format(end - context.start_time)) + import matplotlib.pyplot as plt # The base currency of the algo exchange base_currency = context.exchanges.values()[0].base_currency.upper() @@ -147,7 +167,7 @@ def analyze(context=None, perf=None): perf.loc[:, 'price'].plot(ax=ax2, label='Price') ax2.set_ylabel('{asset} ({base})'.format( - asset=context.neo_usd.symbol, base=base_currency + asset=context.neo_eth.symbol, base=base_currency )) transaction_df = extract_transactions(perf) @@ -156,7 +176,7 @@ def analyze(context=None, perf=None): sell_df = transaction_df[transaction_df['amount'] < 0] ax2.scatter( buy_df.index.to_pydatetime(), - perf.loc[buy_df.index, 'price'], + perf.loc[buy_df.index.floor('1 min'), 'price'], marker='^', s=100, c='green', @@ -164,7 +184,7 @@ def analyze(context=None, perf=None): ) ax2.scatter( sell_df.index.to_pydatetime(), - perf.loc[sell_df.index, 'price'], + perf.loc[sell_df.index.floor('1 min'), 'price'], marker='v', s=100, c='red', @@ -191,7 +211,7 @@ def analyze(context=None, perf=None): if not transaction_df.empty: ax6.scatter( buy_df.index.to_pydatetime(), - perf.loc[buy_df.index, 'rsi'], + perf.loc[buy_df.index.floor('1 min'), 'rsi'], marker='^', s=100, c='green', @@ -199,7 +219,7 @@ def analyze(context=None, perf=None): ) ax6.scatter( sell_df.index.to_pydatetime(), - perf.loc[sell_df.index, 'rsi'], + perf.loc[sell_df.index.floor('1 min'), 'rsi'], marker='v', s=100, c='red', @@ -218,6 +238,13 @@ if __name__ == '__main__': MODE = 'live' if MODE == 'backtest': + folder = os.path.join( + tempfile.gettempdir(), 'catalyst', NAMESPACE + ) + ensure_directory(folder) + + timestr = time.strftime('%Y%m%d-%H%M%S') + out = os.path.join(folder, '{}.p'.format(timestr)) # catalyst run -f catalyst/examples/mean_reversion_simple.py -x poloniex -s 2017-10-1 -e 2017-11-10 -c usdt -n mean-reversion --data-frequency minute --capital-base 10000 run_algorithm( capital_base=10000, @@ -228,18 +255,21 @@ if __name__ == '__main__': exchange_name='bitfinex', algo_namespace=NAMESPACE, base_currency='usd', - start=pd.to_datetime('2017-10-1', utc=True), + start=pd.to_datetime('2017-10-01', utc=True), end=pd.to_datetime('2017-11-10', utc=True), + output=out ) + log.info('saved perf stats: {}'.format(out)) elif MODE == 'live': run_algorithm( + capital_base=0.5, initialize=initialize, handle_data=handle_data, analyze=analyze, exchange_name='bittrex', live=True, algo_namespace=NAMESPACE, - base_currency='btc', + base_currency='eth', live_graph=False ) diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index 4c878e04..66a72421 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -61,7 +61,7 @@ class Bitfinex(Exchange): self.max_requests_per_minute = 80 self.request_cpt = dict() - self.bundle = ExchangeBundle(self) + self.bundle = ExchangeBundle(self.name) def _request(self, operation, data, version='v1'): payload_object = { diff --git a/catalyst/exchange/bittrex/bittrex.py b/catalyst/exchange/bittrex/bittrex.py index d50d9bb8..fc248847 100644 --- a/catalyst/exchange/bittrex/bittrex.py +++ b/catalyst/exchange/bittrex/bittrex.py @@ -46,7 +46,7 @@ class Bittrex(Exchange): self.assets = dict() self.load_assets() - self.bundle = ExchangeBundle(self) + self.bundle = ExchangeBundle(self.name) @property def account(self): diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index 9b0c8c49..c010479a 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -6,6 +6,7 @@ from datetime import timedelta, datetime, date import numpy as np import pandas as pd import pytz +from catalyst.assets._assets import TradingPair from catalyst.data.bundles.core import download_without_progress from catalyst.exchange.exchange_utils import get_exchange_bundles_folder @@ -13,7 +14,6 @@ from catalyst.exchange.exchange_utils import get_exchange_bundles_folder EXCHANGE_NAMES = ['bitfinex', 'bittrex', 'poloniex'] API_URL = 'http://data.enigma.co/api/v1' - def get_date_from_ms(ms): """ The date from the number of miliseconds from the epoch. @@ -317,3 +317,41 @@ def range_in_bundle(asset, start_dt, end_dt, reader): has_data = False return has_data + + +def get_assets(exchange, include_symbols, exclude_symbols): + """ + Get assets from an exchange, including or excluding the specified + symbols. + + Parameters + ---------- + exchange: Exchange + include_symbols: str + exclude_symbols: str + + Returns + ------- + list[TradingPair] + + """ + if include_symbols is not None: + include_symbols_list = include_symbols.split(',') + + return exchange.get_assets(include_symbols_list) + + else: + all_assets = exchange.get_assets() + + if exclude_symbols is not None: + exclude_symbols_list = exclude_symbols.split(',') + + assets = [] + for asset in all_assets: + if asset.symbol not in exclude_symbols_list: + assets.append(asset) + + return assets + + else: + return all_assets diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index bd9cca9d..d21bf273 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -24,7 +24,6 @@ from catalyst.exchange.exchange_utils import get_exchange_symbols, \ get_frequency, resample_history_df from catalyst.finance.order import ORDER_STATUS from catalyst.finance.transaction import Transaction -from catalyst.utils.deprecate import deprecated log = Logger('Exchange', level=LOG_LEVEL) @@ -43,7 +42,7 @@ class Exchange: self.num_candles_limit = None self.max_requests_per_minute = None self.request_cpt = None - self.bundle = ExchangeBundle(self) + self.bundle = ExchangeBundle(self.name) @property def positions(self): diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 23fa1737..da7908e2 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -20,7 +20,7 @@ from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ from catalyst.exchange.bundle_utils import range_in_bundle, \ get_bcolz_chunk, get_month_start_end, \ get_year_start_end, get_df_from_arrays, get_start_dt, get_period_label, \ - get_delta + get_delta, get_assets from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ BcolzExchangeBarWriter from catalyst.exchange.exchange_errors import EmptyValuesInBundleError, \ @@ -41,23 +41,14 @@ def _cachpath(symbol, type_): class ExchangeBundle: - def __init__(self, exchange): - self.exchange = exchange + def __init__(self, exchange_name): + self.exchange_name = exchange_name self.minutes_per_day = 1440 self.default_ohlc_ratio = 1000000 self._writers = dict() self._readers = dict() self.calendar = get_calendar('OPEN') - - def get_assets(self, include_symbols, exclude_symbols): - # TODO: filter exclude symbols assets - if include_symbols is not None: - include_symbols_list = include_symbols.split(',') - - return self.exchange.get_assets(include_symbols_list) - - else: - return self.exchange.get_assets() + self.exchange = None def get_reader(self, data_frequency, path=None): """ @@ -69,7 +60,7 @@ class ExchangeBundle: """ if path is None: - root = get_exchange_folder(self.exchange.name) + root = get_exchange_folder(self.exchange_name) path = BUNDLE_NAME_TEMPLATE.format( root=root, frequency=data_frequency @@ -100,7 +91,7 @@ class ExchangeBundle: BcolzMinuteBarWriter | BcolzDailyBarWriter """ - root = get_exchange_folder(self.exchange.name) + root = get_exchange_folder(self.exchange_name) path = BUNDLE_NAME_TEMPLATE.format( root=root, frequency=data_frequency @@ -158,9 +149,9 @@ class ExchangeBundle: ---------- assets: list[TradingPair] The assets is scope. - start_dt: datetime + start_dt: pd.Timestamp The chunk start date. - end_dt: datetime + end_dt: pd.Timestamp The chunk end date. data_frequency: str @@ -209,8 +200,8 @@ class ExchangeBundle: Parameters ---------- - start_dt: datetime - end_dt: datetime + start_dt: pd.Timestamp + end_dt: pd.Timestamp data_frequency: str Returns @@ -367,7 +358,7 @@ class ExchangeBundle: # Download and extract the bundle path = get_bcolz_chunk( - exchange_name=self.exchange.name, + exchange_name=self.exchange_name, symbol=asset.symbol, data_frequency=data_frequency, period=period @@ -436,14 +427,14 @@ class ExchangeBundle: Parameters ---------- - start: datetime - end: datetime + start: pd.Timestamp + end: pd.Timestamp assets: list[TradingPair] data_frequency: str Returns ------- - datetime, datetime + pd.Timestamp, pd.Timestamp """ earliest_trade = None last_entry = None @@ -490,8 +481,8 @@ class ExchangeBundle: ---------- assets: list[TradingPair] data_frequency: str - start_dt: datetime - end_dt: datetime + start_dt: pd.Timestamp + end_dt: pd.Timestamp Returns ------- @@ -574,8 +565,8 @@ class ExchangeBundle: ---------- assets: list[TradingPair] data_frequency: str - start_dt: datetime - end_dt: datetime + start_dt: pd.Timestamp + end_dt: pd.Timestamp show_progress: bool show_breakdown: bool @@ -611,7 +602,7 @@ class ExchangeBundle: show_progress, label='Ingesting {frequency} price data for ' '{symbol} on {exchange}'.format( - exchange=self.exchange.name, + exchange=self.exchange_name, frequency=data_frequency, symbol=asset.symbol )) as it: @@ -636,7 +627,7 @@ class ExchangeBundle: show_progress, label='Ingesting {frequency} price data on ' '{exchange}'.format( - exchange=self.exchange.name, + exchange=self.exchange_name, frequency=data_frequency, )) as it: for chunk in it: @@ -654,8 +645,41 @@ class ExchangeBundle: '\n'.join(problems) )) + def ingest_csv(self, path, data_frequency): + """ + Ingest price data from a CSV file. + + Parameters + ---------- + path: str + data_frequency: str + + Returns + ------- + list[str] + A list of potential problems detected during ingestion. + + """ + log.info('ingesting csv file: {}'.format(path)) + problems = [] + + df = pd.read_csv( + path, + names=['symbol', 'last_traded', 'open', 'high', 'close', 'volume'], + parse_dates=[1] + ) + # problems += self.ingest_df( + # ohlcv_df=df, + # data_frequency=data_frequency, + # asset=asset, + # writer=writer, + # empty_rows_behavior=empty_rows_behavior, + # duplicates_threshold=duplicates_threshold + # ) + return filter(partial(is_not, None), problems) + def ingest(self, data_frequency, include_symbols=None, - exclude_symbols=None, start=None, end=None, + exclude_symbols=None, start=None, end=None, csv=None, show_progress=True, show_breakdown=True, show_report=True): """ Inject data based on specified parameters. @@ -665,17 +689,34 @@ class ExchangeBundle: data_frequency: str include_symbols: str exclude_symbols: str - start: datetime - end: datetime + start: pd.Timestamp + end: pd.Timestamp show_progress: bool environ: """ - assets = self.get_assets(include_symbols, exclude_symbols) + if csv is not None: + self.ingest_csv(csv, data_frequency) - for frequency in data_frequency.split(','): - self.ingest_assets(assets, frequency, start, end, - show_progress, show_breakdown, show_report) + else: + if self.exchange is None: + # Avoid circular dependencies + from catalyst.exchange.factory import get_exchange + self.exchange = get_exchange(self.exchange_name) + + assets = get_assets( + self.exchange, include_symbols, exclude_symbols + ) + for frequency in data_frequency.split(','): + self.ingest_assets( + assets=assets, + data_frequency=frequency, + start_dt=start, + end_dt=end, + show_progress=show_progress, + show_breakdown=show_breakdown, + show_report=show_report + ) def get_history_window_series_and_load(self, assets, @@ -693,11 +734,11 @@ class ExchangeBundle: Parameters ---------- assets: list[TradingPair] - end_dt: datetime + end_dt: pd.Timestamp bar_count: int field: str data_frequency: str - algo_end_dt: datetime + algo_end_dt: pd.Timestamp Returns ------- @@ -802,7 +843,7 @@ class ExchangeBundle: raise PricingDataNotLoadedError( field=field, first_trading_day=min([asset.start_date for asset in assets]), - exchange=self.exchange.name, + exchange=self.exchange_name, symbols=symbols, symbol_list=','.join(symbols), data_frequency=data_frequency, @@ -840,7 +881,7 @@ class ExchangeBundle: raise PricingDataNotLoadedError( field=field, first_trading_day=min([asset.start_date for asset in assets]), - exchange=self.exchange.name, + exchange=self.exchange_name, symbols=symbols, symbol_list=','.join(symbols), data_frequency=data_frequency, @@ -860,7 +901,7 @@ class ExchangeBundle: raise PricingDataNotLoadedError( field=field, first_trading_day=asset.start_date, - exchange=self.exchange.name, + exchange=self.exchange_name, symbols=asset.symbol, symbol_list=asset.symbol, data_frequency=data_frequency, @@ -884,7 +925,7 @@ class ExchangeBundle: ) if len(arrays) == 0: raise DataCorruptionError( - exchange=self.exchange.name, + exchange=self.exchange_name, symbols=asset.symbol, start_dt=asset_start_dt, end_dt=asset_end_dt @@ -897,42 +938,41 @@ class ExchangeBundle: return series + def clean(self, data_frequency): + """ + Removing the bundle data from the catalyst folder. -def clean(self, data_frequency): - """ - Removing the bundle data from the catalyst folder. + Parameters + ---------- + data_frequency: str - Parameters - ---------- - data_frequency: str + """ + log.debug('cleaning exchange {}, frequency {}'.format( + self.exchange_name, data_frequency + )) + root = get_exchange_folder(self.exchange_name) - """ - log.debug('cleaning exchange {}, frequency {}'.format( - self.exchange.name, data_frequency - )) - root = get_exchange_folder(self.exchange.name) + symbols = os.path.join(root, 'symbols.json') + if os.path.isfile(symbols): + os.remove(symbols) - symbols = os.path.join(root, 'symbols.json') - if os.path.isfile(symbols): - os.remove(symbols) + temp_bundles = os.path.join(root, 'temp_bundles') - temp_bundles = os.path.join(root, 'temp_bundles') + if os.path.isdir(temp_bundles): + log.debug('removing folder and content: {}'.format(temp_bundles)) + shutil.rmtree(temp_bundles) + log.debug('{} removed'.format(temp_bundles)) - if os.path.isdir(temp_bundles): - log.debug('removing folder and content: {}'.format(temp_bundles)) - shutil.rmtree(temp_bundles) - log.debug('{} removed'.format(temp_bundles)) + frequencies = ['daily', 'minute'] if data_frequency is None \ + else [data_frequency] - frequencies = ['daily', 'minute'] if data_frequency is None \ - else [data_frequency] + for frequency in frequencies: + label = '{}_bundle'.format(frequency) + frequency_bundle = os.path.join(root, label) - for frequency in frequencies: - label = '{}_bundle'.format(frequency) - frequency_bundle = os.path.join(root, label) - - if os.path.isdir(frequency_bundle): - log.debug( - 'removing folder and content: {}'.format(frequency_bundle) - ) - shutil.rmtree(frequency_bundle) - log.debug('{} removed'.format(frequency_bundle)) + if os.path.isdir(frequency_bundle): + log.debug( + 'removing folder and content: {}'.format(frequency_bundle) + ) + shutil.rmtree(frequency_bundle) + log.debug('{} removed'.format(frequency_bundle)) diff --git a/catalyst/exchange/exchange_data_portal.py b/catalyst/exchange/exchange_data_portal.py index c5d7ac85..4cfac2b7 100644 --- a/catalyst/exchange/exchange_data_portal.py +++ b/catalyst/exchange/exchange_data_portal.py @@ -21,7 +21,6 @@ log = Logger('DataPortalExchange', level=LOG_LEVEL) class DataPortalExchangeBase(DataPortal): def __init__(self, *args, **kwargs): - self.exchanges = kwargs.pop('exchanges', None) # TODO: put somewhere accessible by each algo self.retry_get_history_window = 5 self.retry_get_spot_value = 5 @@ -49,11 +48,10 @@ class DataPortalExchangeBase(DataPortal): if len(exchange_assets) > 1: df_list = [] for exchange_name in exchange_assets: - exchange = self.exchanges[exchange_name] assets = exchange_assets[exchange_name] df_exchange = self.get_exchange_history_window( - exchange, + exchange_name, assets, end_dt, bar_count, @@ -68,9 +66,9 @@ class DataPortalExchangeBase(DataPortal): return pd.concat(df_list) else: - exchange = self.exchanges[list(exchange_assets.keys())[0]] + exchange_name = list(exchange_assets.keys())[0] return self.get_exchange_history_window( - exchange, + exchange_name, assets, end_dt, bar_count, @@ -122,7 +120,7 @@ class DataPortalExchangeBase(DataPortal): @abc.abstractmethod def get_exchange_history_window(self, - exchange, + exchange_name, assets, end_dt, bar_count, @@ -136,9 +134,8 @@ class DataPortalExchangeBase(DataPortal): attempt_index=0): try: if isinstance(assets, TradingPair): - exchange = self.exchanges[assets.exchange] spot_values = self.get_exchange_spot_value( - exchange, [assets], field, dt, data_frequency) + assets.exchange, [assets], field, dt, data_frequency) if not spot_values: return np.nan @@ -154,17 +151,16 @@ class DataPortalExchangeBase(DataPortal): exchange_assets[asset.exchange].append(asset) if len(list(exchange_assets.keys())) == 1: - exchange = self.exchanges[list(exchange_assets.keys())[0]] + exchange_name = list(exchange_assets.keys())[0] return self.get_exchange_spot_value( - exchange, assets, field, dt, data_frequency) + exchange_name, assets, field, dt, data_frequency) else: spot_values = [] for exchange_name in exchange_assets: - exchange = self.exchanges[exchange_name] assets = exchange_assets[exchange_name] exchange_spot_values = self.get_exchange_spot_value( - exchange, + exchange_name, assets, field, dt, @@ -199,7 +195,7 @@ class DataPortalExchangeBase(DataPortal): return self._get_spot_value(assets, field, dt, data_frequency) @abc.abstractmethod - def get_exchange_spot_value(self, exchange, assets, field, dt, + def get_exchange_spot_value(self, exchange_name, assets, field, dt, data_frequency): return @@ -214,10 +210,11 @@ class DataPortalExchangeBase(DataPortal): class DataPortalExchangeLive(DataPortalExchangeBase): def __init__(self, *args, **kwargs): + self.exchanges = kwargs.pop('exchanges', None) super(DataPortalExchangeLive, self).__init__(*args, **kwargs) def get_exchange_history_window(self, - exchange, + exchange_name, assets, end_dt, bar_count, @@ -230,7 +227,7 @@ class DataPortalExchangeLive(DataPortalExchangeBase): Parameters ---------- - exchange: Exchange + exchange_name: Exchange assets: list[TradingPair] end_dt: datetime bar_count: int @@ -244,6 +241,7 @@ class DataPortalExchangeLive(DataPortalExchangeBase): DataFrame """ + exchange = self.exchanges[exchange_name] df = exchange.get_history_window( assets, end_dt, @@ -254,14 +252,14 @@ class DataPortalExchangeLive(DataPortalExchangeBase): ffill) return df - def get_exchange_spot_value(self, exchange, assets, field, dt, + def get_exchange_spot_value(self, exchange_name, assets, field, dt, data_frequency): """ A spot value for the exchange. Parameters ---------- - exchange: Exchange + exchange_name: str assets: list[TradingPair] field: str dt: datetime @@ -272,6 +270,7 @@ class DataPortalExchangeLive(DataPortalExchangeBase): float """ + exchange = self.exchanges[exchange_name] exchange_spot_values = exchange.get_spot_value( assets, field, dt, data_frequency) @@ -280,16 +279,16 @@ class DataPortalExchangeLive(DataPortalExchangeBase): class DataPortalExchangeBacktest(DataPortalExchangeBase): def __init__(self, *args, **kwargs): + self.exchange_names = kwargs.pop('exchange_names', None) + super(DataPortalExchangeBacktest, self).__init__(*args, **kwargs) self.exchange_bundles = dict() - self.history_loaders = dict() self.minute_history_loaders = dict() - for exchange_name in self.exchanges: - exchange = self.exchanges[exchange_name] - self.exchange_bundles[exchange_name] = ExchangeBundle(exchange) + for name in self.exchange_names: + self.exchange_bundles[name] = ExchangeBundle(name) def _get_first_trading_day(self, assets): first_date = None @@ -299,7 +298,7 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): return first_date def get_exchange_history_window(self, - exchange, + exchange_name, assets, end_dt, bar_count, @@ -326,7 +325,7 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): DataFrame """ - bundle = self.exchange_bundles[exchange.name] # type: ExchangeBundle + bundle = self.exchange_bundles[exchange_name] # type: ExchangeBundle freq, candle_size, unit, adj_data_frequency = get_frequency( frequency, data_frequency @@ -351,7 +350,7 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): return df def get_exchange_spot_value(self, - exchange, + exchange_name, assets, field, dt, @@ -363,7 +362,7 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): Parameters ---------- - exchange: Exchange + exchange_name: str assets: list[TradingPair] field: str dt: datetime @@ -374,7 +373,7 @@ class DataPortalExchangeBacktest(DataPortalExchangeBase): float """ - bundle = self.exchange_bundles[exchange.name] + bundle = self.exchange_bundles[exchange_name] if data_frequency == 'daily': dt = dt.floor('1D') else: diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py index 0d283d8b..ece0c659 100644 --- a/catalyst/exchange/poloniex/poloniex.py +++ b/catalyst/exchange/poloniex/poloniex.py @@ -47,7 +47,7 @@ class Poloniex(Exchange): self.max_requests_per_minute = 60 self.request_cpt = dict() - self.bundle = ExchangeBundle(self) + self.bundle = ExchangeBundle(self.name) def sanitize_curency_symbol(self, exchange_symbol): """ diff --git a/catalyst/exchange/validator.py b/catalyst/exchange/validator.py index 2037fd59..5cd0b1e2 100644 --- a/catalyst/exchange/validator.py +++ b/catalyst/exchange/validator.py @@ -1,20 +1,17 @@ import os import tempfile +import pandas as pd import six from catalyst.assets._assets import TradingPair, get_calendar from logbook import Logger - -import pandas as pd from pandas.util.testing import assert_frame_equal from catalyst.constants import LOG_LEVEL from catalyst.exchange.asset_finder_exchange import AssetFinderExchange -from catalyst.exchange.bundle_utils import get_start_dt from catalyst.exchange.exchange_data_portal import DataPortalExchangeBacktest -from catalyst.exchange.factory import get_exchange, get_exchanges +from catalyst.exchange.factory import get_exchanges from catalyst.utils.paths import ensure_directory -from catalyst.exchange.exchange import Exchange log = Logger('Validator', level=LOG_LEVEL) diff --git a/catalyst/utils/run_algo.py b/catalyst/utils/run_algo.py index bf359ef4..af62f97d 100644 --- a/catalyst/utils/run_algo.py +++ b/catalyst/utils/run_algo.py @@ -320,7 +320,7 @@ def _run(handle_data, # can handle this later. data = DataPortalExchangeBacktest( - exchanges=exchanges, + exchange_names=[exchange_name for exchange_name in exchanges], asset_finder=None, trading_calendar=open_calendar, first_trading_day=start, From 6a7c47f3a9990c08e436ba32700141ad6a8b82e0 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Fri, 24 Nov 2017 01:58:33 -0500 Subject: [PATCH 30/37] BLD: implementing issue #65, adding local symbols definition --- catalyst/constants.py | 1 + catalyst/exchange/exchange_bundle.py | 72 ++++++++++++++++++++++++++-- catalyst/exchange/exchange_utils.py | 37 ++++++++++---- tests/exchange/test_bundle.py | 10 ++++ 4 files changed, 107 insertions(+), 13 deletions(-) diff --git a/catalyst/constants.py b/catalyst/constants.py index cde29914..172de34e 100644 --- a/catalyst/constants.py +++ b/catalyst/constants.py @@ -5,5 +5,6 @@ import logbook LOG_LEVEL = logbook.INFO DATE_TIME_FORMAT = '%Y-%m-%d %H:%M' +DATE_FORMAT = '%Y-%m-%d' AUTO_INGEST = False \ No newline at end of file diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index da7908e2..25eaab86 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -1,3 +1,4 @@ +import json import os import shutil from datetime import datetime, timedelta @@ -26,8 +27,9 @@ from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ from catalyst.exchange.exchange_errors import EmptyValuesInBundleError, \ TempBundleNotFoundError, \ NoDataAvailableOnExchange, \ - PricingDataNotLoadedError, DataCorruptionError -from catalyst.exchange.exchange_utils import get_exchange_folder + PricingDataNotLoadedError, DataCorruptionError, ExchangeSymbolsNotFound +from catalyst.exchange.exchange_utils import get_exchange_folder, \ + get_exchange_symbols, perf_serial, symbols_serial from catalyst.utils.cli import maybe_show_progress from catalyst.utils.paths import ensure_directory @@ -661,13 +663,73 @@ class ExchangeBundle: """ log.info('ingesting csv file: {}'.format(path)) - problems = [] + try: + symbols_def = get_exchange_symbols( + self.exchange_name, is_local=True + ) + except ExchangeSymbolsNotFound: + symbols_def = dict() + problems = [] df = pd.read_csv( path, - names=['symbol', 'last_traded', 'open', 'high', 'close', 'volume'], - parse_dates=[1] + header=0, + sep=',', + dtype=dict( + symbol=np.object_, + last_traded=np.object_, + open=np.float64, + high=np.float64, + close=np.float64, + volume=np.float64 + ), + parse_dates=['last_traded'], + index_col=None ) + + symbols = df['symbol'].unique() + trading_pairs = dict() + for symbol in symbols: + start_dt = df['last_traded'].min() + end_dt = df['last_traded'].max() + end_dt_key = 'end_{}'.format(data_frequency) + + if symbol is symbols_def: + symbol_def = symbols_def[symbol] + + start_dt = symbol_def['start_date'] \ + if symbol_def['start_date'] < start_dt else start_dt + + end_dt = symbol_def[end_dt_key] \ + if symbol_def[end_dt_key] > end_dt else end_dt + + end_daily = end_dt \ + if data_frequency == 'daily' else symbol_def['end_daily'] + + end_minute = end_dt \ + if data_frequency == 'minute' else symbol_def['end_minute'] + + else: + end_daily = end_dt if data_frequency == 'daily' else None + end_minute = end_dt if data_frequency == 'minute' else None + + trading_pair = TradingPair( + symbol=symbol, + exchange=self.exchange_name, + start_date=start_dt, + end_date=end_dt, + leverage=0, # TODO: add as an optional column + asset_name=symbol, + min_trade_size=0, # TODO: add as an optional column + end_daily=end_daily, + end_minute=end_minute, + exchange_symbol=symbol + ) + trading_pairs[symbol] = trading_pair.to_dict() + + symbols_def_json = json.dumps(trading_pairs, default=symbols_serial) + df.set_index(['symbol', 'last_traded'], drop=True, inplace=True) + df.tz_localize('UTC', level=1) # problems += self.ingest_df( # ohlcv_df=df, # data_frequency=data_frequency, diff --git a/catalyst/exchange/exchange_utils.py b/catalyst/exchange/exchange_utils.py index d4119491..3546ed28 100644 --- a/catalyst/exchange/exchange_utils.py +++ b/catalyst/exchange/exchange_utils.py @@ -9,6 +9,7 @@ import pandas as pd from catalyst.assets._assets import TradingPair from six.moves.urllib import request +from catalyst.constants import DATE_TIME_FORMAT, DATE_FORMAT from catalyst.exchange.exchange_errors import ExchangeSymbolsNotFound, \ InvalidHistoryFrequencyError, InvalidHistoryFrequencyAlias from catalyst.utils.paths import data_root, ensure_directory, \ @@ -42,7 +43,7 @@ def get_exchange_folder(exchange_name, environ=None): return exchange_folder -def get_exchange_symbols_filename(exchange_name, environ=None): +def get_exchange_symbols_filename(exchange_name, is_local=False, environ=None): """ The absolute path of the exchange's symbol.json file. @@ -56,8 +57,9 @@ def get_exchange_symbols_filename(exchange_name, environ=None): str """ + name = 'symbols.json' if not is_local else 'symbols_local.json' exchange_folder = get_exchange_folder(exchange_name, environ) - return os.path.join(exchange_folder, 'symbols.json') + return os.path.join(exchange_folder, name) def download_exchange_symbols(exchange_name, environ=None): @@ -80,13 +82,14 @@ def download_exchange_symbols(exchange_name, environ=None): return response -def get_exchange_symbols(exchange_name, environ=None): +def get_exchange_symbols(exchange_name, is_local=False, environ=None): """ The de-serialized content of the exchange's symbols.json. Parameters ---------- exchange_name: str + is_local: bool environ: Returns @@ -94,12 +97,11 @@ def get_exchange_symbols(exchange_name, environ=None): Object """ - filename = get_exchange_symbols_filename(exchange_name) + filename = get_exchange_symbols_filename(exchange_name, is_local) - if not os.path.isfile(filename) or \ - pd.Timedelta(pd.Timestamp('now', - tz='UTC') - last_modified_time( - filename)).days > 1: + if not is_local and (not os.path.isfile(filename) or pd.Timedelta( + pd.Timestamp('now', tz='UTC') - last_modified_time( + filename)).days > 1): download_exchange_symbols(exchange_name, environ) if os.path.isfile(filename): @@ -363,6 +365,25 @@ def get_exchange_bundles_folder(exchange_name, environ=None): return temp_bundles +def symbols_serial(obj): + """ + JSON serializer for objects not serializable by default json code + + Parameters + ---------- + obj: Object + + Returns + ------- + str + + """ + if isinstance(obj, (datetime, date)): + return obj.floor('1D').strftime(DATE_FORMAT) + + raise TypeError("Type %s not serializable" % type(obj)) + + def perf_serial(obj): """ JSON serializer for objects not serializable by default json code diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index cd5f2bde..eed7af4f 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -525,3 +525,13 @@ class TestExchangeBundle: print('TAIL\n{}'.format(df.tail(10))) df.to_csv(path) pass + + def test_ingest_csv(self): + data_frequency = 'minute' + exchange_name = 'bittrex' + path = '/Users/fredfortier/Dropbox/Enigma/Data/bat_eth.csv' + + exchange_bundle = ExchangeBundle(exchange_name) + exchange_bundle.ingest_csv(path, data_frequency) + + pass From 968e70b69bf2aaa0b07295191cacb461214a293a Mon Sep 17 00:00:00 2001 From: fredfortier Date: Sat, 25 Nov 2017 07:43:20 -0500 Subject: [PATCH 31/37] BLD: implementing issue #65, implemented custom exchange data --- catalyst/assets/_assets.pyx | 18 +- catalyst/constants.py | 5 +- catalyst/examples/mean_reversion.py | 283 --------------------- catalyst/exchange/asset_finder_exchange.py | 12 +- catalyst/exchange/bitfinex/bitfinex.py | 7 +- catalyst/exchange/bittrex/bittrex.py | 3 + catalyst/exchange/bundle_utils.py | 4 +- catalyst/exchange/exchange.py | 54 ++-- catalyst/exchange/exchange_algorithm.py | 1 + catalyst/exchange/exchange_bundle.py | 84 ++++-- catalyst/exchange/exchange_utils.py | 58 ++++- catalyst/exchange/poloniex/poloniex.py | 7 +- tests/exchange/test_bundle.py | 31 ++- 13 files changed, 223 insertions(+), 344 deletions(-) delete mode 100644 catalyst/examples/mean_reversion.py diff --git a/catalyst/assets/_assets.pyx b/catalyst/assets/_assets.pyx index 19ddacd3..3537493f 100644 --- a/catalyst/assets/_assets.pyx +++ b/catalyst/assets/_assets.pyx @@ -38,6 +38,7 @@ from numpy cimport int64_t import warnings cimport numpy as np +from catalyst.exchange.exchange_utils import get_sid from catalyst.utils.calendars import get_calendar from catalyst.exchange.exchange_errors import InvalidSymbolError, SidHashError @@ -503,11 +504,7 @@ cdef class TradingPair(Asset): if sid == 0 or sid is None: try: - # sid = abs(hash(symbol)) % (10 ** 4) - # TODO: try to encode the symbol in the main scope - sid = int( - hashlib.sha256(symbol.encode('utf-8')).hexdigest(), 16 - ) % 10 ** 6 + sid = get_sid(symbol) except Exception as e: raise SidHashError(symbol=symbol) @@ -559,6 +556,17 @@ cdef class TradingPair(Asset): end_minute=self.end_minute ) + cpdef to_dict(self): + """ + Convert to a python dict. + """ + super_dict = super(TradingPair, self).to_dict() + super_dict['end_daily'] = self.end_daily + super_dict['end_minute'] = self.end_minute + super_dict['leverage'] = self.leverage + super_dict['min_trade_size'] = self.min_trade_size + return super_dict + def is_exchange_open(self, dt_minute): """ Parameters diff --git a/catalyst/constants.py b/catalyst/constants.py index 172de34e..93c118b6 100644 --- a/catalyst/constants.py +++ b/catalyst/constants.py @@ -4,7 +4,10 @@ import logbook LOG_LEVEL = logbook.INFO +SYMBOLS_URL = 'https://s3.amazonaws.com/enigmaco/catalyst-exchanges/' \ + '{exchange}/symbols.json' + DATE_TIME_FORMAT = '%Y-%m-%d %H:%M' DATE_FORMAT = '%Y-%m-%d' -AUTO_INGEST = False \ No newline at end of file +AUTO_INGEST = False diff --git a/catalyst/examples/mean_reversion.py b/catalyst/examples/mean_reversion.py deleted file mode 100644 index 3e0f2ecf..00000000 --- a/catalyst/examples/mean_reversion.py +++ /dev/null @@ -1,283 +0,0 @@ -# For this example, we're going to write a simple momentum script. When the -# stock goes up quickly, we're going to buy; when it goes down quickly, we're -# going to sell. Hopefully we'll ride the waves. -from datetime import timedelta - -import pandas as pd -import talib -# To run an algorithm in Catalyst, you need two functions: initialize and -# handle_data. -from logbook import Logger -from talib.common import MA_Type - -from catalyst import run_algorithm -from catalyst.api import symbol, record, order_target_percent, \ - get_open_orders -# We give a name to the algorithm which Catalyst will use to persist its state. -# In this example, Catalyst will create the `.catalyst/data/live_algos` -# directory. If we stop and start the algorithm, Catalyst will resume its -# state using the files included in the folder. -from catalyst.exchange.stats_utils import extract_transactions, trend_direction - -algo_namespace = 'mean_reversion' -log = Logger(algo_namespace) - - -def initialize(context): - # This initialize function sets any data or variables that you'll use in - # your algorithm. For instance, you'll want to define the trading pair (or - # trading pairs) you want to backtest. You'll also want to define any - # parameters or values you're going to use. - - # In our example, we're looking at Ether in USD Tether. - context.eth_btc = symbol('neo_usd') - context.base_price = None - context.current_day = None - context.trigger = None - - -def handle_data(context, data): - # This handle_data function is where the real work is done. Our data is - # minute-level tick data, and each minute is called a frame. This function - # runs on each frame of the data. - - # We flag the first period of each day. - # Since cryptocurrencies trade 24/7 the `before_trading_starts` handle - # would only execute once. This method works with minute and daily - # frequencies. - today = data.current_dt.floor('1D') - if today != context.current_day: - context.traded_today = False - context.current_day = today - - # We're computing the volume-weighted-average-price of the security - # defined above, in the context.eth_btc variable. For this example, we're - # using three bars on the 15 min bars. - - # The frequency attribute determine the bar size. We use this convention - # for the frequency alias: - # http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases - prices = data.history( - context.eth_btc, - fields='close', - bar_count=50, - frequency='15T' - ) - - # Ta-lib calculates various technical indicator based on price and - # volume arrays. - - # In this example, we are comp - rsi = talib.RSI(prices.values, timeperiod=14) - upper, middle, lower = talib.BBANDS( - prices.values, - timeperiod=20, - nbdevup=2, - nbdevdn=2, - matype=MA_Type.EMA - ) - - # We need a variable for the current price of the security to compare to - # the average. Since we are requesting two fields, data.current() - # returns a DataFrame with - current = data.current(context.eth_btc, fields=['close', 'volume']) - price = current['close'] - - # If base_price is not set, we use the current value. This is the - # price at the first bar which we reference to calculate price_change. - if context.base_price is None: - context.base_price = price - - price_change = (price - context.base_price) / context.base_price - cash = context.portfolio.cash - - # Now that we've collected all current data for this frame, we use - # the record() method to save it. This data will be available as - # a parameter of the analyze() function for further analysis. - record( - price=price, - volume=current['volume'], - upper_band=upper[-1], - lower_band=lower[-1], - price_change=price_change, - rsi=rsi[-1], - cash=cash - ) - - # We are trying to avoid over-trading by limiting our trades to - # one per day. - if context.traded_today: - return - - # Since we are using limit orders, some orders may not execute immediately - # we wait until all orders are executed before considering more trades. - orders = get_open_orders(context.eth_btc) - if len(orders) > 0: - return - - # Exit if we cannot trade - if not data.can_trade(context.eth_btc): - return - - # Another powerful built-in feature of the Catalyst backtester is the - # portfolio object. The portfolio object tracks your positions, cash, - # cost basis of specific holdings, and more. In this line, we calculate - # how long or short our position is at this minute. - pos_amount = context.portfolio.positions[context.eth_btc].amount - - # In this example, we're using a trigger instead of buying directly after - # a signal. Since this is mean reversion, our signals go against the - # momentum. Using a trigger allow us to spot the opportunity but trade - # only when a trade reversal begins. - if context.trigger is not None: - # The tread_direction() method determines the trend based on the last - # two bars of the series. - direction = trend_direction(rsi) - if context.trigger[1] == 'buy' and direction == 'up': - log.info( - '{}: buying - price: {}, rsi: {}, bband: {}'.format( - data.current_dt, price, rsi[-1], lower[-1] - ) - ) - order_target_percent(context.eth_btc, 1) - context.traded_today = True - context.trigger = None - - elif context.trigger[1] == 'sell' and direction == 'down': - log.info( - '{}: selling - price: {}, rsi: {}, bband: {}'.format( - data.current_dt, price, rsi[-1], upper[-1] - ) - ) - order_target_percent(context.eth_btc, 0) - context.traded_today = True - context.trigger = None - - # If we found a signal but no trade reversal within two hours, we - # reset the trigger. - elif context.trigger[0] + timedelta(hours=2) < data.current_dt: - context.trigger = None - - else: - # Determining the entry and exit signals based on RSI and SMA - if rsi[-1] <= 30 and pos_amount == 0: - context.trigger = (data.current_dt, 'buy') - - elif rsi[-1] >= 80 and pos_amount > 0: - context.trigger = (data.current_dt, 'sell') - - -def analyze(context=None, perf=None): - import matplotlib.pyplot as plt - - # The base currency of the algo exchange - base_currency = context.exchanges.values()[0].base_currency.upper() - - # Plot the portfolio value over time. - ax1 = plt.subplot(611) - perf.loc[:, 'portfolio_value'].plot(ax=ax1) - ax1.set_ylabel('Portfolio Value ({})'.format(base_currency)) - - # Plot the price increase or decrease over time. - ax2 = plt.subplot(612, sharex=ax1) - perf.loc[:, 'price'].plot(ax=ax2, label='Price') - perf.loc[:, 'upper_band'].plot(ax=ax2, label='Upper') - perf.loc[:, 'lower_band'].plot(ax=ax2, label='Lower') - - ax2.set_ylabel('{asset} ({base})'.format( - asset=context.eth_btc.symbol, base=base_currency - )) - - transaction_df = extract_transactions(perf) - if not transaction_df.empty: - buy_df = transaction_df[transaction_df['amount'] > 0] - sell_df = transaction_df[transaction_df['amount'] < 0] - ax2.scatter( - buy_df.index.to_pydatetime(), - perf.loc[buy_df.index, 'price'], - marker='^', - s=100, - c='green', - label='' - ) - ax2.scatter( - sell_df.index.to_pydatetime(), - perf.loc[sell_df.index, 'price'], - marker='v', - s=100, - c='red', - label='' - ) - - ax4 = plt.subplot(613, sharex=ax1) - perf.loc[:, 'cash'].plot( - ax=ax4, label='Base Currency ({})'.format(base_currency) - ) - ax4.set_ylabel('Cash ({})'.format(base_currency)) - - perf['algorithm'] = perf.loc[:, 'algorithm_period_return'] - - ax5 = plt.subplot(614, sharex=ax1) - perf.loc[:, ['algorithm', 'price_change']].plot(ax=ax5) - ax5.set_ylabel('Percent Change') - - ax6 = plt.subplot(615, sharex=ax1) - perf.loc[:, 'rsi'].plot(ax=ax6, label='RSI') - ax6.axhline(70, color='darkgoldenrod') - ax6.axhline(30, color='darkgoldenrod') - - if not transaction_df.empty: - ax6.scatter( - buy_df.index.to_pydatetime(), - perf.loc[buy_df.index, 'rsi'], - marker='^', - s=100, - c='green', - label='' - ) - ax6.scatter( - sell_df.index.to_pydatetime(), - perf.loc[sell_df.index, 'rsi'], - marker='v', - s=100, - c='red', - label='' - ) - plt.legend(loc=3) - - # Show the plot. - plt.gcf().set_size_inches(18, 8) - plt.show() - pass - - -if __name__ == '__main__': - # The execution mode: backtest or live - MODE = 'backtest' - - if MODE == 'backtest': - # catalyst run -f catalyst/examples/mean_reversion_simple.py -x poloniex -s 2017-7-1 -e 2017-7-31 -c usdt -n mean-reversion --data-frequency minute --capital-base 10000 - run_algorithm( - capital_base=1, - data_frequency='minute', - initialize=initialize, - handle_data=handle_data, - analyze=analyze, - exchange_name='bitfinex', - algo_namespace=algo_namespace, - base_currency='usd', - start=pd.to_datetime('2017-10-1', utc=True), - end=pd.to_datetime('2017-11-13', utc=True), - ) - - elif MODE == 'live': - run_algorithm( - initialize=initialize, - handle_data=handle_data, - analyze=analyze, - exchange_name='bitfinex', - live=True, - algo_namespace=algo_namespace, - base_currency='usd', - live_graph=True - ) diff --git a/catalyst/exchange/asset_finder_exchange.py b/catalyst/exchange/asset_finder_exchange.py index ceaa5d38..2cf3aa4e 100644 --- a/catalyst/exchange/asset_finder_exchange.py +++ b/catalyst/exchange/asset_finder_exchange.py @@ -48,7 +48,8 @@ class AssetFinderExchange(object): # log.debug('fetching asset: {}'.format(sid)) return list() - def lookup_symbol(self, symbol, exchange, as_of_date=None, fuzzy=False): + def lookup_symbol(self, symbol, exchange, data_frequency=None, + as_of_date=None, fuzzy=False): """Lookup an asset by symbol. Parameters @@ -84,10 +85,15 @@ class AssetFinderExchange(object): """ log.debug('looking up symbol: {} {}'.format(symbol, exchange.name)) - key = ','.join([exchange.name, symbol]) + if data_frequency is not None: + key = ','.join([exchange.name, symbol, data_frequency]) + + else: + key = ','.join([exchange.name, symbol]) + if key in self._asset_cache: return self._asset_cache[key] else: - asset = exchange.get_asset(symbol) + asset = exchange.get_asset(symbol, data_frequency) self._asset_cache[key] = asset return asset diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index 66a72421..12c051cd 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -46,8 +46,13 @@ class Bitfinex(Exchange): self.secret = secret.encode('UTF-8') self.name = 'bitfinex' self.color = 'green' - self.assets = {} + + self.assets = dict() self.load_assets() + + self.local_assets = dict() + self.load_assets(is_local=True) + self.base_currency = base_currency self._portfolio = portfolio self.minute_writer = None diff --git a/catalyst/exchange/bittrex/bittrex.py b/catalyst/exchange/bittrex/bittrex.py index fc248847..af85aef1 100644 --- a/catalyst/exchange/bittrex/bittrex.py +++ b/catalyst/exchange/bittrex/bittrex.py @@ -46,6 +46,9 @@ class Bittrex(Exchange): self.assets = dict() self.load_assets() + self.local_assets = dict() + self.load_assets(is_local=True) + self.bundle = ExchangeBundle(self.name) @property diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index c010479a..29509bfa 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -9,11 +9,13 @@ import pytz from catalyst.assets._assets import TradingPair from catalyst.data.bundles.core import download_without_progress -from catalyst.exchange.exchange_utils import get_exchange_bundles_folder +from catalyst.exchange.exchange_utils import get_exchange_bundles_folder, \ + get_exchange_symbols EXCHANGE_NAMES = ['bitfinex', 'bittrex', 'poloniex'] API_URL = 'http://data.enigma.co/api/v1' + def get_date_from_ms(ms): """ The date from the number of miliseconds from the epoch. diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index d21bf273..6315c82d 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -16,7 +16,7 @@ from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import MismatchingBaseCurrencies, \ InvalidOrderStyle, BaseCurrencyNotFoundError, SymbolNotFoundOnExchange, \ PricingDataNotLoadedError, \ - NoDataAvailableOnExchange + NoDataAvailableOnExchange, ExchangeSymbolsNotFound from catalyst.exchange.exchange_execution import ExchangeStopLimitOrder, \ ExchangeLimitOrder, ExchangeStopOrder from catalyst.exchange.exchange_portfolio import ExchangePortfolio @@ -33,7 +33,8 @@ class Exchange: def __init__(self): self.name = None - self.assets = {} + self.assets = dict() + self.local_assets = dict() self._portfolio = None self.minute_writer = None self.minute_reader = None @@ -173,7 +174,7 @@ class Exchange: return symbols - def get_assets(self, symbols=None): + def get_assets(self, symbols=None, data_frequency=None): """ The list of markets for the specified symbols. @@ -190,7 +191,7 @@ class Exchange: if symbols is not None: for symbol in symbols: - asset = self.get_asset(symbol) + asset = self.get_asset(symbol, data_frequency) assets.append(asset) else: for key in self.assets: @@ -198,7 +199,19 @@ class Exchange: return assets - def get_asset(self, symbol): + def _find_asset(self, asset, symbol, data_frequency, is_local=False): + assets = self.assets if not is_local else self.local_assets + + for key in assets: + if not asset and assets[key].symbol.lower() == symbol.lower() and ( + not data_frequency or ( + data_frequency == 'minute' and assets[ + key].end_minute is not None)): + asset = assets[key] + + return asset + + def get_asset(self, symbol, data_frequency=None): """ The market for the specified symbol. @@ -213,13 +226,17 @@ class Exchange: """ asset = None - for key in self.assets: - if not asset and self.assets[key].symbol.lower() == symbol.lower(): - asset = self.assets[key] + log.debug('searching asset {} on the server') + asset = self._find_asset(asset, symbol, data_frequency, False) + + log.debug('asset {} not found on the server, searching local assets') + asset = self._find_asset(asset, symbol, data_frequency, True) if not asset: + all_values = list(self.assets.values()) + \ + list(self.local_assets.values()) supported_symbols = [ - pair.symbol for pair in list(self.assets.values()) + asset.symbol for asset in all_values ] raise SymbolNotFoundOnExchange( @@ -230,10 +247,10 @@ class Exchange: return asset - def fetch_symbol_map(self): - return get_exchange_symbols(self.name) + def fetch_symbol_map(self, is_local=False): + return get_exchange_symbols(self.name, is_local) - def load_assets(self): + def load_assets(self, is_local=False): """ Populate the 'assets' attribute with a dictionary of Assets. The key of the resulting dictionary is the exchange specific @@ -246,11 +263,15 @@ class Exchange: universal symbol. This simple approach avoids maintaining a mapping of sids. - This method can be overridden if an exchange offers equivalent data + This method can be omerridden if an exchange offers equivalent data via its api. """ - symbol_map = self.fetch_symbol_map() + try: + symbol_map = self.fetch_symbol_map(is_local) + except ExchangeSymbolsNotFound: + return None + for exchange_symbol in symbol_map: asset = symbol_map[exchange_symbol] @@ -302,7 +323,10 @@ class Exchange: exchange_symbol=exchange_symbol ) - self.assets[exchange_symbol] = trading_pair + if is_local: + self.local_assets[exchange_symbol] = trading_pair + else: + self.assets[exchange_symbol] = trading_pair def check_open_orders(self): """ diff --git a/catalyst/exchange/exchange_algorithm.py b/catalyst/exchange/exchange_algorithm.py index 7ec8efd2..d03d6efa 100644 --- a/catalyst/exchange/exchange_algorithm.py +++ b/catalyst/exchange/exchange_algorithm.py @@ -117,6 +117,7 @@ class ExchangeTradingAlgorithmBase(TradingAlgorithm): return self.asset_finder.lookup_symbol( symbol=symbol_str, exchange=exchange, + data_frequency=self.data_frequency, as_of_date=_lookup_date ) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 25eaab86..3a2a57b2 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -1,4 +1,4 @@ -import json +import os import os import shutil from datetime import datetime, timedelta @@ -8,6 +8,7 @@ from operator import is_not import numpy as np import pandas as pd +import pytz from catalyst.assets._assets import TradingPair from logbook import Logger from pytz import UTC @@ -29,7 +30,7 @@ from catalyst.exchange.exchange_errors import EmptyValuesInBundleError, \ NoDataAvailableOnExchange, \ PricingDataNotLoadedError, DataCorruptionError, ExchangeSymbolsNotFound from catalyst.exchange.exchange_utils import get_exchange_folder, \ - get_exchange_symbols, perf_serial, symbols_serial + get_exchange_symbols, save_exchange_symbols from catalyst.utils.cli import maybe_show_progress from catalyst.utils.paths import ensure_directory @@ -647,7 +648,8 @@ class ExchangeBundle: '\n'.join(problems) )) - def ingest_csv(self, path, data_frequency): + def ingest_csv(self, path, data_frequency, empty_rows_behavior='strip', + duplicates_threshold=100): """ Ingest price data from a CSV file. @@ -686,12 +688,19 @@ class ExchangeBundle: parse_dates=['last_traded'], index_col=None ) + min_start_dt = None + max_end_dt = None symbols = df['symbol'].unique() - trading_pairs = dict() + + # Apply the timezone before creating an index for simplicity + df['last_traded'] = df['last_traded'].dt.tz_localize(pytz.UTC) + df.set_index(['symbol', 'last_traded'], drop=True, inplace=True) + + assets = dict() for symbol in symbols: - start_dt = df['last_traded'].min() - end_dt = df['last_traded'].max() + start_dt = df.index.get_level_values(1).min() + end_dt = df.index.get_level_values(1).max() end_dt_key = 'end_{}'.format(data_frequency) if symbol is symbols_def: @@ -710,10 +719,16 @@ class ExchangeBundle: if data_frequency == 'minute' else symbol_def['end_minute'] else: - end_daily = end_dt if data_frequency == 'daily' else None - end_minute = end_dt if data_frequency == 'minute' else None + end_daily = end_dt if data_frequency == 'daily' else 'N/A' + end_minute = end_dt if data_frequency == 'minute' else 'N/A' - trading_pair = TradingPair( + if min_start_dt is None or start_dt < min_start_dt: + min_start_dt = start_dt + + if max_end_dt is None or end_dt > max_end_dt: + max_end_dt = end_dt + + asset = TradingPair( symbol=symbol, exchange=self.exchange_name, start_date=start_dt, @@ -725,19 +740,42 @@ class ExchangeBundle: end_minute=end_minute, exchange_symbol=symbol ) - trading_pairs[symbol] = trading_pair.to_dict() + assets[symbol] = asset - symbols_def_json = json.dumps(trading_pairs, default=symbols_serial) - df.set_index(['symbol', 'last_traded'], drop=True, inplace=True) - df.tz_localize('UTC', level=1) - # problems += self.ingest_df( - # ohlcv_df=df, - # data_frequency=data_frequency, - # asset=asset, - # writer=writer, - # empty_rows_behavior=empty_rows_behavior, - # duplicates_threshold=duplicates_threshold - # ) + save_exchange_symbols(self.exchange_name, assets, True) + + writer = self.get_writer( + start_dt=min_start_dt.replace(hour=00, minute=00), + end_dt=max_end_dt.replace(hour=23, minute=59), + data_frequency=data_frequency + ) + + for symbol in assets: + asset = assets[symbol] + ohlcv_df = df.loc[ + (df.index.get_level_values(0) == symbol) + ] # type: pd.DataFrame + ohlcv_df.index = ohlcv_df.index.droplevel(0) + + period_start = start_dt.replace(hour=00, minute=00) + period_end = end_dt.replace(hour=23, minute=59) + periods = self.get_calendar_periods_range( + period_start, period_end, data_frequency + ) + + # We're not really resampling but ensuring that each frame + # contains data + ohlcv_df = ohlcv_df.reindex(periods, method='ffill') + ohlcv_df['volume'] = ohlcv_df['volume'].fillna(0) + + problems += self.ingest_df( + ohlcv_df=ohlcv_df, + data_frequency=data_frequency, + asset=asset, + writer=writer, + empty_rows_behavior=empty_rows_behavior, + duplicates_threshold=duplicates_threshold + ) return filter(partial(is_not, None), problems) def ingest(self, data_frequency, include_symbols=None, @@ -1018,6 +1056,10 @@ class ExchangeBundle: if os.path.isfile(symbols): os.remove(symbols) + local_symbols = os.path.join(root, 'symbols_local.json') + if os.path.isfile(local_symbols): + os.remove(local_symbols) + temp_bundles = os.path.join(root, 'temp_bundles') if os.path.isdir(temp_bundles): diff --git a/catalyst/exchange/exchange_utils.py b/catalyst/exchange/exchange_utils.py index 3546ed28..678d9945 100644 --- a/catalyst/exchange/exchange_utils.py +++ b/catalyst/exchange/exchange_utils.py @@ -1,3 +1,4 @@ +import hashlib import json import os import pickle @@ -9,14 +10,31 @@ import pandas as pd from catalyst.assets._assets import TradingPair from six.moves.urllib import request -from catalyst.constants import DATE_TIME_FORMAT, DATE_FORMAT +from catalyst.constants import DATE_FORMAT, SYMBOLS_URL from catalyst.exchange.exchange_errors import ExchangeSymbolsNotFound, \ InvalidHistoryFrequencyError, InvalidHistoryFrequencyAlias from catalyst.utils.paths import data_root, ensure_directory, \ last_modified_time -SYMBOLS_URL = 'https://s3.amazonaws.com/enigmaco/catalyst-exchanges/' \ - '{exchange}/symbols.json' + +def get_sid(symbol): + """ + Create a sid by hashing the symbol of a currency pair. + + Parameters + ---------- + symbol: str + + Returns + ------- + int + The resulting sid. + + """ + sid = int( + hashlib.sha256(symbol.encode('utf-8')).hexdigest(), 16 + ) % 10 ** 6 + return sid def get_exchange_folder(exchange_name, environ=None): @@ -106,8 +124,12 @@ def get_exchange_symbols(exchange_name, is_local=False, environ=None): if os.path.isfile(filename): with open(filename) as data_file: - data = json.load(data_file) - return data + try: + data = json.load(data_file) + return data + + except ValueError: + return dict() else: raise ExchangeSymbolsNotFound( exchange=exchange_name, @@ -115,6 +137,32 @@ def get_exchange_symbols(exchange_name, is_local=False, environ=None): ) +def save_exchange_symbols(exchange_name, assets, is_local=False, environ=None): + """ + Save assets into an exchange_symbols file. + + Parameters + ---------- + exchange_name: str + assets: list[dict[str, object]] + is_local: bool + environ + + Returns + ------- + + """ + asset_dicts = dict() + for symbol in assets: + asset_dicts[symbol] = assets[symbol].to_dict() + + filename = get_exchange_symbols_filename( + exchange_name, is_local, environ + ) + with open(filename, 'wt') as handle: + json.dump(asset_dicts, handle, indent=4, default=symbols_serial) + + def get_symbols_string(assets): """ A concatenated string of symbols from a list of assets. diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py index ece0c659..1c2028ee 100644 --- a/catalyst/exchange/poloniex/poloniex.py +++ b/catalyst/exchange/poloniex/poloniex.py @@ -35,8 +35,13 @@ class Poloniex(Exchange): def __init__(self, key, secret, base_currency, portfolio=None): self.api = Poloniex_api(key=key, secret=secret) self.name = 'poloniex' - self.assets = {} + + self.assets = dict() self.load_assets() + + self.local_assets = dict() + self.load_assets(is_local=True) + self.base_currency = base_currency self._portfolio = portfolio self.minute_writer = None diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index eed7af4f..89ef3062 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -448,7 +448,7 @@ class TestExchangeBundle: end_dt = pd.to_datetime('2016-6-1', utc=True) self._bundle_to_csv( asset=asset, - exchange=exchange, + exchange_name=exchange.name, data_frequency=data_frequency, filename='{}_{}_{}'.format( exchange_name, data_frequency, asset.symbol @@ -474,16 +474,16 @@ class TestExchangeBundle: ) self._bundle_to_csv( asset=asset, - exchange=exchange, + exchange_name=exchange.name, data_frequency=data_frequency, path=path, filename=period ) pass - def _bundle_to_csv(self, asset, exchange, data_frequency, filename, + def _bundle_to_csv(self, asset, exchange_name, data_frequency, filename, path=None, start_dt=None, end_dt=None): - bundle = ExchangeBundle(exchange) + bundle = ExchangeBundle(exchange_name) reader = bundle.get_reader(data_frequency, path=path) if start_dt is None: @@ -514,24 +514,39 @@ class TestExchangeBundle: df = get_df_from_arrays(arrays, periods) folder = os.path.join( - tempfile.gettempdir(), 'catalyst', exchange.name, asset.symbol + tempfile.gettempdir(), 'catalyst', exchange_name, asset.symbol ) ensure_directory(folder) path = os.path.join(folder, filename + '.csv') log.info('creating csv file: {}'.format(path)) - print('HEAD\n{}'.format(df.head(10))) - print('TAIL\n{}'.format(df.tail(10))) + print('HEAD\n{}'.format(df.head(100))) + print('TAIL\n{}'.format(df.tail(100))) df.to_csv(path) pass def test_ingest_csv(self): data_frequency = 'minute' exchange_name = 'bittrex' - path = '/Users/fredfortier/Dropbox/Enigma/Data/bat_eth.csv' + path = '/Users/fredfortier/Dropbox/Enigma/Data/bittrex_bat_eth.csv' exchange_bundle = ExchangeBundle(exchange_name) exchange_bundle.ingest_csv(path, data_frequency) + exchange = get_exchange(exchange_name) + asset = exchange.get_asset('bat_eth') + + start_dt = pd.to_datetime('2017-6-3', utc=True) + end_dt = pd.to_datetime('2017-8-3 19:24', utc=True) + self._bundle_to_csv( + asset=asset, + exchange_name=exchange.name, + data_frequency=data_frequency, + filename='{}_{}_{}'.format( + exchange_name, data_frequency, asset.symbol + ), + start_dt=start_dt, + end_dt=end_dt + ) pass From c6fe45371c71041751a898186c7bb38840430efd Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Mon, 27 Nov 2017 13:20:00 -0700 Subject: [PATCH 32/37] ENH: removed default for --capital_base in backtesting --- catalyst/__main__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/catalyst/__main__.py b/catalyst/__main__.py index 86e49ca2..feddb5b8 100644 --- a/catalyst/__main__.py +++ b/catalyst/__main__.py @@ -139,7 +139,6 @@ def ipython_only(option): @click.option( '--capital-base', type=float, - default=10e6, show_default=True, help='The starting capital for the simulation.', ) From 12d5915c8ee3503770c387b0b6d623e53aef4915 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Mon, 27 Nov 2017 15:02:13 -0700 Subject: [PATCH 33/37] ENH: DEBUG level can be easily overriden from the local environment --- catalyst/constants.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/catalyst/constants.py b/catalyst/constants.py index bbdae343..1793257e 100644 --- a/catalyst/constants.py +++ b/catalyst/constants.py @@ -1,8 +1,13 @@ # -*- coding: utf-8 -*- +import os import logbook -LOG_LEVEL = logbook.DEBUG +''' You can override the LOG level from your environment. + For example, if you want to see the DEBUG messages, run: + $ export CATALYST_LOG_LEVEL=10 +''' +LOG_LEVEL = int(os.environ.get('CATALYST_LOG_LEVEL', logbook.INFO)) DATE_TIME_FORMAT = '%Y-%m-%d %H:%M' From ba46015bae48ef25e0e48a969ae1f6c41241046a Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 27 Nov 2017 17:16:44 -0500 Subject: [PATCH 34/37] BLD: completed implementation of issue #65, support for custom exchange data --- catalyst/exchange/exchange_algorithm.py | 4 +++- catalyst/exchange/exchange_bundle.py | 31 +++++++++++++++++-------- catalyst/exchange/exchange_errors.py | 4 ++++ 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/catalyst/exchange/exchange_algorithm.py b/catalyst/exchange/exchange_algorithm.py index d03d6efa..bd7c84a1 100644 --- a/catalyst/exchange/exchange_algorithm.py +++ b/catalyst/exchange/exchange_algorithm.py @@ -114,10 +114,12 @@ class ExchangeTradingAlgorithmBase(TradingAlgorithm): else: exchange = self.exchanges[exchange_name] + data_frequency = self.data_frequency \ + if self.sim_params.arena == 'backtest' else None return self.asset_finder.lookup_symbol( symbol=symbol_str, exchange=exchange, - data_frequency=self.data_frequency, + data_frequency=data_frequency, as_of_date=_lookup_date ) diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index 3a2a57b2..2bfa6201 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -28,7 +28,8 @@ from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ from catalyst.exchange.exchange_errors import EmptyValuesInBundleError, \ TempBundleNotFoundError, \ NoDataAvailableOnExchange, \ - PricingDataNotLoadedError, DataCorruptionError, ExchangeSymbolsNotFound + PricingDataNotLoadedError, DataCorruptionError, ExchangeSymbolsNotFound, \ + PricingDataValueError from catalyst.exchange.exchange_utils import get_exchange_folder, \ get_exchange_symbols, save_exchange_symbols from catalyst.utils.cli import maybe_show_progress @@ -464,7 +465,8 @@ class ExchangeBundle: start = earliest_trade if end is None or (last_entry is not None and end > last_entry): - end = last_entry + end = last_entry.replace(minute=59, hour=23) \ + if data_frequency == 'minute' else last_entry if end is None or start is None or start > end: raise NoDataAvailableOnExchange( @@ -960,7 +962,7 @@ class ExchangeBundle: trailing_bar_count=None, reset_reader=False): start_dt = get_start_dt(end_dt, bar_count, data_frequency, False) - start_dt, end_dt = self.get_adj_dates( + start_dt, _ = self.get_adj_dates( start_dt, end_dt, assets, data_frequency ) @@ -991,11 +993,11 @@ class ExchangeBundle: series = dict() for asset in assets: - asset_start_dt, asset_end_dt = self.get_adj_dates( + asset_start_dt, _ = self.get_adj_dates( start_dt, end_dt, assets, data_frequency ) in_bundle = range_in_bundle( - asset, asset_start_dt, asset_end_dt, reader + asset, asset_start_dt, end_dt, reader ) if not in_bundle: raise PricingDataNotLoadedError( @@ -1006,11 +1008,11 @@ class ExchangeBundle: symbol_list=asset.symbol, data_frequency=data_frequency, start_dt=asset_start_dt, - end_dt=asset_end_dt + end_dt=end_dt ) periods = self.get_calendar_periods_range( - asset_start_dt, asset_end_dt, data_frequency + asset_start_dt, end_dt, data_frequency ) # This does not behave well when requesting multiple assets # when the start or end date of one asset is outside of the range @@ -1028,13 +1030,22 @@ class ExchangeBundle: exchange=self.exchange_name, symbols=asset.symbol, start_dt=asset_start_dt, - end_dt=asset_end_dt + end_dt=end_dt ) field_values = arrays[0][:, 0] - value_series = pd.Series(field_values, index=periods) - series[asset] = value_series + try: + value_series = pd.Series(field_values, index=periods) + series[asset] = value_series + except ValueError as e: + raise PricingDataValueError( + exchange=asset.exchange, + symbol=asset.symbol, + start_dt=asset_start_dt, + end_dt=end_dt, + error=e + ) return series diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index a36bb23a..cb4f4d32 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -217,6 +217,10 @@ class PricingDataNotLoadedError(ZiplineError): '{data_frequency} -i {symbol_list}`. See catalyst documentation ' 'for details.').strip() +class PricingDataValueError(ZiplineError): + msg = ('Unable to retrieve pricing data for {exchange} {symbol} ' + '[{start_dt} - {end_dt}]: {error}').strip() + class DataCorruptionError(ZiplineError): msg = ('Unable to validate data for {exchange} {symbols} in date range ' From c2f3e00d99730d09ed9ee03a695030984ba6c608 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 27 Nov 2017 17:27:43 -0500 Subject: [PATCH 35/37] BUG: Adding back missing constants --- catalyst/constants.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/catalyst/constants.py b/catalyst/constants.py index 1793257e..35e1f727 100644 --- a/catalyst/constants.py +++ b/catalyst/constants.py @@ -9,6 +9,10 @@ import logbook ''' LOG_LEVEL = int(os.environ.get('CATALYST_LOG_LEVEL', logbook.INFO)) +SYMBOLS_URL = 'https://s3.amazonaws.com/enigmaco/catalyst-exchanges/' \ + '{exchange}/symbols.json' + DATE_TIME_FORMAT = '%Y-%m-%d %H:%M' +DATE_FORMAT = '%Y-%m-%d' AUTO_INGEST = False \ No newline at end of file From 2bb11db412ba77bd1d8a545a12f60672df3ecdd0 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 27 Nov 2017 17:28:13 -0500 Subject: [PATCH 36/37] BLD: modified sample algo for testing --- catalyst/examples/mean_reversion_simple.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalyst/examples/mean_reversion_simple.py b/catalyst/examples/mean_reversion_simple.py index 6660c829..6d4922e1 100644 --- a/catalyst/examples/mean_reversion_simple.py +++ b/catalyst/examples/mean_reversion_simple.py @@ -235,7 +235,7 @@ def analyze(context=None, perf=None): if __name__ == '__main__': # The execution mode: backtest or live - MODE = 'live' + MODE = 'backtest' if MODE == 'backtest': folder = os.path.join( From 9397b3fd5a32c6e9b8d076902bc257f952167484 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Mon, 27 Nov 2017 17:36:44 -0500 Subject: [PATCH 37/37] BLD: testing simple universe with Bitfinex --- catalyst/examples/simple_universe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/catalyst/examples/simple_universe.py b/catalyst/examples/simple_universe.py index 20a4fc78..ff79c763 100644 --- a/catalyst/examples/simple_universe.py +++ b/catalyst/examples/simple_universe.py @@ -108,7 +108,7 @@ def fill(series): if __name__ == '__main__': - start_date = pd.to_datetime('2017-01-01', utc=True) + start_date = pd.to_datetime('2017-11-10', utc=True) end_date = pd.to_datetime('2017-11-13', utc=True) performance = run_algorithm(start=start_date, end=end_date, @@ -116,7 +116,7 @@ if __name__ == '__main__': initialize=initialize, handle_data=handle_data, analyze=analyze, - exchange_name='poloniex', + exchange_name='bitfinex', data_frequency='minute', base_currency='btc', live=False,