From cb870422c30caaed8af082dab6cd2028c1e6404f Mon Sep 17 00:00:00 2001 From: Abner Ayala-Acevedo Date: Wed, 25 Oct 2017 12:11:40 -0700 Subject: [PATCH 1/7] Create simple_universe.py This example aims to help users get familiar with catalyst API's to collect and handle data. --- catalyst/examples/simple_universe.py | 107 +++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 catalyst/examples/simple_universe.py diff --git a/catalyst/examples/simple_universe.py b/catalyst/examples/simple_universe.py new file mode 100644 index 00000000..4909fee2 --- /dev/null +++ b/catalyst/examples/simple_universe.py @@ -0,0 +1,107 @@ +""" +Requires Catalyst version 0.3.0 or above +Tested on Catalyst version 0.3.2 + +These example aims to provide and easy way for users to learn how to collect data from the different exchanges. +You simply need to specify the exchange and the market that you want to focus on. +You will all see how to create a universe and filter it base on the exchange and the market you desire. + +The example prints out the closing price of all the pairs for a given market-exchange every 30 minutes. +The example also contains the ohlcv minute data for the past seven days which could be used to create indicators +Use this as the backbone to create your own trading strategies. +""" + +import pandas as pd +from catalyst import run_algorithm +from catalyst.exchange.exchange_utils import get_exchange_symbols + +from catalyst.api import ( + symbols, +) + + +def initialize(context): + context.i = -1 # counts the minutes + context.exchange = 'poloniex' # must match the exchange specified in run_algorithm + context.base_currency = 'btc' # must match the base currency specified in run_algorithm + + +def handle_data(context, data): + lookback = 60 * 24 * 7 # (minutes, hours, days) of how far to lookback in the data history + context.i += 1 + + # we must first wait until enough minutes, hours or days have passed for data history to work + if context.i < lookback: + return + + # current date formatted into a string + today = context.blotter.current_dt.strftime('%Y-%m-%d %H:%M:%S') + date, time = str(today).split(' ') + + # update universe everyday + new_day = 60 * 24 + if not context.i % new_day: + context.universe = universe(context, date) + + # get data every 30 minutes + minutes = 30 + if not context.i % minutes and context.universe: + # we iterate for every pair in the current universe + for coin in context.coins: + pair = str(coin.symbol) + + # ohlcv data (the standard data required for candlestick or indicators/signals) + open = data.history(coin, 'open', bar_count=lookback, frequency='1m').ffill().bfill() + high = data.history(coin, 'high', bar_count=lookback, frequency='1m').ffill().bfill() + low = data.history(coin, 'low', bar_count=lookback, frequency='1m').ffill().bfill() + close = data.history(coin, 'price', bar_count=lookback, frequency='1m').ffill().bfill() + volume = data.history(coin, 'volume', bar_count=lookback, frequency='1m').ffill().bfill() + + # close[-1] is the equivalent to current price + # displays the minute price for each pair every 30 minutes + print(today, pair, close[-1]) + + +def analyze(context=None, results=None): + pass + + +def universe(context, date): + # Get the universe for a given exchange and a given base_currency market + # Example: Poloniex BTC Market + json_symbols = get_exchange_symbols(context.exchange) # get all the pairs for the exchange + poloniex_universe_df = pd.DataFrame.from_dict(json_symbols).transpose().astype(str) # convert into a dataframe + poloniex_universe_df['base_currency'] = poloniex_universe_df.apply(lambda row: row.symbol.split('_')[1], + axis=1) + poloniex_universe_df['market_currency'] = poloniex_universe_df.apply(lambda row: row.symbol.split('_')[0], + axis=1) + # Filter all the exchange pairs to only the ones for a give base currency + poloniex_universe_df = poloniex_universe_df[poloniex_universe_df['base_currency'] == context.base_currency] + + # Filter all the pairs to ensure that pair existed in the current date + poloniex_universe_df = poloniex_universe_df[poloniex_universe_df.start_date < date] + context.coins = symbols(*poloniex_universe_df.symbol) # convert all the pairs to symbols + return poloniex_universe_df.symbol.tolist() + + +if __name__ == '__main__': + start_date = pd.to_datetime('2017-01-01', utc=True) + end_date = pd.to_datetime('2017-10-15', utc=True) + + performance = run_algorithm(start=start_date, end=end_date, + capital_base=10000.0, + initialize=initialize, + handle_data=handle_data, + analyze=analyze, + exchange_name='poloniex', + bundle='poloniex', + data_frequency='minute', + base_currency='btc', + live=False, + live_graph=False, + algo_namespace='simple_universe') + +""" +Run in Terminal (inside catalyst environment): +python simple_universe.py +""" From c8cc2edd36e2d069b7e0be94a1ecc7ab18880661 Mon Sep 17 00:00:00 2001 From: Abner Ayala-Acevedo Date: Thu, 26 Oct 2017 15:16:13 -0700 Subject: [PATCH 2/7] Convert to 30 minutes ohlcv data --- catalyst/examples/simple_universe.py | 56 ++++++++++++++++++---------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/catalyst/examples/simple_universe.py b/catalyst/examples/simple_universe.py index 4909fee2..172837bb 100644 --- a/catalyst/examples/simple_universe.py +++ b/catalyst/examples/simple_universe.py @@ -9,9 +9,13 @@ You will all see how to create a universe and filter it base on the exchange and The example prints out the closing price of all the pairs for a given market-exchange every 30 minutes. The example also contains the ohlcv minute data for the past seven days which could be used to create indicators Use this as the backbone to create your own trading strategies. + +Variables lookback date and date are used to ensure data for a coin existed on the lookback period specified. """ +import numpy as np import pandas as pd +from datetime import timedelta from catalyst import run_algorithm from catalyst.exchange.exchange_utils import get_exchange_symbols @@ -30,18 +34,16 @@ def handle_data(context, data): lookback = 60 * 24 * 7 # (minutes, hours, days) of how far to lookback in the data history context.i += 1 - # we must first wait until enough minutes, hours or days have passed for data history to work - if context.i < lookback: - return - # current date formatted into a string - today = context.blotter.current_dt.strftime('%Y-%m-%d %H:%M:%S') - date, time = str(today).split(' ') + today = context.blotter.current_dt + date, time = today.strftime('%Y-%m-%d %H:%M:%S').split(' ') + lookback_date = today - timedelta(days=(lookback / (60 * 24))) # subtract the amount of days specified in lookback + lookback_date = lookback_date.strftime('%Y-%m-%d %H:%M:%S').split(' ')[0] # get only the date as a string # update universe everyday new_day = 60 * 24 if not context.i % new_day: - context.universe = universe(context, date) + context.universe = universe(context, lookback_date, date) # get data every 30 minutes minutes = 30 @@ -50,25 +52,30 @@ def handle_data(context, data): for coin in context.coins: pair = str(coin.symbol) - # ohlcv data (the standard data required for candlestick or indicators/signals) - open = data.history(coin, 'open', bar_count=lookback, frequency='1m').ffill().bfill() - high = data.history(coin, 'high', bar_count=lookback, frequency='1m').ffill().bfill() - low = data.history(coin, 'low', bar_count=lookback, frequency='1m').ffill().bfill() - close = data.history(coin, 'price', bar_count=lookback, frequency='1m').ffill().bfill() - volume = data.history(coin, 'volume', bar_count=lookback, frequency='1m').ffill().bfill() + # 30 minute interval ohlcv data (the standard data required for candlestick or indicators/signals) + # 30T means 30 minutes re-sampling of one minute data. change to your desire time interval. + open = fill(data.history(coin, 'open', bar_count=lookback, frequency='1m')).resample('30T').first() + high = fill(data.history(coin, 'high', bar_count=lookback, frequency='1m')).resample('30T').max() + low = fill(data.history(coin, 'low', bar_count=lookback, frequency='1m')).resample('30T').min() + close = fill(data.history(coin, 'price', bar_count=lookback, frequency='1m')).resample('30T').last() + volume = fill(data.history(coin, 'volume', bar_count=lookback, frequency='1m')).resample('30T').sum() # close[-1] is the equivalent to current price # displays the minute price for each pair every 30 minutes - print(today, pair, close[-1]) + print(today, pair, open[-1], high[-1], low[-1], close[-1], volume[-1]) + + # ---------------------------------------------------------------------------------------------------------- + # -------------------------------------- Insert Your Strategy Here ----------------------------------------- + # ---------------------------------------------------------------------------------------------------------- def analyze(context=None, results=None): pass -def universe(context, date): - # Get the universe for a given exchange and a given base_currency market - # Example: Poloniex BTC Market +# Get the universe for a given exchange and a given base_currency market +# Example: Poloniex BTC Market +def universe(context, lookback_date, current_date): json_symbols = get_exchange_symbols(context.exchange) # get all the pairs for the exchange poloniex_universe_df = pd.DataFrame.from_dict(json_symbols).transpose().astype(str) # convert into a dataframe poloniex_universe_df['base_currency'] = poloniex_universe_df.apply(lambda row: row.symbol.split('_')[1], @@ -78,12 +85,23 @@ def universe(context, date): # Filter all the exchange pairs to only the ones for a give base currency poloniex_universe_df = poloniex_universe_df[poloniex_universe_df['base_currency'] == context.base_currency] - # Filter all the pairs to ensure that pair existed in the current date - poloniex_universe_df = poloniex_universe_df[poloniex_universe_df.start_date < date] + # Filter all the pairs to ensure that pair existed in the current date range + poloniex_universe_df = poloniex_universe_df[poloniex_universe_df.start_date < lookback_date] + poloniex_universe_df = poloniex_universe_df[poloniex_universe_df.end_daily >= current_date] context.coins = symbols(*poloniex_universe_df.symbol) # convert all the pairs to symbols return poloniex_universe_df.symbol.tolist() +# Replace all NA, NAN or infinite values with its nearest value +def fill(series): + if isinstance(series, pd.Series): + return series.replace([np.inf, -np.inf], np.nan).ffill().bfill() + elif isinstance(series, np.ndarray): + return pd.Series(series).replace([np.inf, -np.inf], np.nan).ffill().bfill().values + else: + return series + + if __name__ == '__main__': start_date = pd.to_datetime('2017-01-01', utc=True) end_date = pd.to_datetime('2017-10-15', utc=True) From 76a8362e3d390a196bc421c7af958de0cf467452 Mon Sep 17 00:00:00 2001 From: Abner Ayala-Acevedo Date: Thu, 26 Oct 2017 15:17:02 -0700 Subject: [PATCH 3/7] Update simple_universe.py --- catalyst/examples/simple_universe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalyst/examples/simple_universe.py b/catalyst/examples/simple_universe.py index 172837bb..f5c0a1d2 100644 --- a/catalyst/examples/simple_universe.py +++ b/catalyst/examples/simple_universe.py @@ -1,6 +1,6 @@ """ Requires Catalyst version 0.3.0 or above -Tested on Catalyst version 0.3.2 +Tested on Catalyst version 0.3.3 These example aims to provide and easy way for users to learn how to collect data from the different exchanges. You simply need to specify the exchange and the market that you want to focus on. From cd0157347fa16735df06ba0b9cb085c334b25a46 Mon Sep 17 00:00:00 2001 From: Abner Ayala-Acevedo Date: Thu, 26 Oct 2017 18:10:12 -0700 Subject: [PATCH 4/7] Refactoring --- catalyst/examples/simple_universe.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/catalyst/examples/simple_universe.py b/catalyst/examples/simple_universe.py index f5c0a1d2..c4e24182 100644 --- a/catalyst/examples/simple_universe.py +++ b/catalyst/examples/simple_universe.py @@ -77,19 +77,19 @@ def analyze(context=None, results=None): # Example: Poloniex BTC Market def universe(context, lookback_date, current_date): json_symbols = get_exchange_symbols(context.exchange) # get all the pairs for the exchange - poloniex_universe_df = pd.DataFrame.from_dict(json_symbols).transpose().astype(str) # convert into a dataframe - poloniex_universe_df['base_currency'] = poloniex_universe_df.apply(lambda row: row.symbol.split('_')[1], + universe_df = pd.DataFrame.from_dict(json_symbols).transpose().astype(str) # convert into a dataframe + universe_df['base_currency'] = universe_df.apply(lambda row: row.symbol.split('_')[1], axis=1) - poloniex_universe_df['market_currency'] = poloniex_universe_df.apply(lambda row: row.symbol.split('_')[0], + universe_df['market_currency'] = universe_df.apply(lambda row: row.symbol.split('_')[0], axis=1) # Filter all the exchange pairs to only the ones for a give base currency - poloniex_universe_df = poloniex_universe_df[poloniex_universe_df['base_currency'] == context.base_currency] + universe_df = universe_df[universe_df['base_currency'] == context.base_currency] # Filter all the pairs to ensure that pair existed in the current date range - poloniex_universe_df = poloniex_universe_df[poloniex_universe_df.start_date < lookback_date] - poloniex_universe_df = poloniex_universe_df[poloniex_universe_df.end_daily >= current_date] - context.coins = symbols(*poloniex_universe_df.symbol) # convert all the pairs to symbols - return poloniex_universe_df.symbol.tolist() + universe_df = universe_df[universe_df.start_date < lookback_date] + universe_df = universe_df[universe_df.end_daily >= current_date] + context.coins = symbols(*universe_df.symbol) # convert all the pairs to symbols + return universe_df.symbol.tolist() # Replace all NA, NAN or infinite values with its nearest value @@ -112,7 +112,6 @@ if __name__ == '__main__': handle_data=handle_data, analyze=analyze, exchange_name='poloniex', - bundle='poloniex', data_frequency='minute', base_currency='btc', live=False, From df14a949189a5e6b27875005f9ade8d211186fbf Mon Sep 17 00:00:00 2001 From: Abner Ayala-Acevedo Date: Thu, 16 Nov 2017 11:22:35 -0800 Subject: [PATCH 5/7] Modified for examples consistency. Fully tested on v0.3.8 --- catalyst/examples/simple_universe.py | 33 ++++++++++++++++------------ 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/catalyst/examples/simple_universe.py b/catalyst/examples/simple_universe.py index c4e24182..20a4fc78 100644 --- a/catalyst/examples/simple_universe.py +++ b/catalyst/examples/simple_universe.py @@ -26,27 +26,29 @@ from catalyst.api import ( def initialize(context): context.i = -1 # counts the minutes - context.exchange = 'poloniex' # must match the exchange specified in run_algorithm - context.base_currency = 'btc' # must match the base currency specified in run_algorithm + context.exchange = context.exchanges.values()[0].name.lower() # exchange name + context.base_currency = context.exchanges.values()[0].base_currency.lower() # market base currency def handle_data(context, data): - lookback = 60 * 24 * 7 # (minutes, hours, days) of how far to lookback in the data history context.i += 1 + lookback_days = 7 # 7 days # current date formatted into a string - today = context.blotter.current_dt + today = data.current_dt date, time = today.strftime('%Y-%m-%d %H:%M:%S').split(' ') - lookback_date = today - timedelta(days=(lookback / (60 * 24))) # subtract the amount of days specified in lookback + lookback_date = today - timedelta(days=lookback_days) # subtract the amount of days specified in lookback lookback_date = lookback_date.strftime('%Y-%m-%d %H:%M:%S').split(' ')[0] # get only the date as a string # update universe everyday - new_day = 60 * 24 + new_day = 60 * 24 # assuming data_frequency='minute' if not context.i % new_day: context.universe = universe(context, lookback_date, date) # get data every 30 minutes minutes = 30 + one_day_in_minutes = 1440 # 1440 assumes data_frequency='minute' + lookback = one_day_in_minutes / minutes * lookback_days # get N lookback_days of history data if not context.i % minutes and context.universe: # we iterate for every pair in the current universe for coin in context.coins: @@ -54,15 +56,15 @@ def handle_data(context, data): # 30 minute interval ohlcv data (the standard data required for candlestick or indicators/signals) # 30T means 30 minutes re-sampling of one minute data. change to your desire time interval. - open = fill(data.history(coin, 'open', bar_count=lookback, frequency='1m')).resample('30T').first() - high = fill(data.history(coin, 'high', bar_count=lookback, frequency='1m')).resample('30T').max() - low = fill(data.history(coin, 'low', bar_count=lookback, frequency='1m')).resample('30T').min() - close = fill(data.history(coin, 'price', bar_count=lookback, frequency='1m')).resample('30T').last() - volume = fill(data.history(coin, 'volume', bar_count=lookback, frequency='1m')).resample('30T').sum() + opened = fill(data.history(coin, 'open', bar_count=lookback, frequency='30T')).values + high = fill(data.history(coin, 'high', bar_count=lookback, frequency='30T')).values + low = fill(data.history(coin, 'low', bar_count=lookback, frequency='30T')).values + close = fill(data.history(coin, 'price', bar_count=lookback, frequency='30T')).values + volume = fill(data.history(coin, 'volume', bar_count=lookback, frequency='30T')).values # close[-1] is the equivalent to current price # displays the minute price for each pair every 30 minutes - print(today, pair, open[-1], high[-1], low[-1], close[-1], volume[-1]) + print(today, pair, opened[-1], high[-1], low[-1], close[-1], volume[-1]) # ---------------------------------------------------------------------------------------------------------- # -------------------------------------- Insert Your Strategy Here ----------------------------------------- @@ -82,6 +84,7 @@ def universe(context, lookback_date, current_date): axis=1) universe_df['market_currency'] = universe_df.apply(lambda row: row.symbol.split('_')[0], axis=1) + # Filter all the exchange pairs to only the ones for a give base currency universe_df = universe_df[universe_df['base_currency'] == context.base_currency] @@ -89,6 +92,8 @@ def universe(context, lookback_date, current_date): universe_df = universe_df[universe_df.start_date < lookback_date] universe_df = universe_df[universe_df.end_daily >= current_date] context.coins = symbols(*universe_df.symbol) # convert all the pairs to symbols + + # print(universe_df.symbol.tolist()) return universe_df.symbol.tolist() @@ -104,10 +109,10 @@ def fill(series): if __name__ == '__main__': start_date = pd.to_datetime('2017-01-01', utc=True) - end_date = pd.to_datetime('2017-10-15', utc=True) + end_date = pd.to_datetime('2017-11-13', utc=True) performance = run_algorithm(start=start_date, end=end_date, - capital_base=10000.0, + capital_base=100.0, # amount of base_currency, not always in dollars unless usd initialize=initialize, handle_data=handle_data, analyze=analyze, From c260e188b0c220f79173892299ffb0f63145ec1f Mon Sep 17 00:00:00 2001 From: fredfortier Date: Thu, 16 Nov 2017 18:14:24 -0500 Subject: [PATCH 6/7] BUG: looking a potential resampling issue --- tests/exchange/test_data_portal.py | 37 ++++++++++++++++++++++++++++++ tests/exchange/test_utils.py | 15 +++++++++--- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/tests/exchange/test_data_portal.py b/tests/exchange/test_data_portal.py index 31c67295..efd2ffe6 100644 --- a/tests/exchange/test_data_portal.py +++ b/tests/exchange/test_data_portal.py @@ -113,3 +113,40 @@ class TestExchangeDataPortal: ) log.info('found history window: {}'.format(data)) + + def test_validate_resample(self): + symbol = ['eth_btc'] + exchange_name = 'poloniex' + exchange = get_exchange(exchange_name, base_currency=symbol) + + assets = exchange.get_assets(symbols=symbol) + + date = rnd_history_date_days( + max_days=10, + last_dt=pd.to_datetime('2017-11-1', utc=True) + ) + bar_count = rnd_bar_count(max_bars=10) + sample_minutes = 15 + sample_data = self.data_portal_backtest.get_history_window( + assets=assets, + end_dt=date, + bar_count=bar_count, + frequency='{}T'.format(sample_minutes), + field='close', + data_frequency='daily' + ) + minute_data = self.data_portal_backtest.get_history_window( + assets=assets, + end_dt=date, + bar_count=bar_count * sample_minutes, + frequency='1T', + field='close', + data_frequency='daily' + ) + resampled_minute_data = minute_data.resample( + '{}T'.format(sample_minutes)) + + print(sample_data.tail(10)) + print(resampled_minute_data.tail(10)) + print(minute_data.tail(10)) + pass diff --git a/tests/exchange/test_utils.py b/tests/exchange/test_utils.py index eb53bff5..d7f1df87 100644 --- a/tests/exchange/test_utils.py +++ b/tests/exchange/test_utils.py @@ -4,11 +4,20 @@ from random import randint import pandas as pd -def rnd_history_date_days(max_days=30): - now = pd.Timestamp.utcnow() +def rnd_history_date_days(max_days=30, last_dt=None): + if last_dt is None: + last_dt = pd.Timestamp.utcnow() + days = randint(0, max_days) - return now - timedelta(days=days) + return last_dt - timedelta(days=days) + + +def rnd_history_date_minutes(max_minutes=1440): + now = pd.Timestamp.utcnow() + days = randint(0, max_minutes) + + return now - timedelta(minutes=days) def rnd_bar_count(max_bars=21): From f3dca74e87dae8e9c144b1d31369956e3eb702d6 Mon Sep 17 00:00:00 2001 From: fredfortier Date: Fri, 17 Nov 2017 19:40:49 -0500 Subject: [PATCH 7/7] BUG: fixed a get_candles issue with the Poloniex exchange --- catalyst/exchange/poloniex/poloniex.py | 5 ++--- tests/exchange/test_poloniex.py | 26 ++++++++++++++------------ 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py index 62845845..0d283d8b 100644 --- a/catalyst/exchange/poloniex/poloniex.py +++ b/catalyst/exchange/poloniex/poloniex.py @@ -226,10 +226,9 @@ class Poloniex(Exchange): ohlc_map = dict() for asset in asset_list: + delta = end_dt - pd.to_datetime('1970-1-1', utc=True) + end = int(delta.total_seconds()) - # TODO: what's wrong with this? - # end = int(time.mktime(end_dt.timetuple())) - end = int(time.time()) if bar_count is None: start = end - 2 * frequency else: diff --git a/tests/exchange/test_poloniex.py b/tests/exchange/test_poloniex.py index b2ad56c3..60ff1e65 100644 --- a/tests/exchange/test_poloniex.py +++ b/tests/exchange/test_poloniex.py @@ -1,9 +1,10 @@ -from catalyst.exchange.bittrex.bittrex import Bittrex from catalyst.exchange.poloniex.poloniex import Poloniex from catalyst.finance.order import Order from base import BaseExchangeTestCase from logbook import Logger from catalyst.exchange.exchange_utils import get_exchange_auth +import pandas as pd +from test_utils import output_df log = Logger('test_poloniex') @@ -51,18 +52,19 @@ class TestPoloniex(BaseExchangeTestCase): def test_get_candles(self): log.info('retrieving candles') - ohlcv_neo = self.exchange.get_candles( - freq='5T', - assets=self.exchange.get_asset('eth_btc') - ) - ohlcv_neo_ubq = self.exchange.get_candles( - freq='5T', - assets=[ - self.exchange.get_asset('neos_btc'), - self.exchange.get_asset('via_btc') - ], - bar_count=14 + assets = self.exchange.get_asset('eth_btc') + ohlcv = self.exchange.get_candles( + end_dt=pd.to_datetime('2017-11-01', utc=True), + freq='30T', + assets=assets, + bar_count=200 ) + df = pd.DataFrame(ohlcv) + df.set_index('last_traded', drop=True, inplace=True) + log.info(df.tail(25)) + + path = output_df(df, assets, 'candles') + log.info('saved candles: {}'.format(path)) pass def test_tickers(self):