diff --git a/catalyst/__main__.py b/catalyst/__main__.py index 7a9e6316..1edb53f4 100644 --- a/catalyst/__main__.py +++ b/catalyst/__main__.py @@ -8,6 +8,8 @@ import pandas as pd from six import text_type from catalyst.data import bundles as bundles_module +from catalyst.exchange.exchange_bundle import ExchangeBundle +from catalyst.exchange.init_utils import get_exchange from catalyst.utils.cli import Date, Timestamp from catalyst.utils.run_algo import _run, load_extensions @@ -38,6 +40,7 @@ except NameError: default=True, help="Don't load the default catalyst extension.py file in $ZIPLINE_HOME.", ) +@click.version_option() def main(extension, strict_extensions, default_extension): """Top level catalyst entry point. """ @@ -126,7 +129,7 @@ def ipython_only(option): ) @click.option( '--data-frequency', - type=click.Choice({'daily', '5-minute', 'minute'}), + type=click.Choice({'daily', 'minute'}), default='daily', show_default=True, help='The data frequency of the simulation.', @@ -187,17 +190,11 @@ def ipython_only(option): default=None, help='Should the algorithm methods be resolved in the local namespace.' )) -@click.option( - '--live/--no-live', - is_flag=True, - default=False, - help='Enable live trading.', -) @click.option( '-x', '--exchange-name', - type=click.Choice({'bitfinex', 'bittrex'}), - help='The name of the targeted exchange (supported: bitfinex, bittrex).', + type=click.Choice({'bitfinex', 'bittrex', 'poloniex'}), + help='The name of the targeted exchange (supported: bitfinex, bittrex, poloniex).', ) @click.option( '-n', @@ -210,12 +207,6 @@ def ipython_only(option): help='The base currency used to calculate statistics ' '(e.g. usd, btc, eth).', ) -@click.option( - '--live-graph/--no-live-graph', - is_flag=True, - default=False, - help='Display live graph.', -) @click.pass_context def run(ctx, algofile, @@ -230,44 +221,34 @@ def run(ctx, output, print_algo, local_namespace, - live, exchange_name, algo_namespace, - base_currency, - live_graph): + base_currency): """Run a backtest for the given algorithm. """ - if live: - if exchange_name is None: - ctx.fail("must specify an exchange name '-x' in live execution " - "mode '--live'") - if algo_namespace is None: - ctx.fail("must specify an algorithm name '-n' in live execution " - "mode '--live'") - if base_currency is None: - ctx.fail("must specify a base currency '-c' in live " - "execution mode '--live'") - else: - # check that the start and end dates are passed correctly - if start is None and end is None: - # check both at the same time to avoid the case where a user - # does not pass either of these and then passes the first only - # to be told they need to pass the second argument also - ctx.fail( - "must specify dates with '-s' / '--start' and '-e' / '--end'", - ) - if start is None: - ctx.fail("must specify a start date with '-s' / '--start'") - if end is None: - ctx.fail("must specify an end date with '-e' / '--end'") - if (algotext is not None) == (algofile is not None): ctx.fail( "must specify exactly one of '-f' / '--algofile' or" " '-t' / '--algotext'", ) + # check that the start and end dates are passed correctly + if start is None and end is None: + # check both at the same time to avoid the case where a user + # does not pass either of these and then passes the first only + # to be told they need to pass the second argument also + ctx.fail( + "must specify dates with '-s' / '--start' and '-e' / '--end'", + ) + if start is None: + ctx.fail("must specify a start date with '-s' / '--start'") + if end is None: + ctx.fail("must specify an end date with '-e' / '--end'") + + if exchange_name is None: + ctx.fail("must specify an exchange name '-x'") + perf = _run( initialize=None, handle_data=None, @@ -287,11 +268,11 @@ def run(ctx, print_algo=print_algo, local_namespace=local_namespace, environ=os.environ, - live=live, + live=False, exchange=exchange_name, algo_namespace=algo_namespace, base_currency=base_currency, - live_graph=live_graph + live_graph=False ) if output == '-': @@ -335,15 +316,215 @@ def catalyst_magic(line, cell=None): raise ValueError('main returned non-zero status code: %d' % e.code) +@main.command() +@click.option( + '-f', + '--algofile', + default=None, + type=click.File('r'), + help='The file that contains the algorithm to run.', +) +@click.option( + '-t', + '--algotext', + help='The algorithm script to run.', +) +@click.option( + '-D', + '--define', + multiple=True, + help="Define a name to be bound in the namespace before executing" + " the algotext. For example '-Dname=value'. The value may be any python" + " expression. These are evaluated in order so they may refer to previously" + " defined names.", +) +@click.option( + '-o', + '--output', + default='-', + metavar='FILENAME', + show_default=True, + help="The location to write the perf data. If this is '-' the perf will" + " be written to stdout.", +) +@click.option( + '--print-algo/--no-print-algo', + is_flag=True, + default=False, + help='Print the algorithm to stdout.', +) +@ipython_only(click.option( + '--local-namespace/--no-local-namespace', + is_flag=True, + default=None, + help='Should the algorithm methods be resolved in the local namespace.' +)) +@click.option( + '-x', + '--exchange-name', + type=click.Choice({'bitfinex', 'bittrex', 'poloniex'}), + help='The name of the targeted exchange (supported: bitfinex, bittrex, poloniex).', +) +@click.option( + '-n', + '--algo-namespace', + help='A label assigned to the algorithm for data storage purposes.' +) +@click.option( + '-c', + '--base-currency', + help='The base currency used to calculate statistics ' + '(e.g. usd, btc, eth).', +) +@click.option( + '--live-graph/--no-live-graph', + is_flag=True, + default=False, + help='Display live graph.', +) +@click.pass_context +def live(ctx, + algofile, + algotext, + define, + output, + print_algo, + local_namespace, + exchange_name, + algo_namespace, + base_currency, + live_graph): + """Trade live with the given algorithm. + """ + if (algotext is not None) == (algofile is not None): + ctx.fail( + "must specify exactly one of '-f' / '--algofile' or" + " '-t' / '--algotext'", + ) + + if exchange_name is None: + ctx.fail("must specify an exchange name '-x'") + if algo_namespace is None: + ctx.fail("must specify an algorithm name '-n' in live execution mode") + if base_currency is None: + ctx.fail("must specify a base currency '-c' in live execution mode") + + perf = _run( + initialize=None, + handle_data=None, + before_trading_start=None, + analyze=None, + algofile=algofile, + algotext=algotext, + defines=define, + data_frequency=None, + capital_base=None, + data=None, + bundle=None, + bundle_timestamp=None, + start=None, + end=None, + output=output, + print_algo=print_algo, + local_namespace=local_namespace, + environ=os.environ, + live=True, + exchange=exchange_name, + algo_namespace=algo_namespace, + base_currency=base_currency, + live_graph=live_graph + ) + + if output == '-': + click.echo(str(perf)) + elif output != os.devnull: # make the catalyst magic not write any data + perf.to_pickle(output) + + return perf + + +@main.command(name='ingest-exchange') +@click.option( + '-x', + '--exchange-name', + type=click.Choice({'bitfinex', 'bittrex', 'poloniex'}), + help='The name of the exchange bundle to ingest (supported: bitfinex,' + ' bittrex, poloniex).', +) +@click.option( + '-f', + '--data-frequency', + type=click.Choice({'daily', 'minute', 'daily,minute', 'minute,daily'}), + default='daily', + show_default=True, + help='The data frequency of the desired OHLCV bars.', +) +@click.option( + '-s', + '--start', + default=None, + type=Date(tz='utc', as_timestamp=True), + help='The start date of the data range. (default: one year from end date)', +) +@click.option( + '-e', + '--end', + default=None, + type=Date(tz='utc', as_timestamp=True), + help='The end date of the data range. (default: today)', +) +@click.option( + '-i', + '--include-symbols', + default=None, + help='A list of symbols to ingest (optional comma separated list)', +) +@click.option( + '--exclude-symbols', + default=None, + help='A list of symbols to exclude from the ingestion ' + '(optional comma separated list)', +) +@click.option( + '--show-progress/--no-show-progress', + default=True, + help='Print progress information to the terminal.' +) +def ingest_exchange(exchange_name, data_frequency, start, end, + include_symbols, exclude_symbols, show_progress): + """ + Ingest data for the given exchange. + """ + exchange = get_exchange(exchange_name) + exchange_bundle = ExchangeBundle(exchange) + + click.echo('Ingesting exchange bundle {}...'.format(exchange_name)) + exchange_bundle.ingest( + data_frequency=data_frequency, + include_symbols=include_symbols, + exclude_symbols=exclude_symbols, + start=start, + end=end, + show_progress=show_progress + ) + + @main.command() @click.option( '-b', '--bundle', - default='poloniex', metavar='BUNDLE-NAME', - show_default=True, + default=None, + show_default=False, help='The data bundle to ingest.', ) +@click.option( + '-x', + '--exchange-name', + type=click.Choice({'bitfinex', 'bittrex', 'poloniex'}), + help='The name of the exchange bundle to ingest (supported: bitfinex,' + ' bittrex, poloniex).', +) @click.option( '-c', '--compile-locally', @@ -362,9 +543,12 @@ def catalyst_magic(line, cell=None): default=True, help='Print progress information to the terminal.' ) -def ingest(bundle, compile_locally, assets_version, show_progress): +@click.pass_context +def ingest(ctx, bundle, exchange_name, compile_locally, assets_version, + show_progress): """Ingest the data for the given bundle. """ + bundles_module.ingest( bundle, os.environ, @@ -384,6 +568,13 @@ def ingest(bundle, compile_locally, assets_version, show_progress): show_default=True, help='The data bundle to clean.', ) +@click.option( + '-x', + '--exchange_name', + metavar='EXCHANGE-NAME', + show_default=True, + help='The exchange bundle name to clean.', +) @click.option( '-e', '--before', diff --git a/catalyst/algorithm.py b/catalyst/algorithm.py index 5601f133..65e2fa60 100644 --- a/catalyst/algorithm.py +++ b/catalyst/algorithm.py @@ -134,10 +134,7 @@ from catalyst.utils.security_list import SecurityList import catalyst.protocol from catalyst.sources.requests_csv import PandasRequestsCSV -from catalyst.gens.sim_engine import ( - MinuteSimulationClock, - FiveMinuteSimulationClock, -) +from catalyst.gens.sim_engine import MinuteSimulationClock from catalyst.sources.benchmark_source import BenchmarkSource from catalyst.catalyst_warnings import ZiplineDeprecationWarning @@ -174,7 +171,7 @@ class TradingAlgorithm(object): algo_filename : str, optional The filename for the algoscript. This will be used in exception tracebacks. default: ''. - data_frequency : {'daily', '5-minute', 'minute'}, optional + data_frequency : {'daily', 'minute'}, optional The duration of the bars. instant_fill : bool, optional Whether to fill orders immediately or on next bar. default: False @@ -227,7 +224,7 @@ class TradingAlgorithm(object): script : str Algoscript that contains initialize and handle_data function definition. - data_frequency : {'daily', '5-minute', 'minute'} + data_frequency : {'daily', 'minute'} The duration of the bars. capital_base : float How much capital to start with. @@ -435,8 +432,6 @@ class TradingAlgorithm(object): if get_loader is not None: if data_frequency == 'daily': all_dates = self.trading_calendar.all_sessions - elif data_frequency == '5-minute': - all_dates = self.trading_calendar.all_five_minutes elif data_frequency == 'minute': all_dates = self.trading_calendar.all_minutes else: @@ -468,7 +463,7 @@ class TradingAlgorithm(object): self._in_before_trading_start = True with handle_non_market_minutes(data) if \ - self.data_frequency in ('minute', '5-minute') else ExitStack(): + self.data_frequency == 'minute' else ExitStack(): self._before_trading_start(self, data) self._in_before_trading_start = False @@ -524,11 +519,10 @@ class TradingAlgorithm(object): market_closes = trading_o_and_c['market_close'] minutely_emission = False - if self.sim_params.data_frequency in set(('minute', '5-minute')): + if self.sim_params.data_frequency == 'minute': market_opens = trading_o_and_c['market_open'] - minutely_emission = self.sim_params.emission_rate in \ - set(('minute', '5-minute')) + minutely_emission = self.sim_params.emission_rate == 'minute' else: # in daily mode, we want to have one bar per session, timestamped # as the last minute of the session. @@ -552,15 +546,6 @@ class TradingAlgorithm(object): 'UTC', ) - if self.sim_params.data_frequency == '5-minute': - return FiveMinuteSimulationClock( - self.sim_params.sessions, - execution_opens, - execution_closes, - before_trading_start_minutes, - minute_emission=minutely_emission, - ) - return MinuteSimulationClock( self.sim_params.sessions, execution_opens, @@ -692,8 +677,6 @@ class TradingAlgorithm(object): time_count = times.nunique() if time_count == 1: self.sim_params.data_frequency = 'daily' - elif time_count == 288: - self.sim_params.data_frequency = '5-minute' else: self.sim_params.data_frequency = 'minute' @@ -715,8 +698,6 @@ class TradingAlgorithm(object): if self.sim_params.data_frequency == 'daily': equity_reader_arg = 'equity_daily_reader' - elif self.sim_params.data_frequency == '5-minute': - equity_daily_reader = 'equity_5_minute_reader' elif self.sim_params.data_frequency == 'minute': equity_reader_arg = 'equity_minute_reader' equity_reader = PanelBarReader( @@ -960,9 +941,9 @@ class TradingAlgorithm(object): The arena from the simulation parameters. This will normally be ``'backtest'`` but some systems may use this distinguish live trading from backtesting. - data_frequency : {'daily', '5-minute', 'minute'} + data_frequency : {'daily', 'minute'} data_frequency tells the algorithm if it is running with - daily, minute, or five-minute mode. + daily or minute mode. start : datetime The start date for the simulation. end : datetime @@ -1136,19 +1117,12 @@ class TradingAlgorithm(object): 'date_rule. You should use keyword argument ' 'time_rule= when calling schedule_function without ' 'specifying a date_rule', stacklevel=3) - - freq = self.sim_params.data_frequency date_rule = date_rule or date_rules.every_day() - if freq is 'daily': - # ignore time rule in daily mode - time_rule = time_rules.every_minute() - else: - # use provided time rule or default to every minute or 5 minutes - # based on desired data frequency. - time_rule = time_rule or (time_rules.every_5_minutes() - if freq is '5-minute' else - time_rules.every_minute()) + time_rule = ((time_rule or time_rules.every_minute()) + if self.sim_params.data_frequency == 'minute' else + # If we are in daily mode the time_rule is ignored. + time_rules.every_minute()) # Check the type of the algorithm's schedule before pulling calendar # Note that the ExchangeTradingSchedule is currently the only @@ -1819,7 +1793,7 @@ class TradingAlgorithm(object): @data_frequency.setter def data_frequency(self, value): - assert value in ('daily', '5-minute', 'minute') + assert value in ('daily', 'minute') self.sim_params.data_frequency = value @api_method diff --git a/catalyst/assets/_assets.pyx b/catalyst/assets/_assets.pyx index 05144223..986d2174 100644 --- a/catalyst/assets/_assets.pyx +++ b/catalyst/assets/_assets.pyx @@ -395,6 +395,9 @@ cdef class TradingPair(Asset): cdef readonly float leverage cdef readonly object market_currency cdef readonly object base_currency + cdef readonly object end_daily + cdef readonly object end_minute + cdef readonly object exchange_symbol _kwargnames = frozenset({ 'sid', @@ -408,7 +411,11 @@ cdef class TradingPair(Asset): 'exchange_full', 'leverage', 'market_currency', - 'base_currency' + 'base_currency', + 'end_daily', + 'end_minute', + 'exchange_symbol', + 'min_trade_size' }) def __init__(self, object symbol, @@ -417,10 +424,14 @@ cdef class TradingPair(Asset): object asset_name=None, int sid=0, float leverage=1.0, + object end_daily=None, + object end_minute=None, object end_date=None, + object exchange_symbol=None, object first_traded=None, object auto_close_date=None, - object exchange_full=None): + object exchange_full=None, + object min_trade_size=None): """ Replicates the Asset constructor with some built-in conventions and a new 'leverage' attribute. @@ -472,10 +483,14 @@ cdef class TradingPair(Asset): :param asset_name: :param sid: :param leverage: + :param end_daily + :param end_minute :param end_date: + :param exchange_symbol: :param first_traded: :param auto_close_date: :param exchange_full: + :param min_trade_size: """ symbol = symbol.lower() @@ -509,23 +524,33 @@ cdef class TradingPair(Asset): first_traded=first_traded, auto_close_date=auto_close_date, exchange_full=exchange_full, + min_trade_size=min_trade_size ) self.leverage = leverage + self.end_daily = end_daily + self.end_minute = end_minute + self.exchange_symbol = exchange_symbol def __repr__(self): return 'Trading Pair {symbol}({sid}) Exchange: {exchange}, ' \ 'Introduced On: {start_date}, ' \ 'Market Currency: {market_currency}, ' \ 'Base Currency: {base_currency}, ' \ - 'Exchange Leverage: {leverage}'.format( + 'Exchange Leverage: {leverage}, ' \ + 'Minimum Trade Size: {min_trade_size} ' \ + 'Last daily ingestion: {end_daily} ' \ + 'Last minutely ingestion: {end_minute}'.format( symbol=self.symbol, sid=self.sid, exchange=self.exchange, start_date=self.start_date, market_currency=self.market_currency, base_currency=self.base_currency, - leverage=self.leverage + leverage=self.leverage, + min_trade_size=self.min_trade_size, + end_daily=self.end_daily, + end_minute=self.end_minute ) cpdef __reduce__(self): @@ -544,7 +569,8 @@ cdef class TradingPair(Asset): self.end_date, self.first_traded, self.auto_close_date, - self.exchange_full)) + self.exchange_full, + self.min_trade_size)) def make_asset_array(int size, Asset asset): cdef np.ndarray out = np.empty([size], dtype=object) diff --git a/catalyst/curate/poloniex.py b/catalyst/curate/poloniex.py index 911ac25f..e2a88476 100644 --- a/catalyst/curate/poloniex.py +++ b/catalyst/curate/poloniex.py @@ -2,10 +2,13 @@ import json, time, csv from datetime import datetime import pandas as pd import os, time, shutil, requests, logbook +from catalyst.exchange.exchange_utils import get_exchange_symbols_filename + DT_START = int(time.mktime(datetime(2010, 1, 1, 0, 0).timetuple())) DT_END = int(time.time()) CSV_OUT_FOLDER = '/var/tmp/catalyst/data/poloniex/' +CSV_OUT_FOLDER = '/Volumes/enigma/data/poloniex/' CONN_RETRIES = 2 logbook.StderrHandler().push_application() @@ -247,11 +250,45 @@ class PoloniexCurator(object): df.set_index('date', inplace=True) return df[start : end] + ''' + Generates a symbols.json file with corresponding start_date for each currencyPair + ''' + def generate_symbols_json(self, filename=None): + symbol_map = {} + + if(filename is None): + filename = get_exchange_symbols_filename('poloniex') + + with open(filename, 'w') as symbols: + for currencyPair in self.currency_pairs: + start = None + csv_fn = CSV_OUT_FOLDER + 'crypto_trades-' + currencyPair + '.csv' + with open(csv_fn, 'r') as f: + f.seek(0, os.SEEK_END) + if(f.tell() > 2): # First check file is not zero size + f.seek(-2, os.SEEK_END) # Jump to the second last byte. + while f.read(1) != b"\n": # Until EOL is found... + f.seek(-2, os.SEEK_CUR) # ...jump back the read byte plus one more. + start = pd.to_datetime( f.readline().split(',')[1], infer_datetime_format=True) + + if(start is None): + start = time.gmtime() + base, market = currencyPair.lower().split('_') + symbol = '{market}_{base}'.format( market=market, base=base ) + symbol_map[currencyPair] = dict( + symbol = symbol, + start_date = start.strftime("%Y-%m-%d") + ) + json.dump(symbol_map, symbols, sort_keys=True, indent=2, separators=(',',':')) + if __name__ == '__main__': pc = PoloniexCurator() pc.get_currency_pairs() - + #pc.generate_symbols_json() + for currencyPair in pc.currency_pairs: pc.retrieve_trade_history(currencyPair) pc.write_ohlcv_file(currencyPair) + + \ No newline at end of file diff --git a/catalyst/data/_equities.pyx b/catalyst/data/_equities.pyx index c0652b02..b035b768 100644 --- a/catalyst/data/_equities.pyx +++ b/catalyst/data/_equities.pyx @@ -220,6 +220,8 @@ cpdef _read_bcolz_data(ctable_t table, outbuf_as_float = outbuf.astype(float64) * .000000001 outbuf_as_float[where_nan] = NAN results.append(outbuf_as_float) + elif column_name in ['volume']: + results.append(outbuf.astype(float64) * .000000001) else: results.append(outbuf) return results diff --git a/catalyst/data/_minute_bar_internal.pyx b/catalyst/data/_minute_bar_internal.pyx index 9ebb0841..bea12bc2 100644 --- a/catalyst/data/_minute_bar_internal.pyx +++ b/catalyst/data/_minute_bar_internal.pyx @@ -35,17 +35,6 @@ def minute_value(ndarray[long_t, ndim=1] market_opens, return market_opens[q] + r -@cython.cdivision(True) -def five_minute_value(ndarray[long_t, ndim=1] market_opens, - Py_ssize_t pos, - short five_minutes_per_day): - - cdef short q, r - q = cython.cdiv(pos, five_minutes_per_day) - r = cython.cmod(pos, five_minutes_per_day) - - return market_opens[q] + r - def find_position_of_minute(ndarray[long_t, ndim=1] market_opens, ndarray[long_t, ndim=1] market_closes, long_t minute_val, @@ -99,26 +88,6 @@ def find_position_of_minute(ndarray[long_t, ndim=1] market_opens, return (market_open_loc * minutes_per_day) + delta -def find_position_of_five_minute(ndarray[long_t, ndim=1] market_opens, - ndarray[long_t, ndim=1] market_closes, - long_t five_minute_val, - short five_minutes_per_day, - bool forward_fill): - - cdef Py_ssize_t market_open_loc, market_open, delta - - market_open_loc = \ - searchsorted(market_opens, five_minute_val, side='right') - 1 - market_open = market_opens[market_open_loc] - market_close = market_closes[market_open_loc] - - if not forward_fill and ((five_minute_val - market_open) >= five_minutes_per_day): - raise ValueError("Given five minutes is not between an open and a close") - - delta = int_min(five_minute_val - market_open, market_close - market_open) - - return (market_open_loc * five_minutes_per_day) + delta - def find_last_traded_position_internal( ndarray[long_t, ndim=1] market_opens, ndarray[long_t, ndim=1] market_closes, @@ -189,50 +158,3 @@ def find_last_traded_position_internal( # found a trade event return -1 -def find_last_traded_five_minute_position_internal( - ndarray[long_t, ndim=1] market_opens, - ndarray[long_t, ndim=1] market_closes, - long_t end_five_minute, - long_t start_five_minute, - volumes, - short five_minutes_per_day): - cdef Py_ssize_t minute_pos, current_minute, q - - five_minute_pos = int_min( - find_position_of_five_minute( - market_opens, - market_closes, - end_five_minute, - five_minutes_per_day, - True, - ), - len(volumes) - 1, - ) - - while five_minute_pos >= 0: - current_five_minute = five_minute_value( - market_opens, five_minute_pos, five_minutes_per_day - ) - - q = cython.cdiv(five_minute_pos, five_minutes_per_day) - if current_five_minute > market_closes[q]: - five_minute_pos = find_position_of_five_minute( - market_opens, - market_closes, - market_closes[q], - five_minutes_per_day, - False, - ) - continue - - if current_five_minute < start_five_minute: - return -1 - - if volumes[five_minute_pos] != 0: - return five_minute_pos - - five_minute_pos -= 1 - - # we've gone to the beginning of this asset's range, and still haven't - # found a trade event - return -1 diff --git a/catalyst/data/bundles/base.py b/catalyst/data/bundles/base.py index 135dd531..6af7a0eb 100644 --- a/catalyst/data/bundles/base.py +++ b/catalyst/data/bundles/base.py @@ -60,10 +60,6 @@ class BaseBundle(object): def minutes_per_day(self): raise NotImplementedError() - @lazyval - def five_minutes_per_day(self): - raise NotImplementedError() - @lazyval def frequencies(self): raise NotImplementedError() @@ -115,7 +111,6 @@ class BaseBundle(object): environ, asset_db_writer, minute_bar_writer, - five_minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, @@ -162,7 +157,7 @@ class BaseBundle(object): # Post-process metadata using cached symbol frames, and write to # disk. This metadata must be written before any attempt to write - # either minute or 5-minute data. + # minute data. metadata = self._post_process_metadata( raw_metadata, cache, @@ -170,26 +165,6 @@ class BaseBundle(object): ) asset_db_writer.write(metadata) - # Compile 5-minute symbol data if bundle supports 5-minute mode and - # persist the dataset to disk. - ''' - if '5-minute' in self.frequencies: - five_minute_bar_writer.write( - self._fetch_symbol_iter( - api_key, - cache, - symbol_map, - calendar, - start_session, - end_session, - '5-minute', - retries, - ), - length=len(symbol_map), - show_progress=show_progress, - ) - ''' - # Compile minute symbol data if bundle supports minute mode and # persist the dataset to disk. if 'minute' in self.frequencies: diff --git a/catalyst/data/bundles/base_pricing.py b/catalyst/data/bundles/base_pricing.py index c5281fdd..7b94e4bc 100644 --- a/catalyst/data/bundles/base_pricing.py +++ b/catalyst/data/bundles/base_pricing.py @@ -47,10 +47,6 @@ class BaseCryptoPricingBundle(BasePricingBundle): def minutes_per_day(self): return 1440 - @lazyval - def five_minutes_per_day(self): - return 288 - @property def splits(self): return [] @@ -68,10 +64,6 @@ class BaseEquityPricingBundle(BasePricingBundle): def minutes_per_day(self): return 390 - @lazyval - def five_minutes_per_day(self): - return 78 - @property def splits(self): return self._splits diff --git a/catalyst/data/bundles/core.py b/catalyst/data/bundles/core.py index 29aceb7a..a25591de 100644 --- a/catalyst/data/bundles/core.py +++ b/catalyst/data/bundles/core.py @@ -17,10 +17,6 @@ from ..us_equity_pricing import ( SQLiteAdjustmentReader, SQLiteAdjustmentWriter, ) -from ..five_minute_bars import ( - BcolzFiveMinuteBarReader, - BcolzFiveMinuteBarWriter, -) from ..minute_bars import ( BcolzMinuteBarReader, BcolzMinuteBarWriter, @@ -54,11 +50,6 @@ def minute_path(bundle_name, timestr, environ=None): environ=environ, ) -def five_minute_path(bundle_name, timestr, environ=None): - return pth.data_path( - five_minute_relative(bundle_name, timestr, environ), - environ=environ, - ) def daily_path(bundle_name, timestr, environ=None): return pth.data_path( @@ -92,8 +83,6 @@ def cache_relative(bundle_name, timestr, environ=None): def daily_relative(bundle_name, timestr, environ=None): return bundle_name, timestr, 'daily_equities.bcolz' -def five_minute_relative(bundle_name, timestr, environ=None): - return bundle_name, timestr, 'five_minute.bcolz' def minute_relative(bundle_name, timestr, environ=None): return bundle_name, timestr, 'minute_equities.bcolz' @@ -206,14 +195,13 @@ RegisteredBundle = namedtuple( 'start_session', 'end_session', 'minutes_per_day', - 'five_minutes_per_day', 'ingest', 'create_writers'] ) BundleData = namedtuple( 'BundleData', - 'asset_finder minute_bar_reader five_minute_bar_reader daily_bar_reader ' + 'asset_finder minute_bar_reader daily_bar_reader ' 'adjustment_reader', ) @@ -303,7 +291,6 @@ def _make_bundle_core(): bundle.ingest, calendar_name=bundle.calendar_name, minutes_per_day=bundle.minutes_per_day, - five_minutes_per_day=bundle.five_minutes_per_day, start_session=start_session, end_session=end_session, create_writers=create_writers, @@ -316,7 +303,6 @@ def _make_bundle_core(): start_session=None, end_session=None, minutes_per_day=1440, - five_minutes_per_day=288, create_writers=True): """Register a data bundle ingest function. @@ -397,7 +383,6 @@ def _make_bundle_core(): start_session=start_session, end_session=end_session, minutes_per_day=minutes_per_day, - five_minutes_per_day=five_minutes_per_day, ingest=f, create_writers=create_writers, ) @@ -496,16 +481,6 @@ def _make_bundle_core(): # that it can compute the adjustment ratios for the dividends. daily_bar_writer.write(()) - five_minute_bar_writer = BcolzFiveMinuteBarWriter( - wd.ensure_dir(*five_minute_relative( - name, timestr, environ=environ) - ), - calendar, - start_session, - end_session, - five_minutes_per_day=bundle.five_minutes_per_day, - ) - minute_bar_writer = BcolzMinuteBarWriter( wd.ensure_dir(*minute_relative( name, timestr, environ=environ) @@ -532,7 +507,6 @@ def _make_bundle_core(): ) else: daily_bar_writer = None - five_minute_bar_writer = None minute_bar_writer = None asset_db_writer = None adjustment_db_writer = None @@ -544,7 +518,6 @@ def _make_bundle_core(): environ, asset_db_writer, minute_bar_writer, - five_minute_bar_writer, daily_bar_writer, adjustment_db_writer, calendar, @@ -631,9 +604,6 @@ def _make_bundle_core(): minute_bar_reader=BcolzMinuteBarReader( minute_path(name, timestr, environ=environ), ), - five_minute_bar_reader=BcolzFiveMinuteBarReader( - five_minute_path(name, timestr, environ=environ), - ), daily_bar_reader=BcolzDailyBarReader( daily_path(name, timestr, environ=environ), ), diff --git a/catalyst/data/bundles/poloniex.py b/catalyst/data/bundles/poloniex.py index 60bbac95..64cc2d27 100644 --- a/catalyst/data/bundles/poloniex.py +++ b/catalyst/data/bundles/poloniex.py @@ -97,6 +97,8 @@ class PoloniexBundle(BaseCryptoPricingBundle): end_date, frequency): + # TODO: replace this with direct exchange call + # The end date and frequency should be used to calculate the number of bars if(frequency == 'minute'): pc = PoloniexCurator() raw = pc.onemin_to_dataframe(symbol, start_date, end_date) @@ -146,7 +148,6 @@ class PoloniexBundle(BaseCryptoPricingBundle): data_frequency): period_map = { 'daily': 86400, -# '5-minute': 300, } try: @@ -165,6 +166,7 @@ class PoloniexBundle(BaseCryptoPricingBundle): return self._format_polo_query(query_params) def _format_polo_query(self, query_params): + # TODO: got against the exchange object return 'https://poloniex.com/public?{query}'.format( query=urlencode(query_params), ) diff --git a/catalyst/data/data_portal.py b/catalyst/data/data_portal.py index 69aa166e..6bfe6047 100644 --- a/catalyst/data/data_portal.py +++ b/catalyst/data/data_portal.py @@ -42,7 +42,6 @@ from catalyst.assets.roll_finder import ( ) from catalyst.data.dispatch_bar_reader import ( AssetDispatchMinuteBarReader, - AssetDispatchFiveMinuteBarReader, AssetDispatchSessionBarReader ) from catalyst.data.resample import ( @@ -120,10 +119,6 @@ class DataPortal(object): daily data backtests or daily history calls in a minute backetest. If a daily bar reader is not provided but a minute bar reader is, the minutes will be rolled up to serve the daily requests. - five_minute_reader : BcolzFiveMinuteBarReader, optional - The five minute bar reader for equities. This will be used to service - 5-minute data backtests or five-minute history calls. This can be used - to serve daily calls if no daily bar reader is provided. minute_reader : BcolzMinuteBarReader, optional The minute bar reader for equities. This will be used to service minute data backtests or minute history calls. This can be used @@ -150,7 +145,6 @@ class DataPortal(object): trading_calendar, first_trading_day, daily_reader=None, - five_minute_reader=None, minute_reader=None, future_daily_reader=None, future_minute_reader=None, @@ -202,7 +196,6 @@ class DataPortal(object): reader.last_available_dt for reader in [ minute_reader, - five_minute_reader, future_minute_reader, ] if reader is not None @@ -214,8 +207,6 @@ class DataPortal(object): aligned_minute_reader = self._ensure_reader_aligned( minute_reader) - aligned_five_minute_reader = self._ensure_reader_aligned( - five_minute_reader) aligned_session_reader = self._ensure_reader_aligned( daily_reader) aligned_future_minute_reader = self._ensure_reader_aligned( @@ -229,13 +220,10 @@ class DataPortal(object): } aligned_minute_readers = {} - aligned_five_minute_readers = {} aligned_session_readers = {} if aligned_minute_reader is not None: aligned_minute_readers[Equity] = aligned_minute_reader - if aligned_five_minute_reader is not None: - aligned_five_minute_readers[Equity] = aligned_five_minute_reader if aligned_session_reader is not None: aligned_session_readers[Equity] = aligned_session_reader @@ -267,13 +255,6 @@ class DataPortal(object): self._last_available_minute, ) - _dispatch_five_minute_reader = AssetDispatchFiveMinuteBarReader( - self.trading_calendar, - self.asset_finder, - aligned_five_minute_readers, - self._last_available_minute, - ) - _dispatch_session_reader = AssetDispatchSessionBarReader( self.trading_calendar, self.asset_finder, @@ -283,7 +264,6 @@ class DataPortal(object): self._pricing_readers = { 'minute': _dispatch_minute_reader, - '5-minute': _dispatch_five_minute_reader, 'daily': _dispatch_session_reader, } @@ -719,17 +699,6 @@ class DataPortal(object): spot_value=result ) - - def _get_five_minute_spot_value(self, asset, column, dt, ffill=False): - return self._get_minutely_spot_value( - asset, - column, - dt, - ffill, - '5-minute', - ) - - def _get_minute_spot_value(self, asset, column, dt, ffill=False): return self._get_minutely_spot_value( asset, diff --git a/catalyst/data/dispatch_bar_reader.py b/catalyst/data/dispatch_bar_reader.py index d96252c9..7dfd7e95 100644 --- a/catalyst/data/dispatch_bar_reader.py +++ b/catalyst/data/dispatch_bar_reader.py @@ -138,12 +138,6 @@ class AssetDispatchMinuteBarReader(AssetDispatchBarReader): def _dt_window_size(self, start_dt, end_dt): return len(self.trading_calendar.minutes_in_range(start_dt, end_dt)) - -class AssetDispatchFiveMinuteBarReader(AssetDispatchBarReader): - - def _dt_window_size(self, start_dt, end_dt): - return len(self.trading_calendar.five_minutes_in_range(start_dt, end_dt)) - class AssetDispatchSessionBarReader(AssetDispatchBarReader): def _dt_window_size(self, start_dt, end_dt): diff --git a/catalyst/data/five_minute_bars.py b/catalyst/data/five_minute_bars.py deleted file mode 100644 index 9021dc0a..00000000 --- a/catalyst/data/five_minute_bars.py +++ /dev/null @@ -1,1385 +0,0 @@ -# Copyright 2016 Quantopian, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from abc import ABCMeta, abstractmethod -import json -import os -from glob import glob -from os.path import join -from textwrap import dedent - -from lru import LRU -import bcolz -from bcolz import ctable -from intervaltree import IntervalTree -import logbook -import numpy as np -from numpy import ( - iinfo, - uint64, -) -import pandas as pd -from pandas import HDFStore -import tables -from six import with_metaclass -from toolz import keymap, valmap - -from catalyst.data._minute_bar_internal import ( - five_minute_value, - find_position_of_five_minute, - find_last_traded_five_minute_position_internal, -) - -from catalyst.gens.sim_engine import NANOS_IN_MINUTE - -from catalyst.data.bar_reader import BarReader, NoDataOnDate -from catalyst.data.us_equity_pricing import ( - winsorise_uint64, - check_uint64_safe, -) -from catalyst.utils.calendars import get_calendar -from catalyst.utils.cli import ( - item_show_count, - maybe_show_progress, -) -from catalyst.utils.memoize import lazyval - -logger = logbook.Logger('FiveMinuteBars') - -OPEN_FIVE_MINUTES_PER_DAY = 288 - -DEFAULT_EXPECTEDLEN_CRYPTO = OPEN_FIVE_MINUTES_PER_DAY * 366 * 15 - -OHLC_RATIO = 1000000 - -OHLC = frozenset(['open', 'high', 'low', 'close']) -OHLCV = frozenset(['open', 'high', 'low', 'close', 'volume']) - -UINT64_MAX = iinfo(uint64).max - -NANOS_IN_FIVE_MINUTES = 5 * NANOS_IN_MINUTE - -class BcolzFiveMinuteOverlappingData(Exception): - pass - - -class BcolzFiveMinuteWriterColumnMismatch(Exception): - pass - - -class FiveMinuteBarReader(BarReader): - @property - def data_frequency(self): - return "5-minute" - - -def _calc_five_minute_index(market_opens, five_minutes_per_day): - five_minutes = np.zeros(len(market_opens) * five_minutes_per_day, - dtype='datetime64[ns]') - deltas = 5 * np.arange(0, five_minutes_per_day, dtype='timedelta64[m]') - for i, market_open in enumerate(market_opens): - start = market_open.asm8 - five_minute_values = start + deltas - start_ix = five_minutes_per_day * i - end_ix = start_ix + five_minutes_per_day - five_minutes[start_ix:end_ix] = five_minute_values - return pd.to_datetime(five_minutes, utc=True, box=True) - - -def _sid_subdir_path(sid): - """ - Format subdir path to limit the number directories in any given - subdirectory to 100. - - The number in each directory is designed to support at least 100000 - equities. - - Parameters - ---------- - sid : int - Asset identifier. - - Returns - ------- - out : string - A path for the bcolz rootdir, including subdirectory prefixes based on - the padded string representation of the given sid. - - e.g. 1 is formatted as 00/00/000001.bcolz - """ - padded_sid = format(sid, '06') - return os.path.join( - # subdir 1 00/XX - padded_sid[0:2], - # subdir 2 XX/00 - padded_sid[2:4], - "{0}.bcolz".format(str(padded_sid)) - ) - - -def convert_cols(cols, scale_factor, sid, invalid_data_behavior): - """Adapt OHLCV columns into uint64 columns. - - Parameters - ---------- - cols : dict - A dict mapping each column name (open, high, low, close, volume) - to a float column to convert to uint64. - scale_factor : int - Factor to use to scale float values before converting to uint64. - sid : int - Sid of the relevant asset, for logging. - invalid_data_behavior : str - Specifies behavior when data cannot be converted to uint64. - If 'raise', raises an exception. - If 'warn', logs a warning and filters out incompatible values. - If 'ignore', silently filters out incompatible values. - """ - scaled_opens = np.nan_to_num(cols['open']) * scale_factor - scaled_highs = np.nan_to_num(cols['high']) * scale_factor - scaled_lows = np.nan_to_num(cols['low']) * scale_factor - scaled_closes = np.nan_to_num(cols['close']) * scale_factor - volumes = np.nan_to_num(cols['volume']) - - exclude_mask = np.zeros_like(scaled_opens, dtype=bool) - - for col_name, scaled_col in [ - ('open', scaled_opens), - ('high', scaled_highs), - ('low', scaled_lows), - ('close', scaled_closes), - ('volume', volumes), - ]: - max_val = scaled_col.max() - - try: - check_uint64_safe(max_val, col_name) - except ValueError: - if invalid_data_behavior == 'raise': - raise - - if invalid_data_behavior == 'warn': - logger.warn( - 'Values for sid={}, col={} contain some too large for ' - 'uint64 (max={}), filtering them out', - sid, col_name, max_val, - ) - - # We want to exclude all rows that have an unsafe value in - # this column. - exclude_mask &= (scaled_col >= iinfo(uint64).max) - - # Convert all cols to uint64. - opens = scaled_opens.astype(uint64) - highs = scaled_highs.astype(uint64) - lows = scaled_lows.astype(uint64) - closes = scaled_closes.astype(uint64) - volumes = volumes.astype(uint64) - - # Exclude rows with unsafe values by setting to zero. - opens[exclude_mask] = 0 - highs[exclude_mask] = 0 - lows[exclude_mask] = 0 - closes[exclude_mask] = 0 - volumes[exclude_mask] = 0 - - return opens, highs, lows, closes, volumes - - -class BcolzFiveMinuteBarMetadata(object): - """ - Parameters - ---------- - ohlc_ratio : int - The factor by which the pricing data is multiplied so that the - float data can be stored as an integer. - calendar : catalyst.utils.calendars.trading_calendar.TradingCalendar - The TradingCalendar on which the five minute bars are based. - start_session : datetime - The first trading session in the data set. - end_session : datetime - The last trading session in the data set. - five_minutes_per_day : int - The number of minutes per each period. - """ - FORMAT_VERSION = 3 - - METADATA_FILENAME = 'five-minute-metadata.json' - - @classmethod - def metadata_path(cls, rootdir): - return os.path.join(rootdir, cls.METADATA_FILENAME) - - @classmethod - def read(cls, rootdir): - path = cls.metadata_path(rootdir) - with open(path) as fp: - raw_data = json.load(fp) - - try: - version = raw_data['version'] - except KeyError: - # Version was first written with version 1, assume 0, - # if version does not match. - version = 0 - - default_ohlc_ratio = raw_data['ohlc_ratio'] - - if version >= 1: - five_minutes_per_day = raw_data['five_minutes_per_day'] - else: - # version 0 always assumed crypto assets. - five_minutes_per_day = CRYPTO_ASSETS_FIVE_MINUTES_PER_DAY - - if version >= 2: - calendar = get_calendar(raw_data['calendar_name']) - start_session = pd.Timestamp( - raw_data['start_session'], tz='UTC') - end_session = pd.Timestamp(raw_data['end_session'], tz='UTC') - else: - # No calendar info included in older versions, so - # default to OPEN. - calendar = get_calendar('OPEN') - - start_session = pd.Timestamp( - raw_data['first_trading_day'], tz='UTC') - end_session = calendar.minute_to_session_label( - pd.Timestamp( - raw_data['market_closes'][-1], unit='m', tz='UTC') - ) - - if version >= 3: - ohlc_ratios_per_sid = raw_data['ohlc_ratios_per_sid'] - if ohlc_ratios_per_sid is not None: - ohlc_ratios_per_sid = keymap(int, ohlc_ratios_per_sid) - else: - ohlc_ratios_per_sid = None - - return cls( - default_ohlc_ratio, - ohlc_ratios_per_sid, - calendar, - start_session, - end_session, - five_minutes_per_day, - version=version, - ) - - def __init__( - self, - default_ohlc_ratio, - ohlc_ratios_per_sid, - calendar, - start_session, - end_session, - five_minutes_per_day, - version=FORMAT_VERSION, - ): - self.calendar = calendar - self.start_session = start_session - self.end_session = end_session - self.default_ohlc_ratio = default_ohlc_ratio - self.ohlc_ratios_per_sid = ohlc_ratios_per_sid - self.five_minutes_per_day = five_minutes_per_day - self.version = version - - def write(self, rootdir): - """ - Write the metadata to a JSON file in the rootdir. - - Values contained in the metadata are: - - version : int - The value of FORMAT_VERSION of this class. - ohlc_ratio : int - The default ratio by which to multiply the pricing data to - convert the floats from floats to an integer to fit within - the np.uint64. If ohlc_ratios_per_sid is None or does not - contain a mapping for a given sid, this ratio is used. - ohlc_ratios_per_sid : dict - A dict mapping each sid in the output to the factor by - which the pricing data is multiplied so that the float data - can be stored as an integer. - five_minutes_per_day : int - The number of minutes per each period. - calendar_name : str - The name of the TradingCalendar on which the minute bars are - based. - start_session : datetime - 'YYYY-MM-DD' formatted representation of the first trading - session in the data set. - end_session : datetime - 'YYYY-MM-DD' formatted representation of the last trading - session in the data set. - - Deprecated, but included for backwards compatibility: - - first_trading_day : string - 'YYYY-MM-DD' formatted representation of the first trading day - available in the dataset. - market_opens : list - List of int64 values representing UTC market opens as - minutes since epoch. - market_closes : list - List of int64 values representing UTC market closes as - minutes since epoch. - """ - - calendar = self.calendar - slicer = calendar.schedule.index.slice_indexer( - self.start_session, - self.end_session, - ) - schedule = calendar.schedule[slicer] - market_opens = schedule.market_open - market_closes = schedule.market_close - - metadata = { - 'version': self.version, - 'ohlc_ratio': self.default_ohlc_ratio, - 'ohlc_ratios_per_sid': self.ohlc_ratios_per_sid, - 'five_minutes_per_day': self.five_minutes_per_day, - 'calendar_name': self.calendar.name, - 'start_session': str(self.start_session.date()), - 'end_session': str(self.end_session.date()), - # Write these values for backwards compatibility - 'first_trading_day': str(self.start_session.date()), - 'market_opens': ( - market_opens.values.astype('datetime64[m]'). - astype(np.int64).tolist()), - 'market_closes': ( - market_closes.values.astype('datetime64[m]'). - astype(np.int64).tolist()), - } - with open(self.metadata_path(rootdir), 'w+') as fp: - json.dump(metadata, fp) - - -class BcolzFiveMinuteBarWriter(object): - """ - Class capable of writing minute OHLCV data to disk into bcolz format. - - Parameters - ---------- - rootdir : string - Path to the root directory into which to write the metadata and - bcolz subdirectories. - calendar : catalyst.utils.calendars.trading_calendar.TradingCalendar - The trading calendar on which to base the minute bars. Used to - get the market opens used as a starting point for each periodic - span of minutes in the index, and the market closes that - correspond with the market opens. - five_minutes_per_day : int - The number of minutes per each period. Defaults to 390, the mode - of minutes in NYSE trading days. - start_session : datetime - The first trading session in the data set. - end_session : datetime - The last trading session in the data set. - default_ohlc_ratio : int, optional - The default ratio by which to multiply the pricing data to - convert from floats to integers that fit within np.uint64. If - ohlc_ratios_per_sid is None or does not contain a mapping for a - given sid, this ratio is used. Default is OHLC_RATIO (1000). - ohlc_ratios_per_sid : dict, optional - A dict mapping each sid in the output to the ratio by which to - multiply the pricing data to convert the floats from floats to - an integer to fit within the np.uint64. - expectedlen : int, optional - The expected length of the dataset, used when creating the initial - bcolz ctable. - - If the expectedlen is not used, the chunksize and corresponding - compression ratios are not ideal. - - Defaults to supporting 15 years of NYSE equity market data. - see: http://bcolz.blosc.org/opt-tips.html#informing-about-the-length-of-your-carrays # noqa - write_metadata : bool, optional - If True, writes the minute bar metadata (on init of the writer). - If False, no metadata is written (existing metadata is - retained). Default is True. - - Notes - ----- - Writes a bcolz directory for each individual sid, all contained within - a root directory which also contains metadata about the entire dataset. - - Each individual asset's data is stored as a bcolz table with a column for - each pricing field: (open, high, low, close, volume) - - The open, high, low, and close columns are integers which are 1000 times - the quoted price, so that the data can represented and stored as an - np.uint64, supporting market prices quoted up to the thousands place. - - volume is a np.uint64 with no mutation of the tens place. - - The 'index' for each individual asset are a repeating period of minutes of - length `minutes_per_day` starting from each market open. - The file format does not account for half-days. - e.g.: - 2016-01-19 14:31 - 2016-01-19 14:32 - ... - 2016-01-19 20:59 - 2016-01-19 21:00 - 2016-01-20 14:31 - 2016-01-20 14:32 - ... - 2016-01-20 20:59 - 2016-01-20 21:00 - - All assets are written with a common 'index', sharing a common first - trading day. Assets that do not begin trading until after the first trading - day will have zeros for all pricing data up and until data is traded. - - 'index' is in quotations, because bcolz does not provide an index. The - format allows index-like behavior by writing each minute's data into the - corresponding position of the enumeration of the aforementioned datetime - index. - - The datetimes which correspond to each position are written in the metadata - as integer nanoseconds since the epoch into the `minute_index` key. - - See Also - -------- - catalyst.data.minute_bars.BcolzMinuteBarReader - """ - COL_NAMES = ('open', 'high', 'low', 'close', 'volume') - - def __init__(self, - rootdir, - calendar, - start_session, - end_session, - five_minutes_per_day, - default_ohlc_ratio=OHLC_RATIO, - ohlc_ratios_per_sid=None, - expectedlen=DEFAULT_EXPECTEDLEN_CRYPTO, - write_metadata=True): - - self._rootdir = rootdir - self._start_session = start_session - self._end_session = end_session - self._calendar = calendar - slicer = ( - calendar.schedule.index.slice_indexer(start_session, end_session)) - self._schedule = calendar.schedule[slicer] - self._session_labels = self._schedule.index - self._five_minutes_per_day = five_minutes_per_day - self._expectedlen = expectedlen - self._default_ohlc_ratio = default_ohlc_ratio - self._ohlc_ratios_per_sid = ohlc_ratios_per_sid - - self._five_minute_index = _calc_five_minute_index( - self._schedule.market_open, self._five_minutes_per_day) - - if write_metadata: - metadata = BcolzFiveMinuteBarMetadata( - self._default_ohlc_ratio, - self._ohlc_ratios_per_sid, - self._calendar, - self._start_session, - self._end_session, - self._five_minutes_per_day, - ) - metadata.write(self._rootdir) - - @classmethod - def open(cls, rootdir, end_session=None): - """ - Open an existing ``rootdir`` for writing. - - Parameters - ---------- - end_session : Timestamp (optional) - When appending, the intended new ``end_session``. - """ - metadata = BcolzFiveMinuteBarMetadata.read(rootdir) - return BcolzFiveMinuteBarWriter( - rootdir, - metadata.calendar, - metadata.start_session, - end_session if end_session is not None else metadata.end_session, - metadata.five_minutes_per_day, - metadata.default_ohlc_ratio, - metadata.ohlc_ratios_per_sid, - write_metadata=end_session is not None - ) - - @property - def first_trading_day(self): - return self._start_session - - def ohlc_ratio_for_sid(self, sid): - if self._ohlc_ratios_per_sid is not None: - try: - return self._ohlc_ratios_per_sid[sid] - except KeyError: - pass - - # If no ohlc_ratios_per_sid dict is passed, or if the specified - # sid is not in the dict, fallback to the general ohlc_ratio. - return self._default_ohlc_ratio - - def sidpath(self, sid): - """ - Parameters - ---------- - sid : int - Asset identifier. - - Returns - ------- - out : string - Full path to the bcolz rootdir for the given sid. - """ - sid_subdir = _sid_subdir_path(sid) - return join(self._rootdir, sid_subdir) - - def last_date_in_output_for_sid(self, sid): - """ - Parameters - ---------- - sid : int - Asset identifier. - - Returns - ------- - out : pd.Timestamp - The midnight of the last date written in to the output for the - given sid. - """ - sizes_path = "{0}/close/meta/sizes".format(self.sidpath(sid)) - if not os.path.exists(sizes_path): - return pd.NaT - with open(sizes_path, mode='r') as f: - sizes = f.read() - data = json.loads(sizes) - # use integer division so that the result is an int - # for pandas index later https://github.com/pandas-dev/pandas/blob/master/pandas/tseries/base.py#L247 # noqa - num_days = data['shape'][0] // self._five_minutes_per_day - if num_days == 0: - # empty container - return pd.NaT - return self._session_labels[num_days - 1] - - def _init_ctable(self, path): - """ - Create empty ctable for given path. - - Parameters - ---------- - path : string - The path to rootdir of the new ctable. - """ - # Only create the containing subdir on creation. - # This is not to be confused with the `.bcolz` directory, but is the - # directory up one level from the `.bcolz` directories. - sid_containing_dirname = os.path.dirname(path) - if not os.path.exists(sid_containing_dirname): - # Other sids may have already created the containing directory. - os.makedirs(sid_containing_dirname) - initial_array = np.empty(0, np.uint64) - table = ctable( - rootdir=path, - columns=[ - initial_array, - initial_array, - initial_array, - initial_array, - initial_array, - ], - names=[ - 'open', - 'high', - 'low', - 'close', - 'volume' - ], - expectedlen=self._expectedlen, - mode='w', - ) - table.flush() - return table - - def _ensure_ctable(self, sid): - """Ensure that a ctable exists for ``sid``, then return it.""" - sidpath = self.sidpath(sid) - if not os.path.exists(sidpath): - return self._init_ctable(sidpath) - return bcolz.ctable(rootdir=sidpath, mode='a') - - def _zerofill(self, table, numdays): - # Compute the number of minutes to be filled, accounting for the - # possibility of a partial day's worth of minutes existing for - # the previous day. - five_minute_offset = len(table) % self._five_minutes_per_day - num_to_prepend = numdays * self._five_minutes_per_day - five_minute_offset - - prepend_array = np.zeros(num_to_prepend, np.uint64) - # Fill all OHLCV with zeros. - table.append([prepend_array] * 5) - table.flush() - - def pad(self, sid, date): - """ - Fill sid container with empty data through the specified date. - - If the last recorded trade is not at the close, then that day will be - padded with zeros until its close. Any day after that (up to and - including the specified date) will be padded with `minute_per_day` - worth of zeros - - Parameters - ---------- - sid : int - The asset identifier for the data being written. - date : datetime-like - The date used to calculate how many slots to be pad. - The padding is done through the date, i.e. after the padding is - done the `last_date_in_output_for_sid` will be equal to `date` - """ - table = self._ensure_ctable(sid) - - last_date = self.last_date_in_output_for_sid(sid) - - tds = self._session_labels - - if date <= last_date or date < tds[0]: - # No need to pad. - return - - if last_date == pd.NaT: - # If there is no data, determine how many days to add so that - # desired days are written to the correct slots. - days_to_zerofill = tds[tds.slice_indexer(end=date)] - else: - days_to_zerofill = tds[tds.slice_indexer( - start=last_date + tds.freq, - end=date)] - - self._zerofill(table, len(days_to_zerofill)) - - new_last_date = self.last_date_in_output_for_sid(sid) - assert new_last_date == date, "new_last_date={0} != date={1}".format( - new_last_date, date) - - def set_sid_attrs(self, sid, **kwargs): - """Write all the supplied kwargs as attributes of the sid's file. - """ - table = self._ensure_ctable(sid) - for k, v in kwargs.items(): - table.attrs[k] = v - - def write(self, - data, - length=None, - show_progress=False, - invalid_data_behavior='warn'): - """Write a stream of minute data. - - Parameters - ---------- - data : iterable[(int, pd.DataFrame)] - The data to write. Each element should be a tuple of sid, data - where data has the following format: - columns : ('open', 'high', 'low', 'close', 'volume') - open : float64 - high : float64 - low : float64 - close : float64 - volume : float64|int64 - index : DatetimeIndex of market minutes. - A given sid may appear more than once in ``data``; however, - the dates must be strictly increasing. - show_progress : bool, optional - Whether or not to show a progress bar while writing. - """ - with maybe_show_progress( - data, - length=length, - show_percent=False, - show_progress=show_progress, - item_show_func=item_show_count(length), - label='Compiling five-minute data', - ) as it: - write_sid = self.write_sid - for e in it: - write_sid(*e, invalid_data_behavior=invalid_data_behavior) - - def write_sid(self, sid, df, invalid_data_behavior='warn'): - """ - Write the OHLCV data for the given sid. - If there is no bcolz ctable yet created for the sid, create it. - If the length of the bcolz ctable is not exactly to the date before - the first day provided, fill the ctable with 0s up to that date. - - Parameters - ---------- - sid : int - The asset identifer for the data being written. - df : pd.DataFrame - DataFrame of market data with the following characteristics. - columns : ('open', 'high', 'low', 'close', 'volume') - open : float64 - high : float64 - low : float64 - close : float64 - volume : float64|int64 - index : DatetimeIndex of market minutes. - """ - cols = { - 'open': df.open.values, - 'high': df.high.values, - 'low': df.low.values, - 'close': df.close.values, - 'volume': df.volume.values, - } - dts = df.index.values - # Call internal method, since DataFrame has already ensured matching - # index and value lengths. - self._write_cols(sid, dts, cols, invalid_data_behavior) - - def write_cols(self, sid, dts, cols, invalid_data_behavior='warn'): - """ - Write the OHLCV data for the given sid. - If there is no bcolz ctable yet created for the sid, create it. - If the length of the bcolz ctable is not exactly to the date before - the first day provided, fill the ctable with 0s up to that date. - - Parameters - ---------- - sid : int - The asset identifier for the data being written. - dts : datetime64 array - The dts corresponding to values in cols. - cols : dict of str -> np.array - dict of market data with the following characteristics. - keys are ('open', 'high', 'low', 'close', 'volume') - open : float64 - high : float64 - low : float64 - close : float64 - volume : float64|int64 - """ - if not all(len(dts) == len(cols[name]) for name in self.COL_NAMES): - raise BcolzFiveMinuteWriterColumnMismatch( - "Length of dts={0} should match cols: {1}".format( - len(dts), - " ".join("{0}={1}".format(name, len(cols[name])) - for name in self.COL_NAMES))) - self._write_cols(sid, dts, cols, invalid_data_behavior) - - def _write_cols(self, sid, dts, cols, invalid_data_behavior): - """ - Internal method for `write_cols` and `write`. - - Parameters - ---------- - sid : int - The asset identifier for the data being written. - dts : datetime64 array - The dts corresponding to values in cols. - cols : dict of str -> np.array - dict of market data with the following characteristics. - keys are ('open', 'high', 'low', 'close', 'volume') - open : float64 - high : float64 - low : float64 - close : float64 - volume : float64|int64 - """ - table = self._ensure_ctable(sid) - - tds = self._session_labels - input_first_day = self._calendar.minute_to_session_label( - pd.Timestamp(dts[0]), direction='previous') - - last_date = self.last_date_in_output_for_sid(sid) - - day_before_input = input_first_day - tds.freq - - self.pad(sid, day_before_input) - table = self._ensure_ctable(sid) - - # Get the number of minutes already recorded in this sid's ctable - num_rec_mins = table.size - - all_minutes = self._five_minute_index - # Get the latest minute we wish to write to the ctable - last_minute_to_write = pd.Timestamp(dts[-1], tz='UTC') - - #print 'all_minutes[-1]:', all_minutes[num_rec_mins-1] - #print 'last_minute_to_write:', last_minute_to_write - - # In the event that we've already written some minutely data to the - # ctable, guard against overwriting that data. - if num_rec_mins > 0: - last_recorded_minute = all_minutes[num_rec_mins - 1] - if last_minute_to_write <= last_recorded_minute: - raise BcolzMinuteOverlappingData(dedent(""" - Data with last_date={0} already includes input start={1} for - sid={2}""".strip()).format(last_date, input_first_day, sid)) - - latest_min_count = all_minutes.get_loc(last_minute_to_write) - - # Get all the minutes we wish to write (all market minutes after the - # latest currently written, up to and including last_minute_to_write) - all_minutes_in_window = all_minutes[num_rec_mins:latest_min_count + 1] - - minutes_count = all_minutes_in_window.size - - open_col = np.zeros(minutes_count, dtype=uint64) - high_col = np.zeros(minutes_count, dtype=uint64) - low_col = np.zeros(minutes_count, dtype=uint64) - close_col = np.zeros(minutes_count, dtype=uint64) - vol_col = np.zeros(minutes_count, dtype=uint64) - - dt_ixs = np.searchsorted(all_minutes_in_window.values, - dts.astype('datetime64[ns]')) - - ohlc_ratio = self.ohlc_ratio_for_sid(sid) - - ( - open_col[dt_ixs], - high_col[dt_ixs], - low_col[dt_ixs], - close_col[dt_ixs], - vol_col[dt_ixs], - ) = convert_cols(cols, ohlc_ratio, sid, invalid_data_behavior) - - table.append([ - open_col, - high_col, - low_col, - close_col, - vol_col - ]) - table.flush() - - def data_len_for_day(self, day): - """ - Return the number of data points up to and including the - provided day. - """ - day_ix = self._session_labels.get_loc(day) - # Add one to the 0-indexed day_ix to get the number of days. - num_days = day_ix + 1 - return num_days * self._five_minutes_per_day - - def truncate(self, date): - """Truncate data beyond this date in all ctables.""" - truncate_slice_end = self.data_len_for_day(date) - - glob_path = os.path.join(self._rootdir, "*", "*", "*.bcolz") - sid_paths = sorted(glob(glob_path)) - - for sid_path in sid_paths: - file_name = os.path.basename(sid_path) - - try: - table = bcolz.open(rootdir=sid_path) - except IOError: - continue - if table.len <= truncate_slice_end: - logger.info("{0} not past truncate date={1}.", file_name, date) - continue - - logger.info( - "Truncating {0} at end_date={1}", file_name, date.date() - ) - - table.resize(truncate_slice_end) - - # Update end session in metadata. - metadata = BcolzFiveMinuteBarMetadata.read(self._rootdir) - metadata.end_session = date - metadata.write(self._rootdir) - - -class BcolzFiveMinuteBarReader(FiveMinuteBarReader): - """ - Reader for data written by BcolzFiveMinuteBarWriter - - Parameters - ---------- - rootdir : string - The root directory containing the metadata and asset bcolz - directories. - - See Also - -------- - catalyst.data.minute_bars.BcolzFiveMinuteBarWriter - """ - FIELDS = ('open', 'high', 'low', 'close', 'volume') - - def __init__(self, rootdir, sid_cache_size=1000): - self._rootdir = rootdir - - metadata = self._get_metadata() - - self._start_session = metadata.start_session - self._end_session = metadata.end_session - - self.calendar = metadata.calendar - slicer = self.calendar.schedule.index.slice_indexer( - self._start_session, - self._end_session, - ) - self._schedule = self.calendar.schedule[slicer] - self._market_opens = self._schedule.market_open - self._market_open_values = self._market_opens.values.\ - astype('datetime64[m]').astype(np.int64) - self._market_closes = self._schedule.market_close - self._market_close_values = self._market_closes.values.\ - astype('datetime64[m]').astype(np.int64) - - self._default_ohlc_inverse = 1.0 / metadata.default_ohlc_ratio - ohlc_ratios = metadata.ohlc_ratios_per_sid - if ohlc_ratios: - self._ohlc_inverses_per_sid = ( - valmap(lambda x: 1.0 / x, ohlc_ratios)) - else: - self._ohlc_inverses_per_sid = None - - self._five_minutes_per_day = metadata.five_minutes_per_day - - self._carrays = { - field: LRU(sid_cache_size) - for field in self.FIELDS - } - - self._last_get_value_dt_position = None - self._last_get_value_dt_value = None - - # This is to avoid any bad data or other performance-killing situation - # where there a consecutive streak of 0 (no volume) starting at an - # asset's start date. - # if asset 1 started on 2015-01-03 but its first trade is 2015-01-06 - # 10:31 AM US/Eastern, this dict would store {1: 23675971}, - # which is the minute epoch of that date. - self._known_zero_volume_dict = {} - - def _get_metadata(self): - return BcolzFiveMinuteBarMetadata.read(self._rootdir) - - @property - def trading_calendar(self): - return self.calendar - - @lazyval - def last_available_dt(self): - _, close = self.calendar.open_and_close_for_session(self._end_session) - return close - - @property - def first_trading_day(self): - return self._start_session - - def _ohlc_ratio_inverse_for_sid(self, sid): - if self._ohlc_inverses_per_sid is not None: - try: - return self._ohlc_inverses_per_sid[sid] - except KeyError: - pass - - # If we can not get a sid-specific OHLC inverse for this sid, - # fallback to the default. - return self._default_ohlc_inverse - - def _minutes_to_exclude(self): - """ - Calculate the minutes which should be excluded when a window - occurs on days which had an early close, i.e. days where the close - based on the regular period of minutes per day and the market close - do not match. - - Returns - ------- - List of DatetimeIndex representing the minutes to exclude because - of early closes. - """ - market_opens = self._market_opens.values.astype('datetime64[m]') - market_closes = self._market_closes.values.astype('datetime64[m]') - minutes_per_day = (market_closes - market_opens).astype(np.int64) / 5 - early_indices = np.where( - minutes_per_day != self._five_minutes_per_day - 1)[0] - early_opens = self._market_opens[early_indices] - early_closes = self._market_closes[early_indices] - minutes = [(market_open, early_close) - for market_open, early_close - in zip(early_opens, early_closes)] - return minutes - - @lazyval - def _minute_exclusion_tree(self): - """ - Build an interval tree keyed by the start and end of each range - of positions should be dropped from windows. (These are the minutes - between an early close and the minute which would be the close based - on the regular period if there were no early close.) - The value of each node is the same start and end position stored as - a tuple. - - The data is stored as such in support of a fast answer to the question, - does a given start and end position overlap any of the exclusion spans? - - Returns - ------- - IntervalTree containing nodes which represent the minutes to exclude - because of early closes. - """ - itree = IntervalTree() - for market_open, early_close in self._minutes_to_exclude(): - start_pos = self._find_position_of_five_minute(early_close) + 1 - end_pos = ( - self._find_position_of_five_minute(market_open) - + - self._five_minutes_per_day - - - 1 - ) - data = (start_pos, end_pos) - itree[start_pos:end_pos + 1] = data - return itree - - def _exclusion_indices_for_range(self, start_idx, end_idx): - """ - Returns - ------- - List of tuples of (start, stop) which represent the ranges of minutes - which should be excluded when a market minute window is requested. - """ - itree = self._minute_exclusion_tree - if itree.overlaps(start_idx, end_idx): - ranges = [] - intervals = itree[start_idx:end_idx] - for interval in intervals: - ranges.append(interval.data) - return sorted(ranges) - else: - return None - - def _get_carray_path(self, sid, field): - sid_subdir = _sid_subdir_path(sid) - # carrays are subdirectories of the sid's rootdir - return os.path.join(self._rootdir, sid_subdir, field) - - def _open_minute_file(self, field, sid): - sid = int(sid) - - try: - carray = self._carrays[field][sid] - except KeyError: - carray = self._carrays[field][sid] = \ - bcolz.carray(rootdir=self._get_carray_path(sid, field), - mode='r') - - return carray - - def table_len(self, sid): - """Returns the length of the underlying table for this sid.""" - return len(self._open_minute_file('close', sid)) - - def get_sid_attr(self, sid, name): - sid_subdir = _sid_subdir_path(sid) - sid_path = os.path.join(self._rootdir, sid_subdir) - attrs = bcolz.attrs.attrs(sid_path, 'r') - try: - return attrs[name] - except KeyError: - return None - - def get_value(self, sid, dt, field): - """ - Retrieve the pricing info for the given sid, dt, and field. - - Parameters - ---------- - sid : int - Asset identifier. - dt : datetime-like - The datetime at which the trade occurred. - field : string - The type of pricing data to retrieve. - ('open', 'high', 'low', 'close', 'volume') - - Returns - ------- - out : float|int - - The market data for the given sid, dt, and field coordinates. - - For OHLC: - Returns a float if a trade occurred at the given dt. - If no trade occurred, a np.nan is returned. - - For volume: - Returns the integer value of the volume. - (A volume of 0 signifies no trades for the given dt.) - """ - if self._last_get_value_dt_value == dt.value: - minute_pos = self._last_get_value_dt_position - else: - try: - minute_pos = self._find_position_of_five_minute(dt) - except ValueError: - raise NoDataOnDate() - - self._last_get_value_dt_value = dt.value - self._last_get_value_dt_position = minute_pos - - try: - value = self._open_minute_file(field, sid)[minute_pos] - except IndexError: - value = 0 - if value == 0: - if field == 'volume': - return 0 - else: - return np.nan - - if field != 'volume': - value *= self._ohlc_ratio_inverse_for_sid(sid) - return value - - def get_last_traded_dt(self, asset, dt): - minute_pos = self._find_last_traded_five_minute_position(asset, dt) - if minute_pos == -1: - return pd.NaT - return self._pos_to_minute(minute_pos) - - def _find_last_traded_five_minute_position(self, asset, dt): - volumes = self._open_minute_file('volume', asset) - start_date_minute = asset.start_date.value / NANOS_IN_FIVE_MINUTE - dt_minute = dt.value / NANOS_IN_FIVE_MINUTE - - try: - # if we know of a dt before which this asset has no volume, - # don't look before that dt - earliest_dt_to_search = self._known_zero_volume_dict[asset.sid] - except KeyError: - earliest_dt_to_search = start_date_minute - - if dt_minute < earliest_dt_to_search: - return -1 - - pos = find_last_traded_five_minute_position_internal( - self._market_open_values, - self._market_close_values, - dt_minute, - earliest_dt_to_search, - volumes, - self._five_minutes_per_day, - ) - - if pos == -1: - # if we didn't find any volume before this dt, save it to avoid - # work in the future. - try: - self._known_zero_volume_dict[asset.sid] = max( - dt_minute, - self._known_zero_volume_dict[asset.sid] - ) - except KeyError: - self._known_zero_volume_dict[asset.sid] = dt_minute - - return pos - - def _pos_to_minute(self, pos): - minute_epoch = five_minute_value( - self._market_open_values, - pos, - self._five_minutes_per_day - ) - - return pd.Timestamp(minute_epoch, tz='UTC', unit="m") - - def _find_position_of_five_minute(self, minute_dt): - """ - Internal method that returns the position of the given minute in the - list of every trading minute since market open of the first trading - day. Adjusts non market minutes to the last close. - - ex. this method would return 1 for 2002-01-02 9:32 AM Eastern, if - 2002-01-02 is the first trading day of the dataset. - - Parameters - ---------- - minute_dt: pd.Timestamp - The minute whose position should be calculated. - - Returns - ------- - int: The position of the given minute in the list of all trading - minutes since market open on the first trading day. - """ - return find_position_of_five_minute( - self._market_open_values, - self._market_close_values, - minute_dt.value / NANOS_IN_FIVE_MINUTE, - self._five_minutes_per_day, - False, - ) - - def load_raw_arrays(self, fields, start_dt, end_dt, sids): - """ - Parameters - ---------- - fields : list of str - 'open', 'high', 'low', 'close', or 'volume' - start_dt: Timestamp - Beginning of the window range. - end_dt: Timestamp - End of the window range. - sids : list of int - The asset identifiers in the window. - - Returns - ------- - list of np.ndarray - A list with an entry per field of ndarrays with shape - (minutes in range, sids) with a dtype of float64, containing the - values for the respective field over start and end dt range. - """ - start_idx = self._find_position_of_five_minute(start_dt) - end_idx = self._find_position_of_five_minute(end_dt) - - num_minutes = (end_idx - start_idx + 1) - - results = [] - - indices_to_exclude = self._exclusion_indices_for_range( - start_idx, end_idx) - if indices_to_exclude is not None: - for excl_start, excl_stop in indices_to_exclude: - length = excl_stop - excl_start + 1 - num_minutes -= length - - shape = num_minutes, len(sids) - - for field in fields: - if field != 'volume': - out = np.full(shape, np.nan) - else: - out = np.zeros(shape, dtype=uint64) - - for i, sid in enumerate(sids): - carray = self._open_minute_file(field, sid) - values = carray[start_idx:end_idx + 1] - if indices_to_exclude is not None: - for excl_start, excl_stop in indices_to_exclude[::-1]: - excl_slice = np.s_[ - excl_start - start_idx:excl_stop - start_idx + 1] - values = np.delete(values, excl_slice) - - where = values != 0 - # first slice down to len(where) because we might not have - # written data for all the minutes requested - if field != 'volume': - out[:len(where), i][where] = ( - values[where] * self._ohlc_ratio_inverse_for_sid(sid)) - else: - out[:len(where), i][where] = values[where] - - results.append(out) - return results - - -class MinuteBarUpdateReader(with_metaclass(ABCMeta, object)): - """ - Abstract base class for minute update readers. - """ - - @abstractmethod - def read(self, dts, sids): - """ - Read and return pricing update data. - - Parameters - ---------- - dts : DatetimeIndex - The minutes for which to read the pricing updates. - sids : iter[int] - The sids for which to read the pricing updates. - - Returns - ------- - data : iter[(int, DataFrame)] - Returns an iterable of ``sid`` to the corresponding OHLCV data. - """ - raise NotImplementedError() - - -class H5MinuteBarUpdateWriter(object): - """ - Writer for files containing minute bar updates for consumption by a writer - for a ``MinuteBarReader`` format. - - Parameters - ---------- - path : str - The destination path. - complevel : int, optional - The HDF5 complevel, defaults to ``5``. - complib : str, optional - The HDF5 complib, defaults to ``zlib``. - """ - - FORMAT_VERSION = 0 - - _COMPLEVEL = 5 - _COMPLIB = 'zlib' - - def __init__(self, path, complevel=None, complib=None): - self._complevel = complevel if complevel \ - is not None else self._COMPLEVEL - self._complib = complib if complib \ - is not None else self._COMPLIB - self._path = path - - def write(self, frames): - """ - Write the frames to the target HDF5 file, using the format used by - ``pd.Panel.to_hdf`` - - Parameters - ---------- - frames : iter[(int, DataFrame)] or dict[int -> DataFrame] - An iterable or other mapping of sid to the corresponding OHLCV - pricing data. - """ - with HDFStore(self._path, 'w', - complevel=self._complevel, complib=self._complib) \ - as store: - panel = pd.Panel.from_dict(dict(frames)) - panel.to_hdf(store, 'updates') - with tables.open_file(self._path, mode='r+') as h5file: - h5file.set_node_attr('/', 'version', 0) - - -class H5MinuteBarUpdateReader(MinuteBarUpdateReader): - """ - Reader for minute bar updates stored in HDF5 files. - - Parameters - ---------- - path : str - The path of the HDF5 file from which to source data. - """ - def __init__(self, path): - self._panel = pd.read_hdf(path) - - def read(self, dts, sids): - panel = self._panel[sids, dts, :] - return panel.iteritems() diff --git a/catalyst/data/loader.py b/catalyst/data/loader.py index a9dd8167..ce293e9b 100644 --- a/catalyst/data/loader.py +++ b/catalyst/data/loader.py @@ -12,41 +12,36 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import datetime import os from collections import OrderedDict import logbook import pandas as pd -import numpy as np -from pandas_datareader.data import DataReader -import datetime -import time import pytz +from pandas_datareader.data import DataReader from six import iteritems from six.moves.urllib_error import HTTPError -from .benchmarks import get_benchmark_returns +from catalyst.utils.calendars import get_calendar from . import treasuries, treasuries_can +from .benchmarks import get_benchmark_returns +from ..utils.deprecate import deprecated from ..utils.paths import ( cache_root, data_root, ) -from ..utils.deprecate import deprecated - -from catalyst.data.bundles.poloniex import PoloniexBundle -from catalyst.utils.calendars import get_calendar - logger = logbook.Logger('Loader') # Mapping from index symbol to appropriate bond data INDEX_MAPPING = { 'SPY': - (treasuries, 'treasury_curves.csv', 'www.federalreserve.gov'), + (treasuries, 'treasury_curves.csv', 'www.federalreserve.gov'), '^GSPTSE': - (treasuries_can, 'treasury_curves_can.csv', 'bankofcanada.ca'), + (treasuries_can, 'treasury_curves_can.csv', 'bankofcanada.ca'), '^FTSE': # use US treasuries until UK bonds implemented - (treasuries, 'treasury_curves.csv', 'www.federalreserve.gov'), + (treasuries, 'treasury_curves.csv', 'www.federalreserve.gov'), } ONE_HOUR = pd.Timedelta(hours=1) @@ -94,18 +89,27 @@ def has_data_for_dates(series_or_df, first_date, last_date): if not isinstance(dts, pd.DatetimeIndex): raise TypeError("Expected a DatetimeIndex, but got %s." % type(dts)) first, last = dts[[0, -1]].tz_localize(None) - return (first <= first_date.tz_localize(None)) and (last >= last_date.tz_localize(None)) + return (first <= first_date.tz_localize(None)) and ( + last >= last_date.tz_localize(None)) -def load_crypto_market_data(trading_day=None, trading_days=None, bm_symbol='USDT_BTC', - bundle=None, bundle_data=None, environ=None): +def load_crypto_market_data(trading_day=None, trading_days=None, + bm_symbol=None, bundle=None, bundle_data=None, + environ=None, exchange=None, start_dt=None, + end_dt=None): if trading_day is None: trading_day = get_calendar('OPEN').trading_day - if trading_days is None: - trading_days = get_calendar('OPEN').all_sessions - first_date = trading_days[1] - now = pd.Timestamp.utcnow() + # TODO: consider making configurable + bm_symbol = 'btc_usdt' + # if trading_days is None: + # trading_days = get_calendar('OPEN').schedule + + if start_dt is None: + start_dt = get_calendar('OPEN').first_trading_session + + if end_dt is None: + end_dt = pd.Timestamp.utcnow() # We expect to have benchmark and treasury data that's current up until # **two** full trading days prior to the most recently completed trading @@ -121,6 +125,7 @@ def load_crypto_market_data(trading_day=None, trading_days=None, bm_symbol='USDT # We'll attempt to download new data if the latest entry in our cache is # before this date. + ''' if(bundle_data): # If we are using the bundle to retrieve the cryptobenchmark, find the last # date for which there is trading data in the bundle @@ -129,31 +134,44 @@ def load_crypto_market_data(trading_day=None, trading_days=None, bm_symbol='USDT last_date = pd.to_datetime(bundle_data.daily_bar_reader._spot_col('day')[ix],unit='s') else: last_date = trading_days[trading_days.get_loc(now, method='ffill') - 2] - - br = ensure_crypto_benchmark_data( - bm_symbol, - first_date, - last_date, - now, - # We need the trading_day to figure out the close prior to the first - # date so that we can compute returns for the first date. - trading_day, - bundle, - bundle_data, - environ, - ) + ''' + last_date = trading_days[trading_days.get_loc(end_dt, method='ffill') - 1] + + if exchange is None: + # This is exceptional, since placing the import at the module scope + # breaks things and it's only needed here + from catalyst.exchange.poloniex.poloniex import Poloniex + exchange = Poloniex('', '', '') + + benchmark_asset = exchange.get_asset(bm_symbol) + + # exchange.get_history_window() already ensures that we have the right data + # for the right dates + br = exchange.get_history_window( + assets=[benchmark_asset], + end_dt=last_date, + bar_count=pd.Timedelta(last_date - start_dt).days, + frequency='1d', + field='close', + data_frequency='daily') + br.columns = ['close'] + br = br.pct_change(1).iloc[1:] + br.loc[start_dt] = 0 + br = br.sort_index() + # Override first_date for treasury data since we have it for many more years # and is independent of crypto data - first_date_treasury = pd.Timestamp('1990-01-02', tz='UTC') + first_date_treasury = pd.Timestamp('1990-01-02', tz='UTC') tc = ensure_treasury_data( bm_symbol, first_date_treasury, last_date, - now, + end_dt, environ, ) - benchmark_returns = br[br.index.slice_indexer(first_date, last_date)] - treasury_curves = tc[tc.index.slice_indexer(first_date_treasury, last_date)] + benchmark_returns = br[br.index.slice_indexer(start_dt, last_date)] + treasury_curves = tc[ + tc.index.slice_indexer(first_date_treasury, last_date)] return benchmark_returns, treasury_curves @@ -251,12 +269,11 @@ def ensure_crypto_benchmark_data(symbol, bundle, bundle_data, environ=None): - filename = get_benchmark_filename(symbol) logger.info( ('Loading benchmark data for {symbol!r} ' - 'from {first_date} to {last_date}'), + 'from {first_date} to {last_date}'), symbol=symbol, first_date=first_date, last_date=last_date @@ -277,7 +294,7 @@ def ensure_crypto_benchmark_data(symbol, # If no cached data was found or it was missing any dates then download the # necessary data. - if(bundle == 'poloniex'): + if (bundle == 'poloniex'): ''' If we're using the Poloniex bundle, we'll get the benchmark from the bundle instead of downloading it from Poloniex every time we need it. @@ -285,43 +302,51 @@ def ensure_crypto_benchmark_data(symbol, prevents users abroad from getting Catalyst to work ''' logger.info( - ('Retrieving benchmark data from bundle for {symbol!r} from {first_date} to {last_date}'), + ( + 'Retrieving benchmark data from bundle for {symbol!r} from {first_date} to {last_date}'), symbol=symbol, first_date=first_date, last_date=last_date) - asset = bundle_data.asset_finder.lookup_symbol(symbol=symbol,as_of_date=None) + asset = bundle_data.asset_finder.lookup_symbol(symbol=symbol, + as_of_date=None) fields = ['day', 'close'] raw = bundle_data.daily_bar_reader.load_raw_arrays( columns=fields, start_date=first_date - trading_day, end_date=last_date, - assets=[asset,]) - bench_raw = pd.concat([pd.DataFrame(raw[0], columns=['date']),pd.DataFrame(raw[1], columns=['close'])], axis=1) - bench_raw['date'] = pd.to_datetime(bench_raw['date'],unit='s') + assets=[asset, ]) + bench_raw = pd.concat([pd.DataFrame(raw[0], columns=['date']), + pd.DataFrame(raw[1], columns=['close'])], + axis=1) + bench_raw['date'] = pd.to_datetime(bench_raw['date'], unit='s') bench_raw.set_index('date', inplace=True) bench_raw.sort_index(inplace=True) - bench_raw = bench_raw[pd.to_datetime(first_date - trading_day):pd.to_datetime(last_date)] + bench_raw = bench_raw[ + pd.to_datetime(first_date - trading_day):pd.to_datetime( + last_date)] else: # This is how it used to be: downloading the benchmark everytime. # Leaving this code here to be repurposed in the future for other bundles. logger.info( - ('Downloading benchmark data for {symbol!r} from {first_date} to {last_date}'), + ( + 'Downloading benchmark data for {symbol!r} from {first_date} to {last_date}'), symbol=symbol, first_date=first_date, last_date=last_date) + raise DeprecationWarning('poloniex bundle deprecated') # Load benchmark symbol from Poloniex API - try: - bundle = PoloniexBundle() - bench_raw = bundle._fetch_symbol_frame( - None, - symbol, - get_calendar(bundle.calendar_name), - first_date - trading_day, - last_date, - 'daily', - ) - except (OSError, IOError, HTTPError): - logger.exception('Failed to fetch new crypto benchmark returns') - raise + # try: + # bundle = PoloniexBundle() + # bench_raw = bundle._fetch_symbol_frame( + # None, + # symbol, + # get_calendar(bundle.calendar_name), + # first_date - trading_day, + # last_date, + # 'daily', + # ) + # except (OSError, IOError, HTTPError): + # logger.exception('Failed to fetch new crypto benchmark returns') + # raise # select close column and compute percent change between days daily_close = bench_raw[['close']] @@ -380,7 +405,7 @@ def ensure_benchmark_data(symbol, first_date, last_date, now, trading_day, # necessary data. logger.info( ('Downloading benchmark data for {symbol!r} ' - 'from {first_date} to {last_date}'), + 'from {first_date} to {last_date}'), symbol=symbol, first_date=first_date - trading_day, last_date=last_date @@ -441,7 +466,7 @@ def ensure_benchmark_data(symbol, first_date, last_date, now, trading_day, # necessary data. logger.info( ('Downloading benchmark data for {symbol!r} ' - 'from {first_date} to {last_date}'), + 'from {first_date} to {last_date}'), symbol=symbol, first_date=first_date - trading_day, last_date=last_date @@ -525,7 +550,8 @@ def _load_cached_data(filename, first_date, last_date, now, resource_name, data = pd.DataFrame.from_csv(path) if data.empty: raise ValueError("File is empty.") - data.index = pd.to_datetime(data.index, infer_datetime_format=True, errors='coerce' ).tz_localize('UTC') + data.index = pd.to_datetime(data.index, infer_datetime_format=True, + errors='coerce').tz_localize('UTC') if has_data_for_dates(data, first_date, last_date): return data diff --git a/catalyst/data/minute_bars.py b/catalyst/data/minute_bars.py index a08c4ead..83dafa1c 100644 --- a/catalyst/data/minute_bars.py +++ b/catalyst/data/minute_bars.py @@ -44,7 +44,6 @@ from catalyst.utils.calendars import get_calendar from catalyst.utils.cli import maybe_show_progress from catalyst.utils.memoize import lazyval - logger = logbook.Logger('MinuteBars') US_EQUITIES_MINUTES_PER_DAY = 390 @@ -262,14 +261,14 @@ class BcolzMinuteBarMetadata(object): ) def __init__( - self, - default_ohlc_ratio, - ohlc_ratios_per_sid, - calendar, - start_session, - end_session, - minutes_per_day, - version=FORMAT_VERSION, + self, + default_ohlc_ratio, + ohlc_ratios_per_sid, + calendar, + start_session, + end_session, + minutes_per_day, + version=FORMAT_VERSION, ): self.calendar = calendar self.start_session = start_session @@ -342,10 +341,10 @@ class BcolzMinuteBarMetadata(object): 'first_trading_day': str(self.start_session.date()), 'market_opens': ( market_opens.values.astype('datetime64[m]'). - astype(np.int64).tolist()), + astype(np.int64).tolist()), 'market_closes': ( market_closes.values.astype('datetime64[m]'). - astype(np.int64).tolist()), + astype(np.int64).tolist()), } with open(self.metadata_path(rootdir), 'w+') as fp: json.dump(metadata, fp) @@ -914,10 +913,10 @@ class BcolzMinuteBarReader(MinuteBarReader): ) self._schedule = self.calendar.schedule[slicer] self._market_opens = self._schedule.market_open - self._market_open_values = self._market_opens.values.\ + self._market_open_values = self._market_opens.values. \ astype('datetime64[m]').astype(np.int64) self._market_closes = self._schedule.market_close - self._market_close_values = self._market_closes.values.\ + self._market_close_values = self._market_closes.values. \ astype('datetime64[m]').astype(np.int64) self._default_ohlc_inverse = 1.0 / metadata.default_ohlc_ratio @@ -1125,7 +1124,7 @@ class BcolzMinuteBarReader(MinuteBarReader): else: return np.nan - #if field != 'volume': + # if field != 'volume': value *= self._ohlc_ratio_inverse_for_sid(sid) return value @@ -1256,16 +1255,16 @@ class BcolzMinuteBarReader(MinuteBarReader): if indices_to_exclude is not None: for excl_start, excl_stop in indices_to_exclude[::-1]: excl_slice = np.s_[ - excl_start - start_idx:excl_stop - start_idx + 1] + excl_start - start_idx:excl_stop - start_idx + 1] values = np.delete(values, excl_slice) where = values != 0 # first slice down to len(where) because we might not have # written data for all the minutes requested - #if field != 'volume': + # if field != 'volume': out[:len(where), i][where] = ( - values[where] * self._ohlc_ratio_inverse_for_sid(sid)) - #else: + values[where] * self._ohlc_ratio_inverse_for_sid(sid)) + # else: # out[:len(where), i][where] = values[where] results.append(out) @@ -1319,9 +1318,9 @@ class H5MinuteBarUpdateWriter(object): def __init__(self, path, complevel=None, complib=None): self._complevel = complevel if complevel \ - is not None else self._COMPLEVEL + is not None else self._COMPLEVEL self._complib = complib if complib \ - is not None else self._COMPLIB + is not None else self._COMPLIB self._path = path def write(self, frames): @@ -1353,6 +1352,7 @@ class H5MinuteBarUpdateReader(MinuteBarUpdateReader): path : str The path of the HDF5 file from which to source data. """ + def __init__(self, path): self._panel = pd.read_hdf(path) diff --git a/catalyst/examples/arbitrage_with_interface.py b/catalyst/examples/arbitrage_with_interface.py new file mode 100644 index 00000000..f3625e7a --- /dev/null +++ b/catalyst/examples/arbitrage_with_interface.py @@ -0,0 +1,275 @@ +from logbook import Logger + +from catalyst.api import ( + record, + order, + symbol, + get_open_orders +) +from catalyst.exchange.stats_utils import get_pretty_stats +from catalyst.utils.run_algo import run_algorithm + +algo_namespace = 'arbitrage_eth_btc' +log = Logger(algo_namespace) + + +def initialize(context): + log.info('initializing arbitrage algorithm') + + # The context contains a new "exchanges" attribute which is a dictionary + # of exchange objects by exchange name. This allow easy access to the + # exchanges. + context.buying_exchange = context.exchanges['poloniex'] + context.selling_exchange = context.exchanges['bitfinex'] + + context.trading_pair_symbol = 'eth_btc' + context.trading_pairs = dict() + + # Note the second parameter of the symbol() method + # Passing the exchange name here returns a TradingPair object including + # the exchange information. This allow all other operations using + # the TradingPair to target the correct exchange. + context.trading_pairs[context.buying_exchange] = \ + symbol('eth_btc', context.buying_exchange.name) + + context.trading_pairs[context.selling_exchange] = \ + symbol(context.trading_pair_symbol, context.selling_exchange.name) + + context.entry_points = [ + dict(gap=0.03, amount=0.05), + dict(gap=0.04, amount=0.1), + dict(gap=0.05, amount=0.5), + ] + context.exit_points = [ + dict(gap=-0.02, amount=0.5), + ] + + context.SLIPPAGE_ALLOWED = 0.02 + pass + + +def place_orders(context, amount, buying_price, selling_price, action): + """ + This method will always place two orders of the same amount to keep + the currency position the same as it moves between the two exchanges. + + :param context: TradingAlgorithm + :param amount: float + The trading pair amount to trade on both exchanges. + :param buying_price: float + The current trading pair price on the buying exchange. + :param selling_price: float + The current trading pair price on the selling exchange. + :param action: string + "enter": buys on the buying exchange and sells on the selling exchange + "exit": buys on the selling exchange and sells on the buying exchange + + :return: + """ + if action == 'enter': + enter_exchange = context.buying_exchange + entry_price = buying_price + + exit_exchange = context.selling_exchange + exit_price = selling_price + + elif action == 'exit': + enter_exchange = context.selling_exchange + entry_price = selling_price + + exit_exchange = context.buying_exchange + exit_price = buying_price + + else: + raise ValueError('invalid order action') + + base_currency = enter_exchange.base_currency + base_currency_amount = enter_exchange.portfolio.cash + + exit_balances = exit_exchange.get_balances() + exit_currency = context.trading_pairs[ + context.selling_exchange].market_currency + + if exit_currency in exit_balances: + market_currency_amount = exit_balances[exit_currency] + else: + log.warn( + 'the selling exchange {exchange_name} does not hold ' + 'currency {currency}'.format( + exchange_name=exit_exchange.name, + currency=exit_currency + ) + ) + return + + if base_currency_amount < (amount * entry_price): + adj_amount = base_currency_amount / entry_price + log.warn( + 'not enough {base_currency} ({base_currency_amount}) to buy ' + '{amount}, adjusting the amount to {adj_amount}'.format( + base_currency=base_currency, + base_currency_amount=base_currency_amount, + amount=amount, + adj_amount=adj_amount + ) + ) + amount = adj_amount + + elif market_currency_amount < amount: + log.warn( + 'not enough {currency} ({currency_amount}) to sell ' + '{amount}, aborting'.format( + currency=exit_currency, + currency_amount=market_currency_amount, + amount=amount + ) + ) + return + + adj_buy_price = entry_price * (1 + context.SLIPPAGE_ALLOWED) + log.info( + 'buying {amount} {trading_pair} on {exchange_name} with price ' + 'limit {limit_price}'.format( + amount=amount, + trading_pair=context.trading_pair_symbol, + exchange_name=enter_exchange.name, + limit_price=adj_buy_price + ) + ) + order( + asset=context.trading_pairs[enter_exchange], + amount=amount, + limit_price=adj_buy_price + ) + + adj_sell_price = exit_price * (1 - context.SLIPPAGE_ALLOWED) + log.info( + 'selling {amount} {trading_pair} on {exchange_name} with price ' + 'limit {limit_price}'.format( + amount=-amount, + trading_pair=context.trading_pair_symbol, + exchange_name=exit_exchange.name, + limit_price=adj_sell_price + ) + ) + order( + asset=context.trading_pairs[exit_exchange], + amount=-amount, + limit_price=adj_sell_price + ) + pass + + +def handle_data(context, data): + log.info('handling bar {}'.format(data.current_dt)) + + buying_price = data.current( + context.trading_pairs[context.buying_exchange], 'price') + + log.info('price on buying exchange {exchange}: {price}'.format( + exchange=context.buying_exchange.name.upper(), + price=buying_price, + )) + + selling_price = data.current( + context.trading_pairs[context.selling_exchange], 'price') + + log.info('price on selling exchange {exchange}: {price}'.format( + exchange=context.selling_exchange.name.upper(), + price=selling_price, + )) + + # If for example, + # selling price = 50 + # buying price = 25 + # expected gap = 1 + + # If follows that, + # selling price - buying price / buying price + # 50 - 25 / 25 = 1 + gap = (selling_price - buying_price) / buying_price + log.info( + 'the price gap: {gap} ({gap_percent}%)'.format( + gap=gap, + gap_percent=gap * 100 + ) + ) + record(buying_price=buying_price, selling_price=selling_price, gap=gap) + + # Waiting for orders to close before initiating new ones + for exchange in context.trading_pairs: + asset = context.trading_pairs[exchange] + + orders = get_open_orders(asset) + if orders: + log.info( + 'found {order_count} open orders on {exchange_name} ' + 'skipping bar until all open orders execute'.format( + order_count=len(orders), + exchange_name=exchange.name + ) + ) + return + + # Consider the least ambitious entry point first + # Override of wider gap is found + entry_points = sorted( + context.entry_points, + key=lambda point: point['gap'], + ) + + buy_amount = None + for entry_point in entry_points: + if gap > entry_point['gap']: + buy_amount = entry_point['amount'] + + if buy_amount: + log.info('found buy trigger for amount: {}'.format(buy_amount)) + place_orders( + context=context, + amount=buy_amount, + buying_price=buying_price, + selling_price=selling_price, + action='enter' + ) + + else: + # Consider the narrowest exit gap first + # Override of wider gap is found + exit_points = sorted( + context.exit_points, + key=lambda point: point['gap'], + reverse=True + ) + + sell_amount = None + for exit_point in exit_points: + if gap < exit_point['gap']: + sell_amount = exit_point['amount'] + + if sell_amount: + log.info('found sell trigger for amount: {}'.format(sell_amount)) + place_orders( + context=context, + amount=sell_amount, + buying_price=buying_price, + selling_price=selling_price, + action='exit' + ) + + +def analyze(context, stats): + log.info('the daily stats:\n{}'.format(get_pretty_stats(stats))) + pass + + +run_algorithm( + initialize=initialize, + handle_data=handle_data, + analyze=analyze, + exchange_name='poloniex,bitfinex', + live=True, + algo_namespace=algo_namespace, + base_currency='btc', + live_graph=False +) diff --git a/catalyst/examples/buy_btc_simple.py b/catalyst/examples/buy_btc_simple.py new file mode 100644 index 00000000..f7eb8aa0 --- /dev/null +++ b/catalyst/examples/buy_btc_simple.py @@ -0,0 +1,8 @@ +from catalyst.api import order, record, symbol + +def initialize(context): + context.asset = symbol('btc_usd') + +def handle_data(context, data): + order(asset, 1) + record(btc=data.current(context.asset, 'price')) \ No newline at end of file diff --git a/catalyst/examples/buy_low_sell_high.py b/catalyst/examples/buy_low_sell_high.py index e1459ee6..feadf49b 100644 --- a/catalyst/examples/buy_low_sell_high.py +++ b/catalyst/examples/buy_low_sell_high.py @@ -38,6 +38,8 @@ def initialize(context): context.retry_update_portfolio = 10 context.retry_order = 5 + context.swallow_errors = True + context.errors = [] pass @@ -49,6 +51,7 @@ def _handle_data(context, data): bar_count=20, frequency='15m' ) + rsi = talib.RSI(prices.values, timeperiod=14)[-1] log.info('got rsi: {}'.format(rsi)) @@ -135,11 +138,11 @@ def _handle_data(context, data): def handle_data(context, data): log.info('handling bar {}'.format(data.current_dt)) - # try: - _handle_data(context, data) - # except Exception as e: - # log.warn('aborting the bar on error {}'.format(e)) - # context.errors.append(e) + try: + _handle_data(context, data) + except Exception as e: + log.warn('aborting the bar on error {}'.format(e)) + context.errors.append(e) log.info('completed bar {}, total execution errors {}'.format( data.current_dt, diff --git a/catalyst/examples/buy_low_sell_high_live.py b/catalyst/examples/buy_low_sell_high_live.py new file mode 100644 index 00000000..6742896f --- /dev/null +++ b/catalyst/examples/buy_low_sell_high_live.py @@ -0,0 +1,168 @@ +import talib +from logbook import Logger + +import pandas as pd +from catalyst.api import ( + order, + order_target_percent, + symbol, + record, + get_open_orders, +) +from catalyst.exchange.stats_utils import get_pretty_stats +from catalyst.utils.run_algo import run_algorithm + +algo_namespace = 'buy_the_dip_live' +log = Logger('buy low sell high') + + +def initialize(context): + log.info('initializing algo') + context.ASSET_NAME = 'etc_btc' + context.asset = symbol(context.ASSET_NAME) + + context.TARGET_POSITIONS = 3 + context.PROFIT_TARGET = 0.1 + context.SLIPPAGE_ALLOWED = 0.02 + + context.retry_check_open_orders = 10 + context.retry_update_portfolio = 10 + context.retry_order = 5 + + context.errors = [] + pass + + +def _handle_data(context, data): + price = data.current(context.asset, 'price') + log.info('got price {price}'.format(price=price)) + + prices = data.history( + context.asset, + fields='price', + bar_count=20, + frequency='15m' + ) + rsi = talib.RSI(prices.values, timeperiod=14)[-1] + log.info('got rsi: {}'.format(rsi)) + + # Buying more when RSI is low, this should lower our cost basis + if rsi <= 30: + buy_increment = 1 + elif rsi <= 40: + buy_increment = 0.5 + elif rsi <= 70: + buy_increment = 0.2 + else: + buy_increment = None + + cash = context.portfolio.cash + log.info('base currency available: {cash}'.format(cash=cash)) + + record( + price=price, + rsi=rsi, + ) + + orders = get_open_orders(context.asset) + if orders: + log.info('skipping bar until all open orders execute') + return + + is_buy = False + cost_basis = None + if context.asset in context.portfolio.positions: + position = context.portfolio.positions[context.asset] + + cost_basis = position.cost_basis + log.info( + 'found {amount} positions with cost basis {cost_basis}'.format( + amount=position.amount, + cost_basis=cost_basis + ) + ) + + if position.amount >= context.TARGET_POSITIONS: + log.info('reached positions target: {}'.format(position.amount)) + return + + if price < cost_basis: + is_buy = True + elif position.amount > 0 and \ + price > cost_basis * (1 + context.PROFIT_TARGET): + profit = (price * position.amount) - (cost_basis * position.amount) + log.info('closing position, taking profit: {}'.format(profit)) + order_target_percent( + asset=context.asset, + target=0, + limit_price=price * (1 - context.SLIPPAGE_ALLOWED), + ) + else: + log.info('no buy or sell opportunity found') + else: + is_buy = True + + if is_buy: + if buy_increment is None: + log.info('the rsi is too high to consider buying {}'.format(rsi)) + return + + if price * buy_increment > cash: + log.info('not enough base currency to consider buying') + return + + log.info( + 'buying position cheaper than cost basis {} < {}'.format( + price, + cost_basis + ) + ) + order( + asset=context.asset, + amount=buy_increment, + limit_price=price * (1 + context.SLIPPAGE_ALLOWED) + ) + + +def handle_data(context, data): + log.info('handling bar {}'.format(data.current_dt)) + # try: + _handle_data(context, data) + # except Exception as e: + # log.warn('aborting the bar on error {}'.format(e)) + # context.errors.append(e) + + log.info('completed bar {}, total execution errors {}'.format( + data.current_dt, + len(context.errors) + )) + + if len(context.errors) > 0: + log.info('the errors:\n{}'.format(context.errors)) + + +def analyze(context, stats): + log.info('the daily stats:\n{}'.format(get_pretty_stats(stats))) + pass + + +run_algorithm( + capital_base=1, + initialize=initialize, + handle_data=handle_data, + analyze=analyze, + exchange_name='bitfinex', + start=pd.to_datetime('2017-5-01', utc=True), + end=pd.to_datetime('2017-10-01', utc=True), + base_currency='btc', + data_frequency='daily' +) +# run_algorithm( +# initialize=initialize, +# handle_data=handle_data, +# analyze=analyze, +# exchange_name='poloniex', +# live=True, +# algo_namespace=algo_namespace, +# base_currency='btc' +# ) diff --git a/catalyst/examples/buy_low_sell_high_neo_with_interface.py b/catalyst/examples/buy_low_sell_high_neo_with_interface.py new file mode 100644 index 00000000..a7630ef2 --- /dev/null +++ b/catalyst/examples/buy_low_sell_high_neo_with_interface.py @@ -0,0 +1,173 @@ +import talib +from logbook import Logger +import pandas as pd + +from catalyst.api import ( + order, + order_target_percent, + symbol, + record, + get_open_orders, +) +from catalyst.exchange.stats_utils import get_pretty_stats +from catalyst.utils.run_algo import run_algorithm + +algo_namespace = 'buy_low_sell_high_neo' +log = Logger(algo_namespace) + + +def initialize(context): + log.info('initializing algo') + context.asset = symbol('neo_btc', 'bitfinex') + + context.TARGET_POSITIONS = 50000 + context.PROFIT_TARGET = 0.1 + context.SLIPPAGE_ALLOWED = 0.02 + + context.retry_check_open_orders = 10 + context.retry_update_portfolio = 10 + context.retry_order = 5 + + context.errors = [] + pass + + +def _handle_data(context, data): + price = data.current(context.asset, 'close') + log.info('got price {price}'.format(price=price)) + + if price is None: + log.warn('no pricing data') + return + + prices = data.history( + context.asset, + fields='price', + bar_count=1, + frequency='1m' + ) + rsi = talib.RSI(prices.values, timeperiod=14)[-1] + log.info('got rsi: {}'.format(rsi)) + + # Buying more when RSI is low, this should lower our cost basis + if rsi <= 30: + buy_increment = 1 + elif rsi <= 40: + buy_increment = 0.5 + elif rsi <= 70: + buy_increment = 0.1 + else: + buy_increment = None + + cash = context.portfolio.cash + log.info('base currency available: {cash}'.format(cash=cash)) + + record(price=price) + + orders = get_open_orders(context.asset) + if len(orders) > 0: + log.info('skipping bar until all open orders execute') + return + + is_buy = False + cost_basis = None + if context.asset in context.portfolio.positions: + position = context.portfolio.positions[context.asset] + + cost_basis = position.cost_basis + log.info( + 'found {amount} positions with cost basis {cost_basis}'.format( + amount=position.amount, + cost_basis=cost_basis + ) + ) + + if position.amount >= context.TARGET_POSITIONS: + log.info('reached positions target: {}'.format(position.amount)) + return + + if price < cost_basis: + is_buy = True + elif position.amount > 0 and \ + price > cost_basis * (1 + context.PROFIT_TARGET): + profit = (price * position.amount) - (cost_basis * position.amount) + + log.info('closing position, taking profit: {}'.format(profit)) + order_target_percent( + asset=context.asset, + target=0, + limit_price=price * (1 - context.SLIPPAGE_ALLOWED), + ) + else: + log.info('no buy or sell opportunity found') + else: + is_buy = True + + if is_buy: + if buy_increment is None: + return + + if price * buy_increment > cash: + log.info('not enough base currency to consider buying') + return + + log.info( + 'buying position cheaper than cost basis {} < {}'.format( + price, + cost_basis + ) + ) + limit_price = price * (1 + context.SLIPPAGE_ALLOWED) + order( + asset=context.asset, + amount=buy_increment, + limit_price=limit_price + ) + pass + + +def handle_data(context, data): + log.info('handling bar {}'.format(data.current_dt)) + # try: + _handle_data(context, data) + # except Exception as e: + # log.warn('aborting the bar on error {}'.format(e)) + # context.errors.append(e) + + log.info('completed bar {}, total execution errors {}'.format( + data.current_dt, + len(context.errors) + )) + + if len(context.errors) > 0: + log.info('the errors:\n{}'.format(context.errors)) + + +def analyze(context, stats): + log.info('the daily stats:\n{}'.format(get_pretty_stats(stats))) + + pass + + +# run_algorithm( +# initialize=initialize, +# handle_data=handle_data, +# analyze=analyze, +# exchange_name='bitfinex', +# live=True, +# algo_namespace=algo_namespace, +# base_currency='btc', +# live_graph=False +# ) + +# Backtest +run_algorithm( + capital_base=250, + data_frequency='minute', + initialize=initialize, + handle_data=handle_data, + analyze=analyze, + exchange_name='bitfinex', + algo_namespace=algo_namespace, + base_currency='btc' +) diff --git a/catalyst/examples/simple_loop.py b/catalyst/examples/simple_loop.py new file mode 100644 index 00000000..dc130b3a --- /dev/null +++ b/catalyst/examples/simple_loop.py @@ -0,0 +1,51 @@ +import pandas as pd +import talib + +from catalyst import run_algorithm +from catalyst.api import symbol + + +def initialize(context): + print('initializing') + context.asset = symbol('xrp_btc') + + +def handle_data(context, data): + print('handling bar: {}'.format(data.current_dt)) + + price = data.current(context.asset, 'close') + print('got price {price}'.format(price=price)) + + prices = data.history( + context.asset, + fields='price', + bar_count=15, + frequency='1d' + ) + rsi = talib.RSI(prices.values, timeperiod=14)[-1] + print('got rsi: {}'.format(rsi)) + pass + + +# run_algorithm( +# capital_base=250, +# start=pd.to_datetime('2015-08-01', utc=True), +# end=pd.to_datetime('2017-9-30', utc=True), +# data_frequency='daily', +# initialize=initialize, +# handle_data=handle_data, +# analyze=None, +# exchange_name='poloniex', +# algo_namespace='simple_loop', +# base_currency='eth' +# ) +run_algorithm( + initialize=initialize, + handle_data=handle_data, + analyze=None, + exchange_name='bitfinex', + live=True, + algo_namespace='simple_loop', + base_currency='eth', + live_graph=False +) diff --git a/catalyst/exchange/asset_finder_exchange.py b/catalyst/exchange/asset_finder_exchange.py index 2239f8a8..be8c4a6e 100644 --- a/catalyst/exchange/asset_finder_exchange.py +++ b/catalyst/exchange/asset_finder_exchange.py @@ -4,8 +4,7 @@ log = Logger('AssetFinderExchange') class AssetFinderExchange(object): - def __init__(self, exchange): - self.exchange = exchange + def __init__(self): self._asset_cache = {} @property @@ -47,7 +46,7 @@ class AssetFinderExchange(object): log.info('fetching asset: {}'.format(sid)) return list() - def lookup_symbol(self, symbol, as_of_date, fuzzy=False): + def lookup_symbol(self, symbol, exchange, as_of_date=None, fuzzy=False): """Lookup an asset by symbol. Parameters @@ -81,11 +80,12 @@ class AssetFinderExchange(object): there are multiple candidates for the given ``symbol`` on the ``as_of_date``. """ - log.debug('looking up symbol: {}'.format(symbol)) + log.debug('looking up symbol: {} {}'.format(symbol, exchange.name)) - if symbol in self._asset_cache: - return self._asset_cache[symbol] + key = ','.join([exchange.name, symbol]) + if key in self._asset_cache: + return self._asset_cache[key] else: - asset = self.exchange.get_asset(symbol) - self._asset_cache[symbol] = asset + asset = exchange.get_asset(symbol) + self._asset_cache[key] = asset return asset diff --git a/catalyst/exchange/bitfinex/bitfinex.py b/catalyst/exchange/bitfinex/bitfinex.py index cb33c3ee..1817ea2d 100644 --- a/catalyst/exchange/bitfinex/bitfinex.py +++ b/catalyst/exchange/bitfinex/bitfinex.py @@ -4,6 +4,7 @@ import hmac import json import re import time +import datetime import numpy as np import pandas as pd @@ -13,8 +14,8 @@ import six from catalyst.assets._assets import TradingPair from logbook import Logger -# from websocket import create_connection from catalyst.exchange.exchange import Exchange +from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import ( ExchangeRequestError, InvalidHistoryFrequencyError, @@ -23,7 +24,8 @@ from catalyst.exchange.exchange_execution import ExchangeLimitOrder, \ ExchangeStopLimitOrder, ExchangeStopOrder from catalyst.finance.order import Order, ORDER_STATUS from catalyst.protocol import Account -from catalyst.exchange.exchange_utils import get_exchange_symbols_filename +from catalyst.exchange.exchange_utils import get_exchange_symbols_filename, \ + download_exchange_symbols # Trying to account for REST api instability # https://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request @@ -41,6 +43,7 @@ class Bitfinex(Exchange): self.key = key self.secret = secret.encode('UTF-8') self.name = 'bitfinex' + self.color = 'green' self.assets = {} self.load_assets() self.base_currency = base_currency @@ -48,6 +51,16 @@ class Bitfinex(Exchange): self.minute_writer = None self.minute_reader = None + # The candle limit for each request + self.num_candles_limit = 1000 + + # Max is 90 but playing it safe + # https://www.bitfinex.com/posts/188 + self.max_requests_per_minute = 20 + self.request_cpt = dict() + + self.bundle = ExchangeBundle(self) + def _request(self, operation, data, version='v1'): payload_object = { 'request': '/{}/{}'.format(version, operation), @@ -174,6 +187,7 @@ class Bitfinex(Exchange): def get_balances(self): log.debug('retrieving wallets balances') try: + self.ask_request() response = self._request('balances', None) balances = response.json() except Exception as e: @@ -224,7 +238,8 @@ class Bitfinex(Exchange): # TODO: fetch account data and keep in cache return None - def get_candles(self, data_frequency, assets, bar_count=None): + def get_candles(self, data_frequency, assets, bar_count=None, + start_dt=None, end_dt=None): """ Retrieve OHLVC candles from Bitfinex @@ -239,7 +254,6 @@ class Bitfinex(Exchange): '1M' """ - # TODO: use BcolzMinuteBarReader to read from cache freq_match = re.match(r'([0-9].*)(m|h|d)', data_frequency, re.M | re.I) if freq_match: number = int(freq_match.group(1)) @@ -281,11 +295,27 @@ class Bitfinex(Exchange): if bar_count: is_list = True url += '/hist?limit={}'.format(int(bar_count)) + + def get_ms(date): + epoch = datetime.datetime.utcfromtimestamp(0) + epoch = epoch.replace(tzinfo=pytz.UTC) + + return (date - epoch).total_seconds() * 1000.0 + + if start_dt is not None: + start_ms = get_ms(start_dt) + url += '&start={0:f}'.format(start_ms) + + if end_dt is not None: + end_ms = get_ms(end_dt) + url += '&end={0:f}'.format(end_ms) + else: is_list = False url += '/last' try: + self.ask_request() response = requests.get(url) except Exception as e: raise ExchangeRequestError(error=e) @@ -299,6 +329,9 @@ class Bitfinex(Exchange): candles = response.json() def ohlc_from_candle(candle): + last_traded = pd.Timestamp.utcfromtimestamp( + candle[0] / 1000.0) + last_traded = last_traded.replace(tzinfo=pytz.UTC) ohlc = dict( open=np.float64(candle[1]), high=np.float64(candle[3]), @@ -306,8 +339,7 @@ class Bitfinex(Exchange): close=np.float64(candle[2]), volume=np.float64(candle[5]), price=np.float64(candle[2]), - last_traded=pd.Timestamp.utcfromtimestamp( - candle[0] / 1000.0) + last_traded=last_traded ) return ohlc @@ -368,6 +400,7 @@ class Bitfinex(Exchange): date = pd.Timestamp.utcnow() try: + self.ask_request() response = self._request('order/new', req) order_status = response.json() except Exception as e: @@ -409,6 +442,7 @@ class Bitfinex(Exchange): orders for this asset. """ try: + self.ask_request() response = self._request('orders', None) order_statuses = response.json() except Exception as e: @@ -420,7 +454,7 @@ class Bitfinex(Exchange): order_statuses['message']) ) - orders = list() + orders = [] for order_status in order_statuses: order, executed_price = self._create_order(order_status) if asset is None or asset == order.sid: @@ -443,6 +477,7 @@ class Bitfinex(Exchange): The order object. """ try: + self.ask_request() response = self._request( 'order/status', {'order_id': int(order_id)}) order_status = response.json() @@ -468,6 +503,7 @@ class Bitfinex(Exchange): if isinstance(order_param, Order) else order_param try: + self.ask_request() response = self._request('order/cancel', {'order_id': order_id}) status = response.json() except Exception as e: @@ -492,6 +528,7 @@ class Bitfinex(Exchange): log.debug('fetching tickers {}'.format(symbols)) try: + self.ask_request() response = requests.get( '{url}/v2/tickers?symbols={symbols}'.format( url=self.url, @@ -507,7 +544,10 @@ class Bitfinex(Exchange): response.content) ) - tickers = response.json() + try: + tickers = response.json() + except Exception as e: + raise ExchangeRequestError(error=e) ticks = dict() for index, ticker in enumerate(tickers): @@ -529,14 +569,122 @@ class Bitfinex(Exchange): log.debug('got tickers {}'.format(ticks)) return ticks - def generate_symbols_json(self, filename=None): + def generate_symbols_json(self, filename=None, source_dates=False): symbol_map = {} - response = self._request('symbols', None) - for symbol in response.json(): - symbol_map[symbol]= {"symbol":symbol[:-3]+'_'+symbol[-3:], "start_date": "2010-01-01"} - if(filename is None): + if not source_dates: + fn, r = download_exchange_symbols(self.name) + with open(fn) as data_file: + cached_symbols = json.load(data_file) + + response = self._request('symbols', None) + + for symbol in response.json(): + if (source_dates): + start_date = self.get_symbol_start_date(symbol) + else: + try: + start_date = cached_symbols[symbol]['start_date'] + except KeyError as e: + start_date = time.strftime('%Y-%m-%d') + + try: + end_daily = cached_symbols[symbol]['end_daily'] + except KeyError as e: + end_daily = 'N/A' + + try: + end_minute = cached_symbols[symbol]['end_minute'] + except KeyError as e: + end_minute = 'N/A' + + symbol_map[symbol] = dict( + symbol=symbol[:-3] + '_' + symbol[-3:], + start_date=start_date, + end_daily=end_daily, + end_minute=end_minute, + ) + + if (filename is None): filename = get_exchange_symbols_filename(self.name) - with open(filename,'w') as f: - json.dump(symbol_map, f, sort_keys=True, indent=2, separators=(',',':')) + with open(filename, 'w') as f: + json.dump(symbol_map, f, sort_keys=True, indent=2, + separators=(',', ':')) + + def get_symbol_start_date(self, symbol): + + print(symbol) + symbol_v2 = 't' + symbol.upper() + + """ + For each symbol we retrieve candles with Monhtly resolution + We get the first month, and query again with daily resolution + around that date, and we get the first date + """ + url = '{url}/v2/candles/trade:1M:{symbol}/hist'.format( + url=self.url, + symbol=symbol_v2 + ) + + try: + self.ask_request() + response = requests.get(url) + except Exception as e: + raise ExchangeRequestError(error=e) + + """ + If we don't get any data back for our monthly-resolution query + it means that symbol started trading less than a month ago, so + arbitrarily set the ref. date to 15 days ago to be safe with + +/- 31 days + """ + if (len(response.json())): + startmonth = response.json()[-1][0] + else: + startmonth = int((time.time() - 15 * 24 * 3600) * 1000) + + """ + Query again with daily resolution setting the start and end around + the startmonth we got above. Avoid end dates greater than now: time.time() + """ + url = '{url}/v2/candles/trade:1D:{symbol}/hist?start={start}&end={end}'.format( + url=self.url, + symbol=symbol_v2, + start=startmonth - 3600 * 24 * 31 * 1000, + end=min(startmonth + 3600 * 24 * 31 * 1000, + int(time.time() * 1000)) + ) + + try: + self.ask_request() + response = requests.get(url) + except Exception as e: + raise ExchangeRequestError(error=e) + + return time.strftime('%Y-%m-%d', + time.gmtime(int(response.json()[-1][0] / 1000))) + + def get_orderbook(self, asset, order_type='all'): + exchange_symbol = asset.exchange_symbol + try: + self.ask_request() + response = self._request( + 'book/{}'.format(exchange_symbol), None) + data = response.json() + + except Exception as e: + raise ExchangeRequestError(error=e) + + # TODO: filter by type + result = dict() + for order_type in data: + result[order_type] = [] + + for entry in data[order_type]: + result[order_type].append(dict( + rate=float(entry['price']), + quantity=float(entry['amount']) + )) + + return result diff --git a/catalyst/exchange/bitfinex/symbols.json b/catalyst/exchange/bitfinex/symbols.json index 8ab44191..ab0f38f9 100644 --- a/catalyst/exchange/bitfinex/symbols.json +++ b/catalyst/exchange/bitfinex/symbols.json @@ -1,4 +1,17 @@ { + "neobtc": { + "symbol": "neo_btc", + "start_date": "2017-09-07", + "precision": 5 + }, + "neousd": { + "symbol": "neo_usd", + "start_date": "2017-09-07" + }, + "neoeth": { + "symbol": "neo_eth", + "start_date": "2017-09-07" + }, "btcusd": { "symbol": "btc_usd", "start_date": "2010-01-01" @@ -17,19 +30,19 @@ }, "ethusd": { "symbol": "eth_usd", - "start_date": "2010-01-01" + "start_date": "2017-01-01" }, "ethbtc": { "symbol": "eth_btc", - "start_date": "2010-01-01" + "start_date": "2017-01-01" }, "etcbtc": { "symbol": "etc_btc", - "start_date": "2010-01-01" + "start_date": "2017-01-01" }, "etcusd": { "symbol": "etc_usd", - "start_date": "2010-01-01" + "start_date": "2017-01-01" }, "rrtusd": { "symbol": "rrt_usd", diff --git a/catalyst/exchange/bittrex/bittrex.py b/catalyst/exchange/bittrex/bittrex.py index 6c0dfbc1..dcc9b943 100644 --- a/catalyst/exchange/bittrex/bittrex.py +++ b/catalyst/exchange/bittrex/bittrex.py @@ -7,13 +7,14 @@ from six.moves import urllib from catalyst.exchange.bittrex.bittrex_api import Bittrex_api from catalyst.exchange.exchange import Exchange +from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import InvalidHistoryFrequencyError, \ ExchangeRequestError, InvalidOrderStyle, OrderNotFound, OrderCancelError, \ CreateOrderError from catalyst.finance.execution import LimitOrder, StopLimitOrder from catalyst.finance.order import Order, ORDER_STATUS -from catalyst.exchange.exchange_utils import get_exchange_symbols_filename - +from catalyst.exchange.exchange_utils import get_exchange_symbols_filename, \ + download_exchange_symbols log = Logger('Bittrex') @@ -24,15 +25,25 @@ class Bittrex(Exchange): def __init__(self, key, secret, base_currency, portfolio=None): self.api = Bittrex_api(key=key, secret=secret.encode('UTF-8')) self.name = 'bittrex' + self.color = 'blue' self.base_currency = base_currency self._portfolio = portfolio + self.num_candles_limit = 2000 + + # Not sure what the rate limit is but trying to play it safe + # https://bitcoin.stackexchange.com/questions/53778/bittrex-api-rate-limit + self.max_requests_per_minute = 60 + self.request_cpt = dict() + self.minute_writer = None self.minute_reader = None self.assets = dict() self.load_assets() + self.bundle = ExchangeBundle(self) + @property def account(self): pass @@ -55,14 +66,21 @@ class Bittrex(Exchange): def get_balances(self): try: log.debug('retrieving wallet balances') + self.ask_request() balances = self.api.getbalances() + except Exception as e: raise ExchangeRequestError(error=e) std_balances = dict() - for balance in balances: - currency = balance['Currency'].lower() - std_balances[currency] = balance['Available'] + try: + for balance in balances: + currency = balance['Currency'].lower() + std_balances[currency] = balance['Available'] + + except TypeError: + raise ExchangeRequestError(error=balances) + return std_balances def create_order(self, asset, amount, is_buy, style): @@ -75,6 +93,7 @@ class Bittrex(Exchange): price = style.get_limit_price(is_buy) try: + self.ask_request() if is_buy: order_status = self.api.buylimit(exchange_symbol, amount, price) @@ -115,6 +134,7 @@ class Bittrex(Exchange): def get_open_orders(self, asset): symbol = self.get_symbol(asset) try: + self.ask_request() open_orders = self.api.getopenorders(symbol) except Exception as e: raise ExchangeRequestError(error=e) @@ -158,6 +178,7 @@ class Bittrex(Exchange): def get_order(self, order_id): log.info('retrieving order {}'.format(order_id)) try: + self.ask_request() order_status = self.api.getorder(order_id) except Exception as e: raise ExchangeRequestError(error=e) @@ -173,6 +194,7 @@ class Bittrex(Exchange): log.info('cancelling order {}'.format(order_id)) try: + self.ask_request() status = self.api.cancel(order_id) except Exception as e: raise ExchangeRequestError(error=e) @@ -184,7 +206,8 @@ class Bittrex(Exchange): error=status['message'] ) - def get_candles(self, data_frequency, assets, bar_count=None): + def get_candles(self, data_frequency, assets, bar_count=None, + start_date=None): """ Supported Intervals ------------------- @@ -275,6 +298,7 @@ class Bittrex(Exchange): for asset in assets: symbol = self.get_symbol(asset) try: + self.ask_request() ticker = self.api.getticker(symbol) except Exception as e: raise ExchangeRequestError(error=e) @@ -296,6 +320,11 @@ class Bittrex(Exchange): def generate_symbols_json(self, filename=None): symbol_map = {} + + fn, r = download_exchange_symbols(self.name) + with open(fn) as data_file: + cached_symbols = json.load(data_file) + markets = self.api.getmarkets() for market in markets: exchange_symbol = market['MarketName'] @@ -303,13 +332,57 @@ class Bittrex(Exchange): market=self.sanitize_curency_symbol(market['MarketCurrency']), base=self.sanitize_curency_symbol(market['BaseCurrency']) ) + + try: + end_daily = cached_symbols[exchange_symbol]['end_daily'] + except KeyError as e: + end_daily = 'N/A' + + try: + end_minute = cached_symbols[exchange_symbol]['end_minute'] + except KeyError as e: + end_minute = 'N/A' + symbol_map[exchange_symbol] = dict( symbol=symbol, - start_date=pd.to_datetime(market['Created'], utc=True).strftime("%Y-%m-%d") + start_date=pd.to_datetime(market['Created'], + utc=True).strftime("%Y-%m-%d"), + end_daily=end_daily, + end_minute=end_minute, ) - if(filename is None): + if (filename is None): filename = get_exchange_symbols_filename(self.name) - with open(filename,'w') as f: - json.dump(symbol_map, f, sort_keys=True, indent=2, separators=(',',':')) + with open(filename, 'w') as f: + json.dump(symbol_map, f, sort_keys=True, indent=2, + separators=(',', ':')) + + def get_orderbook(self, asset, order_type='all'): + if order_type == 'all': + order_type = 'both' + elif order_type == 'bid': + order_type = 'buy' + elif order_type == 'ask': + order_type = 'sell' + else: + raise ValueError('invalid type') + + exchange_symbol = asset.exchange_symbol + data = self.api.getorderbook(market=exchange_symbol, type=order_type) + + result = dict() + for exchange_type in data: + if exchange_type == 'buy': + order_type = 'bids' + elif exchange_type == 'sell': + order_type = 'asks' + + result[order_type] = [] + for entry in data[exchange_type]: + result[order_type].append(dict( + rate=entry['Rate'], + quantity=entry['Quantity'] + )) + + return result diff --git a/catalyst/exchange/bittrex/extensions-example.py b/catalyst/exchange/bittrex/extensions-example.py new file mode 100644 index 00000000..33ffb4f5 --- /dev/null +++ b/catalyst/exchange/bittrex/extensions-example.py @@ -0,0 +1,7 @@ +from catalyst.data.bundles import register +from catalyst.exchange.exchange_bundle import exchange_bundle + +symbols = ( + 'neo_btc', +) +register('exchange_bitfinex', exchange_bundle('bitfinex', symbols)) \ No newline at end of file diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py new file mode 100644 index 00000000..fe622b52 --- /dev/null +++ b/catalyst/exchange/bundle_utils.py @@ -0,0 +1,254 @@ +import calendar +import os +import tarfile +from datetime import timedelta, datetime, date + +import numpy as np +import pandas as pd +import pytz + +from catalyst.data.bundles import from_bundle_ingest_dirname +from catalyst.data.bundles.core import download_without_progress +from catalyst.exchange.exchange_errors import NoDataAvailableOnExchange +from catalyst.exchange.exchange_utils import get_exchange_bundles_folder +from catalyst.utils.deprecate import deprecated +from catalyst.utils.paths import data_path + +EXCHANGE_NAMES = ['bitfinex', 'bittrex', 'poloniex'] +API_URL = 'http://data.enigma.co/api/v1' + + +def get_date_from_ms(ms): + return datetime.fromtimestamp(ms / 1000.0) + + +def get_seconds_from_date(date): + epoch = datetime.utcfromtimestamp(0) + epoch = epoch.replace(tzinfo=pytz.UTC) + + return int((date - epoch).total_seconds()) + + +def get_bcolz_chunk(exchange_name, symbol, data_frequency, period): + """ + Download and extract a bcolz bundle. + + :param exchange_name: + :param symbol: + :param data_frequency: + :param period: + :return: + + Note: + Filename: bitfinex-daily-neo_eth-2017-10.tar.gz + """ + + root = get_exchange_bundles_folder(exchange_name) + name = '{exchange}-{frequency}-{symbol}-{period}'.format( + exchange=exchange_name, + frequency=data_frequency, + symbol=symbol, + period=period + ) + path = os.path.join(root, name) + + if not os.path.isdir(path): + url = 'https://s3.amazonaws.com/enigmaco/catalyst-bundles/' \ + 'exchange-{exchange}/{name}.tar.gz'.format( + exchange=exchange_name, + name=name + ) + + bytes = download_without_progress(url) + with tarfile.open('r', fileobj=bytes) as tar: + tar.extractall(path) + + return path + + +def get_delta(periods, data_frequency): + return timedelta(minutes=periods) \ + if data_frequency == 'minute' else timedelta(days=periods) + + +def get_periods_range(start_dt, end_dt, data_frequency): + freq = 'T' if data_frequency == 'minute' else 'D' + + return pd.date_range(start_dt, end_dt, freq=freq) + + +def get_periods(start_dt, end_dt, data_frequency): + delta = end_dt - start_dt + + if data_frequency == 'minute': + delta_periods = delta.total_seconds() / 60 + + elif data_frequency == 'daily': + delta_periods = delta.total_seconds() / 60 / 60 / 24 + + else: + raise ValueError('frequency not supported') + + return int(delta_periods) + + +def get_start_dt(end_dt, bar_count, data_frequency): + periods = bar_count + if periods > 1: + delta = get_delta(periods, data_frequency) + start_dt = end_dt - delta + else: + start_dt = end_dt + + return start_dt + + +def get_adj_dates(start, end, assets, data_frequency): + """ + Contains a date range to the trading availability of the specified pairs. + + :param start: + :param end: + :param assets: + :param data_frequency: + :return: + """ + earliest_trade = None + last_entry = None + for asset in assets: + if earliest_trade is None or earliest_trade > asset.start_date: + earliest_trade = asset.start_date + + end_asset = asset.end_minute if data_frequency == 'minute' else \ + asset.end_daily + if end_asset is not None and \ + (last_entry is None or end_asset > last_entry): + last_entry = end_asset + + if start is None or earliest_trade > start: + start = earliest_trade + + if end is None or (last_entry is not None and end > last_entry): + end = last_entry + + if end is None or start >= end: + raise NoDataAvailableOnExchange( + exchange=asset.exchange.title(), + symbol=[asset.symbol.encode('utf-8')], + data_frequency=data_frequency, + ) + + return start, end + + +def get_month_start_end(dt): + """ + Returns the first and last day of the month for the specified date. + + :param dt: + :return: + """ + month_range = calendar.monthrange(dt.year, dt.month) + month_start = pd.to_datetime(datetime( + dt.year, dt.month, 1, 0, 0, 0, 0 + ), utc=True) + + month_end = pd.to_datetime(datetime( + dt.year, dt.month, month_range[1], 23, 59, 0, 0 + ), utc=True) + + return month_start, month_end + + +def get_year_start_end(dt): + """ + Returns the first and last day of the year for the specified date. + + :param dt: + :return: + """ + year_start = pd.to_datetime(date(dt.year, 1, 1), utc=True) + year_end = pd.to_datetime(date(dt.year, 12, 31), utc=True) + + return year_start, year_end + + +def get_df_from_arrays(arrays, periods): + ohlcv = dict() + for index, field in enumerate( + ['open', 'high', 'low', 'close', 'volume']): + ohlcv[field] = arrays[index].flatten() + + df = pd.DataFrame( + data=ohlcv, + index=periods + ) + return df + + +def range_in_bundle(asset, start_dt, end_dt, reader): + """ + Evaluate whether price data of an asset is included has been ingested in + the exchange bundle for the given date range. + + :param asset: + :param start_dt: + :param end_dt: + :param reader: + :return: + """ + has_data = True + if has_data and reader is not None: + try: + start_close = \ + reader.get_value(asset.sid, start_dt, 'close') + + if np.isnan(start_close): + has_data = False + + else: + end_close = reader.get_value(asset.sid, end_dt, 'close') + + if np.isnan(end_close): + has_data = False + + except Exception as e: + has_data = False + + else: + has_data = False + + return has_data + + +@deprecated +def find_most_recent_time(bundle_name): + """ + Find most recent "time folder" for a given bundle. + + :param bundle_name: + The name of the targeted bundle. + + :return folder: + The name of the time folder. + """ + try: + bundle_folders = os.listdir( + data_path([bundle_name]), + ) + except OSError: + return None + + most_recent_bundle = dict() + for folder in bundle_folders: + date = from_bundle_ingest_dirname(folder) + if not most_recent_bundle or date > \ + most_recent_bundle[most_recent_bundle.keys()[0]]: + most_recent_bundle = dict() + most_recent_bundle[folder] = date + + if most_recent_bundle: + return most_recent_bundle.keys()[0] + else: + return None + diff --git a/catalyst/exchange/data_portal_exchange.py b/catalyst/exchange/data_portal_exchange.py index 77a7cb76..468fb8a1 100644 --- a/catalyst/exchange/data_portal_exchange.py +++ b/catalyst/exchange/data_portal_exchange.py @@ -11,29 +11,37 @@ # See the License for the specific language governing permissions and # limitations under the License. +import abc from time import sleep +import numpy as np +import pandas as pd +from catalyst.assets._assets import TradingPair from logbook import Logger from catalyst.data.data_portal import DataPortal +from catalyst.exchange.bundle_utils import get_start_dt +from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import ( ExchangeRequestError, - ExchangeBarDataError -) + ExchangeBarDataError, + PricingDataBeforeTradingError, + PricingDataNotLoadedError, InvalidHistoryFrequencyError, + BundleNotFoundError) log = Logger('DataPortalExchange') -class DataPortalExchange(DataPortal): - def __init__(self, exchange, *args, **kwargs): - self.exchange = exchange +class DataPortalExchangeBase(DataPortal): + def __init__(self, *args, **kwargs): + self.exchanges = kwargs.pop('exchanges', None) # TODO: put somewhere accessible by each algo self.retry_get_history_window = 5 self.retry_get_spot_value = 5 self.retry_delay = 5 - super(DataPortalExchange, self).__init__(*args, **kwargs) + super(DataPortalExchangeBase, self).__init__(*args, **kwargs) def _get_history_window(self, assets, @@ -45,14 +53,46 @@ class DataPortalExchange(DataPortal): ffill=True, attempt_index=0): try: - return self.exchange.get_history_window( - assets, - end_dt, - bar_count, - frequency, - field, - data_frequency, - ffill) + exchange_assets = dict() + for asset in assets: + if asset.exchange not in exchange_assets: + exchange_assets[asset.exchange] = list() + + exchange_assets[asset.exchange].append(asset) + + if len(exchange_assets) > 1: + df_list = [] + for exchange_name in exchange_assets: + exchange = self.exchanges[exchange_name] + assets = exchange_assets[exchange_name] + + df_exchange = self.get_exchange_history_window( + exchange, + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency, + ffill) + + df_list.append(df_exchange) + + # Merging the values values of each exchange + return pd.concat(df_list) + + else: + exchange = self.exchanges[exchange_assets.keys()[0]] + return self.get_exchange_history_window( + exchange, + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency, + ffill) + except ExchangeRequestError as e: log.warn( 'get history attempt {}: {}'.format(attempt_index, e) @@ -80,8 +120,12 @@ class DataPortalExchange(DataPortal): bar_count, frequency, field, - data_frequency, + data_frequency=None, ffill=True): + + if field == 'price': + field = 'close' + return self._get_history_window(assets, end_dt, bar_count, @@ -90,11 +134,63 @@ class DataPortalExchange(DataPortal): data_frequency, ffill) + @abc.abstractmethod + def get_exchange_history_window(self, + exchange, + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency, + ffill=True): + pass + def _get_spot_value(self, assets, field, dt, data_frequency, attempt_index=0): try: - return self.exchange.get_spot_value(assets, field, dt, - data_frequency) + if isinstance(assets, TradingPair): + exchange = self.exchanges[assets.exchange] + spot_values = self.get_exchange_spot_value( + exchange, [assets], field, dt, data_frequency) + + if not spot_values: + return np.nan + + return spot_values[0] + + else: + exchange_assets = dict() + for asset in assets: + if asset.exchange not in exchange_assets: + exchange_assets[asset.exchange] = list() + + exchange_assets[asset.exchange].append(asset) + + if len(exchange_assets.keys()) == 1: + exchange = self.exchanges[exchange_assets.keys()[0]] + return self.get_exchange_spot_value( + exchange, assets, field, dt, data_frequency) + + else: + spot_values = [] + for exchange_name in exchange_assets: + exchange = self.exchanges[exchange_name] + assets = exchange_assets[exchange_name] + exchange_spot_values = self.get_exchange_spot_value( + exchange, + assets, + field, + dt, + data_frequency + ) + if len(assets) == 1: + spot_values.append(exchange_spot_values) + else: + spot_values += exchange_spot_values + + return spot_values + except ExchangeRequestError as e: log.warn( 'get spot value attempt {}: {}'.format(attempt_index, e) @@ -111,11 +207,139 @@ class DataPortalExchange(DataPortal): ) def get_spot_value(self, assets, field, dt, data_frequency): + if field == 'price': + field = 'close' + return self._get_spot_value(assets, field, dt, data_frequency) + @abc.abstractmethod + def get_exchange_spot_value(self, exchange, assets, field, dt, + data_frequency): + return + def get_adjusted_value(self, asset, field, dt, perspective_dt, data_frequency, spot_value=None): # TODO: does this pertain to cryptocurrencies? - raise NotImplementedError("get_adjusted_value is not implemented yet!") + log.warn('get_adjusted_value is not implemented yet!') + return spot_value + + +class DataPortalExchangeLive(DataPortalExchangeBase): + def __init__(self, *args, **kwargs): + super(DataPortalExchangeLive, self).__init__(*args, **kwargs) + + def get_exchange_history_window(self, + exchange, + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency, + ffill=True): + df = exchange.get_history_window( + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency, + ffill) + return df + + def get_exchange_spot_value(self, exchange, assets, field, dt, + data_frequency): + exchange_spot_values = exchange.get_spot_value( + assets, field, dt, data_frequency) + + return exchange_spot_values + + +class DataPortalExchangeBacktest(DataPortalExchangeBase): + def __init__(self, *args, **kwargs): + super(DataPortalExchangeBacktest, self).__init__(*args, **kwargs) + + self.exchange_bundles = dict() + + self.history_loaders = dict() + self.minute_history_loaders = dict() + + for exchange_name in self.exchanges: + exchange = self.exchanges[exchange_name] + self.exchange_bundles[exchange_name] = ExchangeBundle(exchange) + + def _get_first_trading_day(self, assets): + first_date = None + for asset in assets: + if first_date is None or asset.start_date > first_date: + first_date = asset.start_date + return first_date + + def get_exchange_history_window(self, + exchange, + assets, + end_dt, + bar_count, + frequency, + field, + data_frequency, + ffill=True): + """ + Fetching price history window from the exchange bundle. + + Using a try... except approach to minimize reads most of the time, + when the data exists. + + :param exchange: + :param assets: + :param end_dt: + :param bar_count: + :param frequency: + :param field: + :param data_frequency: + :param ffill: + :return: + """ + + bundle = self.exchange_bundles[exchange.name] + series = bundle.get_history_window_series_and_load( + assets=assets, + end_dt=end_dt, + bar_count=bar_count, + field=field, + data_frequency=data_frequency + ) + return pd.DataFrame(series) + + def get_exchange_spot_value(self, exchange, assets, field, dt, + data_frequency): + bundle = self.exchange_bundles[exchange.name] + + if data_frequency == 'daily': + dt = dt.floor('1D') + else: + dt = dt.floor('1 min') + + try: + return bundle.get_spot_values(assets, field, dt, data_frequency) + + except PricingDataNotLoadedError: + log.info( + 'pricing data for {symbol} not found on {dt}' + ', updating the bundles.'.format( + symbol=[asset.symbol for asset in assets], + dt=dt + ) + ) + bundle.ingest_assets( + assets=assets, + start_dt=self._first_trading_day, + end_dt=self._last_available_session, + data_frequency=data_frequency, + show_progress=True + ) + return bundle.get_spot_values( + assets, field, dt, data_frequency, True + ) diff --git a/catalyst/exchange/exchange.py b/catalyst/exchange/exchange.py index b0d0f5e2..f8c902a8 100644 --- a/catalyst/exchange/exchange.py +++ b/catalyst/exchange/exchange.py @@ -1,7 +1,7 @@ import abc -import collections -import random +import re from abc import ABCMeta, abstractmethod, abstractproperty +from datetime import timedelta from time import sleep import numpy as np @@ -10,11 +10,13 @@ from catalyst.assets._assets import TradingPair from logbook import Logger from catalyst.data.data_portal import BASE_FIELDS -from catalyst.errors import ( - SymbolNotFound, -) +from catalyst.exchange.bundle_utils import get_start_dt, \ + get_delta, get_periods, get_adj_dates +from catalyst.exchange.exchange_bundle import ExchangeBundle from catalyst.exchange.exchange_errors import MismatchingBaseCurrencies, \ - InvalidOrderStyle, BaseCurrencyNotFoundError + InvalidOrderStyle, BaseCurrencyNotFoundError, SymbolNotFoundOnExchange, \ + InvalidHistoryFrequencyError, MismatchingFrequencyError, \ + BundleNotFoundError, NoDataAvailableOnExchange, PricingDataNotLoadedError from catalyst.exchange.exchange_execution import ExchangeStopLimitOrder, \ ExchangeLimitOrder, ExchangeStopOrder from catalyst.exchange.exchange_portfolio import ExchangePortfolio @@ -30,13 +32,17 @@ class Exchange: def __init__(self): self.name = None - self.trading_pairs = None self.assets = {} self._portfolio = None self.minute_writer = None self.minute_reader = None self.base_currency = None + self.num_candles_limit = None + self.max_requests_per_minute = None + self.request_cpt = None + self.bundle = ExchangeBundle(self) + @property def positions(self): return self.portfolio.positions @@ -64,6 +70,44 @@ class Exchange: def time_skew(self): pass + def ask_request(self): + """ + Asks permission to issue a request to the exchange. + The primary purpose is to avoid hitting rate limits. + + The application will pause if the maximum requests per minute + permitted by the exchange is exceeded. + + :return boolean: + + """ + now = pd.Timestamp.utcnow() + if not self.request_cpt: + self.request_cpt = dict() + self.request_cpt[now] = 0 + return True + + cpt_date = self.request_cpt.keys()[0] + cpt = self.request_cpt[cpt_date] + + if now > cpt_date + timedelta(minutes=1): + self.request_cpt = dict() + self.request_cpt[now] = 0 + return True + + if cpt >= self.max_requests_per_minute: + delta = now - cpt_date + + sleep_period = 60 - delta.total_seconds() + sleep(sleep_period) + + now = pd.Timestamp.utcnow() + self.request_cpt = dict() + self.request_cpt[now] = 0 + return True + else: + self.request_cpt[cpt_date] += 1 + def get_symbol(self, asset): """ Get the exchange specific symbol of the given asset. @@ -79,7 +123,7 @@ class Exchange: if not symbol: raise ValueError('Currency %s not supported by exchange %s' % - (asset['symbol'], self.name)) + (asset['symbol'], self.name.title())) return symbol @@ -97,6 +141,19 @@ class Exchange: return symbols + def get_assets(self, symbols=None): + assets = [] + + if symbols is not None: + for symbol in symbols: + asset = self.get_asset(symbol) + assets.append(asset) + else: + for key in self.assets: + assets.append(self.assets[key]) + + return assets + def get_asset(self, symbol): """ Find an Asset on the current exchange based on its Catalyst symbol @@ -110,7 +167,13 @@ class Exchange: asset = self.assets[key] if not asset: - raise SymbolNotFound(symbol=symbol) + supported_symbols = [pair.symbol.encode('utf-8') for pair in + self.assets.values()] + raise SymbolNotFoundOnExchange( + symbol=symbol, + exchange=self.name.title(), + supported_symbols=supported_symbols + ) return asset @@ -159,13 +222,32 @@ class Exchange: else: asset_name = None + if 'min_trade_size' in asset: + min_trade_size = asset['min_trade_size'] + else: + min_trade_size = 0.0000001 + + if 'end_daily' in asset and asset['end_daily'] != 'N/A': + end_daily = pd.to_datetime(asset['end_daily'], utc=True) + else: + end_daily = None + + if 'end_minute' in asset and asset['end_minute'] != 'N/A': + end_minute = pd.to_datetime(asset['end_minute'], utc=True) + else: + end_minute = None + trading_pair = TradingPair( symbol=asset['symbol'], exchange=self.name, start_date=start_date, end_date=end_date, leverage=leverage, - asset_name=asset_name + asset_name=asset_name, + min_trade_size=min_trade_size, + end_daily=end_daily, + end_minute=end_minute, + exchange_symbol=exchange_symbol ) self.assets[exchange_symbol] = trading_pair @@ -247,19 +329,14 @@ class Exchange: '1D', '7D', '14D', '1M' """ if field not in BASE_FIELDS: - raise KeyError('Invalid column: ' + str(field)) + raise KeyError('Invalid column: {}'.format(field)) - if isinstance(assets, collections.Iterable): - values = list() - for asset in assets: - value = self.get_single_spot_value( - asset, field, data_frequency) - values.append(value) + values = [] + for asset in assets: + value = self.get_single_spot_value(asset, field, data_frequency) + values.append(value) - return values - else: - return self.get_single_spot_value( - assets, field, data_frequency) + return values def get_single_spot_value(self, asset, field, data_frequency): """ @@ -284,64 +361,45 @@ class Exchange: ) ) - if field == 'price': - field = 'close' + ohlc = self.get_candles(data_frequency, asset) + if field not in ohlc: + raise KeyError('Invalid column: %s' % field) - # Don't use a timezone here - dt = pd.Timestamp.utcnow().floor('1 min') - value = None - if self.minute_reader is not None: - try: - # Slight delay to minimize the chances that multiple algos - # might try to hit the cache at the exact same time. - sleep_time = random.uniform(0.5, 0.8) - sleep(sleep_time) - # TODO: This does not always! Why is that? Open an issue with zipline. - # See: https://github.com/zipline-live/zipline/issues/26 - value = self.minute_reader.get_value( - sid=asset.sid, - dt=dt, - field=field - ) - except Exception as e: - log.warn('minute data not found: {}'.format(e)) - - if value is None or np.isnan(value): - ohlc = self.get_candles(data_frequency, asset) - if field not in ohlc: - raise KeyError('Invalid column: %s' % field) - - if self.minute_writer is not None: - df = pd.DataFrame( - [ohlc], - index=pd.DatetimeIndex([dt]), - columns=['open', 'high', 'low', 'close', 'volume'] - ) - - try: - self.minute_writer.write_sid( - sid=asset.sid, - df=df - ) - log.debug('wrote minute data: {}'.format(dt)) - except Exception as e: - log.warn( - 'unable to write minute data: {} {}'.format(dt, e)) - - value = ohlc[field] - log.debug('got spot value: {}'.format(value)) - else: - log.debug('got spot value from cache: {}'.format(value)) + value = ohlc[field] + log.debug('got spot value: {}'.format(value)) return value + def get_series_from_candles(self, candles, start_dt, end_dt, + field, previous_value=None): + """ + Get a series of field data for the specified candles. + + :param candles: + :param start_dt: + :param end_dt: + :param field: + :param previous_value: + :return: + """ + + dates = [candle['last_traded'] for candle in candles] + values = [candle[field] for candle in candles] + + periods = pd.date_range(start_dt, end_dt) + series = pd.Series(values, index=dates) + + series.reindex(periods, method='ffill', fill_value=previous_value) + + return series + def get_history_window(self, assets, end_dt, bar_count, frequency, field, - data_frequency, + data_frequency=None, ffill=True): """ @@ -378,23 +436,93 @@ class Exchange: A dataframe containing the requested data. """ - candles = self.get_candles( - data_frequency=frequency, - assets=assets, - bar_count=bar_count, - ) + freq_match = re.match(r'([0-9].*)(m|M|d|D)', frequency, re.M | re.I) + if freq_match: + candle_size = int(freq_match.group(1)) + unit = freq_match.group(2) + + else: + raise InvalidHistoryFrequencyError(frequency) + + if unit.lower() == 'd': + if data_frequency == 'minute': + data_frequency = 'daily' + + elif unit.lower() == 'm': + if data_frequency == 'daily': + data_frequency = 'minute' + + else: + raise InvalidHistoryFrequencyError(frequency) + + adj_bar_count = candle_size * bar_count + try: + series = self.bundle.get_history_window_series_and_load( + assets=assets, + end_dt=end_dt, + bar_count=adj_bar_count, + field=field, + data_frequency=data_frequency + ) + except PricingDataNotLoadedError: + series = dict() - series = dict() for asset in assets: - asset_candles = candles[asset] + if asset not in series or series[asset].index[-1] < end_dt: + # Adding bars too recent to be contained in the consolidated + # exchanges bundles. We go directly against the exchange + # to retrieve the candles. - values = map(lambda candle: candle[field], asset_candles) - dates = map(lambda candle: candle['last_traded'], asset_candles) + trailing_dt = \ + series[asset].index[-1] + get_delta(1, data_frequency) \ + if asset in series else start_dt - value_series = pd.Series(values, index=dates) - series[asset] = value_series + trailing_bar_count = \ + get_periods(trailing_dt, end_dt, data_frequency) + + # The get_history method supports multiple asset + candles = self.get_candles( + data_frequency=data_frequency, + assets=asset, + bar_count=trailing_bar_count, + end_dt=end_dt + ) + + last_value = series[asset].iloc(0) if asset in series \ + else np.nan + + candle_series = self.get_series_from_candles( + candles=candles, + start_dt=trailing_dt, + end_dt=end_dt, + field=field, + previous_value=last_value + ) + + if asset in series: + series[asset].append(candle_series) + + else: + series[asset] = candle_series + + df = pd.DataFrame(series) + + if candle_size > 1: + if field == 'open': + agg = 'first' + elif field == 'high': + agg = 'max' + elif field == 'low': + agg = 'min' + elif field == 'close': + agg = 'last' + elif field == 'volume': + agg = 'sum' + else: + raise ValueError('Invalid field.') + + df = df.resample('{}T'.format(candle_size)).agg(agg) - df = pd.concat(series) return df def synchronize_portfolio(self): @@ -413,7 +541,7 @@ class Exchange: if base_position_available is None: raise BaseCurrencyNotFoundError( base_currency=self.base_currency, - exchange=self.name + exchange=self.name.title() ) portfolio = self._portfolio @@ -440,18 +568,6 @@ class Exchange: portfolio.portfolio_value = \ portfolio.positions_value + portfolio.cash - @abstractmethod - def get_balances(self): - """ - Retrieve wallet balances for the exchange - :return balances: A dict of currency => available balance - """ - pass - - @abstractmethod - def create_order(self, asset, amount, is_buy, style): - pass - def order(self, asset, amount, limit_price=None, stop_price=None, style=None): """Place an order. @@ -515,7 +631,7 @@ class Exchange: style = ExchangeStopOrder(stop_price, exchange=self.name) elif style is not None: - raise InvalidOrderStyle(exchange=self.name, + raise InvalidOrderStyle(exchange=self.name.title(), style=style.__class__.__name__) else: raise ValueError('Incomplete order data.') @@ -537,6 +653,34 @@ class Exchange: else: return None + # The methods below must be implemented for each exchange. + @abstractmethod + def get_balances(self): + """ + Retrieve wallet balances for the exchange + :return balances: A dict of currency => available balance + """ + pass + + @abstractmethod + def create_order(self, asset, amount, is_buy, style): + """ + Place an order on the exchange. + + :param asset : Asset + The asset that this order is for. + :param amount : int + The amount of shares to order. If ``amount`` is positive, this is + the number of shares to buy or cover. If ``amount`` is negative, + this is the number of shares to sell or short. + :param style : ExecutionStyle + The execution style for the order. + :param is_buy: boolean + Is it a buy order? + :return: + """ + pass + @abstractmethod def get_open_orders(self, asset): """Retrieve all of the current open orders. @@ -588,16 +732,34 @@ class Exchange: pass @abstractmethod - def get_candles(self, data_frequency, assets, bar_count=None): + def get_candles(self, data_frequency, assets, bar_count=None, + start_dt=None, end_dt=None): """ Retrieve OHLCV candles for the given assets :param data_frequency: - :param assets: - :param end_dt: + The candle frequency: minute or daily + :param assets: list[TradingPair] + The targeted assets. :param bar_count: - :param limit: - :return: + The number of bar desired. (default 1) + :param end_dt: datetime, optional + The last bar date. + :param start_dt: datetime, optional + The first bar date. + + :return dict[TradingPair, dict[str, Object]]: OHLCV data + A dictionary of OHLCV candles. Each TradingPair instance is + mapped to a list of dictionaries with this structure: + open: float + high: float + low: float + close: float + volume: float + last_traded: datetime + + See definition here: + http://www.investopedia.com/terms/o/ohlcchart.asp """ pass @@ -618,3 +780,16 @@ class Exchange: :return: """ pass + + @abc.abstractmethod + def get_orderbook(self, asset, order_type): + """ + Retrieve the the orderbook for the given trading pair. + + :param asset: TradingPair + :param order_type: str + The type of orders: bid, ask or all + + :return: + """ + pass diff --git a/catalyst/exchange/algorithm_exchange.py b/catalyst/exchange/exchange_algorithm.py similarity index 66% rename from catalyst/exchange/algorithm_exchange.py rename to catalyst/exchange/exchange_algorithm.py index e7bced7e..80f6a312 100644 --- a/catalyst/exchange/algorithm_exchange.py +++ b/catalyst/exchange/exchange_algorithm.py @@ -11,43 +11,50 @@ # See the License for the specific language governing permissions and # limitations under the License. import os +import pickle import signal import sys -import pickle +from collections import deque from datetime import timedelta -from time import sleep from os import listdir from os.path import isfile, join -from collections import deque -import numpy as np +from time import sleep import logbook import pandas as pd +from catalyst.assets._assets import TradingPair import catalyst.protocol as zp from catalyst.algorithm import TradingAlgorithm from catalyst.data.minute_bars import BcolzMinuteBarWriter, \ BcolzMinuteBarReader from catalyst.errors import OrderInBeforeTradingStart -from catalyst.exchange.simple_clock import SimpleClock -from catalyst.exchange.live_graph_clock import LiveGraphClock +from catalyst.exchange.exchange_blotter import ExchangeBlotter from catalyst.exchange.exchange_errors import ( ExchangeRequestError, ExchangePortfolioDataError, - ExchangeTransactionError -) + ExchangeTransactionError, + OrphanOrderError) +from catalyst.exchange.exchange_execution import ExchangeStopLimitOrder, \ + ExchangeLimitOrder, ExchangeStopOrder from catalyst.exchange.exchange_utils import get_exchange_minute_writer_root, \ save_algo_object, get_algo_object, get_algo_folder, get_algo_df, \ save_algo_df +from catalyst.exchange.live_graph_clock import LiveGraphClock +from catalyst.exchange.simple_clock import SimpleClock from catalyst.exchange.stats_utils import get_pretty_stats +from catalyst.finance.execution import MarketOrder from catalyst.finance.performance.period import calc_period_stats from catalyst.gens.tradesimulation import AlgorithmSimulator from catalyst.utils.api_support import ( api_method, disallowed_in_before_trading_start) -from catalyst.utils.input_validation import error_keywords +from catalyst.utils.input_validation import error_keywords, ensure_upper_case, \ + expect_types +from catalyst.utils.preprocess import preprocess +from catalyst.utils.math_utils import round_nearest -log = logbook.Logger("ExchangeTradingAlgorithm") +log = logbook.Logger('exchange_algorithm') class ExchangeAlgorithmExecutor(AlgorithmSimulator): @@ -55,247 +62,65 @@ class ExchangeAlgorithmExecutor(AlgorithmSimulator): super(self.__class__, self).__init__(*args, **kwargs) -class ExchangeTradingAlgorithm(TradingAlgorithm): +class ExchangeTradingAlgorithmBase(TradingAlgorithm): def __init__(self, *args, **kwargs): - self.exchange = kwargs.pop('exchange', None) - self.algo_namespace = kwargs.pop('algo_namespace', None) - self.live_graph = kwargs.pop('live_graph', None) + self.exchanges = kwargs.pop('exchanges', None) - self._clock = None - self.minute_stats = deque(maxlen=60) + super(ExchangeTradingAlgorithmBase, self).__init__(*args, **kwargs) - self.pnl_stats = get_algo_df(self.algo_namespace, 'pnl_stats') - - self.custom_signals_stats = \ - get_algo_df(self.algo_namespace, 'custom_signals_stats') - - self.exposure_stats = \ - get_algo_df(self.algo_namespace, 'exposure_stats') - - self.is_running = True - - self.retry_check_open_orders = 5 - self.retry_synchronize_portfolio = 5 - self.retry_get_open_orders = 5 - self.retry_order = 2 - self.retry_delay = 5 - - self.stats_minutes = 5 - - super(self.__class__, self).__init__(*args, **kwargs) - # self._create_minute_writer() - - signal.signal(signal.SIGINT, self.signal_handler) - - log.info('exchange trading algorithm successfully initialized') - - def _create_minute_writer(self): - root = get_exchange_minute_writer_root(self.exchange.name) - filename = os.path.join(root, 'metadata.json') - - if os.path.isfile(filename): - writer = BcolzMinuteBarWriter.open( - root, self.sim_params.end_session) - else: - writer = BcolzMinuteBarWriter( - rootdir=root, - calendar=self.trading_calendar, - minutes_per_day=1440, - start_session=self.sim_params.start_session, - end_session=self.sim_params.end_session, - write_metadata=True - ) - - self.exchange.minute_writer = writer - self.exchange.minute_reader = BcolzMinuteBarReader(root) - - def signal_handler(self, signal, frame): - self.is_running = False - - if self._analyze is None: - log.info('Interruption signal detected {}, exiting the ' - 'algorithm'.format(signal)) - - else: - log.info('Interruption signal detected {}, calling `analyze()` ' - 'before exiting the algorithm'.format(signal)) - - algo_folder = get_algo_folder(self.algo_namespace) - folder = join(algo_folder, 'daily_perf') - files = [f for f in listdir(folder) if isfile(join(folder, f))] - - daily_perf_list = [] - for item in files: - filename = join(folder, item) - with open(filename, 'rb') as handle: - daily_perf_list.append(pickle.load(handle)) - - stats = pd.DataFrame(daily_perf_list) - - self.analyze(stats) - - sys.exit(0) - - @property - def clock(self): - if self._clock is None: - return self._create_clock() - else: - return self._clock - - def _create_clock(self): - - # The calendar's execution times are the minutes over which we actually - # want to run the clock. Typically the execution times simply adhere to - # the market open and close times. In the case of the futures calendar, - # for example, we only want to simulate over a subset of the full 24 - # hour calendar, so the execution times dictate a market open time of - # 6:31am US/Eastern and a close of 5:00pm US/Eastern. - - # In our case, we are trading around the clock, so the market close - # corresponds to the last minute of the day. - - # This method is taken from TradingAlgorithm. - # The clock has been replaced to use RealtimeClock - # TODO: should we apply a time skew? not sure to understand the utility. - - log.debug('creating clock') - if self.live_graph: - self._clock = LiveGraphClock( - self.sim_params.sessions, - time_skew=self.exchange.time_skew, - context=self - ) - else: - self._clock = SimpleClock( - self.sim_params.sessions, - time_skew=self.exchange.time_skew - ) - - return self._clock - - def _create_generator(self, sim_params): - if self.perf_tracker is None: - self.perf_tracker = get_algo_object( - algo_name=self.algo_namespace, - key='perf_tracker' - ) - - # Call the simulation trading algorithm for side-effects: - # it creates the perf tracker - TradingAlgorithm._create_generator(self, sim_params) - self.trading_client = ExchangeAlgorithmExecutor( - self, - sim_params, - self.data_portal, - self.clock, - self._create_benchmark_source(), - self.restrictions, - universe_func=self._calculate_universe - ) - - return self.trading_client.transform() - - def updated_portfolio(self): + def round_order(self, amount, asset): """ - We skip the entire performance tracker business and update the - portfolio directly. + We need fractions with cryptocurrencies + + :param amount: :return: """ - return self.exchange.portfolio + return round_nearest(amount, asset.min_trade_size) - def updated_account(self): - return self.exchange.account + @api_method + @preprocess(symbol_str=ensure_upper_case) + def symbol(self, symbol_str, exchange_name=None): + """Lookup an Equity by its ticker symbol. - def _synchronize_portfolio(self, attempt_index=0): - try: - self.exchange.synchronize_portfolio() + Parameters + ---------- + symbol_str : str + The ticker symbol for the equity to lookup. + exchange_name: str + The name of the exchange containing the symbol - # Applying the updated last_sales_price to the positions - # in the performance tracker. This seems a bit redundant - # but it will make sense when we have multiple exchange portfolios - # feeding into the same performance tracker. - tracker = self.perf_tracker.todays_performance.position_tracker - for asset in self.exchange.portfolio.positions: - position = self.exchange.portfolio.positions[asset] - tracker.update_position( - asset=asset, - last_sale_date=position.last_sale_date, - last_sale_price=position.last_sale_price - ) - except ExchangeRequestError as e: - log.warn( - 'update portfolio attempt {}: {}'.format(attempt_index, e) - ) - if attempt_index < self.retry_synchronize_portfolio: - sleep(self.retry_delay) - self._synchronize_portfolio(attempt_index + 1) - else: - raise ExchangePortfolioDataError( - data_type='update-portfolio', - attempts=attempt_index, - error=e - ) + Returns + ------- + equity : Equity + The equity that held the ticker symbol on the current + symbol lookup date. - def _check_open_orders(self, attempt_index=0): - try: - return self.exchange.check_open_orders() - except ExchangeRequestError as e: - log.warn( - 'check open orders attempt {}: {}'.format(attempt_index, e) - ) - if attempt_index < self.retry_check_open_orders: - sleep(self.retry_delay) - return self._check_open_orders(attempt_index + 1) - else: - raise ExchangePortfolioDataError( - data_type='order-status', - attempts=attempt_index, - error=e - ) + Raises + ------ + SymbolNotFound + Raised when the symbols was not held on the current lookup date. - def add_pnl_stats(self, period_stats): - starting = period_stats['starting_cash'] - current = period_stats['portfolio_value'] - appreciation = (current / starting) - 1 - perc = (appreciation * 100) if current != 0 else 0 + See Also + -------- + :func:`catalyst.api.set_symbol_lookup_date` + """ + # If the user has not set the symbol lookup date, + # use the end_session as the date for sybmol->sid resolution. - log.debug('adding pnl stats: {:6f}%'.format(perc)) + _lookup_date = self._symbol_lookup_date \ + if self._symbol_lookup_date is not None \ + else self.sim_params.end_session - df = pd.DataFrame( - data=[dict(performance=perc)], - index=[period_stats['period_close']] + if exchange_name is None: + exchange = self.exchanges.values()[0] + else: + exchange = self.exchanges[exchange_name] + + return self.asset_finder.lookup_symbol( + symbol=symbol_str, + exchange=exchange, + as_of_date=_lookup_date ) - self.pnl_stats = pd.concat([self.pnl_stats, df]) - - save_algo_df(self.algo_namespace, 'pnl_stats', self.pnl_stats) - - def add_custom_signals_stats(self, period_stats): - log.debug('adding custom signals stats: {}'.format(self.recorded_vars)) - df = pd.DataFrame( - data=[self.recorded_vars], - index=[period_stats['period_close']], - ) - self.custom_signals_stats = pd.concat([self.custom_signals_stats, df]) - - save_algo_df(self.algo_namespace, 'custom_signals_stats', - self.custom_signals_stats) - - def add_exposure_stats(self, period_stats): - data = dict( - long_exposure=period_stats['long_exposure'], - base_currency=period_stats['ending_cash'] - ) - log.debug('adding exposure stats: {}'.format(data)) - - df = pd.DataFrame( - data=[data], - index=[period_stats['period_close']], - ) - self.exposure_stats = pd.concat([self.exposure_stats, df]) - - save_algo_df(self.algo_namespace, 'exposure_stats', - self.exposure_stats) def prepare_period_stats(self, start_dt, end_dt): """ @@ -364,6 +189,308 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): return stats + +class ExchangeTradingAlgorithmBacktest(ExchangeTradingAlgorithmBase): + def __init__(self, *args, **kwargs): + super(ExchangeTradingAlgorithmBacktest, self).__init__(*args, **kwargs) + + self.blotter = ExchangeBlotter( + data_frequency=self.data_frequency, + # Default to NeverCancel in catalyst + cancel_policy=self.cancel_policy, + ) + log.info('initialized trading algorithm in backtest mode') + + def _calculate_order(self, asset, amount, + limit_price=None, stop_price=None, style=None): + # Raises a ZiplineError if invalid parameters are detected. + self.validate_order_params(asset, + amount, + limit_price, + stop_price, + style) + + # Convert deprecated limit_price and stop_price parameters to use + # ExecutionStyle objects. + style = self.__convert_order_params_for_blotter(limit_price, + stop_price, + style) + return amount, style + + @staticmethod + def __convert_order_params_for_blotter(limit_price, stop_price, style): + """ + Helper method for converting deprecated limit_price and stop_price + arguments into ExecutionStyle instances. + + This function assumes that either style == None or (limit_price, + stop_price) == (None, None). + """ + if style: + assert (limit_price, stop_price) == (None, None) + return style + if limit_price and stop_price: + return ExchangeStopLimitOrder(limit_price, stop_price) + if limit_price: + return ExchangeLimitOrder(limit_price) + if stop_price: + return ExchangeStopOrder(stop_price) + else: + return MarketOrder() + + +class ExchangeTradingAlgorithmLive(ExchangeTradingAlgorithmBase): + def __init__(self, *args, **kwargs): + self.algo_namespace = kwargs.pop('algo_namespace', None) + self.live_graph = kwargs.pop('live_graph', None) + + self._clock = None + self.minute_stats = deque(maxlen=60) + + self.pnl_stats = get_algo_df(self.algo_namespace, 'pnl_stats') + + self.custom_signals_stats = \ + get_algo_df(self.algo_namespace, 'custom_signals_stats') + + self.exposure_stats = \ + get_algo_df(self.algo_namespace, 'exposure_stats') + + self.is_running = True + + self.retry_check_open_orders = 5 + self.retry_synchronize_portfolio = 5 + self.retry_get_open_orders = 5 + self.retry_order = 2 + self.retry_delay = 5 + + self.stats_minutes = 5 + + super(ExchangeTradingAlgorithmLive, self).__init__(*args, **kwargs) + # TODO: fix precision before re-enabling + # self._create_minute_writer() + + signal.signal(signal.SIGINT, self.signal_handler) + + log.info('initialized trading algorithm in live mode') + + def _create_minute_writer(self): + root = get_exchange_minute_writer_root(self.exchange.name) + filename = os.path.join(root, 'metadata.json') + + if os.path.isfile(filename): + writer = BcolzMinuteBarWriter.open( + root, self.sim_params.end_session) + else: + # TODO: need to be able to write more precise numbers + writer = BcolzMinuteBarWriter( + rootdir=root, + calendar=self.trading_calendar, + minutes_per_day=1440, + start_session=self.sim_params.start_session, + end_session=self.sim_params.end_session, + write_metadata=True + ) + + self.exchange.minute_writer = writer + self.exchange.minute_reader = BcolzMinuteBarReader(root) + + def signal_handler(self, signal, frame): + self.is_running = False + + if self._analyze is None: + log.info('Interruption signal detected {}, exiting the ' + 'algorithm'.format(signal)) + + else: + log.info('Interruption signal detected {}, calling `analyze()` ' + 'before exiting the algorithm'.format(signal)) + + algo_folder = get_algo_folder(self.algo_namespace) + folder = join(algo_folder, 'daily_perf') + files = [f for f in listdir(folder) if isfile(join(folder, f))] + + daily_perf_list = [] + for item in files: + filename = join(folder, item) + with open(filename, 'rb') as handle: + daily_perf_list.append(pickle.load(handle)) + + stats = pd.DataFrame(daily_perf_list) + + self.analyze(stats) + + sys.exit(0) + + @property + def clock(self): + if self._clock is None: + return self._create_clock() + else: + return self._clock + + def _create_clock(self): + + # The calendar's execution times are the minutes over which we actually + # want to run the clock. Typically the execution times simply adhere to + # the market open and close times. In the case of the futures calendar, + # for example, we only want to simulate over a subset of the full 24 + # hour calendar, so the execution times dictate a market open time of + # 6:31am US/Eastern and a close of 5:00pm US/Eastern. + + # In our case, we are trading around the clock, so the market close + # corresponds to the last minute of the day. + + # This method is taken from TradingAlgorithm. + # The clock has been replaced to use RealtimeClock + # TODO: should we apply a time skew? not sure to understand the utility. + + log.debug('creating clock') + if self.live_graph: + self._clock = LiveGraphClock( + self.sim_params.sessions, + context=self + ) + else: + self._clock = SimpleClock( + self.sim_params.sessions, + ) + + return self._clock + + def _create_generator(self, sim_params): + if self.perf_tracker is None: + self.perf_tracker = get_algo_object( + algo_name=self.algo_namespace, + key='perf_tracker' + ) + + # Call the simulation trading algorithm for side-effects: + # it creates the perf tracker + TradingAlgorithm._create_generator(self, sim_params) + self.trading_client = ExchangeAlgorithmExecutor( + self, + sim_params, + self.data_portal, + self.clock, + self._create_benchmark_source(), + self.restrictions, + universe_func=self._calculate_universe + ) + + return self.trading_client.transform() + + def updated_portfolio(self): + """ + We skip the entire performance tracker business and update the + portfolio directly. + :return: + """ + # TODO: build cumulative portfolio + return self.perf_tracker.get_portfolio(False) + + def updated_account(self): + return self.perf_tracker.get_account(False) + + def _synchronize_portfolio(self, attempt_index=0): + try: + for exchange_name in self.exchanges: + exchange = self.exchanges[exchange_name] + + exchange.synchronize_portfolio() + + # Applying the updated last_sales_price to the positions + # in the performance tracker. This seems a bit redundant + # but it will make sense when we have multiple exchange portfolios + # feeding into the same performance tracker. + tracker = self.perf_tracker.todays_performance.position_tracker + for asset in exchange.portfolio.positions: + position = exchange.portfolio.positions[asset] + tracker.update_position( + asset=asset, + last_sale_date=position.last_sale_date, + last_sale_price=position.last_sale_price + ) + except ExchangeRequestError as e: + log.warn( + 'update portfolio attempt {}: {}'.format(attempt_index, e) + ) + if attempt_index < self.retry_synchronize_portfolio: + sleep(self.retry_delay) + self._synchronize_portfolio(attempt_index + 1) + else: + raise ExchangePortfolioDataError( + data_type='update-portfolio', + attempts=attempt_index, + error=e + ) + + def _check_open_orders(self, attempt_index=0): + try: + orders = list() + for exchange_name in self.exchanges: + exchange = self.exchanges[exchange_name] + exchange_orders = exchange.check_open_orders() + + orders += exchange_orders + + return orders + except ExchangeRequestError as e: + log.warn( + 'check open orders attempt {}: {}'.format(attempt_index, e) + ) + if attempt_index < self.retry_check_open_orders: + sleep(self.retry_delay) + return self._check_open_orders(attempt_index + 1) + else: + raise ExchangePortfolioDataError( + data_type='order-status', + attempts=attempt_index, + error=e + ) + + def add_pnl_stats(self, period_stats): + starting = period_stats['starting_cash'] + current = period_stats['portfolio_value'] + appreciation = (current / starting) - 1 + perc = (appreciation * 100) if current != 0 else 0 + + log.debug('adding pnl stats: {:6f}%'.format(perc)) + + df = pd.DataFrame( + data=[dict(performance=perc)], + index=[period_stats['period_close']] + ) + self.pnl_stats = pd.concat([self.pnl_stats, df]) + + save_algo_df(self.algo_namespace, 'pnl_stats', self.pnl_stats) + + def add_custom_signals_stats(self, period_stats): + log.debug('adding custom signals stats: {}'.format(self.recorded_vars)) + df = pd.DataFrame( + data=[self.recorded_vars], + index=[period_stats['period_close']], + ) + self.custom_signals_stats = pd.concat([self.custom_signals_stats, df]) + + save_algo_df(self.algo_namespace, 'custom_signals_stats', + self.custom_signals_stats) + + def add_exposure_stats(self, period_stats): + data = dict( + long_exposure=period_stats['long_exposure'], + base_currency=period_stats['ending_cash'] + ) + log.debug('adding exposure stats: {}'.format(data)) + + df = pd.DataFrame( + data=[data], + index=[period_stats['period_close']], + ) + self.exposure_stats = pd.concat([self.exposure_stats, df]) + + save_algo_df(self.algo_namespace, 'exposure_stats', + self.exposure_stats) + def handle_data(self, data): if not self.is_running: return @@ -394,14 +521,23 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): self.minute_stats.append(minute_stats) self.add_pnl_stats(minute_stats) - self.add_custom_signals_stats(minute_stats) + if self.recorded_vars: + self.add_custom_signals_stats(minute_stats) + recorded_cols = self.recorded_vars.keys() + else: + recorded_cols = None + self.add_exposure_stats(minute_stats) print_df = pd.DataFrame(list(self.minute_stats)) - log.debug( + log.info( 'statistics for the last {stats_minutes} minutes:\n{stats}'.format( stats_minutes=self.stats_minutes, - stats=get_pretty_stats(print_df, self.stats_minutes) + stats=get_pretty_stats( + stats_df=print_df, + recorded_cols=recorded_cols, + num_rows=self.stats_minutes + ) )) today = pd.to_datetime('today', utc=True) @@ -429,11 +565,13 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): log.warn('unable to save minute perfs to disk: {}'.format(e)) try: - save_algo_object( - algo_name=self.algo_namespace, - key='portfolio_{}'.format(self.exchange.name), - obj=self.exchange.portfolio - ) + for exchange_name in self.exchanges: + exchange = self.exchanges[exchange_name] + save_algo_object( + algo_name=self.algo_namespace, + key='portfolio_{}'.format(exchange_name), + obj=exchange.portfolio + ) except Exception as e: log.warn('unable to save portfolio to disk: {}'.format(e)) @@ -445,9 +583,10 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): style=None, attempt_index=0): try: - return self.exchange.order(asset, amount, limit_price, - stop_price, - style) + exchange = self.exchanges[asset.exchange] + return exchange.order(asset, amount, limit_price, + stop_price, + style) except ExchangeRequestError as e: log.warn( 'order attempt {}: {}'.format(attempt_index, e) @@ -466,41 +605,70 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): @api_method @disallowed_in_before_trading_start(OrderInBeforeTradingStart()) + @expect_types(asset=TradingPair) def order(self, asset, amount, limit_price=None, stop_price=None, style=None): + """ + We use the exchange specific portfolio to place orders. + The cumulative portfolio does not contain open orders but exchange + portfolios do. + + :param asset: TradingPair + :param amount: float + :param limit_price: float + :param stop_price: float + :param style: Style + :return order: Order + The catalyst order object or None + """ + amount, style = self._calculate_order(asset, amount, limit_price, stop_price, style) order_id = self._order(asset, amount, limit_price, stop_price, style) + exchange = self.exchanges[asset.exchange] + exchange_portfolio = exchange.portfolio if order_id is not None: - order = self.portfolio.open_orders[order_id] - self.perf_tracker.process_order(order) - return order + + if order_id in exchange_portfolio.open_orders: + order = exchange_portfolio.open_orders[order_id] + self.perf_tracker.process_order(order) + return order + + else: + raise OrphanOrderError( + order_id=order_id, + exchange=exchange.name + ) else: + log.warn('unable to order {} {} on exchange {}'.format( + amount, asset.symbol, asset.exchange)) return None - def round_order(self, amount): - """ - We need fractions with cryptocurrencies - - :param amount: - :return: - """ - return amount - @api_method def batch_market_order(self, share_counts): raise NotImplementedError() def _get_open_orders(self, asset=None, attempt_index=0): try: - return self.exchange.get_open_orders(asset) + if asset: + exchange = self.exchanges[asset.exchange] + return exchange.get_open_orders(asset) + + else: + open_orders = [] + for exchange_name in self.exchanges: + exchange = self.exchanges[exchange_name] + exchange_orders = exchange.get_open_orders() + open_orders.append(exchange_orders) + + return open_orders except ExchangeRequestError as e: log.warn( 'open orders attempt {}: {}'.format(attempt_index, e) @@ -522,12 +690,16 @@ class ExchangeTradingAlgorithm(TradingAlgorithm): return self._get_open_orders(asset) @api_method - def get_order(self, order_id): - return self.exchange.get_order(order_id) + def get_order(self, order_id, exchange_name): + exchange = self.exchanges[exchange_name] + return exchange.get_order(order_id) @api_method - def cancel_order(self, order_param): + def cancel_order(self, order_param, exchange_name): + exchange = self.exchanges[exchange_name] + order_id = order_param if isinstance(order_param, zp.Order): order_id = order_param.id - self.exchange.cancel_order(order_id) + + exchange.cancel_order(order_id) diff --git a/catalyst/exchange/exchange_bcolz.py b/catalyst/exchange/exchange_bcolz.py new file mode 100644 index 00000000..0d19d04f --- /dev/null +++ b/catalyst/exchange/exchange_bcolz.py @@ -0,0 +1,90 @@ +import numpy as np + +from catalyst import get_calendar +from catalyst.data.minute_bars import BcolzMinuteBarReader, \ + BcolzMinuteBarWriter +from catalyst.exchange.bundle_utils import get_periods, get_periods_range + + +class BcolzExchangeBarWriter(BcolzMinuteBarWriter): + def __init__(self, *args, **kwargs): + self._data_frequency = kwargs.pop('data_frequency', None) + kwargs.pop('minutes_per_day', None) + kwargs.pop('calendar', None) + + end_session = kwargs.pop('end_session', None) + if end_session is not None: + end_session = end_session.floor('1d') + + minutes_per_day = 1440 if self._data_frequency == 'minute' else 1 + default_ohlc_ratio = kwargs.pop('default_ohlc_ratio', 1000000) + calendar = get_calendar('OPEN') + + super(BcolzExchangeBarWriter, self) \ + .__init__(*args, **dict(kwargs, + minutes_per_day=minutes_per_day, + default_ohlc_ratio=default_ohlc_ratio, + calendar=calendar, + end_session=end_session + )) + + +class BcolzExchangeBarReader(BcolzMinuteBarReader): + def __init__(self, *args, **kwargs): + self._data_frequency = kwargs.pop('data_frequency', None) + + super(BcolzExchangeBarReader, self).__init__(*args, **kwargs) + + @property + def data_frequency(self): + return self._data_frequency + + def load_raw_arrays(self, fields, start_dt, end_dt, sids): + + # if self._data_frequency == 'minute': + # return super(BcolzExchangeBarReader, self) \ + # .load_raw_arrays(fields, start_dt, end_dt, sids) + # + # else: + # return self._load_daily_raw_arrays(fields, start_dt, end_dt, sids) + + return self._load_raw_arrays(fields, start_dt, end_dt, sids) + + def _load_raw_arrays(self, fields, start_dt, end_dt, sids): + start_idx = self._find_position_of_minute(start_dt) + end_idx = self._find_position_of_minute(end_dt) + + periods = self.calendar.minutes_in_range(start_dt, end_dt) \ + if self.data_frequency == 'minute' \ + else self.calendar.sessions_in_range(start_dt, end_dt) + + num_days = len(periods) + shape = num_days, len(sids) + + all_fields = fields[:] + if len(all_fields) == 1 and all_fields[0] == 'volume': + all_fields.insert(0, 'close') + + mask = None + data = [] + for field in all_fields: + if field != 'volume': + out = np.full(shape, np.nan) + else: + out = np.zeros(shape, dtype=np.float64) + + for i, sid in enumerate(sids): + carray = self._open_minute_file(field, sid) + a = carray[start_idx:end_idx + 1] + + if mask is None: + mask = a != 0 + + out[:len(mask), i][mask] = ( + a[mask] * self._ohlc_ratio_inverse_for_sid(sid) + ) + + if field in fields: + data.append(out) + + return data diff --git a/catalyst/exchange/exchange_blotter.py b/catalyst/exchange/exchange_blotter.py new file mode 100644 index 00000000..c4d451a1 --- /dev/null +++ b/catalyst/exchange/exchange_blotter.py @@ -0,0 +1,137 @@ +from catalyst.assets._assets import TradingPair +from logbook import Logger + +from catalyst.finance.blotter import Blotter +from catalyst.finance.commission import CommissionModel +from catalyst.finance.slippage import SlippageModel +from catalyst.finance.transaction import Transaction + +log = Logger('exchange_blotter') + +# It seems like we need to accept greater slippage risk in cryptos +# Orders won't often close at Equity levels. +# TODO: consider adjusting dynamically based on trading pair +DEFAULT_SLIPPAGE_SPREAD = 0.02 +DEFAULT_MAKER_FEE = 0.001 +DEFAULT_TAKER_FEE = 0.002 + + +class TradingPairFeeSchedule(CommissionModel): + """ + Calculates a commission for a transaction based on a per percentage fee. + + Parameters + ---------- + fee : float, optional + The percentage fee. + """ + + def __init__(self, + maker_fee=DEFAULT_MAKER_FEE, + taker_fee=DEFAULT_TAKER_FEE): + self.maker_fee = maker_fee + self.taker_fee = taker_fee + + def __repr__(self): + return ( + '{class_name}(maker_fee={maker_fee}, ' + 'taker_fee={taker_fee})'.format( + class_name=self.__class__.__name__, + maker_fee=self.maker_fee, + taker_fee=self.taker_fee, + ) + ) + + def calculate(self, order, transaction): + """ + Calculate the final fee based on the order parameters. + + :param order: + :param transaction: + + :return float: + The total commission. + """ + cost = abs(transaction.amount) * transaction.price + + # Assuming just the taker fee for now + fee = cost * self.taker_fee + return fee + + +class TradingPairFixedSlippage(SlippageModel): + """ + Model slippage as a fixed spread. + + Parameters + ---------- + spread : float, optional + spread / 2 will be added to buys and subtracted from sells. + """ + + def __init__(self, spread=DEFAULT_SLIPPAGE_SPREAD): + super(TradingPairFixedSlippage, self).__init__() + self.spread = spread + + def __repr__(self): + return '{class_name}(spread={spread})'.format( + class_name=self.__class__.__name__, spread=self.spread, + ) + + def simulate(self, data, asset, orders_for_asset): + self._volume_for_bar = 0 + + price = data.current(asset, 'close') + + dt = data.current_dt + for order in orders_for_asset: + if order.open_amount == 0: + continue + + order.check_triggers(price, dt) + if not order.triggered: + log.debug('order has not reached the trigger at current ' + 'price {}'.format(price)) + continue + + execution_price, execution_volume = self.process_order(data, order) + + transaction = Transaction( + asset=order.asset, + amount=abs(execution_volume), + dt=dt, + price=execution_price, + order_id=order.id + ) + + self._volume_for_bar += abs(transaction.amount) + yield order, transaction + + def process_order(self, data, order): + price = data.current(order.asset, 'close') + + if order.amount > 0: + # Buy order + adj_price = price * (1 + self.spread) + else: + # Sell order + adj_price = price * (1 - self.spread) + + log.debug('added slippage to price: {} => {}'.format(price, adj_price)) + + return adj_price, order.amount + + +class ExchangeBlotter(Blotter): + def __init__(self, *args, **kwargs): + super(ExchangeBlotter, self).__init__(*args, **kwargs) + + # Using the equity models for now + # We may be able to define more sophisticated models based on the fee + # structure of each exchange. + self.slippage_models = { + TradingPair: TradingPairFixedSlippage() + } + self.commission_models = { + TradingPair: TradingPairFeeSchedule() + } diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py new file mode 100644 index 00000000..ad014b8d --- /dev/null +++ b/catalyst/exchange/exchange_bundle.py @@ -0,0 +1,603 @@ +import os +import shutil +from datetime import timedelta + +import pandas as pd +from logbook import Logger, INFO + +from catalyst import get_calendar +from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ + BcolzMinuteBarMetadata +from catalyst.exchange.bundle_utils import range_in_bundle, \ + get_bcolz_chunk, get_delta, get_adj_dates, get_month_start_end, \ + get_year_start_end, get_periods_range, get_df_from_arrays, get_start_dt +from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ + BcolzExchangeBarWriter +from catalyst.exchange.exchange_errors import EmptyValuesInBundleError, \ + InvalidHistoryFrequencyError, PricingDataBeforeTradingError, \ + TempBundleNotFoundError, NoDataAvailableOnExchange, \ + PricingDataNotLoadedError +from catalyst.exchange.exchange_utils import get_exchange_folder +from catalyst.utils.cli import maybe_show_progress +from catalyst.utils.paths import ensure_directory + + +def _cachpath(symbol, type_): + return '-'.join([symbol, type_]) + + +BUNDLE_NAME_TEMPLATE = '{root}/{frequency}_bundle' +log = Logger('exchange_bundle') +log.level = INFO + + +class ExchangeBundle: + def __init__(self, exchange): + self.exchange = exchange + self.minutes_per_day = 1440 + self.default_ohlc_ratio = 1000000 + self._writers = dict() + self._readers = dict() + self.calendar = get_calendar('OPEN') + + def get_assets(self, include_symbols, exclude_symbols): + # TODO: filter exclude symbols assets + if include_symbols is not None: + include_symbols_list = include_symbols.split(',') + + return self.exchange.get_assets(include_symbols_list) + + else: + return self.exchange.get_assets() + + def get_reader(self, data_frequency, path=None): + """ + Get a data writer object, either a new object or from cache + + :return: BcolzMinuteBarReader or BcolzDailyBarReader + """ + if path is None: + root = get_exchange_folder(self.exchange.name) + path = BUNDLE_NAME_TEMPLATE.format( + root=root, + frequency=data_frequency + ) + + if path in self._readers and self._readers[path] is not None: + return self._readers[path] + + try: + self._readers[path] = BcolzExchangeBarReader( + rootdir=path, + data_frequency=data_frequency + ) + except IOError: + self._readers[path] = None + + return self._readers[path] + + def update_metadata(self, writer, start_dt, end_dt): + pass + + def get_writer(self, start_dt, end_dt, data_frequency): + """ + Get a data writer object, either a new object or from cache + + :return: BcolzMinuteBarWriter or BcolzDailyBarWriter + """ + root = get_exchange_folder(self.exchange.name) + path = BUNDLE_NAME_TEMPLATE.format( + root=root, + frequency=data_frequency + ) + + if path in self._writers: + return self._writers[path] + + ensure_directory(path) + + if len(os.listdir(path)) > 0: + + metadata = BcolzMinuteBarMetadata.read(path) + + write_metadata = False + if start_dt < metadata.start_session: + write_metadata = True + start_session = start_dt + else: + start_session = metadata.start_session + + if end_dt > metadata.end_session: + write_metadata = True + + end_session = end_dt + else: + end_session = metadata.end_session + + self._writers[path] = \ + BcolzExchangeBarWriter( + rootdir=path, + start_session=start_session, + end_session=end_session, + write_metadata=write_metadata, + data_frequency=data_frequency + ) + else: + self._writers[path] = BcolzExchangeBarWriter( + rootdir=path, + start_session=start_dt, + end_session=end_dt, + write_metadata=True, + data_frequency=data_frequency + ) + + return self._writers[path] + + def filter_existing_assets(self, assets, start_dt, end_dt, data_frequency): + """ + For each asset, get the close on the start and end dates of the chunk. + If the data exists, the chunk ingestion is complete. + If any data is missing we ingest the data. + + :param assets: list[TradingPair] + The assets is scope. + :param start_dt: + The chunk start date. + :param end_dt: + The chunk end date. + :return: list[TradingPair] + The assets missing from the bundle + """ + reader = self.get_reader(data_frequency) + missing_assets = [] + for asset in assets: + has_data = range_in_bundle(asset, start_dt, end_dt, reader) + + if not has_data: + missing_assets.append(asset) + + return missing_assets + + def _write(self, data, writer, data_frequency): + """ + Write data to the writer + + :param df: + :param writer: + :return: + """ + try: + writer.write( + data=data, + show_progress=False, + invalid_data_behavior='raise' + ) + except BcolzMinuteOverlappingData as e: + log.warn('chunk already exists: {}'.format(e)) + except Exception as e: + log.warn('error when writing data: {}, trying again'.format(e)) + + # This is workaround, there is an issue with empty + # session_label when using a newly created writer + key = writer._rootdir if data_frequency == 'minute' \ + else writer._filename + + del self._writers[key] + + writer = self.get_writer(writer._start_session, + writer._end_session, data_frequency) + writer.write( + data=data, + show_progress=False, + invalid_data_behavior='raise' + ) + + def get_calendar_periods_range(self, start_dt, end_dt, data_frequency): + return self.calendar.minutes_in_range(start_dt, end_dt) \ + if data_frequency == 'minute' \ + else self.calendar.sessions_in_range(start_dt, end_dt) + + def ingest_ctable(self, asset, data_frequency, period, start_dt, end_dt, + writer, empty_rows_behavior='strip', cleanup=False): + """ + Merge a ctable bundle chunk into the main bundle for the exchange. + + :param asset: TradingPair + :param data_frequency: str + :param period: str + :param writer: + :param empty_rows_behavior: str + Ensure that the bundle does not have any missing data. + + :param cleanup: bool + Remove the temp bundle directory after ingestion. + + :return: + """ + + path = get_bcolz_chunk( + exchange_name=self.exchange.name, + symbol=asset.symbol, + data_frequency=data_frequency, + period=period + ) + + reader = self.get_reader(data_frequency, path=path) + if reader is None: + raise TempBundleNotFoundError(path=path) + + arrays = reader.load_raw_arrays( + sids=[asset.sid], + fields=['open', 'high', 'low', 'close', 'volume'], + start_dt=start_dt, + end_dt=end_dt + ) + + if not arrays: + return path + + periods = self.get_calendar_periods_range( + start_dt, end_dt, data_frequency + ) + + df = get_df_from_arrays(arrays, periods) + + if empty_rows_behavior is not 'ignore': + nan_rows = df[df.isnull().T.any().T].index + + if len(nan_rows) > 0: + dates = [] + previous_date = None + for row_date in nan_rows.values: + row_date = pd.to_datetime(row_date) + + if previous_date is None: + dates.append(row_date) + + else: + seq_date = previous_date + get_delta(1, data_frequency) + + if row_date > seq_date: + dates.append(previous_date) + dates.append(row_date) + + previous_date = row_date + + dates.append(pd.to_datetime(nan_rows.values[-1])) + + name = path.split('/')[-1] + if empty_rows_behavior == 'warn': + log.warn( + '\n{name} with end minute {end_minute} has empty rows ' + 'in ranges: {dates}'.format( + name=name, + end_minute=asset.end_minute, + dates=dates + ) + ) + + elif empty_rows_behavior == 'raise': + raise EmptyValuesInBundleError( + name=name, + end_minute=asset.end_minute, + dates=dates + ) + else: + df.dropna(inplace=True) + + data = [] + if not df.empty: + df.sort_index(inplace=True) + data.append((asset.sid, df)) + self._write(data, writer, data_frequency) + + if cleanup: + log.debug('removing bundle folder following ' + 'ingestion: {}'.format(path)) + shutil.rmtree(path) + + return path + + def prepare_chunks(self, assets, data_frequency, start_dt, end_dt): + """ + Split a price data request into chunks corresponding to individual + bundles. + + :param assets: + :param data_frequency: + :param start_dt: + :param end_dt: + :return: + """ + reader = self.get_reader(data_frequency) + + chunks = [] + for asset in assets: + try: + asset_start, asset_end = \ + get_adj_dates(start_dt, end_dt, [asset], data_frequency) + + except NoDataAvailableOnExchange: + continue + + # Aligning start / end dates with the daily calendar + sessions = get_periods_range(start_dt, end_dt, data_frequency) \ + if data_frequency == 'minute' \ + else self.calendar.sessions_in_range(start_dt, end_dt) + + if asset_start < sessions[0]: + asset_start = sessions[0] + + if asset_end > sessions[-1]: + asset_end = sessions[-1] + + chunk_labels = [] + dt = sessions[0] + while dt <= sessions[-1]: + label = '{}-{:02d}'.format(dt.year, dt.month) \ + if data_frequency == 'minute' else '{}'.format(dt.year) + + if label not in chunk_labels: + chunk_labels.append(label) + + # Adjusting the period dates to match the availability + # of the trading pair + if data_frequency == 'minute': + period_start, period_end = get_month_start_end(dt) + asset_start_month, _ = get_month_start_end(asset_start) + + if asset_start_month == period_start \ + and period_start < asset_start: + period_start = asset_start + + _, asset_end_month = get_month_start_end(asset_end) + if asset_end_month == period_end \ + and period_end > asset_end: + period_end = asset_end + + elif data_frequency == 'daily': + period_start, period_end = get_year_start_end(dt) + asset_start_year, _ = get_year_start_end(asset_start) + + if asset_start_year == period_start \ + and period_start < asset_start: + period_start = asset_start + + _, asset_end_year = get_year_start_end(asset_end) + if asset_end_year == period_end \ + and period_end > asset_end: + period_end = asset_end + else: + raise InvalidHistoryFrequencyError( + frequency=data_frequency + ) + + # Currencies don't always start trading at midnight. + # Checking the last minute of the day instead. + range_start = period_start.replace(hour=23, minute=59) \ + if data_frequency == 'minute' else period_start + has_data = range_in_bundle( + asset, range_start, period_end, reader + ) + + if not has_data: + log.debug('adding period: {}'.format(label)) + chunks.append( + dict( + asset=asset, + period_start=period_start, + period_end=period_end, + period=label + ) + ) + + dt += timedelta(days=1) + + chunks.sort(key=lambda chunk: chunk['period_end']) + + return chunks + + def ingest_assets(self, assets, start_dt, end_dt, data_frequency, + show_progress=False): + """ + Determine if data is missing from the bundle and attempt to ingest it. + + :param assets: + :param start_dt: + :param end_dt: + :return: + """ + writer = self.get_writer(start_dt, end_dt, data_frequency) + chunks = self.prepare_chunks( + assets=assets, + data_frequency=data_frequency, + start_dt=start_dt, + end_dt=end_dt + ) + with maybe_show_progress( + chunks, + show_progress, + label='Fetching {exchange} {frequency} candles: '.format( + exchange=self.exchange.name, + frequency=data_frequency + )) as it: + for chunk in it: + self.ingest_ctable( + asset=chunk['asset'], + data_frequency=data_frequency, + period=chunk['period'], + start_dt=chunk['period_start'], + end_dt=chunk['period_end'], + writer=writer, + empty_rows_behavior='strip' + ) + + def ingest(self, data_frequency, include_symbols=None, + exclude_symbols=None, start=None, end=None, + show_progress=True, environ=os.environ): + """ + + :param data_frequency: + :param include_symbols: + :param exclude_symbols: + :param start: + :param end: + :param show_progress: + :param environ: + :return: + """ + assets = self.get_assets(include_symbols, exclude_symbols) + start_dt, end_dt = get_adj_dates(start, end, assets, data_frequency) + + for frequency in data_frequency.split(','): + self.ingest_assets(assets, start_dt, end_dt, frequency, + show_progress) + + def get_history_window_series_and_load(self, + assets, + end_dt, + bar_count, + field, + data_frequency): + try: + series = self.get_history_window_series( + assets=assets, + end_dt=end_dt, + bar_count=bar_count, + field=field, + data_frequency=data_frequency + ) + return pd.DataFrame(series) + + except PricingDataNotLoadedError: + start_dt = get_start_dt(end_dt, bar_count, data_frequency) + log.info( + 'pricing data for {symbol} not found in range ' + '{start} to {end}, updating the bundles.'.format( + symbol=[asset.symbol for asset in assets], + start=start_dt, + end=end_dt + ) + ) + self.ingest_assets( + assets=assets, + start_dt=start_dt, + end_dt=end_dt, + data_frequency=data_frequency, + show_progress=True + ) + series = self.get_history_window_series( + assets=assets, + end_dt=end_dt, + bar_count=bar_count, + field=field, + data_frequency=data_frequency, + reset_reader=True + ) + return series + + def get_spot_values(self, assets, field, dt, data_frequency, + reset_reader=False): + values = [] + try: + reader = self.get_reader(data_frequency) + if reset_reader: + del self._readers[reader._rootdir] + reader = self.get_reader(data_frequency) + + for asset in assets: + value = reader.get_value( + sid=asset.sid, + dt=dt, + field=field + ) + values.append(value) + + return values + + except Exception: + symbols = [asset.symbol.encode('utf-8') for asset in assets] + raise PricingDataNotLoadedError( + field=field, + first_trading_day=min([asset.start_date for asset in assets]), + exchange=self.exchange.name, + symbols=symbols, + symbol_list=','.join(symbols), + data_frequency=data_frequency + ) + + def get_history_window_series(self, + assets, + end_dt, + bar_count, + field, + data_frequency, + reset_reader=False): + start_dt = get_start_dt(end_dt, bar_count, data_frequency) + start_dt, end_dt = \ + get_adj_dates(start_dt, end_dt, assets, data_frequency) + + reader = self.get_reader(data_frequency) + if reset_reader: + del self._readers[reader._rootdir] + reader = self.get_reader(data_frequency) + + if reader is None: + symbols = [asset.symbol.encode('utf-8') for asset in assets] + raise PricingDataNotLoadedError( + field=field, + first_trading_day=min([asset.start_date for asset in assets]), + exchange=self.exchange.name, + symbols=symbols, + symbol_list=','.join(symbols), + data_frequency=data_frequency + ) + + for asset in assets: + asset_start_dt, asset_end_dt = \ + get_adj_dates(start_dt, end_dt, assets, data_frequency) + + in_bundle = range_in_bundle( + asset, asset_start_dt, asset_end_dt, reader + ) + if not in_bundle: + raise PricingDataNotLoadedError( + field=field, + first_trading_day=asset.start_date, + exchange=self.exchange.name, + symbols=asset.symbol, + symbol_list=asset.symbol, + data_frequency=data_frequency + ) + + series = dict() + try: + arrays = reader.load_raw_arrays( + sids=[asset.sid for asset in assets], + fields=[field], + start_dt=start_dt, + end_dt=end_dt + ) + + except Exception: + symbols = [asset.symbol.encode('utf-8') for asset in assets] + raise PricingDataNotLoadedError( + field=field, + first_trading_day=min([asset.start_date for asset in assets]), + exchange=self.exchange.name, + symbols=symbols, + symbol_list=','.join(symbols), + data_frequency=data_frequency + ) + + periods = self.get_calendar_periods_range( + start_dt, end_dt, data_frequency + ) + + for asset_index, asset in enumerate(assets): + asset_values = arrays[asset_index] + + value_series = pd.Series(asset_values[0], index=periods) + series[asset] = value_series + + return series diff --git a/catalyst/exchange/exchange_errors.py b/catalyst/exchange/exchange_errors.py index 0cfa35b3..602582d2 100644 --- a/catalyst/exchange/exchange_errors.py +++ b/catalyst/exchange/exchange_errors.py @@ -1,6 +1,21 @@ +import sys, traceback from catalyst.errors import ZiplineError +def silent_except_hook(exctype, excvalue, exctraceback): + if exctype in [PricingDataBeforeTradingError, PricingDataNotLoadedError, + SymbolNotFoundOnExchange, NoDataAvailableOnExchange, ]: + fn = traceback.extract_tb(exctraceback)[-1][0] + ln = traceback.extract_tb(exctraceback)[-1][1] + print "Error traceback: {1} (line {2})\n" \ + "{0.__name__}: {3}".format(exctype, fn, ln, excvalue) + else: + sys.__excepthook__(exctype, excvalue, exctraceback) + + +sys.excepthook = silent_except_hook + + class ExchangeRequestError(ZiplineError): msg = ( 'Request failed: {error}' @@ -34,6 +49,13 @@ class ExchangeTransactionError(ZiplineError): ).strip() +class ExchangeNotFoundError(ZiplineError): + msg = ( + 'Exchange {exchange_name} not found. Please specify exchanges ' + 'supported by Catalyst and verify spelling for accuracy.' + ).strip() + + class ExchangeAuthNotFound(ZiplineError): msg = ( 'Please create an auth.json file containing the api token and key for ' @@ -56,7 +78,14 @@ class AlgoPickleNotFound(ZiplineError): class InvalidHistoryFrequencyError(ZiplineError): msg = ( - 'History frequency {frequency} not supported by the exchange.' + 'Frequency {frequency} not supported by the exchange.' + ).strip() + + +class MismatchingFrequencyError(ZiplineError): + msg = ( + 'Bar aggregate frequency {frequency} not compatible with ' + 'data frequency {data_frequency}.' ).strip() @@ -87,6 +116,19 @@ class OrderNotFound(ZiplineError): ).strip() +class OrphanOrderError(ZiplineError): + msg = ( + 'Order {order_id} found in exchange {exchange} but not tracked by ' + 'the algorithm.' + ).strip() + + +class OrphanOrderReverseError(ZiplineError): + msg = ( + 'Order {order_id} tracked by algorithm, but not found in exchange {exchange}.' + ).strip() + + class OrderCancelError(ZiplineError): msg = ( 'Unable to cancel order {order_id} on exchange {exchange} {error}.' @@ -111,3 +153,58 @@ class MismatchingBaseCurrencies(ZiplineError): 'Unable to trade with base currency {base_currency} when the ' 'algorithm uses {algo_currency}.' ).strip() + + +class MismatchingBaseCurrenciesExchanges(ZiplineError): + msg = ( + 'Unable to trade with base currency {base_currency} when the ' + 'exchange {exchange_name} users {exchange_currency}.' + ).strip() + + +class SymbolNotFoundOnExchange(ZiplineError): + """ + Raised when a symbol() call contains a non-existent symbol. + """ + msg = ('Symbol {symbol} not found on exchange {exchange}. ' + 'Choose from: {supported_symbols}').strip() + + +class BundleNotFoundError(ZiplineError): + msg = ('Unable to find bundle data for exchange {exchange} and ' + 'data frequency {data_frequency}.' + 'Please ingest some price data.' + 'See `catalyst ingest-exchange --help` for details.').strip() + + +class TempBundleNotFoundError(ZiplineError): + msg = ('Temporary bundle not found in: {path}.').strip() + + +class EmptyValuesInBundleError(ZiplineError): + msg = ('{name} with end minute {end_minute} has empty rows ' + 'in ranges: {dates}').strip() + + +class PricingDataBeforeTradingError(ZiplineError): + msg = ('Pricing data for trading pairs {symbols} on exchange {exchange} ' + 'starts on {first_trading_day}, but you are either trying to trade or ' + 'retrieve pricing data on {dt}. Adjust your dates accordingly.').strip() + + +class PricingDataNotLoadedError(ZiplineError): + msg = ('Pricing data {field} for trading pairs {symbols} trading on ' + 'exchange {exchange} since {first_trading_day} is unavailable. ' + 'The bundle data is either out-of-date or has not been loaded yet. ' + 'Please ingest data using the command ' + '`catalyst ingest-exchange -x {exchange} -f {data_frequency} -i {symbol_list}`. ' + 'See catalyst documentation for details.').strip() + + +class ApiCandlesError(ZiplineError): + msg = ('Unable to fetch candles from the remote API: {error}.').strip() + +class NoDataAvailableOnExchange(ZiplineError): + msg = ('Requested data for trading pair {symbol} is not available on exchange {exchange} ' + 'in `{data_frequency}` frequency at this time. ' + 'Check `http://enigma.co/catalyst/status` for market coverage.').strip() diff --git a/catalyst/exchange/exchange_portfolio.py b/catalyst/exchange/exchange_portfolio.py index ded8a2a4..d7105480 100644 --- a/catalyst/exchange/exchange_portfolio.py +++ b/catalyst/exchange/exchange_portfolio.py @@ -70,6 +70,30 @@ class ExchangePortfolio(Portfolio): log.debug('updated portfolio with executed order') + def execute_transaction(self, transaction): + log.debug('executing transaction {}'.format(transaction.order_id)) + + order_position = self.positions[transaction.asset] \ + if transaction.asset in self.positions else None + + if order_position is None: + raise ValueError( + 'Trying to execute transaction for a position not held: %s' % transaction.order_id + ) + + self.capital_used += transaction.amount * transaction.price + + if transaction.amount > 0: + if order_position.cost_basis > 0: + order_position.cost_basis = np.average( + [order_position.cost_basis, transaction.price], + weights=[order_position.amount, transaction.amount] + ) + else: + order_position.cost_basis = transaction.price + + log.debug('updated portfolio with executed order') + def remove_order(self, order): log.info('removing cancelled order {}'.format(order.id)) del self.open_orders[order.id] diff --git a/catalyst/exchange/exchange_utils.py b/catalyst/exchange/exchange_utils.py index ebb33023..1335561c 100644 --- a/catalyst/exchange/exchange_utils.py +++ b/catalyst/exchange/exchange_utils.py @@ -3,11 +3,12 @@ import os import pickle import urllib from datetime import date, datetime + import pandas as pd from catalyst.exchange.exchange_errors import ExchangeAuthNotFound, \ ExchangeSymbolsNotFound -from catalyst.utils.paths import data_root, ensure_directory +from catalyst.utils.paths import data_root, ensure_directory, last_modified_time SYMBOLS_URL = 'https://s3.amazonaws.com/enigmaco/catalyst-exchanges/' \ '{exchange}/symbols.json' @@ -39,7 +40,8 @@ def download_exchange_symbols(exchange_name, environ=None): def get_exchange_symbols(exchange_name, environ=None): filename = get_exchange_symbols_filename(exchange_name) - if not os.path.isfile(filename): + if not os.path.isfile(filename) or \ + pd.Timedelta(pd.Timestamp('now', tz='UTC') - last_modified_time(filename)).days > 1: download_exchange_symbols(exchange_name, environ) if os.path.isfile(filename): @@ -80,6 +82,9 @@ def get_algo_folder(algo_name, environ=None): def get_algo_object(algo_name, key, environ=None, rel_path=None): + if algo_name is None: + return None + folder = get_algo_folder(algo_name, environ) if rel_path is not None: @@ -158,6 +163,14 @@ def get_exchange_minute_writer_root(exchange_name, environ=None): return minute_data_folder +def get_exchange_bundles_folder(exchange_name, environ=None): + exchange_folder = get_exchange_folder(exchange_name, environ) + + temp_bundles = os.path.join(exchange_folder, 'temp_bundles') + ensure_directory(temp_bundles) + + return temp_bundles + def perf_serial(obj): """JSON serializer for objects not serializable by default json code""" diff --git a/catalyst/exchange/init_utils.py b/catalyst/exchange/init_utils.py new file mode 100644 index 00000000..a37f0441 --- /dev/null +++ b/catalyst/exchange/init_utils.py @@ -0,0 +1,32 @@ +from catalyst.exchange.bitfinex.bitfinex import Bitfinex +from catalyst.exchange.bittrex.bittrex import Bittrex +from catalyst.exchange.exchange_errors import ExchangeNotFoundError +from catalyst.exchange.exchange_utils import get_exchange_auth +from catalyst.exchange.poloniex.poloniex import Poloniex + + +def get_exchange(exchange_name): + exchange_auth = get_exchange_auth(exchange_name) + if exchange_name == 'bitfinex': + return Bitfinex( + key=exchange_auth['key'], + secret=exchange_auth['secret'], + base_currency=None, # TODO: make optional at the exchange + portfolio=None + ) + elif exchange_name == 'bittrex': + return Bittrex( + key=exchange_auth['key'], + secret=exchange_auth['secret'], + base_currency=None, + portfolio=None + ) + elif exchange_name == 'poloniex': + return Poloniex( + key=exchange_auth['key'], + secret=exchange_auth['secret'], + base_currency=None, + portfolio=None + ) + else: + raise ExchangeNotFoundError(exchange_name=exchange_name) diff --git a/catalyst/exchange/live_graph_clock.py b/catalyst/exchange/live_graph_clock.py index 59b2c3f4..06b1086c 100644 --- a/catalyst/exchange/live_graph_clock.py +++ b/catalyst/exchange/live_graph_clock.py @@ -19,6 +19,10 @@ from catalyst.gens.sim_engine import ( ) from logbook import Logger +from catalyst.exchange.exchange_errors import \ + MismatchingBaseCurrenciesExchanges + + log = Logger('LiveGraphClock') @@ -50,11 +54,11 @@ class LiveGraphClock(object): def __init__(self, sessions, context, time_skew=pd.Timedelta('0s')): + global mdates, plt #TODO: Could be cleaner import matplotlib.dates as mdates from matplotlib import pyplot as plt from matplotlib import style - self.sessions = sessions self.time_skew = time_skew self._last_emit = None @@ -155,17 +159,31 @@ class LiveGraphClock(object): context = self.context df = context.exposure_stats + # TODO: list exchanges in graph + base_currency = None + positions = [] + for exchange_name in context.exchanges: + exchange = context.exchanges[exchange_name] + + if not base_currency: + base_currency = exchange.base_currency + elif base_currency != exchange.base_currency: + raise MismatchingBaseCurrenciesExchanges( + base_currency=base_currency, + exchange_name=exchange.name, + exchange_currency=exchange.base_currency + ) + + positions += exchange.portfolio.positions + ax.clear() ax.set_title('Exposure') ax.plot(df.index, df['base_currency'], '-', color='green', linewidth=1.0, - label='Base Currency: {}'.format( - context.exchange.base_currency.upper() - ) + label='Base Currency: {}'.format(base_currency.upper()) ) - positions = context.exchange.portfolio.positions symbols = [] for position in positions: symbols.append(position.symbol) @@ -173,10 +191,7 @@ class LiveGraphClock(object): ax.plot(df.index, df['long_exposure'], '-', color='blue', linewidth=1.0, - label='Long Exposure: {}'.format( - ', '.join(symbols).upper() - ) - ) + label='Long Exposure: {}'.format(', '.join(symbols).upper())) self.set_legend(ax) self.format_ax(ax) diff --git a/tests/exchange/__init__.py b/catalyst/exchange/poloniex/__init__.py similarity index 100% rename from tests/exchange/__init__.py rename to catalyst/exchange/poloniex/__init__.py diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py new file mode 100644 index 00000000..e24acc75 --- /dev/null +++ b/catalyst/exchange/poloniex/poloniex.py @@ -0,0 +1,638 @@ +import base64 +import hashlib +import hmac +import json +import re +import time +from collections import defaultdict + +import numpy as np +import pandas as pd +import pytz +import requests +# import six +from six import iteritems +from catalyst.assets._assets import TradingPair +from logbook import Logger + +from catalyst.exchange.exchange_bundle import ExchangeBundle +from catalyst.exchange.poloniex.poloniex_api import Poloniex_api + +# from websocket import create_connection +from catalyst.exchange.exchange import Exchange +from catalyst.exchange.exchange_errors import ( + ExchangeRequestError, + InvalidHistoryFrequencyError, + InvalidOrderStyle, OrderCancelError, + OrphanOrderReverseError) +from catalyst.exchange.exchange_execution import ExchangeLimitOrder, \ + ExchangeStopLimitOrder, ExchangeStopOrder +from catalyst.finance.order import Order, ORDER_STATUS +from catalyst.protocol import Account +from catalyst.exchange.exchange_utils import get_exchange_symbols_filename, \ + download_exchange_symbols +from catalyst.finance.transaction import Transaction + +log = Logger('Poloniex') + + +class Poloniex(Exchange): + def __init__(self, key, secret, base_currency, portfolio=None): + self.api = Poloniex_api(key=key, secret=secret.encode('UTF-8')) + self.name = 'poloniex' + self.assets = {} + self.load_assets() + self.base_currency = base_currency + self._portfolio = portfolio + self.minute_writer = None + self.minute_reader = None + self.transactions = defaultdict(list) + + self.num_candles_limit = 2000 + self.max_requests_per_minute = 20 + self.request_cpt = dict() + + self.bundle = ExchangeBundle(self) + + def sanitize_curency_symbol(self, exchange_symbol): + """ + Helper method used to build the universal pair. + Include any symbol mapping here if appropriate. + + :param exchange_symbol: + :return universal_symbol: + """ + return exchange_symbol.lower() + + def _create_order(self, order_status): + """ + Create a Catalyst order object from the Exchange order dictionary + :param order_status: + :return: Order + """ + # if order_status['is_cancelled']: + # status = ORDER_STATUS.CANCELLED + # elif not order_status['is_live']: + # log.info('found executed order {}'.format(order_status)) + # status = ORDER_STATUS.FILLED + # else: + status = ORDER_STATUS.OPEN + + amount = float(order_status['amount']) + # filled = float(order_status['executed_amount']) + filled = None + + if order_status['type'] == 'sell': + amount = -amount + # filled = -filled + + price = float(order_status['rate']) + order_type = order_status['type'] + + stop_price = None + limit_price = None + + # TODO: is this comprehensive enough? + # if order_type.endswith('limit'): + # limit_price = price + # elif order_type.endswith('stop'): + # stop_price = price + + # executed_price = float(order_status['avg_execution_price']) + executed_price = price + + # TODO: bitfinex does not specify comission. I could calculate it but not sure if it's worth it. + commission = None + + # date = pd.Timestamp.utcfromtimestamp(float(order_status['timestamp'])) + # date = pytz.utc.localize(date) + date = None + + order = Order( + dt=date, + asset=self.assets[order_status['symbol']], + # No such field in Poloniex + amount=amount, + stop=stop_price, + limit=limit_price, + filled=filled, + id=str(order_status['orderNumber']), + commission=commission + ) + order.status = status + + return order, executed_price + + def get_balances(self): + log.debug('retrieving wallets balances') + try: + balances = self.api.returnbalances() + except Exception as e: + log.debug(e) + raise ExchangeRequestError(error=e) + + if 'error' in balances: + raise ExchangeRequestError( + error='unable to fetch balance {}'.format(balances['error']) + ) + + std_balances = dict() + for (key, value) in iteritems(balances): + currency = key.lower() + std_balances[currency] = float(value) + + return std_balances + + @property + def account(self): + account = Account() + + account.settled_cash = None + account.accrued_interest = None + account.buying_power = None + account.equity_with_loan = None + account.total_positions_value = None + account.total_positions_exposure = None + account.regt_equity = None + account.regt_margin = None + account.initial_margin_requirement = None + account.maintenance_margin_requirement = None + account.available_funds = None + account.excess_liquidity = None + account.cushion = None + account.day_trades_remaining = None + account.leverage = None + account.net_leverage = None + account.net_liquidation = None + + return account + + @property + def time_skew(self): + # TODO: research the time skew conditions + return pd.Timedelta('0s') + + def get_account(self): + # TODO: fetch account data and keep in cache + return None + + def get_candles(self, data_frequency, assets, bar_count=None, + start_dt=None, end_dt=None): + """ + Retrieve OHLVC candles from Poloniex + + :param data_frequency: + :param assets: + :param bar_count: + :return: + + Available Frequencies + --------------------- + '5m', '15m', '30m', '2h', '4h', '1D' + """ + + # TODO: implement end_dt and start_dt filters + + if ( + data_frequency == '5m' or data_frequency == 'minute'): # TODO: Polo does not have '1m' + frequency = 300 + elif (data_frequency == '15m'): + frequency = 900 + elif (data_frequency == '30m'): + frequency = 1800 + elif (data_frequency == '2h'): + frequency = 7200 + elif (data_frequency == '4h'): + frequency = 14400 + elif (data_frequency == '1D' or data_frequency == 'daily'): + frequency = 86400 + else: + raise InvalidHistoryFrequencyError( + frequency=data_frequency + ) + + # Making sure that assets are iterable + asset_list = [assets] if isinstance(assets, TradingPair) else assets + ohlc_map = dict() + + for asset in asset_list: + + end = int(time.time()) + if (bar_count is None): + start = end - 2 * frequency + else: + start = end - bar_count * frequency + + try: + response = self.api.returnchartdata(self.get_symbol(asset), + frequency, start, end) + except Exception as e: + raise ExchangeRequestError(error=e) + + if 'error' in response: + raise ExchangeRequestError( + error='Unable to retrieve candles: {}'.format( + response.content) + ) + + def ohlc_from_candle(candle): + last_traded = pd.Timestamp.utcfromtimestamp(candle['date']) + last_traded = last_traded.replace(tzinfo=pytz.UTC) + + ohlc = dict( + open=np.float64(candle['open']), + high=np.float64(candle['high']), + low=np.float64(candle['low']), + close=np.float64(candle['close']), + volume=np.float64(candle['volume']), + price=np.float64(candle['close']), + last_traded=last_traded + ) + + return ohlc + + if bar_count is None: + ohlc_map[asset] = ohlc_from_candle(response[0]) + else: + ohlc_bars = [] + for candle in response: + ohlc = ohlc_from_candle(candle) + ohlc_bars.append(ohlc) + ohlc_map[asset] = ohlc_bars + + return ohlc_map[assets] \ + if isinstance(assets, TradingPair) else ohlc_map + + def create_order(self, asset, amount, is_buy, style): + """ + Creating order on the exchange. + + :param asset: + :param amount: + :param is_buy: + :param style: + :return: + """ + exchange_symbol = self.get_symbol(asset) + + if isinstance(style, ExchangeLimitOrder) or isinstance(style, + ExchangeStopLimitOrder): + if isinstance(style, ExchangeStopLimitOrder): + log.warn('{} will ignore the stop price'.format(self.name)) + + price = style.get_limit_price(is_buy) + + try: + if (is_buy): + response = self.api.buy(exchange_symbol, amount, price) + else: + response = self.api.sell(exchange_symbol, -amount, price) + except Exception as e: + raise ExchangeRequestError(error=e) + + date = pd.Timestamp.utcnow() + + if ('orderNumber' in response): + order_id = str(response['orderNumber']) + order = Order( + dt=date, + asset=asset, + amount=amount, + stop=style.get_stop_price(is_buy), + limit=style.get_limit_price(is_buy), + id=order_id + ) + return order + else: + log.warn( + '{} order failed: {}'.format('buy' if is_buy else 'sell', + response['error'])) + return None + else: + raise InvalidOrderStyle(exchange=self.name, + style=style.__class__.__name__) + + def get_open_orders(self, asset='all'): + """Retrieve all of the current open orders. + + Parameters + ---------- + asset : Asset + If passed and not 'all', return only the open orders for the given + asset instead of all open orders. + + Returns + ------- + open_orders : dict[list[Order]] or list[Order] + If 'all' is passed this will return a dict mapping Assets + to a list containing all the open orders for the asset. + If an asset is passed then this will return a list of the open + orders for this asset. + """ + + return self.portfolio.open_orders + + """ + TODO: Why going to the exchange if we already have this info locally? + And why creating all these Orders if we later discard them? + """ + + try: + if (asset == 'all'): + response = self.api.returnopenorders('all') + else: + response = self.api.returnopenorders(self.get_symbol(asset)) + except Exception as e: + raise ExchangeRequestError(error=e) + + if 'error' in response: + raise ExchangeRequestError( + error='Unable to retrieve open orders: {}'.format( + order_statuses['message']) + ) + + print(self.portfolio.open_orders) + + # TODO: Need to handle openOrders for 'all' + orders = list() + for order_status in response: + order, executed_price = self._create_order( + order_status) # will Throw error b/c Polo doesn't track order['symbol'] + if asset is None or asset == order.sid: + orders.append(order) + + return orders + + def get_order(self, order_id): + """Lookup an order based on the order id returned from one of the + order functions. + + Parameters + ---------- + order_id : str + The unique identifier for the order. + + Returns + ------- + order : Order + The order object. + """ + + try: + order = self._portfolio.open_orders[order_id] + except Exception as e: + raise OrphanOrderError(order_id=order_id, exchange=self.name) + + return order + + # TODO: Need to decide whether we fetch orders locally or from exchnage + # The code below is ignored + + try: + response = self.api.returnopenorders(self.get_symbol(order.sid)) + except Exception as e: + raise ExchangeRequestError(error=e) + + for o in response: + if (int(o['orderNumber']) == int(order_id)): + return order + + return None + + def cancel_order(self, order_param): + """Cancel an open order. + + Parameters + ---------- + order_param : str or Order + The order_id or order object to cancel. + """ + + if (isinstance(order_param, Order)): + order = order_param + else: + order = self._portfolio.open_orders[order_param] + + try: + response = self.api.cancelorder(order.id) + except Exception as e: + raise ExchangeRequestError(error=e) + + if 'error' in response: + log.info( + 'Unable to cancel order {order_id} on exchange {exchange} {error}.'.format( + order_id=order.id, + exchange=self.name, + error=response['error'] + )) + + # raise OrderCancelError( + # order_id=order.id, + # exchange=self.name, + # error=response['error'] + # ) + + self.portfolio.remove_order(order) + + def tickers(self, assets): + """ + Fetch ticket data for assets + https://docs.bitfinex.com/v2/reference#rest-public-tickers + + :param assets: + :return: + """ + symbols = self.get_symbols(assets) + + log.debug('fetching tickers {}'.format(symbols)) + + try: + response = self.api.returnticker() + except Exception as e: + raise ExchangeRequestError(error=e) + + if 'error' in response: + raise ExchangeRequestError( + error='Unable to retrieve tickers: {}'.format( + response['error']) + ) + + ticks = dict() + + for index, symbol in enumerate(symbols): + ticks[assets[index]] = dict( + timestamp=pd.Timestamp.utcnow(), + bid=float(response[symbol]['highestBid']), + ask=float(response[symbol]['lowestAsk']), + last_price=float(response[symbol]['last']), + low=float(response[symbol]['lowestAsk']), + # TODO: Polo does not provide low + high=float(response[symbol]['highestBid']), + # TODO: Polo does not provide high + volume=float(response[symbol]['baseVolume']), + ) + + log.debug('got tickers {}'.format(ticks)) + return ticks + + def generate_symbols_json(self, filename=None, source_dates=False): + symbol_map = {} + + if not source_dates: + fn, r = download_exchange_symbols(self.name) + with open(fn) as data_file: + cached_symbols = json.load(data_file) + + response = self.api.returnticker() + + for exchange_symbol in response: + base, market = self.sanitize_curency_symbol(exchange_symbol).split( + '_') + symbol = '{market}_{base}'.format(market=market, base=base) + + if (source_dates): + start_date = self.get_symbol_start_date(exchange_symbol) + else: + try: + start_date = cached_symbols[exchange_symbol]['start_date'] + except KeyError as e: + start_date = time.strftime('%Y-%m-%d') + + try: + end_daily = cached_symbols[exchange_symbol]['end_daily'] + except KeyError as e: + end_daily = 'N/A' + + try: + end_minute = cached_symbols[exchange_symbol]['end_minute'] + except KeyError as e: + end_minute = 'N/A' + + symbol_map[exchange_symbol] = dict( + symbol=symbol, + start_date=start_date, + end_daily=end_daily, + end_minute=end_minute, + ) + + if (filename is None): + filename = get_exchange_symbols_filename(self.name) + + with open(filename, 'w') as f: + json.dump(symbol_map, f, sort_keys=True, indent=2, + separators=(',', ':')) + + def get_symbol_start_date(self, symbol): + try: + r = self.api.returnchartdata(symbol, 86400, pd.to_datetime( + '2010-1-1').value // 10 ** 9) + except Exception as e: + raise ExchangeRequestError(error=e) + + return time.strftime('%Y-%m-%d', time.gmtime(int(r[0]['date']))) + + def check_open_orders(self): + """ + Need to override this function for Poloniex: + + Loop through the list of open orders in the Portfolio object. + Check if any transactions have been executed: + If so, create a transaction and apply to the Portfolio. + Check if the order is still open: + If not, remove it from open orders + + :return: + transactions: Transaction[] + """ + transactions = list() + if self.portfolio.open_orders: + for order_id in list(self.portfolio.open_orders): + + order = self._portfolio.open_orders[order_id] + log.debug('found open order: {}'.format(order_id)) + + try: + order_open = self.get_order(order_id) + except Exception as e: + raise ExchangeRequestError(error=e) + + if (order_open): + delta = pd.Timestamp.utcnow() - order.dt + log.info( + 'order {order_id} still open after {delta}'.format( + order_id=order_id, + delta=delta) + ) + + try: + response = self.api.returnordertrades(order_id) + except Exception as e: + raise ExchangeRequestError(error=e) + + if ('error' in response): + if (not order_open): + raise OrphanOrderReverseError(order_id=order_id, + exchange=self.name) + else: + for tx in response: + """ + We maintain a list of dictionaries of transactions that correspond to + partially filled orders, indexed by order_id. Every time we query + executed transactions from the exchange, we check if we had that + transaction for that order already. If not, we process it. + + When an order if fully filled, we flush the dict of transactions + associated with that order. + """ + if (not filter( + lambda item: item['order_id'] == tx['tradeID'], + self.transactions[order_id])): + log.debug( + 'Got new transaction for order {}: amount {}, price {}'.format( + order_id, tx['amount'], tx['rate'])) + tx['amount'] = float(tx['amount']) + if (tx['type'] == 'sell'): + tx['amount'] = -tx['amount'] + transaction = Transaction( + asset=order.asset, + amount=tx['amount'], + dt=pd.to_datetime(tx['date'], utc=True), + price=float(tx['rate']), + order_id=tx['tradeID'], + # it's a misnomer, but keeping it for compatibility + commission=float(tx['fee']) + ) + self.transactions[order_id].append(transaction) + self.portfolio.execute_transaction(transaction) + transactions.append(transaction) + + if (not order_open): + """ + Since transactions have been executed individually + the only thing left to do is remove them from list of open_orders + """ + del self.portfolio.open_orders[order_id] + del self.transactions[order_id] + + return transactions + + def get_orderbook(self, asset, order_type='all'): + exchange_symbol = asset.exchange_symbol + data = self.api.returnOrderBook(market=exchange_symbol) + + result = dict() + for order_type in data: + # TODO: filter by type + if order_type != 'asks' and order_type != 'bids': + continue + + result[order_type] = [] + for entry in data[order_type]: + if len(entry) == 2: + result[order_type].append( + dict( + rate=float(entry[0]), + quantity=float(entry[1]) + ) + ) + return result diff --git a/catalyst/exchange/poloniex/poloniex_api.py b/catalyst/exchange/poloniex/poloniex_api.py new file mode 100644 index 00000000..599a0b65 --- /dev/null +++ b/catalyst/exchange/poloniex/poloniex_api.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +import json +import time +import hmac +import hashlib + +from six.moves import urllib + +# Workaround for backwards compatibility +# https://stackoverflow.com/questions/3745771/urllib-request-in-python-2-7 +urlopen = urllib.request.urlopen + + +class Poloniex_api(object): + def __init__(self, key, secret): + self.key = key + self.secret = secret + + self.max_requests_per_second = 6 + self.request_cpt = dict() + + self.public = ['returnTicker', 'return24Volume', 'returnOrderBook', + 'returnTradeHistory', 'returnChartData', + 'returnCurrencies', 'returnLoanOrders'] + self.trading = ['returnBalances','returnCompleteBalances','returnDepositAddresses', + 'generateNewAddress','returnDepositsWithdrawals','returnOpenOrders', + 'returnTradeHistory','returnOrderTrades', + 'buy', 'sell', 'cancelOrder', 'moveOrder', + 'withdraw', 'returnFeeInfo','returnAvailableAccountBalances', + 'returnTradableBalances', 'transferBalance', + 'returnMarginAccountSummary','marginBuy','marginSell', + 'getMarginPosition', 'closeMarginPosition','createLoanOffer', + 'cancelLoanOffer','returnOpenLoanOffers','returnActiveLoans', + 'returnLendingHistory','toggleAutoRenew'] + + def ask_request(self): + """ + Asks permission to issue a request to the exchange. + The primary purpose is to avoid hitting rate limits. + + The application will pause if the maximum requests per minute + permitted by the exchange is exceeded. + + :return boolean: + + """ + now = time.time() + if not self.request_cpt: + self.request_cpt = dict() + self.request_cpt[now] = 0 + return True + + cpt_date = self.request_cpt.keys()[0] + cpt = self.request_cpt[cpt_date] + + if now > cpt_date + 1: + self.request_cpt = dict() + self.request_cpt[now] = 0 + return True + + if cpt >= self.max_requests_per_second: + + log.debug('max requests 6 reached, sleeping for 1 seconds') + sleep(1) + + now = time.time() + self.request_cpt = dict() + self.request_cpt[now] = 0 + return True + else: + self.request_cpt[cpt_date] += 1 + + def query(self, method, req={}): + + if method in self.public: + url = 'https://poloniex.com/public?command=' + method + '&' + urllib.parse.urlencode(req) + headers = {} + post_data = None + elif method in self.trading: + url = 'https://poloniex.com/tradingApi' + req['command'] = method + req['nonce'] = int(time.time()*1000) + post_data = urllib.parse.urlencode(req) + signature = hmac.new(self.secret, post_data, hashlib.sha512).hexdigest() + headers = { 'Sign': signature, 'Key': self.key} + else: + raise ValueError('Method "' + method + '" not found in neither the Public API or Trading API endpoints') + + self.ask_request() + req = urllib.request.Request(url, data=post_data, headers=headers) + return json.loads(urlopen(req).read()) + + def returnticker(self): + return self.query('returnTicker', {}) + + def return24volume(self): + return self.query('return24Volume', {}) + + def returnOrderBook(self, market='all'): + return self.query('returnOrderBook', {'currencyPair': market}) + + def returntradehistory(self, market, start=None, end=None): + if(start is not None and end is not None): + return self.query('returntradehistory', + {'currencyPair': market, 'start': start, 'end': end }) + else: + return self.query('returntradehistory', {'currencyPair': market }) + + def returnchartdata(self, market, period, start, end=9999999999): + return self.query('returnChartData', {'currencyPair': market, 'period': period, + 'start': start, 'end': end}) + + def returncurrencies(self): + return self.query('returnCurrencies', {}) + + def returnloadorders(self, market): + return self.query('returnLoanOrders', {'currency': market}) + + def returnbalances(self): + return self.query('returnBalances') + + def returncompletebalances(self, account): + if(account): + return self.query('returnCompleteBalances', {'account': account}) + else: + return self.query('returnCompleteBalances') + + def returndepositaddresses(self): + return self.query('returnDepositAddresses') + + def generatenewaddress(self, currency): + return self.query('generateNewAddress', {'currency': currency}) + + def returnDepositsWithdrawals(self, start, end): + return self.query('returnDepositsWithdrawals', {'start': start, 'end': end}) + + def returnopenorders(self, market): + return self.query('returnOpenOrders', {'currencyPair': market}) + + def returntradehistory(self, market): + #TODO: optional start and/or end and limit + return self.query('returnTradeHistory', {'currencyPair': market}) + + def returnordertrades(self, ordernumber): + return self.query('returnOrderTrades', {'orderNumber': ordernumber}) + + def buy(self, market, amount, rate, fillorkill=0, immediateorcancel=0, postonly=0): + if(fillorkill): + return self.query('buy', {'currencyPair': market, 'rate':rate, 'amount': amount, + 'fillOrKill': fillorkill, }) + elif(immediateorcancel): + return self.query('buy', {'currencyPair': market, 'rate':rate, 'amount': amount, + 'immediateOrCancel': immediateorcancel, }) + elif(postonly): + return self.query('buy', {'currencyPair': market, 'rate':rate, 'amount': amount, + 'postOnly': postonly, }) + else: + return self.query('buy', {'currencyPair': market, 'rate':rate, 'amount': amount, }) + + def sell(self, market, amount, rate, fillorkill=0, immediateorcancel=0, postonly=0): + if(fillorkill): + return self.query('sell', {'currencyPair': market, 'rate':rate, 'amount': amount, + 'fillOrKill': fillorkill, }) + elif(immediateorcancel): + return self.query('sell', {'currencyPair': market, 'rate':rate, 'amount': amount, + 'immediateOrCancel': immediateorcancel, }) + elif(postonly): + return self.query('sell', {'currencyPair': market, 'rate':rate, 'amount': amount, + 'postOnly': postonly, }) + else: + return self.query('sell', {'currencyPair': market, 'rate':rate, 'amount': amount, }) + + def cancelorder(self, ordernumber): + return self.query('cancelOrder', {'orderNumber': ordernumber}) + + def withdraw(self, currency, quantity, address): + return self.query('withdraw', + {'currency': currency, 'amount': quantity, + 'address': address}) + + def returnfeeinfo(self): + return self.query('returnFeeInfo') + diff --git a/catalyst/exchange/stats_utils.py b/catalyst/exchange/stats_utils.py index eda1b2fd..2cf65b43 100644 --- a/catalyst/exchange/stats_utils.py +++ b/catalyst/exchange/stats_utils.py @@ -1,7 +1,7 @@ import pandas as pd -def get_pretty_stats(stats_df, num_rows=10): +def get_pretty_stats(stats_df, recorded_cols=None, num_rows=10): """ Format and print the last few rows of a statistics DataFrame. See the pyfolio project for the data structure. @@ -22,6 +22,10 @@ def get_pretty_stats(stats_df, num_rows=10): 'pnl', 'long_exposure', 'short_exposure', 'orders', 'transactions', 'positions'] + if recorded_cols is not None: + for column in recorded_cols: + columns.append(column) + def format_positions(positions): parts = [] for position in positions: diff --git a/catalyst/finance/performance/tracker.py b/catalyst/finance/performance/tracker.py index 810f2c32..c6df9292 100644 --- a/catalyst/finance/performance/tracker.py +++ b/catalyst/finance/performance/tracker.py @@ -111,27 +111,11 @@ class PerformanceTracker(object): self.treasury_curves, self.trading_calendar ) - elif self.emission_rate == '5-minute': - self.all_benchmark_returns = pd.Series( - index=pd.date_range( - self.sim_params.first_open, - self.sim_params.last_close, - freq='5min' - ), - ) - self.cumulative_risk_metrics = \ - risk.RiskMetricsCumulative( - self.sim_params, - self.treasury_curves, - self.trading_calendar, - create_first_day_stats=True, - ) elif self.emission_rate == 'minute': self.all_benchmark_returns = pd.Series(index=pd.date_range( self.sim_params.first_open, self.sim_params.last_close, freq='Min') ) - self.cumulative_risk_metrics = \ risk.RiskMetricsCumulative( self.sim_params, diff --git a/catalyst/gens/sim_engine.pyx b/catalyst/gens/sim_engine.pyx index a318f292..aa3a9d51 100644 --- a/catalyst/gens/sim_engine.pyx +++ b/catalyst/gens/sim_engine.pyx @@ -20,9 +20,7 @@ cimport cython from cpython cimport bool cdef np.int64_t _nanos_in_minute = 60000000000 -cdef np.int64_t _nanos_in_five_minutes = 5 * _nanos_in_minute NANOS_IN_MINUTE = _nanos_in_minute -NANOS_IN_FIVE_MINUTES = _nanos_in_five_minutes cpdef enum: BAR = 0 @@ -117,24 +115,3 @@ cdef class MinuteSimulationClock: yield minute, BAR if minute_emission: yield minute, MINUTE_END - -cdef class FiveMinuteSimulationClock(MinuteSimulationClock): - @cython.boundscheck(False) - @cython.wraparound(False) - cdef dict calc_minutes_by_session(self): - cdef dict five_minutes_by_session - cdef int session_idx - cdef np.int64_t session_nano - cdef np.ndarray[np.int64_t, ndim=1] five_minutes_nanos - - five_minutes_by_session = {} - for session_idx, session_nano in enumerate(self.sessions_nanos): - five_minutes_nanos = np.arange( - self.market_opens_nanos[session_idx], - self.market_closes_nanos[session_idx], - _nanos_in_five_minutes - ) - five_minutes_by_session[session_nano] = pd.to_datetime( - five_minutes_nanos, utc=True, box=True - ) - return five_minutes_by_session diff --git a/catalyst/gens/tradesimulation.py b/catalyst/gens/tradesimulation.py index 1ef9dfbf..f7daa233 100644 --- a/catalyst/gens/tradesimulation.py +++ b/catalyst/gens/tradesimulation.py @@ -34,7 +34,6 @@ class AlgorithmSimulator(object): EMISSION_TO_PERF_KEY_MAP = { 'minute': 'minute_perf', - '5-minute': '5_minute_perf', 'daily': 'daily_perf' } @@ -202,7 +201,7 @@ class AlgorithmSimulator(object): stack.enter_context(self.processor) stack.enter_context(ZiplineAPI(self.algo)) - if algo.data_frequency in set(('minute', '5-minute')): + if algo.data_frequency == 'minute': def execute_order_cancellation_policy(): algo.blotter.execute_cancel_policy(SESSION_END) diff --git a/catalyst/pipeline/loaders/crypto_pricing_loader.py b/catalyst/pipeline/loaders/crypto_pricing_loader.py index 22c0ccf2..da98c988 100644 --- a/catalyst/pipeline/loaders/crypto_pricing_loader.py +++ b/catalyst/pipeline/loaders/crypto_pricing_loader.py @@ -41,10 +41,6 @@ class CryptoPricingLoader(PipelineLoader): reader = bundle.daily_bar_reader all_sessions = cal.all_sessions - elif data_frequency == '5-minute': - reader = bundle.five_minute_bar_reader - all_sessions = cal.all_five_minutes - elif data_frequency == 'minute': reader = bundle.minute_bar_reader all_sessions = cal.all_minutes diff --git a/catalyst/pipeline/loaders/equity_pricing_loader.py b/catalyst/pipeline/loaders/equity_pricing_loader.py index 4d9ec055..c7dbfd41 100644 --- a/catalyst/pipeline/loaders/equity_pricing_loader.py +++ b/catalyst/pipeline/loaders/equity_pricing_loader.py @@ -40,8 +40,6 @@ class USEquityPricingLoader(PipelineLoader): if data_frequency == 'daily': reader = bundle.daily_bar_reader - elif data_frequency == '5-minute': - reader = bundle.five_minute_bar_reader elif daily_bar_reader == 'minute': reader = bundle.minute_bar_reader else: @@ -53,9 +51,6 @@ class USEquityPricingLoader(PipelineLoader): if data_frequency == 'daily': all_sessions = cal.all_sessions - elif data_frequency == '5-minute': - reader = bundle.five_minute_bar_reader - all_sessions = cal.all_five_minutes elif daily_bar_reader == 'minute': reader = bundle.minute_bar_reader all_sessions = cal.all_minutes diff --git a/catalyst/sources/benchmark_source.py b/catalyst/sources/benchmark_source.py index 846b7eb5..05d5c601 100644 --- a/catalyst/sources/benchmark_source.py +++ b/catalyst/sources/benchmark_source.py @@ -65,19 +65,6 @@ class BenchmarkSource(object): ) self._precalculated_series = minute_series - elif self.emission_rate == '5-minute': - five_minutes = \ - trading_calendar.five_minutes_for_sessions_in_range( - sessions[0], - sessions[-1], - ) - - five_minute_series = daily_series.reindex( - index=five_minutes, - method='ffill', - ) - - self._precalculated_series = five_minute_series else: self._precalculated_series = daily_series else: @@ -168,21 +155,6 @@ class BenchmarkSource(object): ffill=True )[asset] - return benchmark_series.pct_change()[1:] - elif self.emission_rate == '5-minute': - five_minutes = trading_calendar.five_minutes_for_sessions_in_range( - self.sessions[0], self.sessions[-1] - ) - benchmark_series = data_portal.get_history_window( - [asset], - five_minutes[-1], - bar_count=len(five_minutes) + 1, - frequency='5m', - field='price', - data_frequency=self.emission_rate, - ffill=True, - )[asset] - return benchmark_series.pct_change()[1:] else: start_date = asset.start_date diff --git a/catalyst/utils/calendars/exchange_calendar_open.py b/catalyst/utils/calendars/exchange_calendar_open.py index 1dbc8cd3..54be460f 100644 --- a/catalyst/utils/calendars/exchange_calendar_open.py +++ b/catalyst/utils/calendars/exchange_calendar_open.py @@ -31,4 +31,4 @@ class OpenExchangeCalendar(TradingCalendar): return DateOffset(days=1) def __init__(self, *args, **kwargs): - super(OpenExchangeCalendar, self).__init__(start=Timestamp('2015-03-01', tz='UTC'), **kwargs) + super(OpenExchangeCalendar, self).__init__(start=Timestamp('2015-3-1', tz='UTC'), **kwargs) diff --git a/catalyst/utils/calendars/trading_calendar.py b/catalyst/utils/calendars/trading_calendar.py index a1489067..4748b1c2 100644 --- a/catalyst/utils/calendars/trading_calendar.py +++ b/catalyst/utils/calendars/trading_calendar.py @@ -117,9 +117,6 @@ class TradingCalendar(with_metaclass(ABCMeta)): self._trading_minutes_nanos = self.all_minutes.values.\ astype(np.int64) - - self._trading_five_minutes_nanos = self.all_five_minutes.values.\ - astype(np.int64) self.first_trading_session = _all_days[0] self.last_trading_session = _all_days[-1] @@ -182,18 +179,6 @@ class TradingCalendar(with_metaclass(ABCMeta)): """ return int(self._minutes_per_session[start_session:end_session].sum()) - @lazyval - def _five_minutes_per_session(self): - diff = self.schedule.market_close - self.schedule.market_open - diff = diff.astype('timedelta64[m]') - return (diff + 1) // 5 - - def five_minutes_count_for_sessions_in_range(self, - start_session, - end_session): - five_mins = self._five_minutes_per_session[start_session:end_session] - return int(five_mins.sum()) - @property def regular_holidays(self): """ @@ -386,10 +371,6 @@ class TradingCalendar(with_metaclass(ABCMeta)): idx = next_divider_idx(self._trading_minutes_nanos, dt.value) return self.all_minutes[idx] - def next_five_minute(self, dt): - idx = next_divider_idx(self._trading_five_minutes_nanos, dt.values) - return self.all_five_mintutes[idx] - def previous_minute(self, dt): """ Given a dt, return the previous exchange minute. @@ -484,12 +465,6 @@ class TradingCalendar(with_metaclass(ABCMeta)): end_minute=self.schedule.at[session_label, 'market_close'], ) - def five_minutes_for_session(self, session_label): - return self.five_minutes_in_range( - start_five_minute=self.schedule.at[session_label, 'market_open'], - end_five_minute=self.schedule.at[session_label, 'market_close'], - ) - def minutes_window(self, start_dt, count): start_dt_nanos = start_dt.value all_minutes_nanos = self._trading_minutes_nanos @@ -591,20 +566,6 @@ class TradingCalendar(with_metaclass(ABCMeta)): return abs(end_idx - start_idx) - def five_minutes_in_range(self, start_five_minute, end_five_minute): - start_idx = searchsorted(self._trading_five_minutes_nanos, - start_five_minute.value) - - end_idx = searchsorted(self._trading_five_minutes_nanos, - end_five_minute.value) - - if end_five_minute.value == self._trading_five_minutes_nanos[end_idx]: - # if the end minute is a market minute, increase by 1 - end_idx += 1 - - return self.all_five_minutes[start_idx:end_idx] - - def minutes_in_range(self, start_minute, end_minute): """ Given start and end minutes, return all the calendar minutes @@ -662,15 +623,6 @@ class TradingCalendar(with_metaclass(ABCMeta)): return self.minutes_in_range(first_minute, last_minute) - def five_minutes_for_sessions_in_range(self, - start_session_label, - end_session_label): - - first_minute, _ = self.open_and_close_for_session(start_session_label) - _, last_minute = self.open_and_close_for_session(end_session_label) - - return self.five_minutes_in_range(first_minute, last_minute) - def open_and_close_for_session(self, session_label): """ Returns a tuple of timestamps of the open and close of the session @@ -777,13 +729,6 @@ class TradingCalendar(with_metaclass(ABCMeta)): return DatetimeIndex(all_minutes).tz_localize("UTC") - @lazyval - def all_five_minutes(self): - """ - Returns a DatetimeIndex representing all the five minutes in this calendar. - """ - return self._all_minutes_with_interval(5) - @lazyval def all_minutes(self): """ diff --git a/catalyst/utils/events.py b/catalyst/utils/events.py index 3fc83dd4..10bb2f55 100644 --- a/catalyst/utils/events.py +++ b/catalyst/utils/events.py @@ -602,7 +602,6 @@ class date_rules(object): class time_rules(object): market_open = AfterOpen market_close = BeforeClose - every_5_minutes = Always every_minute = Always diff --git a/catalyst/utils/run_algo.py b/catalyst/utils/run_algo.py index fc9f6354..3a318748 100644 --- a/catalyst/utils/run_algo.py +++ b/catalyst/utils/run_algo.py @@ -1,16 +1,16 @@ import os -import re -from runpy import run_path import sys import warnings -from time import sleep from datetime import timedelta - -import pandas as pd +from runpy import run_path +from time import sleep import click +import pandas as pd from catalyst.exchange.bittrex.bittrex import Bittrex +from catalyst.exchange.bitfinex.bitfinex import Bitfinex +from catalyst.exchange.poloniex.poloniex import Poloniex try: from pygments import highlight @@ -23,29 +23,22 @@ except: from toolz import valfilter, concatv from functools import partial -from catalyst.algorithm import TradingAlgorithm -from catalyst.data.bundles.core import load -from catalyst.data.data_portal import DataPortal -from catalyst.data.loader import load_crypto_market_data from catalyst.finance.trading import TradingEnvironment -from catalyst.pipeline.data import USEquityPricing, CryptoPricing -from catalyst.pipeline.loaders import ( - USEquityPricingLoader, - CryptoPricingLoader, -) from catalyst.utils.calendars import get_calendar from catalyst.utils.factory import create_simulation_parameters +from catalyst.data.loader import load_crypto_market_data import catalyst.utils.paths as pth -from catalyst.exchange.algorithm_exchange import ExchangeTradingAlgorithm -from catalyst.exchange.data_portal_exchange import DataPortalExchange -from catalyst.exchange.bitfinex.bitfinex import Bitfinex +from catalyst.exchange.exchange_algorithm import ExchangeTradingAlgorithmLive, \ + ExchangeTradingAlgorithmBacktest +from catalyst.exchange.data_portal_exchange import DataPortalExchangeLive, \ + DataPortalExchangeBacktest from catalyst.exchange.asset_finder_exchange import AssetFinderExchange from catalyst.exchange.exchange_portfolio import ExchangePortfolio from catalyst.exchange.exchange_errors import ( ExchangeRequestError, ExchangeRequestErrorTooManyAttempts, - BaseCurrencyNotFoundError) + BaseCurrencyNotFoundError, ExchangeNotFoundError) from catalyst.exchange.exchange_utils import get_exchange_auth, \ get_algo_object from logbook import Logger @@ -148,72 +141,90 @@ def _run(handle_data, mode = 'live' if live else 'backtest' log.info('running algo in {mode} mode'.format(mode=mode)) - if live and exchange is not None: - exchange_name = exchange - start = pd.Timestamp.utcnow() - end = start + timedelta(minutes=1439) + exchange_name = exchange + if exchange_name is None: + raise ValueError('Please specify at least one exchange.') + exchange_list = [x.strip().lower() for x in exchange.split(',')] + + exchanges = dict() + for exchange_name in exchange_list: + + # Looking for the portfolio from the cache first portfolio = get_algo_object( algo_name=algo_namespace, key='portfolio_{}'.format(exchange_name), environ=environ ) + if portfolio is None: portfolio = ExchangePortfolio( start_date=pd.Timestamp.utcnow() ) + # This corresponds to the json file containing api token info exchange_auth = get_exchange_auth(exchange_name) if exchange_name == 'bitfinex': - exchange = Bitfinex( + exchanges[exchange_name] = Bitfinex( key=exchange_auth['key'], secret=exchange_auth['secret'], base_currency=base_currency, portfolio=portfolio ) elif exchange_name == 'bittrex': - exchange = Bittrex( + exchanges[exchange_name] = Bittrex( + key=exchange_auth['key'], + secret=exchange_auth['secret'], + base_currency=base_currency, + portfolio=portfolio + ) + elif exchange_name == 'poloniex': + exchanges[exchange_name] = Poloniex( key=exchange_auth['key'], secret=exchange_auth['secret'], base_currency=base_currency, portfolio=portfolio ) else: - raise NotImplementedError( - 'exchange not supported: %s' % exchange_name) + raise ExchangeNotFoundError(exchange_name=exchange_name) open_calendar = get_calendar('OPEN') - sim_params = create_simulation_parameters( - start=start, - end=end, - capital_base=capital_base, - data_frequency=data_frequency, - emission_rate=data_frequency, - ) - if live and exchange is not None: - env = TradingEnvironment( + env = TradingEnvironment( + load=partial( + load_crypto_market_data, environ=environ, - exchange_tz='UTC', - asset_db_path=None - ) - env.asset_finder = AssetFinderExchange(exchange) + start_dt=start, + end_dt=end + ), + environ=environ, + exchange_tz='UTC', + asset_db_path=None # We don't need an asset db, we have exchanges + ) + env.asset_finder = AssetFinderExchange() + choose_loader = None # TODO: use the DataPortal for in the algorithm class for this - data = DataPortalExchange( - exchange=exchange, + if live: + start = pd.Timestamp.utcnow() + + # TODO: fix the end data. + end = start + timedelta(hours=8760) + + data = DataPortalExchangeLive( + exchanges=exchanges, asset_finder=env.asset_finder, trading_calendar=open_calendar, first_trading_day=pd.to_datetime('today', utc=True) ) - choose_loader = None - def fetch_capital_base(attempt_index=0): + def fetch_capital_base(exchange, attempt_index=0): """ Fetch the base currency amount required to bootstrap the algorithm against the exchange. The algorithm cannot continue without this value. + :param exchange: the targeted exchange :param attempt_index: :return capital_base: the amount of base currency available for trading @@ -224,8 +235,11 @@ def _run(handle_data, balances = exchange.get_balances() except ExchangeRequestError as e: if attempt_index < 20: + log.warn('exchange error when retrieving balances, {} ' + 'trying again in 5 seconds'.format(e)) sleep(5) - return fetch_capital_base(attempt_index + 1) + return fetch_capital_base(exchange, attempt_index + 1) + else: raise ExchangeRequestErrorTooManyAttempts( attempts=attempt_index, @@ -240,110 +254,59 @@ def _run(handle_data, exchange=exchange_name ) + capital_base = 0 + for exchange_name in exchanges: + exchange = exchanges[exchange_name] + capital_base += fetch_capital_base(exchange) + sim_params = create_simulation_parameters( start=start, end=end, - capital_base=fetch_capital_base(), + capital_base=capital_base, emission_rate='minute', data_frequency='minute' ) - elif bundle is not None: - bundles = bundle.split(',') - - def get_trading_env_and_data(bundles): - env = data = None - - b = 'poloniex' - if len(bundles) == 0: - return env, data - elif len(bundles) == 1: - b = bundles[0] - - bundle_data = load( - b, - environ, - bundle_timestamp, - ) - - prefix, connstr = re.split( - r'sqlite:///', - str(bundle_data.asset_finder.engine.url), - maxsplit=1, - ) - if prefix: - raise ValueError( - "invalid url %r, must begin with 'sqlite:///'" % - str(bundle_data.asset_finder.engine.url), - ) - - env = TradingEnvironment( - load=partial(load_crypto_market_data, bundle=b, - bundle_data=bundle_data, environ=environ), - bm_symbol='USDT_BTC', - trading_calendar=open_calendar, - asset_db_path=connstr, - environ=environ, - ) - - first_trading_day = bundle_data.minute_bar_reader.first_trading_day - - data = DataPortal( - env.asset_finder, - open_calendar, - first_trading_day=first_trading_day, - minute_reader=bundle_data.minute_bar_reader, - five_minute_reader=bundle_data.five_minute_bar_reader, - daily_reader=bundle_data.daily_bar_reader, - adjustment_reader=bundle_data.adjustment_reader, - ) - - return env, data - - def get_loader_for_bundle(b): - bundle_data = load( - b, - environ, - bundle_timestamp, - ) - - if b == 'poloniex': - return CryptoPricingLoader( - bundle_data, - data_frequency, - CryptoPricing, - ) - elif b == 'quandl': - return USEquityPricingLoader( - bundle_data, - data_frequency, - USEquityPricing, - ) - raise ValueError( - "No PipelineLoader registered for bundle %s." % b - ) - - loaders = [get_loader_for_bundle(b) for b in bundles] - env, data = get_trading_env_and_data(bundles) - - def choose_loader(column): - for loader in loaders: - if column in loader.columns: - return loader - raise ValueError( - "No PipelineLoader registered for column %s." % column - ) + # TODO: use the constructor instead + # sim_params._arena = 'live' + algorithm_class = partial( + ExchangeTradingAlgorithmLive, + exchanges=exchanges, + algo_namespace=algo_namespace, + live_graph=live_graph + ) else: - env = TradingEnvironment(environ=environ) - choose_loader = None + # Removed the existing Poloniex fork to keep things simple + # We can add back the complexity if required. - TradingAlgorithmClass = ( - partial(ExchangeTradingAlgorithm, exchange=exchange, - algo_namespace=algo_namespace, live_graph=live_graph) - if live and exchange else TradingAlgorithm) + # I don't think that we should have arbitrary price data bundles + # Instead, we should center this data around exchanges. + # We still need to support bundles for other misc data, but we + # can handle this later. - perf = TradingAlgorithmClass( + data = DataPortalExchangeBacktest( + exchanges=exchanges, + asset_finder=None, + trading_calendar=open_calendar, + first_trading_day=start, + last_available_session=end + ) + + sim_params = create_simulation_parameters( + start=start, + end=end, + capital_base=capital_base, + data_frequency=data_frequency, + emission_rate=data_frequency, + ) + + algorithm_class = partial( + ExchangeTradingAlgorithmBacktest, + exchanges=exchanges + ) + + perf = algorithm_class( namespace=namespace, env=env, get_pipeline_loader=choose_loader, @@ -514,6 +477,11 @@ def run_algorithm(initialize, """ load_extensions(default_extension, extensions, strict_extensions, environ) + # I'm not sure that we need this since the modified DataPortal + # does not require extensions to be explicitly loaded. + + # This will be useful for arbitrary non-pricing bundles but we may + # need to modify the logic. if not live: non_none_data = valfilter(bool, { 'data': data is not None, diff --git a/docs/CNAME b/docs/CNAME index 604be5df..79f02e82 100644 --- a/docs/CNAME +++ b/docs/CNAME @@ -1 +1 @@ -www.zipline.io +enigma-catalyst.readthedocs.io \ No newline at end of file diff --git a/docs/source/beginner-tutorial.rst b/docs/source/beginner-tutorial.rst index c2f9a07d..b3b1b133 100644 --- a/docs/source/beginner-tutorial.rst +++ b/docs/source/beginner-tutorial.rst @@ -1,608 +1,281 @@ -Zipline Beginner Tutorial -------------------------- +Catalyst Beginner Tutorial +-------------------------- Basics ~~~~~~ -Zipline is an open-source algorithmic trading simulator written in -Python. +Catalyst is an open-source algorithmic trading simulator for crypto +assets written in Python. -The source can be found at: https://github.com/quantopian/zipline +The source can be found at: https://github.com/enigmampc/catalyst Some benefits include: +- Support for several of the top crypto-exchanges by trading volume. - Realistic: slippage, transaction costs, order delays. - Stream-based: Process each event individually, avoids look-ahead bias. - Batteries included: Common transforms (moving average) as well as common risk calculations (Sharpe). - Developed and continuously updated by - `Quantopian `__ which provides an - easy-to-use web-interface to Zipline, 10 years of minute-resolution - historical US stock data, and live-trading capabilities. This - tutorial is directed at users wishing to use Zipline without using - Quantopian. If you instead want to get started on Quantopian, see - `here `__. + `Enigma MPC `__ which is building the Enigma + data marketplace protocol as well as Catalyst, the first application + that will run on our protocol. Powered by our financial data + marketplace, Catalyst empowers users to share and curate data and + build profitable, data-driven investment strategies. -This tutorial assumes that you have zipline correctly installed, see the -`installation -instructions `__ if -you haven't set up zipline yet. +This tutorial assumes that you have Catalyst correctly installed, see the +:doc:`installation instructions ` if you haven't set up +Catalyst yet. -Every ``zipline`` algorithm consists of two functions you have to +Every ``catalyst`` algorithm consists of at least two functions you have to define: * ``initialize(context)`` * ``handle_data(context, data)`` -Before the start of the algorithm, ``zipline`` calls the +Before the start of the algorithm, ``catalyst`` calls the ``initialize()`` function and passes in a ``context`` variable. ``context`` is a persistent namespace for you to store variables you need to access from one algorithm iteration to the next. -After the algorithm has been initialized, ``zipline`` calls the +After the algorithm has been initialized, ``catalyst`` calls the ``handle_data()`` function once for each event. At every call, it passes the same ``context`` variable and an event-frame called ``data`` containing the current trading bar with open, high, low, and close -(OHLC) prices as well as volume for each stock in your universe. For -more information on these functions, see the `relevant part of the -Quantopian docs `__. +(OHLC) prices as well as volume for each crypto asset in your universe. + +.. For more information on these functions, see the `relevant part of the +.. Quantopian docs `. My first algorithm ~~~~~~~~~~~~~~~~~~ Lets take a look at a very simple algorithm from the ``examples`` -directory, ``buyapple.py``: +directory, ``buy_btc.py``: .. code-block:: python - from zipline.examples import buyapple - buyapple?? - - -.. code-block:: python - - from zipline.api import order, record, symbol + from catalyst.api import order, record, symbol def initialize(context): - pass + context.asset = symbol('btc_usd') def handle_data(context, data): - order(symbol('AAPL'), 10) - record(AAPL=data.current(symbol('AAPL'), 'price')) + order(context.asset, 1) + record(btc = data.current(context.asset, 'price')) As you can see, we first have to import some functions we would like to use. All functions commonly used in your algorithm can be found in -``zipline.api``. Here we are using :func:`~zipline.api.order()` which takes two -arguments: a security object, and a number specifying how many stocks you would -like to order (if negative, :func:`~zipline.api.order()` will sell/short -stocks). In this case we want to order 10 shares of Apple at each iteration. For -more documentation on ``order()``, see the `Quantopian docs -`__. +``catalyst.api``. Here we are using :func:`~catalyst.api.order()` which takes two +arguments: a cryptoasset object, and a number specifying how many assets you would +like to order (if negative, :func:`~catalyst.api.order()` will sell/short +assets). In this case we want to order 1 bitcoin at each iteration. -Finally, the :func:`~zipline.api.record` function allows you to save the value +.. For more documentation on ``order()``, see the `Quantopian docs +.. `__. + +Finally, the :func:`~catalyst.api.record` function allows you to save the value of a variable at each iteration. You provide it with a name for the variable together with the variable itself: ``varname=var``. After the algorithm finished running you will have access to each variable value you tracked -with :func:`~zipline.api.record` under the name you provided (we will see this -further below). You also see how we can access the current price data of the -AAPL stock in the ``data`` event frame (for more information see -`here `__. +with :func:`~catalyst.api.record` under the name you provided (we will see this +further below). You also see how we can access the current price data of +a bitcoin in the ``data`` event frame. + +.. (for more information see `here `__. Running the algorithm ~~~~~~~~~~~~~~~~~~~~~ -To now test this algorithm on financial data, ``zipline`` provides three -interfaces: A command-line interface, ``IPython Notebook`` magic, and -:func:`~zipline.run_algorithm`. +To can now test this algorithm on crypto data, ``catalyst`` provides three +interfaces: -Ingesting Data +- A command-line interface, +- ``IPython Notebook`` magic, +- and :func:`~catalyst.run_algorithm`. + +Ingesting data ^^^^^^^^^^^^^^ -If you haven't ingested the data, run: -.. code-block:: bash +In previous versions of Catalyst you needed to manually ingest data before running +your algorithm to make it available at runtime. Starting with version 0.3, the +algorithm will automagically ingest the data it needs the first time that encounters +a data request for data that it doesn't have. - $ zipline ingest [-b ] +Still, we believe it is important for you to have a high-level understanding +of how data is managed: -where ```` is the name of the bundle to ingest, defaulting to -:ref:`quantopian-quandl `. +- Pricing data is split and packaged into ``bundles``: chunks of data organized + as time series that are kept up to date daily on Enigma's servers. Catalyst + downloads the bundles that needs at any given time, and reconstructs the whole + dataset in your hard drive. -you can check out the :ref:`ingesting data ` section for -more detail. +- Pricing data is provided in ``daily`` and ``minute`` resolution. Those are different + bundle datasets, and are managed separately. + +- Bundles are exchange-specific, as the pricing data is specific to the trades that + happen in each exchange. You can optionally specify which exchange you want pricing + data from. + +- Catalyst keeps track of all the downloaded bundles, so that it only has to download + them once, and will do incremental updates as needed. + +- When running in ``live trading`` mode, Catalyst will first look for historical + pricing data in the locally stored bundles. If there is anything missing, Catalyst will + hit the exchange for the most recent data, and merge it with the local bundle to make + it available for future iterations. + +If you want to learn more, check out the :ref:`ingesting data ` section +for more detail. Command line interface ^^^^^^^^^^^^^^^^^^^^^^ -After you installed zipline you should be able to execute the following +After you installed Catalyst you should be able to execute the following from your command line (e.g. ``cmd.exe`` on Windows, or the Terminal app -on OSX): +on OSX). Displaying here a simplified output for eductional purposes: .. code-block:: bash - $ zipline run --help + $ catalyst --help .. parsed-literal:: - Usage: zipline run [OPTIONS] + Usage: catalyst [OPTIONS] COMMAND [ARGS]... - Run a backtest for the given algorithm. + Top level catalyst entry point. + + Options: + --version Show the version and exit. + --help Show this message and exit. + + Commands: + ingest-exchange Ingest data for the given exchange. + live Trade live with the given algorithm. + run Run a backtest for the given algorithm. + +There are three main modes you can run on Catalyst. The first being ``ingest-exchange`` +for data ingestion, which we have summarized in the previous section. The second +is ``live`` to use your algorithm to trade live against a given exchange, and the +third mode ``run`` is to backtest your algorithm before trading live with it. + +Let's start with backtesting, so run this other command to learn more about +the available options: + +.. code-block:: bash + + $ catalyst run --help + +.. parsed-literal:: + + Usage: catalyst run [OPTIONS] + + Run a backtest for the given algorithm. + + Options: + -f, --algofile FILENAME The file that contains the algorithm to run. + -t, --algotext TEXT The algorithm script to run. + -D, --define TEXT Define a name to be bound in the namespace + before executing the algotext. For example + '-Dname=value'. The value may be any python + expression. These are evaluated in order so + they may refer to previously defined names. + --data-frequency [daily|minute] + The data frequency of the simulation. + [default: daily] + --capital-base FLOAT The starting capital for the simulation. + [default: 10000000.0] + -b, --bundle BUNDLE-NAME The data bundle to use for the simulation. + [default: poloniex] + --bundle-timestamp TIMESTAMP The date to lookup data on or before. + [default: ] + -s, --start DATE The start date of the simulation. + -e, --end DATE The end date of the simulation. + -o, --output FILENAME The location to write the perf data. If this + is '-' the perf will be written to stdout. + [default: -] + --print-algo / --no-print-algo Print the algorithm to stdout. + -x, --exchange-name [poloniex|bitfinex|bittrex] + The name of the targeted exchange + (supported: bitfinex, bittrex, poloniex). + -n, --algo-namespace TEXT A label assigned to the algorithm for data + storage purposes. + -c, --base-currency TEXT The base currency used to calculate + statistics (e.g. usd, btc, eth). + --help Show this message and exit. - Options: - -f, --algofile FILENAME The file that contains the algorithm to run. - -t, --algotext TEXT The algorithm script to run. - -D, --define TEXT Define a name to be bound in the namespace - before executing the algotext. For example - '-Dname=value'. The value may be any python - expression. These are evaluated in order so - they may refer to previously defined names. - --data-frequency [minute|daily] - The data frequency of the simulation. - [default: daily] - --capital-base FLOAT The starting capital for the simulation. - [default: 10000000.0] - -b, --bundle BUNDLE-NAME The data bundle to use for the simulation. - [default: quantopian-quandl] - --bundle-timestamp TIMESTAMP The date to lookup data on or before. - [default: ] - -s, --start DATE The start date of the simulation. - -e, --end DATE The end date of the simulation. - -o, --output FILENAME The location to write the perf data. If this - is '-' the perf will be written to stdout. - [default: -] - --print-algo / --no-print-algo Print the algorithm to stdout. - --help Show this message and exit. As you can see there are a couple of flags that specify where to find your -algorithm (``-f``) as well as parameters specifying which data to use, -defaulting to the :ref:`quantopian-quandl-mirror`. There are also arguments for -the date range to run the algorithm over (``--start`` and ``--end``). Finally, -you'll want to save the performance metrics of your algorithm so that you can -analyze how it performed. This is done via the ``--output`` flag and will cause -it to write the performance ``DataFrame`` in the pickle Python file format. -Note that you can also define a configuration file with these parameters that -you can then conveniently pass to the ``-c`` option so that you don't have to -supply the command line args all the time (see the .conf files in the examples -directory). +algorithm (``-f``) as well as a parameter to specify which exchange to use. +There are also arguments for the date range to run the algorithm over +(``--start`` and ``--end``). Finally, you'll want to save the performance +metrics of your algorithm so that you can analyze how it performed. This is +done via the ``--output`` flag and will cause it to write the performance +``DataFrame`` in the pickle Python file format. Note that you can also define +a configuration file with these parameters that you can then conveniently pass +to the ``-c`` option so that you don't have to supply the command line args +all the time (see the .conf files in the examples directory). Thus, to execute our algorithm from above and save the results to -``buyapple_out.pickle`` we would call ``zipline run`` as follows: +``buy_btc_simple_out.pickle`` we would call ``catalyst run`` as follows: .. code-block:: python - zipline run -f ../../zipline/examples/buyapple.py --start 2000-1-1 --end 2014-1-1 -o buyapple_out.pickle + catalyst run -f buy_btc_simple.py -x bitfinex --start 2016-1-1 --end 2016-9-29 -o buy_simple_btc_out.pickle -.. parsed-literal:: +.. +.. parsed-literal - AAPL - [2015-11-04 22:45:32.820166] INFO: Performance: Simulated 3521 trading days out of 3521. - [2015-11-04 22:45:32.820314] INFO: Performance: first open: 2000-01-03 14:31:00+00:00 - [2015-11-04 22:45:32.820401] INFO: Performance: last close: 2013-12-31 21:00:00+00:00 +.. AAPL +.. [2015-11-04 22:45:32.820166] INFO: Performance: Simulated 3521 trading days out of 3521. +.. [2015-11-04 22:45:32.820314] INFO: Performance: first open: 2000-01-03 14:31:00+00:00 +.. [2015-11-04 22:45:32.820401] INFO: Performance: last close: 2013-12-31 21:00:00+00:00 ``run`` first calls the ``initialize()`` function, and then -streams the historical stock price day-by-day through ``handle_data()``. -After each call to ``handle_data()`` we instruct ``zipline`` to order 10 -stocks of AAPL. After the call of the ``order()`` function, ``zipline`` +streams the historical asset price day-by-day through ``handle_data()``. +After each call to ``handle_data()`` we instruct ``catalyst`` to order 1 +bitcoin. After the call of the ``order()`` function, ``catalyst`` enters the ordered stock and amount in the order book. After the -``handle_data()`` function has finished, ``zipline`` looks for any open +``handle_data()`` function has finished, ``catalyst`` looks for any open orders and tries to fill them. If the trading volume is high enough for -this stock, the order is executed after adding the commission and +this asset, the order is executed after adding the commission and applying the slippage model which models the influence of your order on the stock price, so your algorithm will be charged more than just the -stock price \* 10. (Note, that you can also change the commission and -slippage model that ``zipline`` uses, see the `Quantopian -docs `__ for more -information). +asset price. (Note, that you can also change the commission and +slippage model that ``catalyst`` uses). -Lets take a quick look at the performance ``DataFrame``. For this, we +.. see the `Quantopian docs `__ +.. for more information). + +Let's take a quick look at the performance ``DataFrame``. For this, we use ``pandas`` from inside the IPython Notebook and print the first ten -rows. Note that ``zipline`` makes heavy usage of ``pandas``, especially -for data input and outputting so it's worth spending some time to learn -it. +rows. Note that ``catalyst`` makes heavy usage of +`pandas `_, especially for data input and +outputting so it's worth spending some time to learn it. .. code-block:: python import pandas as pd - perf = pd.read_pickle('buyapple_out.pickle') # read in perf DataFrame + perf = pd.read_pickle('buy_btc_simple_out.pickle') # read in perf DataFrame perf.head() -.. raw:: html - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
AAPLalgo_volatilityalgorithm_period_returnalphabenchmark_period_returnbenchmark_volatilitybetacapital_usedending_cashending_exposure...short_exposureshort_valueshorts_countsortinostarting_cashstarting_exposurestarting_valuetrading_daystransactionstreasury_period_return
2000-01-03 21:00:003.7383140.000000e+000.000000e+00-0.065800-0.0095490.0000000.0000000.0000010000000.000000.00000...0000.00000010000000.000000.000000.000001[]0.0658
2000-01-04 21:00:003.4231353.367492e-07-3.000000e-08-0.064897-0.0475280.3232290.000001-34.531359999965.4686534.23135...0000.00000010000000.000000.000000.000002[{u'order_id': u'513357725cb64a539e3dd02b47da7...0.0649
2000-01-05 21:00:003.4732294.001918e-07-9.906000e-09-0.066196-0.0456970.3293210.000001-35.032299999930.4363669.46458...0000.0000009999965.4686534.2313534.231353[{u'order_id': u'd7d4ad03cfec4d578c0d817dc3829...0.0662
2000-01-06 21:00:003.1726614.993979e-06-6.410420e-07-0.065758-0.0447850.298325-0.000006-32.026619999898.4097595.17983...000-12731.7805169999930.4363669.4645869.464584[{u'order_id': u'1fbf5e9bfd7c4d9cb2e8383e1085e...0.0657
2000-01-07 21:00:003.3229455.977002e-06-2.201900e-07-0.065206-0.0189080.3753010.000005-33.529459999864.88030132.91780...000-12629.2745839999898.4097595.1798395.179835[{u'order_id': u'9ea6b142ff09466b9113331a37437...0.0652
-

5 rows × 39 columns

-
- - - -As you can see, there is a row for each trading day, starting on the -first business day of 2000. In the columns you can find various +There is a row for each trading day, starting on the first day of our +simulation Jan 1st, 2016. In the columns you can find various information about the state of your algorithm. The very first column -``AAPL`` was placed there by the ``record()`` function mentioned earlier -and allows us to plot the price of apple. For example, we could easily +``btc`` was placed there by the ``record()`` function mentioned earlier +and allows us to plot the price of bitcoin. For example, we could easily examine now how our portfolio value changed over time compared to the -AAPL stock price. +bitcoin price. -.. code-block:: python - - %pylab inline - figsize(12, 12) - import matplotlib.pyplot as plt - - ax1 = plt.subplot(211) - perf.portfolio_value.plot(ax=ax1) - ax1.set_ylabel('portfolio value') - ax2 = plt.subplot(212, sharex=ax1) - perf.AAPL.plot(ax=ax2) - ax2.set_ylabel('AAPL stock price') - -.. parsed-literal:: - - Populating the interactive namespace from numpy and matplotlib - -.. parsed-literal:: - - - -.. image:: tutorial_files/tutorial_11_2.png - - -As you can see, our algorithm performance as assessed by the -``portfolio_value`` closely matches that of the AAPL stock price. This -is not surprising as our algorithm only bought AAPL every chance it got. - -IPython Notebook -~~~~~~~~~~~~~~~~ - -The `IPython Notebook `__ is a very -powerful browser-based interface to a Python interpreter (this tutorial -was written in it). As it is already the de-facto interface for most -quantitative researchers ``zipline`` provides an easy way to run your -algorithm inside the Notebook without requiring you to use the CLI. - -To use it you have to write your algorithm in a cell and let ``zipline`` -know that it is supposed to run this algorithm. This is done via the -``%%zipline`` IPython magic command that is available after you -``import zipline`` from within the IPython Notebook. This magic takes -the same arguments as the command line interface described above. Thus -to run the algorithm from above with the same parameters we just have to -execute the following cell after importing ``zipline`` to register the -magic. - -.. code-block:: python - - %load_ext zipline - -.. code-block:: python - - %%zipline --start 2000-1-1 --end 2014-1-1 - from zipline.api import symbol, order, record - - def initialize(context): - pass - - def handle_data(context, data): - order(symbol('AAPL'), 10) - record(AAPL=data[symbol('AAPL')].price) - -Note that we did not have to specify an input file as above since the -magic will use the contents of the cell and look for your algorithm -functions there. Also, instead of defining an output file we are -specifying a variable name with ``-o`` that will be created in the name -space and contain the performance ``DataFrame`` we looked at above. - -.. code-block:: python - - _.head() - -.. raw:: html - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
AAPLalgo_volatilityalgorithm_period_returnalphabenchmark_period_returnbenchmark_volatilitybetacapital_usedending_cashending_exposure...short_exposureshort_valueshorts_countsortinostarting_cashstarting_exposurestarting_valuetrading_daystransactionstreasury_period_return
2000-01-03 21:00:003.7383140.000000e+000.000000e+00-0.065800-0.0095490.0000000.0000000.0000010000000.000000.00000...0000.00000010000000.000000.000000.000001[]0.0658
2000-01-04 21:00:003.4231353.367492e-07-3.000000e-08-0.064897-0.0475280.3232290.000001-34.531359999965.4686534.23135...0000.00000010000000.000000.000000.000002[{u'commission': 0.3, u'amount': 10, u'sid': 0...0.0649
2000-01-05 21:00:003.4732294.001918e-07-9.906000e-09-0.066196-0.0456970.3293210.000001-35.032299999930.4363669.46458...0000.0000009999965.4686534.2313534.231353[{u'commission': 0.3, u'amount': 10, u'sid': 0...0.0662
2000-01-06 21:00:003.1726614.993979e-06-6.410420e-07-0.065758-0.0447850.298325-0.000006-32.026619999898.4097595.17983...000-12731.7805169999930.4363669.4645869.464584[{u'commission': 0.3, u'amount': 10, u'sid': 0...0.0657
2000-01-07 21:00:003.3229455.977002e-06-2.201900e-07-0.065206-0.0189080.3753010.000005-33.529459999864.88030132.91780...000-12629.2745839999898.4097595.1798395.179835[{u'commission': 0.3, u'amount': 10, u'sid': 0...0.0652
-

5 rows × 39 columns

-
+Our algorithm performance as assessed by the +``portfolio_value`` closely matches that of the bitcoin price. This +is not surprising as our algorithm only bought bitcoin every chance it got. Access to previous prices using ``history`` @@ -627,22 +300,16 @@ we need a new concept: History ``data.history()`` is a convenience function that keeps a rolling window of data for you. The first argument is the number of bars you want to collect, the second argument is the unit (either ``'1d'`` for ``'1m'`` -but note that you need to have minute-level data for using ``1m``). For -a more detailed description ``history()``'s features, see the -`Quantopian docs `__. -Let's look at the strategy which should make this clear: +but note that you need to have minute-level data for using ``1m``). This is +a function we use in the ``handle_data()`` section: .. code-block:: python - %%zipline --start 2000-1-1 --end 2012-1-1 -o dma.pickle + from catalyst.api import order, record, symbol - - from zipline.api import order_target, record, symbol - - def initialize(context): + def initialize(context): context.i = 0 - context.asset = symbol('AAPL') - + context.asset = symbol('btc_usd') def handle_data(context, data): # Skip first 300 days to get full windows @@ -665,67 +332,22 @@ Let's look at the strategy which should make this clear: order_target(context.asset, 0) # Save values for later inspection - record(AAPL=data.current(context.asset, 'price'), + record(btc=data.current(context.asset, 'price'), short_mavg=short_mavg, long_mavg=long_mavg) - def analyze(context, perf): - fig = plt.figure() - ax1 = fig.add_subplot(211) - perf.portfolio_value.plot(ax=ax1) - ax1.set_ylabel('portfolio value in $') - - ax2 = fig.add_subplot(212) - perf['AAPL'].plot(ax=ax2) - perf[['short_mavg', 'long_mavg']].plot(ax=ax2) - - perf_trans = perf.ix[[t != [] for t in perf.transactions]] - buys = perf_trans.ix[[t[0]['amount'] > 0 for t in perf_trans.transactions]] - sells = perf_trans.ix[ - [t[0]['amount'] < 0 for t in perf_trans.transactions]] - ax2.plot(buys.index, perf.short_mavg.ix[buys.index], - '^', markersize=10, color='m') - ax2.plot(sells.index, perf.short_mavg.ix[sells.index], - 'v', markersize=10, color='k') - ax2.set_ylabel('price in $') - plt.legend(loc=0) - plt.show() - -.. image:: tutorial_files/tutorial_22_1.png - -Here we are explicitly defining an ``analyze()`` function that gets -automatically called once the backtest is done (this is not possible on -Quantopian currently). - -Although it might not be directly apparent, the power of ``history()`` -(pun intended) can not be under-estimated as most algorithms make use of -prior market developments in one form or another. You could easily -devise a strategy that trains a classifier with -`scikit-learn `__ which tries to -predict future market movements based on past prices (note, that most of -the ``scikit-learn`` functions require ``numpy.ndarray``\ s rather than -``pandas.DataFrame``\ s, so you can simply pass the underlying -``ndarray`` of a ``DataFrame`` via ``.values``). - -We also used the ``order_target()`` function above. This and other -functions like it can make order management and portfolio rebalancing -much easier. See the `Quantopian documentation on order -functions `__ fore -more details. - Conclusions ~~~~~~~~~~~ We hope that this tutorial gave you a little insight into the -architecture, API, and features of ``zipline``. For next steps, check +architecture, API, and features of ``catalyst``. For next steps, check out some of the -`examples `__. +`examples `__. +The natural next step would be too look into the +`buy_and_hodl `_ +example, which is a more elaborated and realistic version of the ``buy_btc_simple`` example presented in this tutorial. -Feel free to ask questions on `our mailing -list `__, report -problems on our `GitHub issue -tracker `__, -`get -involved `__, -and `checkout Quantopian `__. +Feel free to ask questions on the ``#catalyst_dev`` channel of our +`Discord group `__ and report +problems on our `GitHub issue tracker `__. diff --git a/docs/source/conf.py b/docs/source/conf.py index 62d0bd16..dd0304d0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,7 +1,7 @@ import sys import os -from zipline import __version__ as version +#from catalyst import __version__ as version # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -21,14 +21,14 @@ extensions = [ extlinks = { - 'issue': ('https://github.com/quantopian/zipline/issues/%s', '#'), - 'commit': ('https://github.com/quantopian/zipline/commit/%s', ''), + 'issue': ('https://github.com/enigmampc/catalyst/issues/%s', '#'), + 'commit': ('https://github.com/enigmampc/catalyst/commit/%s', ''), } # -- Docstrings --------------------------------------------------------------- -extensions += ['numpydoc'] -numpydoc_show_class_members = False +#extensions += ['numpydoc'] +#numpydoc_show_class_members = False # Add any paths that contain templates here, relative to this directory. templates_path = ['.templates'] @@ -40,11 +40,12 @@ source_suffix = '.rst' master_doc = 'index' # General information about the project. -project = u'Zipline' -copyright = u'2016, Quantopian Inc.' +project = u'Catalyst' +copyright = u'2017, Enigma MPC, Inc.' # The full version, including alpha/beta/rc tags, but excluding the commit hash -release = version.split('+', 1)[0] +#release = version.split('+', 1)[0] +release = '0.3' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -84,7 +85,7 @@ html_show_sphinx = True html_show_copyright = True # Output file base name for HTML help builder. -htmlhelp_basename = 'ziplinedoc' +htmlhelp_basename = 'catalystdoc' intersphinx_mapping = { 'http://docs.python.org/dev': None, @@ -93,6 +94,6 @@ intersphinx_mapping = { 'pandas': ('http://pandas.pydata.org/pandas-docs/stable/', None), } -doctest_global_setup = "import zipline" +doctest_global_setup = "import catalyst" todo_include_todos = True diff --git a/docs/source/index.rst b/docs/source/index.rst index ee713eb5..0efccaeb 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,12 +1,17 @@ -.. include:: ../../README.rst +.. include:: welcome.rst +| +| +Table of Contents +----------------- .. toctree:: :maxdepth: 1 install beginner-tutorial - bundles - development-guidelines - appendix - release-process - releases + naming-convention +.. bundles +.. development-guidelines +.. appendix +.. release-process +.. releases diff --git a/docs/source/install.rst b/docs/source/install.rst index 71e3d21f..028f9295 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -4,16 +4,16 @@ Install Installing with ``pip`` ----------------------- -Installing Zipline via ``pip`` is slightly more involved than the average +Installing Catalyst via ``pip`` is slightly more involved than the average Python package. There are two reasons for the additional complexity: -1. Zipline ships several C extensions that require access to the CPython C API. +1. Catalyst ships several C extensions that require access to the CPython C API. In order to build the C extensions, ``pip`` needs access to the CPython header files for your Python installation. -2. Zipline depends on `numpy `_, the core library for +2. Catalyst depends on `numpy `_, the core library for numerical array computing in Python. Numpy depends on having the `LAPACK `_ linear algebra routines available. @@ -28,13 +28,28 @@ your particular platform), you should be able to simply run .. code-block:: bash - $ pip install zipline + $ pip install enigma-catalyst -If you use Python for anything other than Zipline, we **strongly** recommend +If you use Python for anything other than Catalyst, we **strongly** recommend that you install in a `virtualenv `_. The `Hitchhiker's Guide to Python`_ provides an `excellent tutorial on virtualenv -`_. +`_. Here's a summarized +version: + +.. code-block:: bash + + $ virtualenv catalyst-venv + $ source ./catalyst-venv/bin/activate + $ pip install enigma- + +Though not required by Catalyst directly, our example algorithms use matplotlib +to visually display the results of the trading algorithms. If you wish to run +any examples or use matplotlib during development, it can be installed using: + +.. code-block:: bash + + $ pip install matplotlib GNU/Linux ~~~~~~~~~ @@ -60,15 +75,17 @@ On `Arch Linux`_, you can acquire the additional dependencies via ``pacman``: $ pacman -S lapack gcc gcc-fortran pkg-config -There are also AUR packages available for installing `Python 3.4 -`_ (Arch's default python is now -3.5, but Zipline only currently supports 3.4), and `ta-lib -`_, an optional Zipline dependency. -Python 2 is also installable via: +.. Commenting it out until Catalyst fully supports Python 3.X +.. +.. There are also AUR packages available for installing `Python 3.4 +.. `_ (Arch's default python is now +.. 3.5, but Catalyst only currently supports 3.4), and `ta-lib +.. `_, an optional Catalyst dependency. +.. Python 2 is also installable via: -.. code-block:: bash +.. - $ pacman -S python2 +.. $ pacman -S python2 OSX ~~~ @@ -87,36 +104,238 @@ following brew packages: $ brew install freetype pkg-config gcc openssl +OSX + virtualenv + matplotlib +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A note about using matplotlib in virtual enviroments on OSX: it may be necessary to run + +.. code-block:: bash + + echo "backend: TkAgg" > ~/.matplotlib/matplotlibrc + +in order to override the default ``macosx`` backend for your system, which may not +be accessible from inside the virtual environment. This will allow Catalyst to open +matplotlib charts from within a virtual environment, which is useful for displaying +the performance of your backtests. To learn more about matplotlib backends, please refer to the +`matplotlib backend documentation `_. + + Windows ~~~~~~~ -For windows, the easiest and best supported way to install zipline is to use +In Windows, you will need the `Microsoft Visual C++ Compiler for Python 2.7 +`_. This package +contains the compiler and the set of system headers necessary for producing +binary wheels for Python 2.7 packages. If it's not already in your system, download +it and install it before proceeding to the next step. + +For windows, the easiest and best supported way to install Catalyst is to use :ref:`Conda `. +Amazon Linux AMI +~~~~~~~~~~~~~~~~ + +The packages ``pip`` and ``setuptools`` that come shipped by default are very outdated. +Thus, you first need to run: + +.. code-block:: bash + + pip install --upgrade pip setuptools + +The default installation is also missing the C and C++ compilers, which you install by: + +.. code-block:: bash + + sudo yum install gcc gcc-c++ + +Then you should follow the regular installation instructions outlined at the beginning +of this page. + + +Troubleshooting ``pip`` Install +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Issue**: + Package enigma-catalyst cannot be found + +**Solution**: + Make sure you have the most up-to-date version of pip installed, by running: + + .. code-block:: bash + + pip install --upgrade pip + + On Windows, the recommended command is: + + .. code-block:: bash + + python -m pip install --upgrade pip + +---- + +**Issue**: + Package enigma-catalyst cannot still be found, even after upgrading pip (see above), with an error similar to: + + .. code-block:: bash + + Downloading/unpacking enigma-catalyst + Could not find a version that satisfies the requirement enigma-catalyst (from versions: 0.1.dev9, 0.2.dev2, 0.1.dev4, 0.1.dev5, 0.1.dev3, 0.2.dev1, 0.1.dev8, 0.1.dev6) + Cleaning up... + No distributions matching the version for enigma-catalyst + +**Solution**: + In some systems (this error has been reported in Ubuntu), pip is configured to only find stable versions by default. Since Catalyst is in alpha version, pip cannot find a matching version that satisfies the installation requirements. The solution is to include the `--pre` flag to include pre-release and development versions: + + .. code-block:: bash + + pip install --pre enigma-catalyst + +---- + +**Issue**: + Package enigma-catalyst fails to install because of outdated setuptools + +**Solution**: + Upgrade to the most up-to-date setuptools package by running: + + .. code-block:: bash + + pip install --upgrade pip setuptools + +---- + +**Issue**: + Missing required packages + +**Solution**: + Download `requirements.txt + `_ + (click on the *Raw* button and Right click -> Save As...) and use it to + install all the required dependencies by running: + + .. code-block:: bash + + pip install -r requirements.txt + +---- + +**Issue**: + Installation fails with error: ``fatal error: Python.h: No such file or directory`` + +**Solution**: + Some systems (this issue has been reported in Ubuntu) require `python-dev` for the proper build and installation of package dependencies. The solution is to install python-dev, which is independent of the virtual environment. In Ubuntu, you would need to run: + + .. code-block:: bash + + sudo apt-get install python-dev + + .. _conda: Installing with ``conda`` ------------------------- -Another way to install Zipline is via the ``conda`` package manager, which +Another way to install Catalyst is via the ``conda`` package manager, which comes as part of Continuum Analytics' `Anaconda `_ distribution. The primary advantage of using Conda over ``pip`` is that conda natively understands the complex binary dependencies of packages like ``numpy`` and -``scipy``. This means that ``conda`` can install Zipline and its dependencies -without requiring the use of a second tool to acquire Zipline's non-Python +``scipy``. This means that ``conda`` can install Catalyst and its dependencies +without requiring the use of a second tool to acquire Catalyst's non-Python dependencies. For instructions on how to install ``conda``, see the `Conda Installation -Documentation `_ +Documentation `_. Alternatively, you +can install MiniConda, which is a smaller footprint (fewer packages and smaller +size) than its big brother Anaconda, but it still contains all the main packages +needed. To install MiniConda, you can follow these steps: -Once conda has been set up you can install Zipline from our ``Quantopian`` -channel: +1. Download `MiniConda `_. Select Python 2.7 for + your Operating System. +2. Install MiniConda. See the `Installation Instructions `_ + if you need help. +3. Ensure the correct installation by running ``conda list`` in a Terminal window, + which should print the list of packages installed with Conda. -.. code-block:: bash +Once either Conda or MiniConda has been set up you can install Catalyst: + +1. Download the file `python2.7-environment.yml `_. +2. Open a Terminal window and enter [``cd/dir``] into the directory where you saved + the above ``python2.7-environment.yml`` file. +3. Install using this file. This step can take about 5-10 minutes to install. + + .. code-block:: bash + + conda env create -f python2.7-environment.yml + +4. Activate the environment (which you need to do every time you start a new session + to run Catalyst): + + **Linux or OSX:** + + .. code-block:: bash + + source activate catalyst + + **Windows:** + + .. code-block:: bash + + activate catalyst + +Congratulations! You now have Catalyst installed. + +Troubleshooting ``conda`` Install +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If the command ``conda env create -f python2.7-environment.yml`` in step 3 above failed +for any reason, you can try setting up the environment manually with the following steps: + +1. Create the environment: + + .. code-block:: bash + + conda create --name catalyst python=2.7 scipy + +2. Activate the environment: + + **Linux or OSX:** + + .. code-block:: bash + + source activate catalyst + + **Windows:** + + .. code-block:: bash + + activate catalyst + +3. Install the Catalyst inside the environment: + + .. code-block:: bash + + pip install enigma-catalyst matplotlib + +Getting Help +------------ + +If after following the instructions above, and going through the *Troubleshooting* sections, +you still experience problems installing Catalyst, you can seek additional help through the +following channels: + +- Join our `Discord community `_, and head over the #catalyst_dev + channel where many other users (as well as the project developers) hang out, and can assist + you with your particular issue. The more descriptive and the more information you can provide, + the easiest will be for others to help you out. + +- Report the problem you are experiencing on our + `GitHub repository `_ following the guidelines + provided therein. Before you do so, take a moment to browse through all `previous reported issues + `_ in the likely case + that someone else experienced that same issue before, and you get a hint on how to solve it. - conda install -c Quantopian zipline .. _`Debian-derived`: https://www.debian.org/misc/children-distros .. _`RHEL-derived`: https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives diff --git a/docs/source/naming-convention.rst b/docs/source/naming-convention.rst new file mode 100644 index 00000000..2a9e0398 --- /dev/null +++ b/docs/source/naming-convention.rst @@ -0,0 +1,66 @@ +Naming Convention +================= + +Catalyst introduces a standardized naming convention for all asset pairs +trading on any exchange in the following form: + + + **{market_currency}_{base_currency}** + +Where {market_currency} is the asset to be traded using {base_currency} as +the reference, both written in lowercase and separated with an underscore. + +This standardization is needed to overcome the lack of consistency in the +naming of assets across different exchanges, and making it easier to the user +to refer to the asset pairs that you want to trade. + +Catalyst maintains a `Market Coverage Overview `_ +where you can check the mapping between Catalyst naming pairs and that of each +exchange. Catalyst will always expect in all its functions that you will refer to +the asset pairs by using the Catalyst naming convention. + +If at any point, you input the wrong name for an asset pair, you will get an error +of that pair not found in the given exchange, and a list of pairs available on that exchange: + +.. code-block:: bash + + $ catalyst ingest-exchange -x poloniex -i btc_usd + +.. parsed-literal:: + + Ingesting exchange bundle poloniex... + Error traceback: /Volumes/Data/Users/victoris/Desktop/Enigma/user-install/catalyst-dev/catalyst/exchange/exchange.py (line 175) + SymbolNotFoundOnExchange: Symbol btc_usd not found on exchange Poloniex. + Choose from: ['rep_usdt', 'gno_btc', 'xvc_btc', 'pink_btc', 'sys_btc', + 'emc2_btc', 'rads_btc', 'note_btc', 'maid_btc', 'bch_btc', 'gnt_btc', + 'bcn_btc', 'rep_btc', 'bcy_btc', 'cvc_btc', 'nxt_xmr', 'zec_usdt', + 'fct_btc', 'gas_btc', 'pot_btc', 'eth_usdt', 'btc_usdt', 'lbc_btc', + 'dcr_btc', 'etc_usdt', 'omg_eth', 'amp_btc', 'xpm_btc', 'nxt_btc', + 'vtc_btc', 'steem_eth', 'blk_xmr', 'pasc_btc', 'zec_xmr', 'grc_btc', + 'nxc_btc', 'btcd_btc', 'ltc_btc', 'dash_btc', 'naut_btc', 'zec_eth', + 'zec_btc', 'burst_btc', 'zrx_eth', 'bela_btc', 'steem_btc', 'etc_btc', + 'eth_btc', 'huc_btc', 'strat_btc', 'lsk_btc', 'exp_btc', 'clam_btc', + 'rep_eth', 'dash_xmr', 'cvc_eth', 'bch_usdt', 'zrx_btc', 'dash_usdt', + 'blk_btc', 'xrp_btc', 'nxt_usdt', 'neos_btc', 'omg_btc', 'bts_btc', + 'doge_btc', 'gnt_eth', 'sbd_btc', 'gno_eth', 'xcp_btc', 'ltc_usdt', + 'btm_btc', 'xmr_usdt', 'lsk_eth', 'omni_btc', 'nav_btc', 'fldc_btc', + 'ppc_btc', 'xbc_btc', 'dgb_btc', 'sc_btc', 'btcd_xmr', 'vrc_btc', + 'ric_btc', 'str_btc', 'maid_xmr', 'xmr_btc', 'sjcx_btc', 'via_btc', + 'xem_btc', 'nmc_btc', 'etc_eth', 'ltc_xmr', 'ardr_btc', 'gas_eth', + 'flo_btc', 'xrp_usdt', 'game_btc', 'bch_eth', 'bcn_xmr', 'str_usdt'] + +In the example above, exchange Poloniex does not use USD, but uses instead the +USDT cryptocurrency asset that is issued on the Bitcoin blockchain via the Omni +Layer Protocol. Each USDT unit is backed by a U.S Dollar held in the reserves of +Tether Limited. USDT can be transferred, stored, and spent, just like bitcoins +or any other cryptocurrency. Given its 1:1 mapping to the USD, is a viable alternative. + +.. code-block:: bash + + $ catalyst ingest-exchange -x poloniex -i btc_usdt + +.. parsed-literal:: + + Ingesting exchange bundle poloniex... + [====================================] Fetching poloniex daily candles: : 100% + diff --git a/docs/source/welcome.rst b/docs/source/welcome.rst new file mode 100644 index 00000000..410d8c75 --- /dev/null +++ b/docs/source/welcome.rst @@ -0,0 +1,28 @@ +.. image:: https://s3.amazonaws.com/enigmaco-docs/enigma-catalyst.jpg +| +Catalyst is a data-driven crypto investment platform. It supports both +backtesting and live-trading in a number of different crypto-exchanges. +Catalyst empowers users to share and curate data and build profitable, +data-driven investment strategies. + +Features +======== + +- Ease of use: Catalyst tries to get out of your way so that you can + focus on algorithm development. See + `examples of trading strategies `_ + provided. +- Support for several of the top crypto-exchanges by trading volume: + `Bitfinex `_, `Bittrex `_, + and `Poloniex `_. +- Secure: You and only you have access to each exchange API keys for your accounts. +- Input of historical pricing data of all crypto-assets by exchange, + with daily and minute resolution. See + `Catalyst Market Coverage Overview `_. +- Backtesting and live-trading functionality, with a seamless transition + between the two modes. +- Output of performance statistics are based on Pandas DataFrames to + integrate nicely into the existing PyData eco-system. +- Statistic and machine learning libraries like matplotlib, scipy, + statsmodels, and sklearn support development, analysis, and + visualization of state-of-the-art trading systems. \ No newline at end of file diff --git a/setup.py b/setup.py index a3243545..c358948b 100644 --- a/setup.py +++ b/setup.py @@ -304,7 +304,7 @@ setup( if '__pycache__' not in root}, license='Apache 2.0', classifiers=[ - 'Development Status :: 2 - Pre-Alpha', + 'Development Status :: 3 - Alpha', 'License :: OSI Approved :: Apache Software License', 'Natural Language :: English', 'Programming Language :: Python', diff --git a/tests/exchange/base.py b/tests/exchange/base.py index 73c43017..b98c3ec1 100644 --- a/tests/exchange/base.py +++ b/tests/exchange/base.py @@ -2,7 +2,7 @@ import unittest from abc import ABCMeta, abstractmethod -class BaseExchangeTestCase(): +class BaseExchangeTestCase: __metaclass__ = ABCMeta @abstractmethod diff --git a/tests/exchange/test_bitfinex.py b/tests/exchange/test_bitfinex.py index 6b9990c3..ded6c8ca 100644 --- a/tests/exchange/test_bitfinex.py +++ b/tests/exchange/test_bitfinex.py @@ -1,12 +1,9 @@ -from catalyst.exchange.bitfinex.bitfinex import Bitfinex -from .base import BaseExchangeTestCase from logbook import Logger -import pandas as pd -from catalyst.finance.execution import (MarketOrder, - LimitOrder, - StopOrder, - StopLimitOrder) + +from base import BaseExchangeTestCase +from catalyst.exchange.bitfinex.bitfinex import Bitfinex from catalyst.exchange.exchange_utils import get_exchange_auth +from catalyst.finance.execution import (LimitOrder) log = Logger('test_bitfinex') @@ -14,7 +11,7 @@ log = Logger('test_bitfinex') class BitfinexTestCase(BaseExchangeTestCase): @classmethod def setup(self): - print ('creating bitfinex object') + log.info('creating bitfinex object') auth = get_exchange_auth('bitfinex') self.exchange = Bitfinex( key=auth['key'], @@ -50,13 +47,17 @@ class BitfinexTestCase(BaseExchangeTestCase): def test_get_candles(self): log.info('retrieving candles') + ohlcv_neo = self.exchange.get_candles( + data_frequency='1m', + assets=self.exchange.get_asset('neo_btc') + ) pass def test_tickers(self): log.info('retrieving tickers') tickers = self.exchange.tickers([ - self.exchange.get_asset('eth_usd'), - self.exchange.get_asset('btc_usd') + self.exchange.get_asset('eth_btc'), + self.exchange.get_asset('etc_btc') ]) pass @@ -68,3 +69,9 @@ class BitfinexTestCase(BaseExchangeTestCase): log.info('testing exchange balances') balances = self.exchange.get_balances() pass + + def test_orderbook(self): + log.info('testing order book for bitfinex') + asset = self.exchange.get_asset('eth_btc') + orderbook = self.exchange.get_orderbook(asset) + pass diff --git a/tests/exchange/test_bittrex.py b/tests/exchange/test_bittrex.py index 825af970..f1becbcc 100644 --- a/tests/exchange/test_bittrex.py +++ b/tests/exchange/test_bittrex.py @@ -1,6 +1,6 @@ from catalyst.exchange.bittrex.bittrex import Bittrex from catalyst.finance.order import Order -from .base import BaseExchangeTestCase +from base import BaseExchangeTestCase from logbook import Logger from catalyst.exchange.exchange_utils import get_exchange_auth @@ -67,8 +67,8 @@ class BittrexTestCase(BaseExchangeTestCase): def test_tickers(self): log.info('retrieving tickers') tickers = self.exchange.tickers([ - self.exchange.get_asset('ubq_btc'), - self.exchange.get_asset('neo_btc') + self.exchange.get_asset('eth_btc'), + self.exchange.get_asset('etc_btc') ]) assert len(tickers) == 2 pass @@ -81,3 +81,9 @@ class BittrexTestCase(BaseExchangeTestCase): def test_get_account(self): log.info('testing account data') pass + + def test_orderbook(self): + log.info('testing order book for bittrex') + asset = self.exchange.get_asset('eth_btc') + orderbook = self.exchange.get_orderbook(asset) + pass diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py new file mode 100644 index 00000000..301f71bb --- /dev/null +++ b/tests/exchange/test_bundle.py @@ -0,0 +1,267 @@ +from logging import Logger + +import pandas as pd + +from catalyst import get_calendar +from catalyst.exchange.bundle_utils import get_bcolz_chunk, get_periods, \ + get_periods_range +from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ + BcolzExchangeBarWriter +from catalyst.exchange.exchange_bundle import ExchangeBundle, \ + BUNDLE_NAME_TEMPLATE +from catalyst.exchange.exchange_utils import get_exchange_folder +from catalyst.exchange.init_utils import get_exchange +from catalyst.utils.paths import ensure_directory + +log = Logger('test_exchange_bundle') + + +class ExchangeBundleTestCase: + def test_ingest_minute(self): + data_frequency = 'minute' + exchange_name = 'bitfinex' + + exchange = get_exchange(exchange_name) + exchange_bundle = ExchangeBundle(exchange) + assets = [ + exchange.get_asset('neo_eth') + ] + + # start = pd.to_datetime('2017-09-01', utc=True) + start = pd.to_datetime('2017-9-15', utc=True) + end = pd.to_datetime('2017-9-30', utc=True) + + log.info('ingesting exchange bundle {}'.format(exchange_name)) + exchange_bundle.ingest( + data_frequency=data_frequency, + include_symbols=','.join([asset.symbol for asset in assets]), + # include_symbols=None, + exclude_symbols=None, + start=start, + end=end, + show_progress=True + ) + + reader = exchange_bundle.get_reader(data_frequency) + for asset in assets: + arrays = reader.load_raw_arrays( + sids=[asset.sid], + fields=['close'], + start_dt=start, + end_dt=end + ) + print('found {} rows for {} ingestion\n{}'.format( + len(arrays[0]), asset.symbol, arrays[0]) + ) + pass + + def test_ingest_minute_all(self): + exchange_name = 'bitfinex' + + # start = pd.to_datetime('2017-09-01', utc=True) + start = pd.to_datetime('2017-10-01', utc=True) + end = pd.to_datetime('2017-10-05', utc=True) + + exchange_bundle = ExchangeBundle(get_exchange(exchange_name)) + + log.info('ingesting exchange bundle {}'.format(exchange_name)) + exchange_bundle.ingest( + data_frequency='minute', + exclude_symbols=None, + start=start, + end=end, + show_progress=True + ) + pass + + def test_ingest_daily(self): + # exchange_name = 'bitfinex' + # data_frequency = 'daily' + # include_symbols = 'neo_btc,bch_btc,eth_btc' + + exchange_name = 'poloniex' + data_frequency = 'daily' + include_symbols = 'btc_usdt' + + start = pd.to_datetime('2016-1-1', utc=True) + end = pd.to_datetime('2017-10-16', utc=True) + periods = get_periods_range(start, end, data_frequency) + + exchange = get_exchange(exchange_name) + exchange_bundle = ExchangeBundle(exchange) + + log.info('ingesting exchange bundle {}'.format(exchange_name)) + exchange_bundle.ingest( + data_frequency=data_frequency, + include_symbols=include_symbols, + exclude_symbols=None, + start=start, + end=end, + show_progress=True + ) + + symbols = include_symbols.split(',') + assets = [] + for pair_symbol in symbols: + assets.append(exchange.get_asset(pair_symbol)) + + reader = exchange_bundle.get_reader(data_frequency) + for asset in assets: + arrays = reader.load_raw_arrays( + sids=[asset.sid], + fields=['close'], + start_dt=start, + end_dt=end + ) + print('found {} rows for {} ingestion\n{}'.format( + len(arrays[0]), asset.symbol, arrays[0]) + ) + pass + + def test_merge_ctables(self): + exchange_name = 'bittrex' + + # Switch between daily and minute for testing + # data_frequency = 'daily' + data_frequency = 'daily' + + exchange = get_exchange(exchange_name) + assets = [ + exchange.get_asset('eth_btc'), + exchange.get_asset('etc_btc'), + exchange.get_asset('wings_eth'), + ] + + start = pd.to_datetime('2017-9-1', utc=True) + end = pd.to_datetime('2017-9-30', utc=True) + + exchange_bundle = ExchangeBundle(exchange) + + writer = exchange_bundle.get_writer(start, end, data_frequency) + + # In the interest of avoiding abstractions, this is writing a chunk + # to the ctable. It does not include the logic which creates chunks. + for asset in assets: + exchange_bundle.ingest_ctable( + asset=asset, + data_frequency=data_frequency, + # period='2017-9', + period='2017', + # Dont't forget to update if you change your dates + start_dt=start, + end_dt=end, + writer=writer, + empty_rows_behavior='strip' + ) + + # In daily mode, this returns an error. It appears that writing + # a second asset in the same date range removed the first asset. + + # In minute mode, the data is there too. This signals that the minute + # writer / reader is more powerful. This explains why I did not + # encounter these problems as I have been focusing on minute data. + reader = exchange_bundle.get_reader(data_frequency) + for asset in assets: + # Since this pair was loaded last. It should be there in daily mode. + arrays = reader.load_raw_arrays( + sids=[asset.sid], + fields=['close'], + start_dt=start, + end_dt=end + ) + print('found {} rows for {} ingestion\n{}'.format( + len(arrays[0]), asset.symbol, arrays[0]) + ) + pass + + def test_daily_data_to_minute_table(self): + exchange_name = 'poloniex' + + # Switch between daily and minute for testing + data_frequency = 'daily' + # data_frequency = 'minute' + + exchange = get_exchange(exchange_name) + assets = [ + exchange.get_asset('eth_btc'), + exchange.get_asset('etc_btc'), + ] + + start = pd.to_datetime('2017-9-1', utc=True) + end = pd.to_datetime('2017-9-30', utc=True) + + # Preparing the bundle folder + root = get_exchange_folder(exchange.name) + path = BUNDLE_NAME_TEMPLATE.format( + root=root, + frequency=data_frequency + ) + ensure_directory(path) + + exchange_bundle = ExchangeBundle(exchange) + calendar = get_calendar('OPEN') + + # We are using a BcolzMinuteBarWriter even though the data is daily + # Each day has a maximum of one bar + + # I tried setting the minutes_per_day to 1 will not create + # unnecessary bars + writer = BcolzExchangeBarWriter( + rootdir=path, + data_frequency=data_frequency, + start_session=start, + end_session=end, + write_metadata=True + ) + + # This will read the daily data in a bundle created by + # the daily writer. It will write to the minute writer which + # we are passing. + + # Ingesting a second asset to ensure that multiple chunks + # don't override each other + for asset in assets: + exchange_bundle.ingest_ctable( + asset=asset, + data_frequency=data_frequency, + period='2017', + start_dt=start, + end_dt=end, + writer=writer, + empty_rows_behavior='strip' + ) + + reader = BcolzExchangeBarReader(rootdir=path, + data_frequency=data_frequency) + + # Reading the two assets to ensure that no data was lost + for asset in assets: + sid = asset.sid + + daily_values = reader.load_raw_arrays( + fields=['open', 'high', 'low', 'close', 'volume'], + start_dt=start, + end_dt=end, + sids=[sid], + ) + + print('found {} rows for last ingestion'.format( + len(daily_values[0])) + ) + pass + + def test_minute_bundle(self): + exchange_name = 'poloniex' + data_frequency = 'minute' + + exchange = get_exchange(exchange_name) + asset = exchange.get_asset('neo_btc') + + path = get_bcolz_chunk( + exchange_name=exchange_name, + symbol=asset.symbol, + data_frequency=data_frequency, + period='2017-5', + ) + + pass diff --git a/tests/exchange/test_clock.py b/tests/exchange/test_clock.py index 94414d27..ff74986b 100644 --- a/tests/exchange/test_clock.py +++ b/tests/exchange/test_clock.py @@ -1,7 +1,7 @@ from unittest import TestCase from logbook import Logger from mock import patch, sentinel -from catalyst.exchange.exchange_clock import ExchangeClock +from catalyst.exchange.simple_clock import SimpleClock from catalyst.utils.calendars.trading_calendar import days_at_time from datetime import time from collections import defaultdict @@ -35,9 +35,9 @@ class ExchangeClockTestCase(TestCase): return self.internal_clock def test_clock(self): - with patch('catalyst.exchange.exchange_clock.pd.to_datetime') as to_dt, \ - patch('catalyst.exchange.exchange_clock.sleep') as sleep: - clock = ExchangeClock(sessions=self.sessions) + with patch('catalyst.exchange.simple_clock.pd.to_datetime') as to_dt, \ + patch('catalyst.exchange.simple_clock.sleep') as sleep: + clock = SimpleClock(sessions=self.sessions) to_dt.side_effect = self.get_clock sleep.side_effect = self.advance_clock start_time = pd.Timestamp.utcnow() diff --git a/tests/exchange/test_data_portal.py b/tests/exchange/test_data_portal.py new file mode 100644 index 00000000..80ed5531 --- /dev/null +++ b/tests/exchange/test_data_portal.py @@ -0,0 +1,108 @@ +import pandas as pd +from logbook import Logger + +from catalyst import get_calendar +from catalyst.exchange.asset_finder_exchange import AssetFinderExchange +from catalyst.exchange.bitfinex.bitfinex import Bitfinex +from catalyst.exchange.bittrex.bittrex import Bittrex +from catalyst.exchange.data_portal_exchange import DataPortalExchangeBacktest, \ + DataPortalExchangeLive +from catalyst.exchange.exchange_utils import get_exchange_auth + +log = Logger('test_bitfinex') + + +class ExchangeDataPortalTestCase: + @classmethod + def setup(self): + log.info('creating bitfinex exchange') + auth_bitfinex = get_exchange_auth('bitfinex') + self.bitfinex = Bitfinex( + key=auth_bitfinex['key'], + secret=auth_bitfinex['secret'], + base_currency='usd' + ) + + log.info('creating bittrex exchange') + auth_bitfinex = get_exchange_auth('bittrex') + self.bittrex = Bittrex( + key=auth_bitfinex['key'], + secret=auth_bitfinex['secret'], + base_currency='usd' + ) + + open_calendar = get_calendar('OPEN') + asset_finder = AssetFinderExchange() + + self.data_portal_live = DataPortalExchangeLive( + exchanges=dict(bitfinex=self.bitfinex, bittrex=self.bittrex), + asset_finder=asset_finder, + trading_calendar=open_calendar, + first_trading_day=pd.to_datetime('today', utc=True) + ) + self.data_portal_backtest = DataPortalExchangeBacktest( + exchanges=dict(bitfinex=self.bitfinex), + asset_finder=asset_finder, + trading_calendar=open_calendar, + first_trading_day=None # will set dynamically based on assets + ) + + def test_get_history_window_live(self): + asset_finder = self.data_portal_live.asset_finder + + assets = [ + asset_finder.lookup_symbol('eth_btc', self.bitfinex), + asset_finder.lookup_symbol('eth_btc', self.bittrex) + ] + now = pd.Timestamp.utcnow() + data = self.data_portal_live.get_history_window( + assets, + now, + 10, + '1m', + 'price') + pass + + def test_get_spot_value_live(self): + asset_finder = self.data_portal_live.asset_finder + + assets = [ + asset_finder.lookup_symbol('eth_btc', self.bitfinex), + asset_finder.lookup_symbol('eth_btc', self.bittrex) + ] + now = pd.Timestamp.utcnow() + value = self.data_portal_live.get_spot_value( + assets, 'price', now, '1m') + pass + + def test_get_history_window_backtest(self): + asset_finder = self.data_portal_live.asset_finder + + assets = [ + asset_finder.lookup_symbol('neo_btc', self.bitfinex), + ] + + date = pd.to_datetime('2017-09-10', utc=True) + data = self.data_portal_backtest.get_history_window( + assets, + date, + 10, + '1m', + 'close', + 'minute') + + log.info('found history window: {}'.format(data)) + pass + + def test_get_spot_value_backtest(self): + asset_finder = self.data_portal_backtest.asset_finder + + assets = [ + asset_finder.lookup_symbol('neo_btc', self.bitfinex), + ] + + date = pd.to_datetime('2017-09-10', utc=True) + value = self.data_portal_backtest.get_spot_value( + assets, 'close', date, 'minute') + log.info('found spot value {}'.format(value)) + pass diff --git a/tests/exchange/test_poloniex.py b/tests/exchange/test_poloniex.py new file mode 100644 index 00000000..4f2f12a7 --- /dev/null +++ b/tests/exchange/test_poloniex.py @@ -0,0 +1,91 @@ +from catalyst.exchange.bittrex.bittrex import Bittrex +from catalyst.exchange.poloniex.poloniex import Poloniex +from catalyst.finance.order import Order +from base import BaseExchangeTestCase +from logbook import Logger +from catalyst.exchange.exchange_utils import get_exchange_auth + +log = Logger('test_poloniex') + + +class PoloniexTestCase(BaseExchangeTestCase): + @classmethod + def setup(self): + print ('creating poloniex object') + auth = get_exchange_auth('poloniex') + self.exchange = Poloniex( + key=auth['key'], + secret=auth['secret'], + base_currency='btc' + ) + + def test_order(self): + log.info('creating order') + asset = self.exchange.get_asset('neo_btc') + order_id = self.exchange.order( + asset=asset, + limit_price=0.0005, + amount=1, + ) + log.info('order created {}'.format(order_id)) + assert order_id is not None + pass + + def test_open_orders(self): + log.info('retrieving open orders') + asset = self.exchange.get_asset('neo_btc') + orders = self.exchange.get_open_orders(asset) + pass + + def test_get_order(self): + log.info('retrieving order') + order = self.exchange.get_order( + u'2c584020-9caf-4af5-bde0-332c0bba17e2') + assert isinstance(order, Order) + pass + + def test_cancel_order(self, ): + log.info('cancel order') + self.exchange.cancel_order(u'dc7bcca2-5219-4145-8848-8a593d2a72f9') + pass + + def test_get_candles(self): + log.info('retrieving candles') + ohlcv_neo = self.exchange.get_candles( + data_frequency='5m', + assets=self.exchange.get_asset('neo_btc') + ) + ohlcv_neo_ubq = self.exchange.get_candles( + data_frequency='5m', + assets=[ + self.exchange.get_asset('neo_btc'), + self.exchange.get_asset('ubq_btc') + ], + bar_count=14 + ) + pass + + def test_tickers(self): + log.info('retrieving tickers') + tickers = self.exchange.tickers([ + self.exchange.get_asset('eth_btc'), + self.exchange.get_asset('etc_btc') + ]) + assert len(tickers) == 2 + pass + + def test_get_balances(self): + log.info('testing wallet balances') + balances = self.exchange.get_balances() + pass + + def test_get_account(self): + log.info('testing account data') + pass + + def test_orderbook(self): + log.info('testing order book for poloniex') + asset = self.exchange.get_asset('eth_btc') + + orderbook = self.exchange.get_orderbook(asset) + pass