diff --git a/catalyst/examples/buy_low_sell_high_neo_with_interface.py b/catalyst/examples/buy_low_sell_high_neo_with_interface.py deleted file mode 100644 index a7630ef2..00000000 --- a/catalyst/examples/buy_low_sell_high_neo_with_interface.py +++ /dev/null @@ -1,173 +0,0 @@ -import talib -from logbook import Logger -import pandas as pd - -from catalyst.api import ( - order, - order_target_percent, - symbol, - record, - get_open_orders, -) -from catalyst.exchange.stats_utils import get_pretty_stats -from catalyst.utils.run_algo import run_algorithm - -algo_namespace = 'buy_low_sell_high_neo' -log = Logger(algo_namespace) - - -def initialize(context): - log.info('initializing algo') - context.asset = symbol('neo_btc', 'bitfinex') - - context.TARGET_POSITIONS = 50000 - context.PROFIT_TARGET = 0.1 - context.SLIPPAGE_ALLOWED = 0.02 - - context.retry_check_open_orders = 10 - context.retry_update_portfolio = 10 - context.retry_order = 5 - - context.errors = [] - pass - - -def _handle_data(context, data): - price = data.current(context.asset, 'close') - log.info('got price {price}'.format(price=price)) - - if price is None: - log.warn('no pricing data') - return - - prices = data.history( - context.asset, - fields='price', - bar_count=1, - frequency='1m' - ) - rsi = talib.RSI(prices.values, timeperiod=14)[-1] - log.info('got rsi: {}'.format(rsi)) - - # Buying more when RSI is low, this should lower our cost basis - if rsi <= 30: - buy_increment = 1 - elif rsi <= 40: - buy_increment = 0.5 - elif rsi <= 70: - buy_increment = 0.1 - else: - buy_increment = None - - cash = context.portfolio.cash - log.info('base currency available: {cash}'.format(cash=cash)) - - record(price=price) - - orders = get_open_orders(context.asset) - if len(orders) > 0: - log.info('skipping bar until all open orders execute') - return - - is_buy = False - cost_basis = None - if context.asset in context.portfolio.positions: - position = context.portfolio.positions[context.asset] - - cost_basis = position.cost_basis - log.info( - 'found {amount} positions with cost basis {cost_basis}'.format( - amount=position.amount, - cost_basis=cost_basis - ) - ) - - if position.amount >= context.TARGET_POSITIONS: - log.info('reached positions target: {}'.format(position.amount)) - return - - if price < cost_basis: - is_buy = True - elif position.amount > 0 and \ - price > cost_basis * (1 + context.PROFIT_TARGET): - profit = (price * position.amount) - (cost_basis * position.amount) - - log.info('closing position, taking profit: {}'.format(profit)) - order_target_percent( - asset=context.asset, - target=0, - limit_price=price * (1 - context.SLIPPAGE_ALLOWED), - ) - else: - log.info('no buy or sell opportunity found') - else: - is_buy = True - - if is_buy: - if buy_increment is None: - return - - if price * buy_increment > cash: - log.info('not enough base currency to consider buying') - return - - log.info( - 'buying position cheaper than cost basis {} < {}'.format( - price, - cost_basis - ) - ) - limit_price = price * (1 + context.SLIPPAGE_ALLOWED) - order( - asset=context.asset, - amount=buy_increment, - limit_price=limit_price - ) - pass - - -def handle_data(context, data): - log.info('handling bar {}'.format(data.current_dt)) - # try: - _handle_data(context, data) - # except Exception as e: - # log.warn('aborting the bar on error {}'.format(e)) - # context.errors.append(e) - - log.info('completed bar {}, total execution errors {}'.format( - data.current_dt, - len(context.errors) - )) - - if len(context.errors) > 0: - log.info('the errors:\n{}'.format(context.errors)) - - -def analyze(context, stats): - log.info('the daily stats:\n{}'.format(get_pretty_stats(stats))) - - pass - - -# run_algorithm( -# initialize=initialize, -# handle_data=handle_data, -# analyze=analyze, -# exchange_name='bitfinex', -# live=True, -# algo_namespace=algo_namespace, -# base_currency='btc', -# live_graph=False -# ) - -# Backtest -run_algorithm( - capital_base=250, - data_frequency='minute', - initialize=initialize, - handle_data=handle_data, - analyze=analyze, - exchange_name='bitfinex', - algo_namespace=algo_namespace, - base_currency='btc' -) diff --git a/catalyst/examples/simple_loop.py b/catalyst/examples/simple_loop.py index eefd6ecb..282c304e 100644 --- a/catalyst/examples/simple_loop.py +++ b/catalyst/examples/simple_loop.py @@ -50,4 +50,3 @@ run_algorithm( # algo_namespace='simple_loop', # base_currency='eth', # live_graph=False -# ) \ No newline at end of file diff --git a/catalyst/exchange/bittrex/bittrex.py b/catalyst/exchange/bittrex/bittrex.py index 6c490a46..d50d9bb8 100644 --- a/catalyst/exchange/bittrex/bittrex.py +++ b/catalyst/exchange/bittrex/bittrex.py @@ -1,7 +1,7 @@ import json +import time import pandas as pd -import time from catalyst.assets._assets import TradingPair from logbook import Logger from six.moves import urllib diff --git a/catalyst/exchange/bundle_utils.py b/catalyst/exchange/bundle_utils.py index 4510bce4..2210aa7b 100644 --- a/catalyst/exchange/bundle_utils.py +++ b/catalyst/exchange/bundle_utils.py @@ -223,6 +223,9 @@ def get_month_start_end(dt, first_day=None, last_day=None): dt.year, dt.month, month_range[1], 23, 59, 0, 0 ), utc=True) + if month_end > pd.Timestamp.utcnow(): + month_end = pd.Timestamp.utcnow().floor('1D') + return month_start, month_end @@ -247,6 +250,9 @@ def get_year_start_end(dt, first_day=None, last_day=None): year_end = last_day if last_day \ else pd.to_datetime(date(dt.year, 12, 31), utc=True) + if year_end > pd.Timestamp.utcnow(): + year_end = pd.Timestamp.utcnow().floor('1D') + return year_start, year_end @@ -294,24 +300,17 @@ def range_in_bundle(asset, start_dt, end_dt, reader): """ has_data = True - if has_data and reader is not None: + dates = [start_dt, end_dt] + + while dates and has_data: try: - start_close = \ - reader.get_value(asset.sid, start_dt, 'close') + dt = dates.pop(0) + close = reader.get_value(asset.sid, dt, 'close') - if np.isnan(start_close): + if np.isnan(close): has_data = False - else: - end_close = reader.get_value(asset.sid, end_dt, 'close') - - if np.isnan(end_close): - has_data = False - except Exception as e: has_data = False - else: - has_data = False - return has_data diff --git a/catalyst/exchange/exchange_bundle.py b/catalyst/exchange/exchange_bundle.py index f5df0f8e..a634757d 100644 --- a/catalyst/exchange/exchange_bundle.py +++ b/catalyst/exchange/exchange_bundle.py @@ -1,4 +1,30 @@ import os +import shutil +from itertools import chain + +import pandas as pd +from catalyst.assets._assets import TradingPair +from logbook import Logger +from pandas.tslib import Timestamp +from pytz import UTC +from six import itervalues + +from catalyst import get_calendar +from catalyst.constants import LOG_LEVEL +from catalyst.data.minute_bars import BcolzMinuteOverlappingData, \ + BcolzMinuteBarMetadata +from catalyst.exchange.bundle_utils import range_in_bundle, \ + get_bcolz_chunk, get_delta, get_month_start_end, \ + get_year_start_end, get_df_from_arrays, get_start_dt, get_period_label +from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ + BcolzExchangeBarWriter +from catalyst.exchange.exchange_errors import EmptyValuesInBundleError, \ + TempBundleNotFoundError, \ + NoDataAvailableOnExchange, \ + PricingDataNotLoadedError +from catalyst.exchange.exchange_utils import get_exchange_folder +from catalyst.utils.cli import maybe_show_progress +from catalyst.utils.paths import ensure_directory import os import shutil from itertools import chain @@ -303,7 +329,7 @@ class ExchangeBundle: :return: """ - + # Download and extract the bundle path = get_bcolz_chunk( exchange_name=self.exchange.name, symbol=asset.symbol, @@ -313,6 +339,14 @@ class ExchangeBundle: reader = self.get_reader(data_frequency, path=path) if reader is None: + try: + log.warn('the reader is unable to use bundle: {}, ' + 'deleting it.'.format(path)) + shutil.rmtree(path) + + except Exception as e: + log.warn('unable to remove temp bundle: {}'.format(e)) + raise TempBundleNotFoundError(path=path) start_dt = reader.first_trading_day @@ -335,7 +369,7 @@ class ExchangeBundle: )) if not arrays: - return path + return reader._rootdir periods = self.get_calendar_periods_range( start_dt, end_dt, data_frequency @@ -351,11 +385,12 @@ class ExchangeBundle: if cleanup: log.debug( - 'removing bundle folder following ingestion: {}'.format(path) + 'removing bundle folder following ingestion: {}'.format( + reader._rootdir) ) - shutil.rmtree(path) + shutil.rmtree(reader._rootdir) - return path + return reader._rootdir def get_adj_dates(self, start, end, assets, data_frequency): """ @@ -402,8 +437,8 @@ class ExchangeBundle: if end is None or start is None or start >= end: raise NoDataAvailableOnExchange( - exchange=asset.exchange.title(), - symbol=[asset.symbol], + exchange=[asset.exchange for asset in assets], + symbol=[asset.symbol for asset in assets], data_frequency=data_frequency, ) @@ -429,9 +464,7 @@ class ExchangeBundle: get_start_end = get_month_start_end \ if data_frequency == 'minute' else get_year_start_end - start_dt, _ = get_start_end(start_dt) - _, end_dt = get_start_end(end_dt) - + # Get a reader for the main bundle to verify if data exists reader = self.get_reader(data_frequency) chunks = dict() @@ -462,7 +495,6 @@ class ExchangeBundle: chunks[asset] = [] for index, dt in enumerate(dates): - period_start, period_end = get_start_end( dt=dt, first_day=dt if index == 0 else None, @@ -481,17 +513,17 @@ class ExchangeBundle: asset, range_start, period_end, reader ) if not has_data: - chunks[asset].append( - dict( - asset=asset, - period_start=period_start, - period_end=period_end, - period=get_period_label(dt, data_frequency) - ) + period = get_period_label(dt, data_frequency) + chunk = dict( + asset=asset, + period=period, ) + chunks[asset].append(chunk) # We sort the chunks by end date to ingest most recent data first - chunks[asset].sort(key=lambda chunk: chunk['period_end']) + chunks[asset].sort( + key=lambda chunk: pd.to_datetime(chunk['period']) + ) return chunks @@ -503,20 +535,26 @@ class ExchangeBundle: Parameters ---------- assets: list[TradingPair] + data_frequency: str start_dt: datetime end_dt: datetime + show_progress: bool + asset_chunks: bool """ - if start_dt is None: start_dt = self.calendar.first_session if end_dt is None: end_dt = pd.Timestamp.utcnow() - start_dt, end_dt = self.get_adj_dates( - start_dt, end_dt, assets, data_frequency - ) + get_start_end = get_month_start_end \ + if data_frequency == 'minute' else get_year_start_end + + # Assign the first and last day of the period + start_dt, _ = get_start_end(start_dt) + _, end_dt = get_start_end(end_dt) + chunks = self.prepare_chunks( assets=assets, data_frequency=data_frequency, @@ -524,19 +562,9 @@ class ExchangeBundle: end_dt=end_dt ) - # Since chunks are either monthly or yearly, it is possible that - # our ingestion data range is greater than specified. We adjust - # the boundaries to ensure that the writer can write all data. - all_chunks = list(chain.from_iterable(itervalues(chunks))) - for chunk in all_chunks: - if chunk['period_start'] < start_dt: - start_dt = chunk['period_start'] - - if chunk['period_end'] > end_dt: - end_dt = chunk['period_end'] - + # This is the common writer for the entire exchange bundle + # we want to give an end_date far in time writer = self.get_writer(start_dt, end_dt, data_frequency) - if asset_chunks: for asset in chunks: with maybe_show_progress( @@ -558,6 +586,12 @@ class ExchangeBundle: cleanup=True ) else: + all_chunks = list(chain.from_iterable(itervalues(chunks))) + + # We sort the chunks by end date to ingest most recent data first + all_chunks.sort( + key=lambda chunk: pd.to_datetime(chunk['period']) + ) with maybe_show_progress( all_chunks, show_progress, @@ -597,15 +631,15 @@ class ExchangeBundle: for frequency in data_frequency.split(','): self.ingest_assets(assets, frequency, start, end, - show_progress) + show_progress, True) def get_history_window_series_and_load(self, - assets, # type: List[TradingPair] - end_dt, # type: Timestamp - bar_count, # type: int - field, # type: str - data_frequency, # type: str - algo_end_dt=None # type: Timestamp + assets, + end_dt, + bar_count, + field, + data_frequency, + algo_end_dt=None ): """ Retrieve price data history, ingest missing data. @@ -663,13 +697,12 @@ class ExchangeBundle: return series def get_spot_values(self, - assets, # type: List[TradingPair] - field, # type: str - dt, # type: Timestamp - data_frequency, # type: str - reset_reader=False # type: bool + assets, + field, + dt, + data_frequency, + reset_reader=False ): - # type: (...) -> List[float] """ The spot values for the gives assets, field and date. Reads from the exchange data bundle. @@ -788,6 +821,14 @@ class ExchangeBundle: return series def clean(self, data_frequency): + """ + Removing the bundle data from the catalyst folder. + + Parameters + ---------- + data_frequency: str + + """ log.debug('cleaning exchange {}, frequency {}'.format( self.exchange.name, data_frequency )) diff --git a/catalyst/exchange/exchange_utils.py b/catalyst/exchange/exchange_utils.py index 4db41d9e..b4f828fe 100644 --- a/catalyst/exchange/exchange_utils.py +++ b/catalyst/exchange/exchange_utils.py @@ -284,13 +284,12 @@ def save_algo_df(algo_name, key, df, environ=None, rel_path=None): ---------- algo_name: str key: str - df: DataFrame + df: pd.DataFrame environ: rel_path: str """ folder = get_algo_folder(algo_name, environ) - if rel_path is not None: folder = os.path.join(folder, rel_path) ensure_directory(folder) diff --git a/catalyst/exchange/poloniex/poloniex.py b/catalyst/exchange/poloniex/poloniex.py index 65c397c7..62845845 100644 --- a/catalyst/exchange/poloniex/poloniex.py +++ b/catalyst/exchange/poloniex/poloniex.py @@ -1,4 +1,3 @@ -import calendar import json import json import time diff --git a/docs/source/releases.rst b/docs/source/releases.rst index ee220190..4a01d62c 100644 --- a/docs/source/releases.rst +++ b/docs/source/releases.rst @@ -2,24 +2,46 @@ Release Notes ============= -Version 0.3.4 +Version 0.3.5 ^^^^^^^^^^^^^ -**Release Date**: 2017-10-31 +**Release Date**: 2017-11-2 Bug Fixes ~~~~~~~~~ -- Fixed issue with auto-ingestion of minute data +- Added workaround for: KeyError: Timestamp error (:issue:`53`) + +Version 0.3.4 +^^^^^^^^^^^^^ +**Release Date**: 2017-11-2 + +Bug Fixes +~~~~~~~~~ + +- Fixed issue with auto-ingestion of minute data (:issue:`47`) - Fixed issue with sell orders in backtesting - Fixed data frequency issues with data.history() in backtesting - Fixed an issue with can_trade() - +- Reduced the commission and slippage values to account for lower volume transactions Build ~~~~~ - Added more unit tests +Documentation +~~~~~~~~~~~~~ + +- Improved installation notes for Windows C++ compiler and Conda +- Addition of `Jupyter Notebook guide `_ +- Addition of `Live Trading page `_ +- Addition of `Videos page `_ +- Addition of `Resources page `_ +- Addition of `Development Guidelines `_ +- Addition of `Release Notes `_ +- Updated code docstrings + + Version 0.3.3 ^^^^^^^^^^^^^ **Release Date**: 2017-10-26 diff --git a/tests/exchange/test_bundle.py b/tests/exchange/test_bundle.py index c5e5feb9..ba055dca 100644 --- a/tests/exchange/test_bundle.py +++ b/tests/exchange/test_bundle.py @@ -1,14 +1,13 @@ import hashlib +import os import tempfile from logging import getLogger -import os import pandas as pd from catalyst import get_calendar from catalyst.exchange.bundle_utils import get_bcolz_chunk, \ - get_periods_range, get_start_dt, get_month_start_end, get_df_from_arrays, \ - get_year_start_end + get_start_dt, get_df_from_arrays from catalyst.exchange.exchange_bcolz import BcolzExchangeBarReader, \ BcolzExchangeBarWriter from catalyst.exchange.exchange_bundle import ExchangeBundle, \ @@ -122,18 +121,20 @@ class TestExchangeBundle: pass def test_ingest_daily(self): - # exchange_name = 'bitfinex' - # data_frequency = 'daily' - # include_symbols = 'neo_btc,bch_btc,eth_btc' - - exchange_name = 'poloniex' + exchange_name = 'bitfinex' data_frequency = 'daily' - include_symbols = 'eth_btc' + include_symbols = 'btc_usd' - start = pd.to_datetime('2017-1-1', utc=True) - end = pd.to_datetime('2017-10-16', utc=True) - periods = get_periods_range(start, end, data_frequency) + # exchange_name = 'poloniex' + # data_frequency = 'daily' + # include_symbols = 'eth_btc' + # start = pd.to_datetime('2017-1-1', utc=True) + # end = pd.to_datetime('2017-10-16', utc=True) + # periods = get_periods_range(start, end, data_frequency) + + start = None + end = None exchange = get_exchange(exchange_name) exchange_bundle = ExchangeBundle(exchange) @@ -153,12 +154,18 @@ class TestExchangeBundle: assets.append(exchange.get_asset(pair_symbol)) reader = exchange_bundle.get_reader(data_frequency) + start_dt = reader.first_trading_day + end_dt = reader.last_available_dt + + if data_frequency == 'daily': + end_dt = end_dt - pd.Timedelta(hours=23, minutes=59) + for asset in assets: arrays = reader.load_raw_arrays( sids=[asset.sid], fields=['close'], - start_dt=start, - end_dt=end + start_dt=start_dt, + end_dt=end_dt ) print('found {} rows for {} ingestion\n{}'.format( len(arrays[0]), asset.symbol, arrays[0]) @@ -430,14 +437,30 @@ class TestExchangeBundle: print('\n' + df_to_string(df)) pass - def bundle_to_csv(self): + def main_bundle_to_csv(self): exchange_name = 'bitfinex' - data_frequency = 'daily' - period = '2016' + data_frequency = 'minute' exchange = get_exchange(exchange_name) - bundle = ExchangeBundle(exchange) - asset = exchange.get_asset('eth_btc') + asset = exchange.get_asset('neo_usd') + + self._bundle_to_csv( + asset=asset, + exchange=exchange, + data_frequency=data_frequency, + filename='{}_{}_{}'.format( + exchange_name, data_frequency, asset.symbol + ) + ) + + def bundle_to_csv(self): + exchange_name = 'bitfinex' + data_frequency = 'minute' + period = '2017-10' + symbol = 'neo_btc' + + exchange = get_exchange(exchange_name) + asset = exchange.get_asset(symbol) path = get_bcolz_chunk( exchange_name=exchange.name, @@ -445,7 +468,20 @@ class TestExchangeBundle: data_frequency=data_frequency, period=period ) + self._bundle_to_csv( + asset=asset, + exchange=exchange, + data_frequency=data_frequency, + path=path, + filename=period + ) + pass + + def _bundle_to_csv(self, asset, exchange, data_frequency, filename, + path=None): + bundle = ExchangeBundle(exchange) reader = bundle.get_reader(data_frequency, path=path) + start_dt = reader.first_trading_day end_dt = reader.last_available_dt @@ -475,7 +511,7 @@ class TestExchangeBundle: ) ensure_directory(folder) - path = os.path.join(folder, period + '.csv') + path = os.path.join(folder, filename + '.csv') log.info('creating csv file: {}'.format(path)) print('HEAD\n{}'.format(df.head(10)))