From f9fded97ac20ad152f9d5d99ec8c87b78e196b78 Mon Sep 17 00:00:00 2001 From: twiecki Date: Tue, 22 Apr 2014 13:46:11 -0400 Subject: [PATCH] ENH: Implement CLI. Add a CLI that reads in an algorithm, loads data, run the algorithm, and output performance metrics. The examples are adapted to the new zipline API and analyses are split into separate files. Also add config files that run the example algorithms with preset settings. --- scripts/run_algo.py | 189 ++---------------- setup.py | 1 + zipline/__init__.py | 4 +- zipline/algorithm.py | 23 ++- zipline/examples/buyapple.conf | 3 + zipline/examples/buyapple.py | 36 +--- zipline/examples/buyapple_analyze.py | 9 + zipline/examples/dual_moving_average.py | 108 +++------- .../examples/dual_moving_average_analyze.py | 20 ++ zipline/examples/quantopian_buy_apple.py | 2 +- zipline/utils/__init__.py | 18 ++ zipline/utils/cli.py | 121 +++++++++++ 12 files changed, 249 insertions(+), 285 deletions(-) mode change 100644 => 100755 scripts/run_algo.py create mode 100644 zipline/examples/buyapple.conf create mode 100644 zipline/examples/buyapple_analyze.py create mode 100644 zipline/examples/dual_moving_average_analyze.py create mode 100644 zipline/utils/cli.py diff --git a/scripts/run_algo.py b/scripts/run_algo.py old mode 100644 new mode 100755 index 67137f35..3f896860 --- a/scripts/run_algo.py +++ b/scripts/run_algo.py @@ -1,174 +1,25 @@ -import argparse - -import os +#!/usr/bin/env python +# +# Copyright 2014 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys -sys.path.append('qexec') - -import pandas as pd - -import cProfile -from line_profiler import LineProfiler -import datetime - -from functools import partial - -from mem_util import get_memusage_mb - - -def profile_qexec(algo_text, results_file_name, start_date, end_date, - capital_base, granularity, profiler_type=None, - names_to_profile=None, - live_algo=False, - session_start_date=None, - inception_date=None, - data_delay=None): - # Import inside profile function, so that modules that take a while - # to import, e.g. tradingcalendar, don't trigger when there are - # invalid parameters, which should be a quick fail. - # TODO: Fix load time of tradingcalendar. - from algo_profile import run_algo - from qexec.algo.validation import unittest - - results, ok = unittest(algo_text, granularity) - - if not ok: - for res in results: - if not res['passed']: - print res._data - sys.exit('Did not pass validation.') - - algo_runner = partial( - run_algo, - algo_text, - start_date, - end_date, - capital_base, - granularity, - session_start=session_start_date, - inception_date=inception_date, - data_delay=data_delay, - live_algo=live_algo - ) - if profiler_type == 'cProfile': - results_dir = os.path.join('results', 'cprofile') - if not os.path.exists(results_dir): - os.makedirs(results_dir) - results_file = os.path.join(results_dir, results_file_name) - cProfile.runctx('algo_runner()', - globals(), - locals(), - results_file) - print ("Wrote results to: {0}".format(results_file)) - - elif profiler_type == 'line_profiler': - results_dir = os.path.join('results', 'line_profiler') - if not os.path.exists(results_dir): - os.makedirs(results_dir) - results_file = os.path.join(results_dir, results_file_name) - profiler = LineProfiler() - for name_to_profile in names_to_profile: - name_parts = name_to_profile.split('.') - obj = __import__(name_parts[0]) - for name in name_parts[1:]: - obj = getattr(obj, name) - profiler.add_function(obj) - profiler.runctx('algo_runner()', - globals(), - locals()) - with open(results_file, 'w') as f: - profiler.print_stats(stream=f) - print ("Wrote results to: {0}".format(results_file)) - - elif not profiler_type: - algo_runner() - - -def create_parser(): - parser = argparse.ArgumentParser() - parser.add_argument('--algofile', '-f', type=argparse.FileType('r'), - required=True) - parser.add_argument('--data-frequency', default='minute', - choices=('minute', 'daily')) - parser.add_argument('--start-date', default='2012-01-01') - parser.add_argument('--end-date', default='2012-12-31') - parser.add_argument('--start-epoch', type=int) - parser.add_argument('--end-epoch', type=int) - parser.add_argument('--capital-base', default='10e6') - parser.add_argument('--live-algo', action='store_true', default=False) - parser.add_argument('--session-start-date', default='2014-01-03') - parser.add_argument('--inception-date', default='2013-12-03') - parser.add_argument('--data-delay', type=int, default=15 * 60) - parser.add_argument('--is-inception', action='store_true', default=False) - parser.add_argument('--profiler-type', choices=('cProfile', - 'line_profiler')), - parser.add_argument( - '--name-to-profile', - action='append', - default=[ - # Good proxy for overall performance/main gen - 'zipline.gens.tradesimulation.AlgorithmSimulator.transform', - # Proxy for network time, will eventually have to change if - # we change data access style. - 'pymongo.cursor.Cursor.next' - ]) - - return parser +import zipline +from zipline.utils import parse_args, run_algo if __name__ == "__main__": - - parser = create_parser() - args = parser.parse_args() - - start_date = pd.Timestamp(args.start_date, tz='UTC') - end_date = pd.Timestamp(args.end_date, tz='UTC') - - if args.start_epoch: - start_date = pd.Timestamp(args.start_epoch, tz='UTC') - if args.end_epoch: - end_date = pd.Timestamp(args.end_epoch, tz='UTC') - - algo_text = args.algofile.read() - # Remove the extension - algo_name_base = os.path.splitext(args.algofile.name)[0] - algo_name = os.path.basename(algo_name_base) - - results_file_name = \ - 'qexec-prof-{algo_name}-{commitish}-{granularity}-{time}'.format( - algo_name=algo_name.replace('.', '_'), - commitish='local', - granularity=args.data_frequency, - time=str(datetime.datetime.now()).replace(' ', '-'). - replace(':', '-')) - - if args.live_algo: - session_start_date = pd.Timestamp(args.session_start_date, tz='UTC') - inception_date = pd.Timestamp(args.inception_date, tz='UTC') - - else: - session_start_date = None - inception_date = None - - profile_qexec( - algo_text, - results_file_name, - start_date, - end_date, - float(args.capital_base), - args.data_frequency, - args.profiler_type, - args.name_to_profile, - live_algo=args.live_algo, - session_start_date=session_start_date, - inception_date=inception_date, - data_delay=args.data_delay - ) - - import objgraph - print 'finished running algo, now doing memory inspection' - maxrss_in_mb = get_memusage_mb() - print "Max memory consumption={maxrss_in_mb}MB".format( - maxrss_in_mb=maxrss_in_mb) - print objgraph.show_growth(limit=20) - print objgraph.show_most_common_types(limit=35) + parsed = parse_args(sys.argv[1:]) + run_algo(print_algo=True, **parsed) + sys.exit(0) diff --git a/setup.py b/setup.py index f00f4960..1e058ad1 100644 --- a/setup.py +++ b/setup.py @@ -47,6 +47,7 @@ setup( author='Quantopian Inc.', author_email='opensource@quantopian.com', packages=find_packages(), + scripts=['scripts/run_algo.py'], long_description=LONG_DESCRIPTION, license='Apache 2.0', classifiers=[ diff --git a/zipline/__init__.py b/zipline/__init__.py index 88f7344e..06e9f1dd 100644 --- a/zipline/__init__.py +++ b/zipline/__init__.py @@ -27,8 +27,8 @@ from . import finance from . import gens from . import utils from . import transforms - from . algorithm import TradingAlgorithm + from . import api __all__ = [ @@ -38,5 +38,5 @@ __all__ = [ 'utils', 'transforms', 'api', - 'TradingAlgorithm' + 'TradingAlgorithm', ] diff --git a/zipline/algorithm.py b/zipline/algorithm.py index 2107ab43..f73014e1 100644 --- a/zipline/algorithm.py +++ b/zipline/algorithm.py @@ -174,19 +174,20 @@ class TradingAlgorithm(object): self.algoscript = kwargs.pop('script', None) self._initialize = None + self._analyze = None if self.algoscript is not None: self.ns = {} exec_(self.algoscript, self.ns) - if 'initialize' not in self.ns: - raise ValueError('You must define an initialze function.') + self._initialize = self.ns.get('initialize', None) if 'handle_data' not in self.ns: raise ValueError('You must define a handle_data function.') - self._initialize = self.ns['initialize'] - self._handle_data = self.ns['handle_data'] + else: + self._handle_data = self.ns['handle_data'] + + # Optional analyze function, gets called after run + self._analyze = self.ns.get('analyze', None) - # If two functions are passed in assume initialize and - # handle_data are passed in. elif kwargs.get('initialize', False) and kwargs.get('handle_data'): if self.algoscript is not None: raise ValueError('You can not set script and \ @@ -194,6 +195,7 @@ class TradingAlgorithm(object): self._initialize = kwargs.pop('initialize') self._handle_data = kwargs.pop('handle_data') + # If method not defined, NOOP if self._initialize is None: self._initialize = lambda x: None @@ -216,6 +218,13 @@ class TradingAlgorithm(object): self._handle_data(self, data) + def analyze(self, perf): + if self._analyze is None: + return + + with ZiplineAPI(self): + self._analyze(self, perf) + def __repr__(self): """ N.B. this does not yet represent a string that can be used @@ -420,6 +429,8 @@ class TradingAlgorithm(object): # convert perf dict to pandas dataframe daily_stats = self._create_daily_stats(perfs) + self.analyze(daily_stats) + return daily_stats def _create_daily_stats(self, perfs): diff --git a/zipline/examples/buyapple.conf b/zipline/examples/buyapple.conf new file mode 100644 index 00000000..42ce1cac --- /dev/null +++ b/zipline/examples/buyapple.conf @@ -0,0 +1,3 @@ +[Defaults] +algofile=buyapple.py +symbols=AAPL diff --git a/zipline/examples/buyapple.py b/zipline/examples/buyapple.py index e4ba859d..8922bcd8 100755 --- a/zipline/examples/buyapple.py +++ b/zipline/examples/buyapple.py @@ -14,35 +14,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import matplotlib.pyplot as plt -from datetime import datetime -import pytz +from zipline.api import order, record -from zipline.algorithm import TradingAlgorithm -from zipline.utils.factory import load_from_yahoo +def initialize(context): + pass - -class BuyApple(TradingAlgorithm): # inherit from TradingAlgorithm - """This is the simplest possible algorithm that does nothing but - buy 1 apple share on each event. - """ - def initialize(self): - pass - - def handle_data(self, data): # overload handle_data() method - self.order('AAPL', 1) # order SID (=0) and amount (=1 shares) - - -if __name__ == '__main__': - start = datetime(2008, 1, 1, 0, 0, 0, 0, pytz.utc) - end = datetime(2010, 1, 1, 0, 0, 0, 0, pytz.utc) - data = load_from_yahoo(stocks=['AAPL'], indexes={}, start=start, - end=end) - simple_algo = BuyApple() - results = simple_algo.run(data) - - ax1 = plt.subplot(211) - results.portfolio_value.plot(ax=ax1) - ax2 = plt.subplot(212, sharex=ax1) - data.AAPL.plot(ax=ax2) - plt.gcf().set_size_inches(18, 8) +def handle_data(context, data): + order('AAPL', 10) + record(AAPL=data['AAPL'].price) diff --git a/zipline/examples/buyapple_analyze.py b/zipline/examples/buyapple_analyze.py new file mode 100644 index 00000000..a6e6d40f --- /dev/null +++ b/zipline/examples/buyapple_analyze.py @@ -0,0 +1,9 @@ +import matplotlib.pyplot as plt + +def analyze(context, perf): + ax1 = plt.subplot(211) + perf.portfolio_value.plot(ax=ax1) + ax2 = plt.subplot(212, sharex=ax1) + perf.AAPL.plot(ax=ax2) + plt.gcf().set_size_inches(18, 8) + plt.show() diff --git a/zipline/examples/dual_moving_average.py b/zipline/examples/dual_moving_average.py index ac11cbd6..1425c6f3 100755 --- a/zipline/examples/dual_moving_average.py +++ b/zipline/examples/dual_moving_average.py @@ -14,87 +14,41 @@ # See the License for the specific language governing permissions and # limitations under the License. -import matplotlib.pyplot as plt +"""Dual Moving Average Crossover algorithm. -from zipline.algorithm import TradingAlgorithm -from zipline.finance import trading -from zipline.transforms import MovingAverage -from zipline.utils.factory import load_from_yahoo +This algorithm buys apple once its short moving average crosses +its long moving average (indicating upwards momentum) and sells +its shares once the averages cross again (indicating downwards +momentum). +""" -from datetime import datetime -import pytz +from zipline.api import order_target, record, symbol +from collections import deque as moving_window +import numpy as np +def initialize(context): + # Add 2 windows, one with a long window, one + # with a short window. + # Note that this is bound to change soon and will be easier. + context.short_window = moving_window(maxlen=100) + context.long_window = moving_window(maxlen=300) -class DualMovingAverage(TradingAlgorithm): - """Dual Moving Average Crossover algorithm. +def handle_data(context, data): + # Save price to window + context.short_window.append(data[symbol('AAPL')].price) + context.long_window.append(data[symbol('AAPL')].price) - This algorithm buys apple once its short moving average crosses - its long moving average (indicating upwards momentum) and sells - its shares once the averages cross again (indicating downwards - momentum). + # Compute averages + short_mavg = np.mean(context.short_window) + long_mavg = np.mean(context.long_window) - """ - def initialize(self, short_window=20, long_window=40): - # Add 2 mavg transforms, one with a long window, one - # with a short window. - self.add_transform(MovingAverage, 'short_mavg', ['price'], - window_length=short_window) + # Trading logic + if short_mavg > long_mavg: + order_target(symbol('AAPL'), 100) + elif short_mavg < long_mavg: + order_target(symbol('AAPL'), 0) - self.add_transform(MovingAverage, 'long_mavg', ['price'], - window_length=long_window) - - # To keep track of whether we invested in the stock or not - self.invested = False - - def handle_data(self, data): - self.short_mavg = data['AAPL'].short_mavg['price'] - self.long_mavg = data['AAPL'].long_mavg['price'] - self.buy = False - self.sell = False - - if self.short_mavg > self.long_mavg and not self.invested: - self.order('AAPL', 5000) - self.invested = True - self.buy = True - elif self.short_mavg < self.long_mavg and self.invested: - self.order('AAPL', -5000) - self.invested = False - self.sell = True - - self.record(short_mavg=self.short_mavg, - long_mavg=self.long_mavg, - buy=self.buy, - sell=self.sell) - -if __name__ == '__main__': - start = datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc) - end = datetime(2003, 1, 1, 0, 0, 0, 0, pytz.utc) - data = load_from_yahoo(stocks=['AAPL'], indexes={}, start=start, - end=end) - - dma = DualMovingAverage() - results = dma.run(data) - - br = trading.environment.benchmark_returns - bm_returns = br[(br.index >= start) & (br.index <= end)] - results['benchmark_returns'] = (1 + bm_returns).cumprod().values - results['algorithm_returns'] = (1 + results.returns).cumprod() - fig = plt.figure() - ax1 = fig.add_subplot(211, ylabel='cumulative returns') - - results[['algorithm_returns', 'benchmark_returns']].plot(ax=ax1, - sharex=True) - - ax2 = fig.add_subplot(212) - data['AAPL'].plot(ax=ax2, color='r') - results[['short_mavg', 'long_mavg']].plot(ax=ax2) - - ax2.plot(results.ix[results.buy].index, results.short_mavg[results.buy], - '^', markersize=10, color='m') - ax2.plot(results.ix[results.sell].index, results.short_mavg[results.sell], - 'v', markersize=10, color='k') - plt.legend(loc=0) - - sharpe = [risk['sharpe'] for risk in dma.risk_report['one_month']] - print("Monthly Sharpe ratios: {0}".format(sharpe)) - plt.gcf().set_size_inches(18, 8) + # Save values for later inspection + record(AAPL=data[symbol('AAPL')].price, + short_mavg=short_mavg, + long_mavg=long_mavg) diff --git a/zipline/examples/dual_moving_average_analyze.py b/zipline/examples/dual_moving_average_analyze.py new file mode 100644 index 00000000..bbb692d9 --- /dev/null +++ b/zipline/examples/dual_moving_average_analyze.py @@ -0,0 +1,20 @@ +import matplotlib.pyplot as plt + +def analyze(context, perf): + fig = plt.figure() + ax1 = fig.add_subplot(211) + perf.portfolio_value.plot(ax=ax1) + + ax2 = fig.add_subplot(212) + perf['AAPL'].plot(ax=ax2) + perf[['short_mavg', 'long_mavg']].plot(ax=ax2) + + perf_trans = perf.ix[[t != [] for t in perf.transactions]] + buys = perf_trans.ix[[t[0]['amount'] > 0 for t in perf_trans.transactions]] + sells = perf_trans.ix[[t[0]['amount'] < 0 for t in perf_trans.transactions]] + ax2.plot(buys.index, perf.short_mavg.ix[buys.index], + '^', markersize=10, color='m') + ax2.plot(sells.index, perf.short_mavg.ix[sells.index], + 'v', markersize=10, color='k') + plt.legend(loc=0) + plt.show() diff --git a/zipline/examples/quantopian_buy_apple.py b/zipline/examples/quantopian_buy_apple.py index bf0d69a2..45beca4c 100644 --- a/zipline/examples/quantopian_buy_apple.py +++ b/zipline/examples/quantopian_buy_apple.py @@ -1,5 +1,5 @@ # -# Copyright 2013 Quantopian, Inc. +# Copyright 2014 Quantopian, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/zipline/utils/__init__.py b/zipline/utils/__init__.py index e69de29b..d3031c30 100644 --- a/zipline/utils/__init__.py +++ b/zipline/utils/__init__.py @@ -0,0 +1,18 @@ +# +# Copyright 2014 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .cli import run_algo, parse_args + +__all__ = ['run_algo', 'parse_args'] \ No newline at end of file diff --git a/zipline/utils/cli.py b/zipline/utils/cli.py new file mode 100644 index 00000000..2a299999 --- /dev/null +++ b/zipline/utils/cli.py @@ -0,0 +1,121 @@ +# +# Copyright 2014 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import argparse +import ConfigParser +from copy import copy + +import datetime +import pandas as pd + +try: + from pygments import highlight + from pygments.lexers import PythonLexer + from pygments.formatters import TerminalFormatter + from pygments.styles import STYLE_MAP + PYGMENTS = True +except: + PYGMENTS = False + +import zipline + +DEFAULTS = { + 'start': '2012-01-01', + 'end': '2012-12-31', + 'data_frequency': 'daily', + 'capital_base': '10e6', + 'source': 'yahoo', + 'symbols': 'AAPL' +} + +def parse_args(argv): + # Parse any conf_file specification + # We make this parser with add_help=False so that + # it doesn't parse -h and print help. + conf_parser = argparse.ArgumentParser( + # Don't mess with format of description + formatter_class=argparse.RawDescriptionHelpFormatter, + # Turn off help, so we print all options in response to -h + add_help=False + ) + conf_parser.add_argument("-c", "--conf_file", + help="Specify config file", + metavar="FILE") + args, remaining_argv = conf_parser.parse_known_args(argv) + + defaults = copy(DEFAULTS) + + if args.conf_file: + config = ConfigParser.SafeConfigParser() + config.read([args.conf_file]) + defaults.update(dict(config.items("Defaults"))) + + # Parse rest of arguments + # Don't suppress add_help here so it will handle -h + parser = argparse.ArgumentParser( + # Inherit options from config_parser + description="Zipline version %s." % zipline.__version__, + parents=[conf_parser] + ) + + parser.set_defaults(**defaults) + + parser.add_argument('--algofile', '-f') + parser.add_argument('--data-frequency', + choices=('minute', 'daily')) + parser.add_argument('--start', '-s') + parser.add_argument('--end', '-e') + parser.add_argument('--capital_base') + parser.add_argument('--source', choices=('yahoo',)) + parser.add_argument('--symbols') + args = parser.parse_args(remaining_argv) + + return(vars(args)) + +def run_algo(print_algo=True, **kwargs): + start = pd.Timestamp(kwargs['start'], tz='UTC') + end = pd.Timestamp(kwargs['end'], tz='UTC') + + symbols = kwargs['symbols'].split(',') + + if kwargs['source'] == 'yahoo': + source = zipline.data.load_bars_from_yahoo(stocks=symbols, start=start, end=end) + else: + raise NotImplementedError('Source %s not implemented.' % kwargs['source']) + + algo_fname = kwargs['algofile'] + with open(algo_fname, 'r') as fd: + algo_text = fd.read() + + analyze_fname = os.path.splitext(algo_fname)[0] + '_analyze.py' + if os.path.exists(analyze_fname): + with open(analyze_fname, 'r') as fd: + # Simply append + algo_text += fd.read() + + if print_algo: + if PYGMENTS: + highlight(algo_text, PythonLexer(), TerminalFormatter(), outfile=sys.stdout) + else: + print algo_text + + algo = zipline.TradingAlgorithm(script=algo_text, + capital_base=float(kwargs['capital_base'])) + + perf = algo.run(source) + + return perf