ENH: Implement CLI.

Add a CLI that reads in an algorithm, loads data, run the algorithm, and output performance metrics. The examples are adapted to the new zipline API and analyses are split into separate files. Also add config files that run the example algorithms with preset settings.
2026-06-27 17:47:56 +08:00 · 2014-04-22 13:46:11 -04:00
parent fde40192cf
commit f9fded97ac
12 changed files with 249 additions and 285 deletions
@@ -1,174 +1,25 @@
-import argparse
-
-import os
+#!/usr/bin/env python
+#
+# Copyright 2014 Quantopian, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

 import sys
-sys.path.append('qexec')
-
-import pandas as pd
-
-import cProfile
-from line_profiler import LineProfiler
-import datetime
-
-from functools import partial
-
-from mem_util import get_memusage_mb
-
-
-def profile_qexec(algo_text, results_file_name, start_date, end_date,
-                  capital_base, granularity, profiler_type=None,
-                  names_to_profile=None,
-                  live_algo=False,
-                  session_start_date=None,
-                  inception_date=None,
-                  data_delay=None):
-    # Import inside profile function, so that modules that take a while
-    # to import, e.g. tradingcalendar, don't trigger when there are
-    # invalid parameters, which should be a quick fail.
-    # TODO: Fix load time of tradingcalendar.
-    from algo_profile import run_algo
-    from qexec.algo.validation import unittest
-
-    results, ok = unittest(algo_text, granularity)
-
-    if not ok:
-        for res in results:
-            if not res['passed']:
-                print res._data
-        sys.exit('Did not pass validation.')
-
-    algo_runner = partial(
-        run_algo,
-        algo_text,
-        start_date,
-        end_date,
-        capital_base,
-        granularity,
-        session_start=session_start_date,
-        inception_date=inception_date,
-        data_delay=data_delay,
-        live_algo=live_algo
-    )
-    if profiler_type == 'cProfile':
-        results_dir = os.path.join('results', 'cprofile')
-        if not os.path.exists(results_dir):
-            os.makedirs(results_dir)
-        results_file = os.path.join(results_dir, results_file_name)
-        cProfile.runctx('algo_runner()',
-                        globals(),
-                        locals(),
-                        results_file)
-        print ("Wrote results to: {0}".format(results_file))
-
-    elif profiler_type == 'line_profiler':
-        results_dir = os.path.join('results', 'line_profiler')
-        if not os.path.exists(results_dir):
-            os.makedirs(results_dir)
-        results_file = os.path.join(results_dir, results_file_name)
-        profiler = LineProfiler()
-        for name_to_profile in names_to_profile:
-            name_parts = name_to_profile.split('.')
-            obj = __import__(name_parts[0])
-            for name in name_parts[1:]:
-                obj = getattr(obj, name)
-            profiler.add_function(obj)
-        profiler.runctx('algo_runner()',
-                        globals(),
-                        locals())
-        with open(results_file, 'w') as f:
-            profiler.print_stats(stream=f)
-        print ("Wrote results to: {0}".format(results_file))
-
-    elif not profiler_type:
-        algo_runner()
-
-
-def create_parser():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--algofile', '-f', type=argparse.FileType('r'),
-                        required=True)
-    parser.add_argument('--data-frequency', default='minute',
-                        choices=('minute', 'daily'))
-    parser.add_argument('--start-date', default='2012-01-01')
-    parser.add_argument('--end-date', default='2012-12-31')
-    parser.add_argument('--start-epoch', type=int)
-    parser.add_argument('--end-epoch', type=int)
-    parser.add_argument('--capital-base', default='10e6')
-    parser.add_argument('--live-algo', action='store_true', default=False)
-    parser.add_argument('--session-start-date', default='2014-01-03')
-    parser.add_argument('--inception-date', default='2013-12-03')
-    parser.add_argument('--data-delay', type=int, default=15 * 60)
-    parser.add_argument('--is-inception', action='store_true', default=False)
-    parser.add_argument('--profiler-type', choices=('cProfile',
-                                                    'line_profiler')),
-    parser.add_argument(
-        '--name-to-profile',
-        action='append',
-        default=[
-            # Good proxy for overall performance/main gen
-            'zipline.gens.tradesimulation.AlgorithmSimulator.transform',
-            # Proxy for network time, will eventually have to change if
-            # we change data access style.
-            'pymongo.cursor.Cursor.next'
-        ])
-
-    return parser

+import zipline
+from zipline.utils import parse_args, run_algo

 if __name__ == "__main__":
-
-    parser = create_parser()
-    args = parser.parse_args()
-
-    start_date = pd.Timestamp(args.start_date, tz='UTC')
-    end_date = pd.Timestamp(args.end_date, tz='UTC')
-
-    if args.start_epoch:
-        start_date = pd.Timestamp(args.start_epoch, tz='UTC')
-    if args.end_epoch:
-        end_date = pd.Timestamp(args.end_epoch, tz='UTC')
-
-    algo_text = args.algofile.read()
-    # Remove the extension
-    algo_name_base = os.path.splitext(args.algofile.name)[0]
-    algo_name = os.path.basename(algo_name_base)
-
-    results_file_name = \
-        'qexec-prof-{algo_name}-{commitish}-{granularity}-{time}'.format(
-            algo_name=algo_name.replace('.', '_'),
-            commitish='local',
-            granularity=args.data_frequency,
-            time=str(datetime.datetime.now()).replace(' ', '-').
-            replace(':', '-'))
-
-    if args.live_algo:
-        session_start_date = pd.Timestamp(args.session_start_date, tz='UTC')
-        inception_date = pd.Timestamp(args.inception_date, tz='UTC')
-
-    else:
-        session_start_date = None
-        inception_date = None
-
-    profile_qexec(
-        algo_text,
-        results_file_name,
-        start_date,
-        end_date,
-        float(args.capital_base),
-        args.data_frequency,
-        args.profiler_type,
-        args.name_to_profile,
-        live_algo=args.live_algo,
-        session_start_date=session_start_date,
-        inception_date=inception_date,
-        data_delay=args.data_delay
-    )
-
-    import objgraph
-    print 'finished running algo, now doing memory inspection'
-    maxrss_in_mb = get_memusage_mb()
-    print "Max memory consumption={maxrss_in_mb}MB".format(
-        maxrss_in_mb=maxrss_in_mb)
-    print objgraph.show_growth(limit=20)
-    print objgraph.show_most_common_types(limit=35)
+    parsed = parse_args(sys.argv[1:])
+    run_algo(print_algo=True, **parsed)
+    sys.exit(0)
@@ -47,6 +47,7 @@ setup(
    author='Quantopian Inc.',
    author_email='opensource@quantopian.com',
    packages=find_packages(),
+    scripts=['scripts/run_algo.py'],
    long_description=LONG_DESCRIPTION,
    license='Apache 2.0',
    classifiers=[
@@ -27,8 +27,8 @@ from . import finance
 from . import gens
 from . import utils
 from . import transforms
-
 from . algorithm import TradingAlgorithm
+
 from . import api

 __all__ = [
@@ -38,5 +38,5 @@ __all__ = [
    'utils',
    'transforms',
    'api',
-    'TradingAlgorithm'
+    'TradingAlgorithm',
 ]
@@ -174,19 +174,20 @@ class TradingAlgorithm(object):
        self.algoscript = kwargs.pop('script', None)

        self._initialize = None
+        self._analyze = None

        if self.algoscript is not None:
            self.ns = {}
            exec_(self.algoscript, self.ns)
-            if 'initialize' not in self.ns:
-                raise ValueError('You must define an initialze function.')
+            self._initialize = self.ns.get('initialize', None)
            if 'handle_data' not in self.ns:
                raise ValueError('You must define a handle_data function.')
-            self._initialize = self.ns['initialize']
-            self._handle_data = self.ns['handle_data']
+            else:
+                self._handle_data = self.ns['handle_data']
+
+            # Optional analyze function, gets called after run
+            self._analyze = self.ns.get('analyze', None)

-        # If two functions are passed in assume initialize and
-        # handle_data are passed in.
        elif kwargs.get('initialize', False) and kwargs.get('handle_data'):
            if self.algoscript is not None:
                raise ValueError('You can not set script and \
@@ -194,6 +195,7 @@ class TradingAlgorithm(object):
            self._initialize = kwargs.pop('initialize')
            self._handle_data = kwargs.pop('handle_data')

+        # If method not defined, NOOP
        if self._initialize is None:
            self._initialize = lambda x: None

@@ -216,6 +218,13 @@ class TradingAlgorithm(object):

        self._handle_data(self, data)

+    def analyze(self, perf):
+        if self._analyze is None:
+            return
+
+        with ZiplineAPI(self):
+            self._analyze(self, perf)
+
    def __repr__(self):
        """
        N.B. this does not yet represent a string that can be used
@@ -420,6 +429,8 @@ class TradingAlgorithm(object):
            # convert perf dict to pandas dataframe
            daily_stats = self._create_daily_stats(perfs)

+        self.analyze(daily_stats)
+
        return daily_stats

    def _create_daily_stats(self, perfs):
@@ -0,0 +1,3 @@
+[Defaults]
+algofile=buyapple.py
+symbols=AAPL
@@ -14,35 +14,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import matplotlib.pyplot as plt
-from datetime import datetime
-import pytz
+from zipline.api import order, record

-from zipline.algorithm import TradingAlgorithm
-from zipline.utils.factory import load_from_yahoo
+def initialize(context):
+    pass

-
-class BuyApple(TradingAlgorithm):  # inherit from TradingAlgorithm
-    """This is the simplest possible algorithm that does nothing but
-    buy 1 apple share on each event.
-    """
-    def initialize(self):
-        pass
-
-    def handle_data(self, data):  # overload handle_data() method
-        self.order('AAPL', 1)  # order SID (=0) and amount (=1 shares)
-
-
-if __name__ == '__main__':
-    start = datetime(2008, 1, 1, 0, 0, 0, 0, pytz.utc)
-    end = datetime(2010, 1, 1, 0, 0, 0, 0, pytz.utc)
-    data = load_from_yahoo(stocks=['AAPL'], indexes={}, start=start,
-                           end=end)
-    simple_algo = BuyApple()
-    results = simple_algo.run(data)
-
-    ax1 = plt.subplot(211)
-    results.portfolio_value.plot(ax=ax1)
-    ax2 = plt.subplot(212, sharex=ax1)
-    data.AAPL.plot(ax=ax2)
-    plt.gcf().set_size_inches(18, 8)
+def handle_data(context, data):
+    order('AAPL', 10)
+    record(AAPL=data['AAPL'].price)
@@ -0,0 +1,9 @@
+import matplotlib.pyplot as plt
+
+def analyze(context, perf):
+    ax1 = plt.subplot(211)
+    perf.portfolio_value.plot(ax=ax1)
+    ax2 = plt.subplot(212, sharex=ax1)
+    perf.AAPL.plot(ax=ax2)
+    plt.gcf().set_size_inches(18, 8)
+    plt.show()
@@ -14,87 +14,41 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import matplotlib.pyplot as plt
+"""Dual Moving Average Crossover algorithm.

-from zipline.algorithm import TradingAlgorithm
-from zipline.finance import trading
-from zipline.transforms import MovingAverage
-from zipline.utils.factory import load_from_yahoo
+This algorithm buys apple once its short moving average crosses
+its long moving average (indicating upwards momentum) and sells
+its shares once the averages cross again (indicating downwards
+momentum).
+"""

-from datetime import datetime
-import pytz
+from zipline.api import order_target, record, symbol
+from collections import deque as moving_window
+import numpy as np

+def initialize(context):
+    # Add 2 windows, one with a long window, one
+    # with a short window.
+    # Note that this is bound to change soon and will be easier.
+    context.short_window = moving_window(maxlen=100)
+    context.long_window = moving_window(maxlen=300)

-class DualMovingAverage(TradingAlgorithm):
-    """Dual Moving Average Crossover algorithm.
+def handle_data(context, data):
+    # Save price to window
+    context.short_window.append(data[symbol('AAPL')].price)
+    context.long_window.append(data[symbol('AAPL')].price)

-    This algorithm buys apple once its short moving average crosses
-    its long moving average (indicating upwards momentum) and sells
-    its shares once the averages cross again (indicating downwards
-    momentum).
+    # Compute averages
+    short_mavg = np.mean(context.short_window)
+    long_mavg = np.mean(context.long_window)

-    """
-    def initialize(self, short_window=20, long_window=40):
-        # Add 2 mavg transforms, one with a long window, one
-        # with a short window.
-        self.add_transform(MovingAverage, 'short_mavg', ['price'],
-                           window_length=short_window)
+    # Trading logic
+    if short_mavg > long_mavg:
+        order_target(symbol('AAPL'), 100)
+    elif short_mavg < long_mavg:
+        order_target(symbol('AAPL'), 0)

-        self.add_transform(MovingAverage, 'long_mavg', ['price'],
-                           window_length=long_window)
-
-        # To keep track of whether we invested in the stock or not
-        self.invested = False
-
-    def handle_data(self, data):
-        self.short_mavg = data['AAPL'].short_mavg['price']
-        self.long_mavg = data['AAPL'].long_mavg['price']
-        self.buy = False
-        self.sell = False
-
-        if self.short_mavg > self.long_mavg and not self.invested:
-            self.order('AAPL', 5000)
-            self.invested = True
-            self.buy = True
-        elif self.short_mavg < self.long_mavg and self.invested:
-            self.order('AAPL', -5000)
-            self.invested = False
-            self.sell = True
-
-        self.record(short_mavg=self.short_mavg,
-                    long_mavg=self.long_mavg,
-                    buy=self.buy,
-                    sell=self.sell)
-
-if __name__ == '__main__':
-    start = datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc)
-    end = datetime(2003, 1, 1, 0, 0, 0, 0, pytz.utc)
-    data = load_from_yahoo(stocks=['AAPL'], indexes={}, start=start,
-                           end=end)
-
-    dma = DualMovingAverage()
-    results = dma.run(data)
-
-    br = trading.environment.benchmark_returns
-    bm_returns = br[(br.index >= start) & (br.index <= end)]
-    results['benchmark_returns'] = (1 + bm_returns).cumprod().values
-    results['algorithm_returns'] = (1 + results.returns).cumprod()
-    fig = plt.figure()
-    ax1 = fig.add_subplot(211, ylabel='cumulative returns')
-
-    results[['algorithm_returns', 'benchmark_returns']].plot(ax=ax1,
-                                                             sharex=True)
-
-    ax2 = fig.add_subplot(212)
-    data['AAPL'].plot(ax=ax2, color='r')
-    results[['short_mavg', 'long_mavg']].plot(ax=ax2)
-
-    ax2.plot(results.ix[results.buy].index, results.short_mavg[results.buy],
-             '^', markersize=10, color='m')
-    ax2.plot(results.ix[results.sell].index, results.short_mavg[results.sell],
-             'v', markersize=10, color='k')
-    plt.legend(loc=0)
-
-    sharpe = [risk['sharpe'] for risk in dma.risk_report['one_month']]
-    print("Monthly Sharpe ratios: {0}".format(sharpe))
-    plt.gcf().set_size_inches(18, 8)
+    # Save values for later inspection
+    record(AAPL=data[symbol('AAPL')].price,
+           short_mavg=short_mavg,
+           long_mavg=long_mavg)
@@ -0,0 +1,20 @@
+import matplotlib.pyplot as plt
+
+def analyze(context, perf):
+    fig = plt.figure()
+    ax1 = fig.add_subplot(211)
+    perf.portfolio_value.plot(ax=ax1)
+
+    ax2 = fig.add_subplot(212)
+    perf['AAPL'].plot(ax=ax2)
+    perf[['short_mavg', 'long_mavg']].plot(ax=ax2)
+
+    perf_trans = perf.ix[[t != [] for t in perf.transactions]]
+    buys = perf_trans.ix[[t[0]['amount'] > 0 for t in perf_trans.transactions]]
+    sells = perf_trans.ix[[t[0]['amount'] < 0 for t in perf_trans.transactions]]
+    ax2.plot(buys.index, perf.short_mavg.ix[buys.index],
+             '^', markersize=10, color='m')
+    ax2.plot(sells.index, perf.short_mavg.ix[sells.index],
+             'v', markersize=10, color='k')
+    plt.legend(loc=0)
+    plt.show()
@@ -1,5 +1,5 @@
 #
-# Copyright 2013 Quantopian, Inc.
+# Copyright 2014 Quantopian, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -0,0 +1,18 @@
+#
+# Copyright 2014 Quantopian, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .cli import run_algo, parse_args
+
+__all__ = ['run_algo', 'parse_args']
@@ -0,0 +1,121 @@
+#
+# Copyright 2014 Quantopian, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import os
+import argparse
+import ConfigParser
+from copy import copy
+
+import datetime
+import pandas as pd
+
+try:
+    from pygments import highlight
+    from pygments.lexers import PythonLexer
+    from pygments.formatters import TerminalFormatter
+    from pygments.styles import STYLE_MAP
+    PYGMENTS = True
+except:
+    PYGMENTS = False
+
+import zipline
+
+DEFAULTS = {
+    'start': '2012-01-01',
+    'end': '2012-12-31',
+    'data_frequency': 'daily',
+    'capital_base': '10e6',
+    'source': 'yahoo',
+    'symbols': 'AAPL'
+}
+
+def parse_args(argv):
+    # Parse any conf_file specification
+    # We make this parser with add_help=False so that
+    # it doesn't parse -h and print help.
+    conf_parser = argparse.ArgumentParser(
+        # Don't mess with format of description
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        # Turn off help, so we print all options in response to -h
+        add_help=False
+        )
+    conf_parser.add_argument("-c", "--conf_file",
+                             help="Specify config file",
+                             metavar="FILE")
+    args, remaining_argv = conf_parser.parse_known_args(argv)
+
+    defaults = copy(DEFAULTS)
+
+    if args.conf_file:
+        config = ConfigParser.SafeConfigParser()
+        config.read([args.conf_file])
+        defaults.update(dict(config.items("Defaults")))
+
+    # Parse rest of arguments
+    # Don't suppress add_help here so it will handle -h
+    parser = argparse.ArgumentParser(
+        # Inherit options from config_parser
+        description="Zipline version %s." % zipline.__version__,
+        parents=[conf_parser]
+    )
+
+    parser.set_defaults(**defaults)
+
+    parser.add_argument('--algofile', '-f')
+    parser.add_argument('--data-frequency',
+                        choices=('minute', 'daily'))
+    parser.add_argument('--start', '-s')
+    parser.add_argument('--end', '-e')
+    parser.add_argument('--capital_base')
+    parser.add_argument('--source', choices=('yahoo',))
+    parser.add_argument('--symbols')
+    args = parser.parse_args(remaining_argv)
+
+    return(vars(args))
+
+def run_algo(print_algo=True, **kwargs):
+    start = pd.Timestamp(kwargs['start'], tz='UTC')
+    end = pd.Timestamp(kwargs['end'], tz='UTC')
+
+    symbols = kwargs['symbols'].split(',')
+
+    if kwargs['source'] == 'yahoo':
+        source = zipline.data.load_bars_from_yahoo(stocks=symbols, start=start, end=end)
+    else:
+        raise NotImplementedError('Source %s not implemented.' % kwargs['source'])
+
+    algo_fname = kwargs['algofile']
+    with open(algo_fname, 'r') as fd:
+        algo_text = fd.read()
+
+    analyze_fname = os.path.splitext(algo_fname)[0] + '_analyze.py'
+    if os.path.exists(analyze_fname):
+        with open(analyze_fname, 'r') as fd:
+            # Simply append
+            algo_text += fd.read()
+
+    if print_algo:
+        if PYGMENTS:
+            highlight(algo_text, PythonLexer(), TerminalFormatter(), outfile=sys.stdout)
+        else:
+            print algo_text
+
+    algo = zipline.TradingAlgorithm(script=algo_text,
+                                    capital_base=float(kwargs['capital_base']))
+
+    perf = algo.run(source)
+
+    return perf