ENH: Implement CLI.

Add a CLI that reads in an algorithm, loads data,
run the algorithm, and output performance metrics.

The examples are adapted to the new zipline API and
analyses are split into separate files.

Also add config files that run the example
algorithms with preset settings.
This commit is contained in:
twiecki
2014-04-22 13:46:11 -04:00
parent fde40192cf
commit f9fded97ac
12 changed files with 249 additions and 285 deletions
Regular → Executable
+20 -169
View File
@@ -1,174 +1,25 @@
import argparse
import os
#!/usr/bin/env python
#
# Copyright 2014 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
sys.path.append('qexec')
import pandas as pd
import cProfile
from line_profiler import LineProfiler
import datetime
from functools import partial
from mem_util import get_memusage_mb
def profile_qexec(algo_text, results_file_name, start_date, end_date,
capital_base, granularity, profiler_type=None,
names_to_profile=None,
live_algo=False,
session_start_date=None,
inception_date=None,
data_delay=None):
# Import inside profile function, so that modules that take a while
# to import, e.g. tradingcalendar, don't trigger when there are
# invalid parameters, which should be a quick fail.
# TODO: Fix load time of tradingcalendar.
from algo_profile import run_algo
from qexec.algo.validation import unittest
results, ok = unittest(algo_text, granularity)
if not ok:
for res in results:
if not res['passed']:
print res._data
sys.exit('Did not pass validation.')
algo_runner = partial(
run_algo,
algo_text,
start_date,
end_date,
capital_base,
granularity,
session_start=session_start_date,
inception_date=inception_date,
data_delay=data_delay,
live_algo=live_algo
)
if profiler_type == 'cProfile':
results_dir = os.path.join('results', 'cprofile')
if not os.path.exists(results_dir):
os.makedirs(results_dir)
results_file = os.path.join(results_dir, results_file_name)
cProfile.runctx('algo_runner()',
globals(),
locals(),
results_file)
print ("Wrote results to: {0}".format(results_file))
elif profiler_type == 'line_profiler':
results_dir = os.path.join('results', 'line_profiler')
if not os.path.exists(results_dir):
os.makedirs(results_dir)
results_file = os.path.join(results_dir, results_file_name)
profiler = LineProfiler()
for name_to_profile in names_to_profile:
name_parts = name_to_profile.split('.')
obj = __import__(name_parts[0])
for name in name_parts[1:]:
obj = getattr(obj, name)
profiler.add_function(obj)
profiler.runctx('algo_runner()',
globals(),
locals())
with open(results_file, 'w') as f:
profiler.print_stats(stream=f)
print ("Wrote results to: {0}".format(results_file))
elif not profiler_type:
algo_runner()
def create_parser():
parser = argparse.ArgumentParser()
parser.add_argument('--algofile', '-f', type=argparse.FileType('r'),
required=True)
parser.add_argument('--data-frequency', default='minute',
choices=('minute', 'daily'))
parser.add_argument('--start-date', default='2012-01-01')
parser.add_argument('--end-date', default='2012-12-31')
parser.add_argument('--start-epoch', type=int)
parser.add_argument('--end-epoch', type=int)
parser.add_argument('--capital-base', default='10e6')
parser.add_argument('--live-algo', action='store_true', default=False)
parser.add_argument('--session-start-date', default='2014-01-03')
parser.add_argument('--inception-date', default='2013-12-03')
parser.add_argument('--data-delay', type=int, default=15 * 60)
parser.add_argument('--is-inception', action='store_true', default=False)
parser.add_argument('--profiler-type', choices=('cProfile',
'line_profiler')),
parser.add_argument(
'--name-to-profile',
action='append',
default=[
# Good proxy for overall performance/main gen
'zipline.gens.tradesimulation.AlgorithmSimulator.transform',
# Proxy for network time, will eventually have to change if
# we change data access style.
'pymongo.cursor.Cursor.next'
])
return parser
import zipline
from zipline.utils import parse_args, run_algo
if __name__ == "__main__":
parser = create_parser()
args = parser.parse_args()
start_date = pd.Timestamp(args.start_date, tz='UTC')
end_date = pd.Timestamp(args.end_date, tz='UTC')
if args.start_epoch:
start_date = pd.Timestamp(args.start_epoch, tz='UTC')
if args.end_epoch:
end_date = pd.Timestamp(args.end_epoch, tz='UTC')
algo_text = args.algofile.read()
# Remove the extension
algo_name_base = os.path.splitext(args.algofile.name)[0]
algo_name = os.path.basename(algo_name_base)
results_file_name = \
'qexec-prof-{algo_name}-{commitish}-{granularity}-{time}'.format(
algo_name=algo_name.replace('.', '_'),
commitish='local',
granularity=args.data_frequency,
time=str(datetime.datetime.now()).replace(' ', '-').
replace(':', '-'))
if args.live_algo:
session_start_date = pd.Timestamp(args.session_start_date, tz='UTC')
inception_date = pd.Timestamp(args.inception_date, tz='UTC')
else:
session_start_date = None
inception_date = None
profile_qexec(
algo_text,
results_file_name,
start_date,
end_date,
float(args.capital_base),
args.data_frequency,
args.profiler_type,
args.name_to_profile,
live_algo=args.live_algo,
session_start_date=session_start_date,
inception_date=inception_date,
data_delay=args.data_delay
)
import objgraph
print 'finished running algo, now doing memory inspection'
maxrss_in_mb = get_memusage_mb()
print "Max memory consumption={maxrss_in_mb}MB".format(
maxrss_in_mb=maxrss_in_mb)
print objgraph.show_growth(limit=20)
print objgraph.show_most_common_types(limit=35)
parsed = parse_args(sys.argv[1:])
run_algo(print_algo=True, **parsed)
sys.exit(0)
+1
View File
@@ -47,6 +47,7 @@ setup(
author='Quantopian Inc.',
author_email='opensource@quantopian.com',
packages=find_packages(),
scripts=['scripts/run_algo.py'],
long_description=LONG_DESCRIPTION,
license='Apache 2.0',
classifiers=[
+2 -2
View File
@@ -27,8 +27,8 @@ from . import finance
from . import gens
from . import utils
from . import transforms
from . algorithm import TradingAlgorithm
from . import api
__all__ = [
@@ -38,5 +38,5 @@ __all__ = [
'utils',
'transforms',
'api',
'TradingAlgorithm'
'TradingAlgorithm',
]
+17 -6
View File
@@ -174,19 +174,20 @@ class TradingAlgorithm(object):
self.algoscript = kwargs.pop('script', None)
self._initialize = None
self._analyze = None
if self.algoscript is not None:
self.ns = {}
exec_(self.algoscript, self.ns)
if 'initialize' not in self.ns:
raise ValueError('You must define an initialze function.')
self._initialize = self.ns.get('initialize', None)
if 'handle_data' not in self.ns:
raise ValueError('You must define a handle_data function.')
self._initialize = self.ns['initialize']
self._handle_data = self.ns['handle_data']
else:
self._handle_data = self.ns['handle_data']
# Optional analyze function, gets called after run
self._analyze = self.ns.get('analyze', None)
# If two functions are passed in assume initialize and
# handle_data are passed in.
elif kwargs.get('initialize', False) and kwargs.get('handle_data'):
if self.algoscript is not None:
raise ValueError('You can not set script and \
@@ -194,6 +195,7 @@ class TradingAlgorithm(object):
self._initialize = kwargs.pop('initialize')
self._handle_data = kwargs.pop('handle_data')
# If method not defined, NOOP
if self._initialize is None:
self._initialize = lambda x: None
@@ -216,6 +218,13 @@ class TradingAlgorithm(object):
self._handle_data(self, data)
def analyze(self, perf):
if self._analyze is None:
return
with ZiplineAPI(self):
self._analyze(self, perf)
def __repr__(self):
"""
N.B. this does not yet represent a string that can be used
@@ -420,6 +429,8 @@ class TradingAlgorithm(object):
# convert perf dict to pandas dataframe
daily_stats = self._create_daily_stats(perfs)
self.analyze(daily_stats)
return daily_stats
def _create_daily_stats(self, perfs):
+3
View File
@@ -0,0 +1,3 @@
[Defaults]
algofile=buyapple.py
symbols=AAPL
+6 -30
View File
@@ -14,35 +14,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import matplotlib.pyplot as plt
from datetime import datetime
import pytz
from zipline.api import order, record
from zipline.algorithm import TradingAlgorithm
from zipline.utils.factory import load_from_yahoo
def initialize(context):
pass
class BuyApple(TradingAlgorithm): # inherit from TradingAlgorithm
"""This is the simplest possible algorithm that does nothing but
buy 1 apple share on each event.
"""
def initialize(self):
pass
def handle_data(self, data): # overload handle_data() method
self.order('AAPL', 1) # order SID (=0) and amount (=1 shares)
if __name__ == '__main__':
start = datetime(2008, 1, 1, 0, 0, 0, 0, pytz.utc)
end = datetime(2010, 1, 1, 0, 0, 0, 0, pytz.utc)
data = load_from_yahoo(stocks=['AAPL'], indexes={}, start=start,
end=end)
simple_algo = BuyApple()
results = simple_algo.run(data)
ax1 = plt.subplot(211)
results.portfolio_value.plot(ax=ax1)
ax2 = plt.subplot(212, sharex=ax1)
data.AAPL.plot(ax=ax2)
plt.gcf().set_size_inches(18, 8)
def handle_data(context, data):
order('AAPL', 10)
record(AAPL=data['AAPL'].price)
+9
View File
@@ -0,0 +1,9 @@
import matplotlib.pyplot as plt
def analyze(context, perf):
ax1 = plt.subplot(211)
perf.portfolio_value.plot(ax=ax1)
ax2 = plt.subplot(212, sharex=ax1)
perf.AAPL.plot(ax=ax2)
plt.gcf().set_size_inches(18, 8)
plt.show()
+31 -77
View File
@@ -14,87 +14,41 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import matplotlib.pyplot as plt
"""Dual Moving Average Crossover algorithm.
from zipline.algorithm import TradingAlgorithm
from zipline.finance import trading
from zipline.transforms import MovingAverage
from zipline.utils.factory import load_from_yahoo
This algorithm buys apple once its short moving average crosses
its long moving average (indicating upwards momentum) and sells
its shares once the averages cross again (indicating downwards
momentum).
"""
from datetime import datetime
import pytz
from zipline.api import order_target, record, symbol
from collections import deque as moving_window
import numpy as np
def initialize(context):
# Add 2 windows, one with a long window, one
# with a short window.
# Note that this is bound to change soon and will be easier.
context.short_window = moving_window(maxlen=100)
context.long_window = moving_window(maxlen=300)
class DualMovingAverage(TradingAlgorithm):
"""Dual Moving Average Crossover algorithm.
def handle_data(context, data):
# Save price to window
context.short_window.append(data[symbol('AAPL')].price)
context.long_window.append(data[symbol('AAPL')].price)
This algorithm buys apple once its short moving average crosses
its long moving average (indicating upwards momentum) and sells
its shares once the averages cross again (indicating downwards
momentum).
# Compute averages
short_mavg = np.mean(context.short_window)
long_mavg = np.mean(context.long_window)
"""
def initialize(self, short_window=20, long_window=40):
# Add 2 mavg transforms, one with a long window, one
# with a short window.
self.add_transform(MovingAverage, 'short_mavg', ['price'],
window_length=short_window)
# Trading logic
if short_mavg > long_mavg:
order_target(symbol('AAPL'), 100)
elif short_mavg < long_mavg:
order_target(symbol('AAPL'), 0)
self.add_transform(MovingAverage, 'long_mavg', ['price'],
window_length=long_window)
# To keep track of whether we invested in the stock or not
self.invested = False
def handle_data(self, data):
self.short_mavg = data['AAPL'].short_mavg['price']
self.long_mavg = data['AAPL'].long_mavg['price']
self.buy = False
self.sell = False
if self.short_mavg > self.long_mavg and not self.invested:
self.order('AAPL', 5000)
self.invested = True
self.buy = True
elif self.short_mavg < self.long_mavg and self.invested:
self.order('AAPL', -5000)
self.invested = False
self.sell = True
self.record(short_mavg=self.short_mavg,
long_mavg=self.long_mavg,
buy=self.buy,
sell=self.sell)
if __name__ == '__main__':
start = datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc)
end = datetime(2003, 1, 1, 0, 0, 0, 0, pytz.utc)
data = load_from_yahoo(stocks=['AAPL'], indexes={}, start=start,
end=end)
dma = DualMovingAverage()
results = dma.run(data)
br = trading.environment.benchmark_returns
bm_returns = br[(br.index >= start) & (br.index <= end)]
results['benchmark_returns'] = (1 + bm_returns).cumprod().values
results['algorithm_returns'] = (1 + results.returns).cumprod()
fig = plt.figure()
ax1 = fig.add_subplot(211, ylabel='cumulative returns')
results[['algorithm_returns', 'benchmark_returns']].plot(ax=ax1,
sharex=True)
ax2 = fig.add_subplot(212)
data['AAPL'].plot(ax=ax2, color='r')
results[['short_mavg', 'long_mavg']].plot(ax=ax2)
ax2.plot(results.ix[results.buy].index, results.short_mavg[results.buy],
'^', markersize=10, color='m')
ax2.plot(results.ix[results.sell].index, results.short_mavg[results.sell],
'v', markersize=10, color='k')
plt.legend(loc=0)
sharpe = [risk['sharpe'] for risk in dma.risk_report['one_month']]
print("Monthly Sharpe ratios: {0}".format(sharpe))
plt.gcf().set_size_inches(18, 8)
# Save values for later inspection
record(AAPL=data[symbol('AAPL')].price,
short_mavg=short_mavg,
long_mavg=long_mavg)
@@ -0,0 +1,20 @@
import matplotlib.pyplot as plt
def analyze(context, perf):
fig = plt.figure()
ax1 = fig.add_subplot(211)
perf.portfolio_value.plot(ax=ax1)
ax2 = fig.add_subplot(212)
perf['AAPL'].plot(ax=ax2)
perf[['short_mavg', 'long_mavg']].plot(ax=ax2)
perf_trans = perf.ix[[t != [] for t in perf.transactions]]
buys = perf_trans.ix[[t[0]['amount'] > 0 for t in perf_trans.transactions]]
sells = perf_trans.ix[[t[0]['amount'] < 0 for t in perf_trans.transactions]]
ax2.plot(buys.index, perf.short_mavg.ix[buys.index],
'^', markersize=10, color='m')
ax2.plot(sells.index, perf.short_mavg.ix[sells.index],
'v', markersize=10, color='k')
plt.legend(loc=0)
plt.show()
+1 -1
View File
@@ -1,5 +1,5 @@
#
# Copyright 2013 Quantopian, Inc.
# Copyright 2014 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
+18
View File
@@ -0,0 +1,18 @@
#
# Copyright 2014 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .cli import run_algo, parse_args
__all__ = ['run_algo', 'parse_args']
+121
View File
@@ -0,0 +1,121 @@
#
# Copyright 2014 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
import argparse
import ConfigParser
from copy import copy
import datetime
import pandas as pd
try:
from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.formatters import TerminalFormatter
from pygments.styles import STYLE_MAP
PYGMENTS = True
except:
PYGMENTS = False
import zipline
DEFAULTS = {
'start': '2012-01-01',
'end': '2012-12-31',
'data_frequency': 'daily',
'capital_base': '10e6',
'source': 'yahoo',
'symbols': 'AAPL'
}
def parse_args(argv):
# Parse any conf_file specification
# We make this parser with add_help=False so that
# it doesn't parse -h and print help.
conf_parser = argparse.ArgumentParser(
# Don't mess with format of description
formatter_class=argparse.RawDescriptionHelpFormatter,
# Turn off help, so we print all options in response to -h
add_help=False
)
conf_parser.add_argument("-c", "--conf_file",
help="Specify config file",
metavar="FILE")
args, remaining_argv = conf_parser.parse_known_args(argv)
defaults = copy(DEFAULTS)
if args.conf_file:
config = ConfigParser.SafeConfigParser()
config.read([args.conf_file])
defaults.update(dict(config.items("Defaults")))
# Parse rest of arguments
# Don't suppress add_help here so it will handle -h
parser = argparse.ArgumentParser(
# Inherit options from config_parser
description="Zipline version %s." % zipline.__version__,
parents=[conf_parser]
)
parser.set_defaults(**defaults)
parser.add_argument('--algofile', '-f')
parser.add_argument('--data-frequency',
choices=('minute', 'daily'))
parser.add_argument('--start', '-s')
parser.add_argument('--end', '-e')
parser.add_argument('--capital_base')
parser.add_argument('--source', choices=('yahoo',))
parser.add_argument('--symbols')
args = parser.parse_args(remaining_argv)
return(vars(args))
def run_algo(print_algo=True, **kwargs):
start = pd.Timestamp(kwargs['start'], tz='UTC')
end = pd.Timestamp(kwargs['end'], tz='UTC')
symbols = kwargs['symbols'].split(',')
if kwargs['source'] == 'yahoo':
source = zipline.data.load_bars_from_yahoo(stocks=symbols, start=start, end=end)
else:
raise NotImplementedError('Source %s not implemented.' % kwargs['source'])
algo_fname = kwargs['algofile']
with open(algo_fname, 'r') as fd:
algo_text = fd.read()
analyze_fname = os.path.splitext(algo_fname)[0] + '_analyze.py'
if os.path.exists(analyze_fname):
with open(analyze_fname, 'r') as fd:
# Simply append
algo_text += fd.read()
if print_algo:
if PYGMENTS:
highlight(algo_text, PythonLexer(), TerminalFormatter(), outfile=sys.stdout)
else:
print algo_text
algo = zipline.TradingAlgorithm(script=algo_text,
capital_base=float(kwargs['capital_base']))
perf = algo.run(source)
return perf