diff --git a/etc/jenkins.sh b/etc/jenkins.sh index 5b304092..96d4745f 100755 --- a/etc/jenkins.sh +++ b/etc/jenkins.sh @@ -29,7 +29,7 @@ pip freeze paver apidocs html #run all the tests in test. see setup.cfg for flags. -nosetests +nosetests --config=jenkins_setup.cfg #run pylint checks cp ./pylint.rcfile /mnt/jenkins/.pylintrc #default location for config file... diff --git a/jenkins_setup.cfg b/jenkins_setup.cfg new file mode 100644 index 00000000..a6f24289 --- /dev/null +++ b/jenkins_setup.cfg @@ -0,0 +1,12 @@ +[nosetests] +verbosity=2 +detailed-errors=1 + +with-xcoverage=1 +cover-package=zipline +cover-erase=1 +cover-html=1 +cover-html-dir=docs/_build/html/cover +with-xunit=1 + + diff --git a/notebooks/Experimenting with Frames.ipynb b/notebooks/Experimenting with Frames.ipynb new file mode 100644 index 00000000..50e5b5a5 --- /dev/null +++ b/notebooks/Experimenting with Frames.ipynb @@ -0,0 +1,352 @@ +{ + "metadata": { + "name": "Experimenting with Frames" + }, + "nbformat": 3, + "worksheets": [ + { + "cells": [ + { + "cell_type": "heading", + "source": [ + "Performance Tracking" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import datetime", + "import pandas", + "import pytz", + "", + "import zipline.test.factory as factory", + "import zipline.finance.performance as perf", + "import zipline.protocol as zp", + "import zipline.finance.risk as risk", + "import zipline.finance.trading as trading" + ], + "language": "python", + "outputs": [], + "prompt_number": 38 + }, + { + "cell_type": "heading", + "source": [ + "Create a simulated trade history using the test factory" + ] + }, + { + "cell_type": "markdown", + "source": [ + "For any backtesting, zipline relies on a TradingEnvironment object. Trading environment holds essential facts: ", + " ", + " - start and end times for the simulation.", + " - historical daily returns for your benchmark.", + " - historical treasury curves", + " - an assumed capital base for your portfolio", + " - a calendar of trading days based on your benchmark", + "", + "zipline ships with a compressed archives of the S&P daily returns, and US treasury curves to facilitate standalone development and testing. In the next cell we instantiate the environment using these defaults. You can see more of this in zipline/test/test_perf_tracking.py" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "benchmark_returns, treasury_curves = factory.load_market_data()", + " ", + "trading_environment = risk.TradingEnvironment(benchmark_returns, treasury_curves)" + ], + "language": "python", + "outputs": [], + "prompt_number": 39 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "trade_count = 100", + "sid = 133", + "price = 10.1 ", + "price_list = [price] * trade_count", + "volume = [100] * trade_count", + "start_date = datetime.datetime.strptime(\"01/01/2011\",\"%m/%d/%Y\")", + "start_date = start_date.replace(tzinfo=pytz.utc)", + "trade_time_increment = datetime.timedelta(days=1)", + "", + "trade_history = factory.create_trade_history( ", + " sid, ", + " price_list, ", + " volume, ", + " start_date, ", + " trade_time_increment, ", + " trading_environment ", + ")", + "", + "sid2 = 134", + "price2 = 12.12", + "price2_list = [price2] * trade_count ", + "trade_history2 = factory.create_trade_history( ", + " sid2, ", + " price2_list, ", + " volume, ", + " start_date, ", + " trade_time_increment, ", + " trading_environment ", + ")", + " ", + "trade_history.extend(trade_history2) ", + "trade_history = sorted(trade_history, key=lambda x: x.dt)" + ], + "language": "python", + "outputs": [], + "prompt_number": 40 + }, + { + "cell_type": "markdown", + "source": [ + "Now that we have a simulated history of trades for two companies and a corresponding trading environment, we can create a dataframe of trades." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df = pandas.DataFrame(index = ['price', 'volume', 'dt'])", + "for event in trade_history:", + " series = event.as_series()", + " #df.index = df.index.tolist().append(event.sid)", + " #series.name = event.sid", + " df[event.sid] = series" + ], + "language": "python", + "outputs": [], + "prompt_number": 92 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 93, + "text": [ + " 133 134", + "price 10.1 12.12", + "volume 100 100", + "dt 2011-04-08 00:00:00+00:00 2011-04-08 00:00:00+00:00" + ] + } + ], + "prompt_number": 93 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_t = df.transpose()", + "df_t" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 94, + "text": [ + " price volume dt", + "133 10.1 100 2011-04-08 00:00:00+00:00", + "134 12.12 100 2011-04-08 00:00:00+00:00" + ] + } + ], + "prompt_number": 94 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df[133]" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 56, + "text": [ + "sid 133", + "volume 100", + "dt 2011-04-08 00:00:00+00:00", + "price 10.1", + "changed NaN", + "Name: 133" + ] + } + ], + "prompt_number": 56 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_t['price']" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 57, + "text": [ + "133 10.1", + "134 12.12", + "Name: price" + ] + } + ], + "prompt_number": 57 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_t['price'].max()" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 50, + "text": [ + "12.12" + ] + } + ], + "prompt_number": 50 + }, + { + "cell_type": "code", + "collapsed": true, + "input": [ + "last = trade_history[23].dt" + ], + "language": "python", + "outputs": [], + "prompt_number": 51 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_t['changed'] = df_t['dt'] > last" + ], + "language": "python", + "outputs": [], + "prompt_number": 53 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_t" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 54, + "text": [ + " sid volume dt price changed", + "133 133 100 2011-04-08 00:00:00+00:00 10.1 True", + "134 134 100 2011-04-08 00:00:00+00:00 12.12 True" + ] + } + ], + "prompt_number": 54 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_t.index" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 59, + "text": [ + "Int64Index([133, 134])" + ] + } + ], + "prompt_number": 59 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df.index" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 60, + "text": [ + "Index([sid, volume, dt, price, changed], dtype=object)" + ] + } + ], + "prompt_number": 60 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df.columns" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 61, + "text": [ + "Int64Index([133, 134])" + ] + } + ], + "prompt_number": 61 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_t.columns" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 62, + "text": [ + "Index([sid, volume, dt, price, changed], dtype=object)" + ] + } + ], + "prompt_number": 62 + }, + { + "cell_type": "code", + "collapsed": true, + "input": [], + "language": "python", + "outputs": [] + } + ] + } + ] +} \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index d14c258b..740ef396 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,15 +2,6 @@ verbosity=2 detailed-errors=1 -with-xcoverage=1 -cover-package=zipline -cover-erase=1 -cover-html=1 -cover-html-dir=docs/_build/html/cover -with-xunit=1 - - # Drop into debugger on failure #pdb=0 #pdb-failures=0 - diff --git a/zipline/finance/performance.py b/zipline/finance/performance.py index 6039d584..9a46f6bb 100644 --- a/zipline/finance/performance.py +++ b/zipline/finance/performance.py @@ -79,7 +79,7 @@ Position Tracking | last_sale_date | datetime of the last trade of the position's | | | security on the exchange | +-----------------+----------------------------------------------------+ - | timestamp | System time evevent occurs in zipilne | + | timestamp | System time event occurs in zipilne | +-----------------+----------------------------------------------------+ Performance Period @@ -133,13 +133,15 @@ class PerformanceTracker(): """ - def __init__(self, period_start, period_end, capital_base, trading_environment): - + def __init__(self, trading_environment): + + + self.trading_environment = trading_environment self.trading_day = datetime.timedelta(hours = 6, minutes = 30) self.calendar_day = datetime.timedelta(hours = 24) - self.period_start = period_start - self.period_end = period_end + self.period_start = self.trading_environment.period_start + self.period_end = self.trading_environment.period_end self.market_open = self.period_start self.market_close = self.market_open + self.trading_day self.progress = 0.0 @@ -147,25 +149,27 @@ class PerformanceTracker(): self.day_count = 0 self.cumulative_capital_used = 0.0 self.max_capital_used = 0.0 - self.capital_base = capital_base - self.trading_environment = trading_environment + self.capital_base = self.trading_environment.capital_base self.returns = [] self.txn_count = 0 self.event_count = 0 self.result_stream = None + self.last_dict = None self.cumulative_performance = PerformancePeriod( {}, - capital_base, - starting_cash = capital_base + self.capital_base, + starting_cash = self.capital_base ) self.todays_performance = PerformancePeriod( {}, - capital_base, - starting_cash = capital_base + self.capital_base, + starting_cash = self.capital_base ) + def get_portfolio(self): + return self.cumulative_performance.to_namedict() def publish_to(self, zmq_socket, context=None): """ @@ -201,17 +205,8 @@ class PerformanceTracker(): 'cumulative_risk_metrics' : self.cumulative_risk_metrics.to_dict(), 'timestamp' : datetime.datetime.now(), } - - def update(self, event_frame): - for dt, event_series in event_frame.iteritems(): - data = {} - data.update(event_series) - event = zp.namedict(data) - self.process_event(event) - + def process_event(self, event): - qutil.LOGGER.debug("series is " + str(event)) - self.event_count += 1 if(event.dt >= self.market_close): @@ -407,10 +402,10 @@ class PerformancePeriod(): def to_dict(self): """ - Creates a dictionary representing the state of this performance period - Returns a dict object of the form: - + Creates a dictionary representing the state of this performance + period. See header comments for a detailed description. """ + positions = self.get_positions() return { 'ending_value' : self.ending_value, @@ -418,6 +413,41 @@ class PerformancePeriod(): 'starting_value' : self.starting_value, 'starting_cash' : self.starting_cash, 'ending_cash' : self.ending_cash, - 'positions' : self.positions, + 'positions' : positions, 'timestamp' : datetime.datetime.now(), } + + def to_namedict(self): + """ + Creates a namedict representing the state of this perfomance period. + Properties are the same as the results of to_dict. See header comments + for a detailed description. + + """ + positions = self.get_positions(namedicted=True) + + positions = zp.namedict(positions) + + return zp.namedict({ + 'ending_value' : self.ending_value, + 'capital_used' : self.period_capital_used, + 'starting_value' : self.starting_value, + 'starting_cash' : self.starting_cash, + 'ending_cash' : self.ending_cash, + 'positions' : positions + }) + + def get_positions(self, namedicted=False): + positions = {} + for sid, pos in self.positions.iteritems(): + cur = pos.to_dict() + if namedicted: + positions[sid] = zp.namedict(cur) + else: + positions[sid] = cur + + return positions + + + + diff --git a/zipline/finance/risk.py b/zipline/finance/risk.py index 1a149f3a..e9ea7780 100644 --- a/zipline/finance/risk.py +++ b/zipline/finance/risk.py @@ -182,7 +182,6 @@ class RiskMetrics(): return period_returns, returns def calculate_volatility(self, daily_returns): - #qutil.LOGGER.debug("trading days {td}".format(td=self.trading_days)) return np.std(daily_returns, ddof=1) * math.sqrt(self.trading_days) def calculate_sharpe(self): @@ -379,11 +378,24 @@ class RiskReport(): class TradingEnvironment(object): - def __init__(self, benchmark_returns, treasury_curves): + def __init__( + self, + benchmark_returns, + treasury_curves, + period_start=None, + period_end=None, + capital_base=None, + frame_index=None + ): + self.trading_days = [] self.trading_day_map = {} self.treasury_curves = treasury_curves self.benchmark_returns = benchmark_returns + self.frame_index = frame_index + self.period_start = period_start + self.period_end = period_end + self.capital_base = capital_base for bm in benchmark_returns: self.trading_days.append(bm.date) self.trading_day_map[bm.date] = bm diff --git a/zipline/finance/trading.py b/zipline/finance/trading.py index 0dd77da4..6c3d1318 100644 --- a/zipline/finance/trading.py +++ b/zipline/finance/trading.py @@ -8,19 +8,27 @@ from zmq.core.poll import select import zipline.messaging as qmsg import zipline.util as qutil import zipline.protocol as zp +import zipline.finance.performance as perf class TradeSimulationClient(qmsg.Component): - def __init__(self, simulation_dt): + def __init__(self, trading_environment): qmsg.Component.__init__(self) - self.received_count = 0 - self.prev_dt = None - self.event_queue = None - self.event_callbacks = [] - self.txn_count = 0 - self.current_dt = simulation_dt - self.last_iteration_duration = datetime.timedelta(seconds=0) - self.event_frame = None + self.received_count = 0 + self.prev_dt = None + self.event_queue = None + self.event_callbacks = [] + self.txn_count = 0 + self.trading_environment = trading_environment + self.current_dt = trading_environment.period_start + self.last_iteration_dur = datetime.timedelta(seconds=0) + + assert self.trading_environment.frame_index != None + self.event_frame = pandas.DataFrame( + index=self.trading_environment.frame_index + ) + + self.perf = perf.PerformanceTracker(self.trading_environment) @property def get_id(self): @@ -67,9 +75,9 @@ class TradeSimulationClient(qmsg.Component): self.run_callbacks() #update time based on receipt of the order - self.last_iteration_duration = datetime.datetime.utcnow() - event_start + self.last_iteration_dur = datetime.datetime.utcnow() - event_start - self.current_dt = self.current_dt + self.last_iteration_duration + self.current_dt = self.current_dt + self.last_iteration_dur #signal done to order source. self.order_socket.send(str(zp.ORDER_PROTOCOL.BREAK)) @@ -95,15 +103,16 @@ class TradeSimulationClient(qmsg.Component): self.order_socket.send(str(zp.ORDER_PROTOCOL.DONE)) def queue_event(self, event): + self.perf.process_event(event) if self.event_queue == None: - self.event_queue = {} + self.event_queue = [] series = event.as_series() - self.event_queue[event.dt] = series + self.event_queue.append(series) def get_frame(self): - frame = pandas.DataFrame(self.event_queue) - self.event_queue = None - return frame + for event in self.event_queue: + self.event_frame[event['sid']] = event + return self.event_frame class OrderDataSource(qmsg.DataSource): """DataSource that relays orders from the client""" diff --git a/zipline/protocol.py b/zipline/protocol.py index 9fc98238..e60177aa 100644 --- a/zipline/protocol.py +++ b/zipline/protocol.py @@ -119,7 +119,6 @@ import numbers import datetime import pytz import copy -import pandas from collections import namedtuple from protocol_utils import Enum, FrameExceptionFactory, namedict diff --git a/zipline/protocol_utils.py b/zipline/protocol_utils.py index 4b38fdda..ba805b3b 100644 --- a/zipline/protocol_utils.py +++ b/zipline/protocol_utils.py @@ -1,8 +1,7 @@ import copy +import pandas from ctypes import Structure, c_ubyte -from pandas import Series - def Enum(*options): """ Fast enums are very important when we want really tight zmq @@ -87,6 +86,8 @@ class namedict(object): def has_attr(self, name): return self.__dict__.has_key(name) - + def as_series(self): - return Series(self.__dict__, self.__dict__.keys()) + s = pandas.Series(self.__dict__) + s.name = self.sid + return s diff --git a/zipline/test/client.py b/zipline/test/client.py index 0167bf7b..7ec121c9 100644 --- a/zipline/test/client.py +++ b/zipline/test/client.py @@ -103,9 +103,8 @@ class TestAlgorithm(): event = zp.namedict(data) #place an order for 100 shares of sid:133 if self.incr < self.count: - if event.source_id != zp.FINANCE_COMPONENT.ORDER_SOURCE: - self.trading_client.order(self.sid, self.amount) - self.incr += 1 + self.trading_client.order(self.sid, self.amount) + self.incr += 1 elif not self.done: self.trading_client.signal_order_done() self.done = True diff --git a/zipline/test/test_finance.py b/zipline/test/test_finance.py index 2542aa2e..b1413800 100644 --- a/zipline/test/test_finance.py +++ b/zipline/test/test_finance.py @@ -207,7 +207,13 @@ class FinanceTestCase(TestCase): set1 = SpecificEquityTrades("flat-133", trade_history) - trading_client = TradeSimulationClient(start_date) + self.trading_environment.period_start = trade_history[0].dt + self.trading_environment.period_end = trade_history[-1].dt + self.trading_environment.capital_base = 10000 + self.trading_environment.frame_index = ['sid', 'volume', 'dt', \ + 'price', 'changed'] + + trading_client = TradeSimulationClient(self.trading_environment) #client will send 10 orders for 100 shares of 133 test_algo = TestAlgorithm(133, 100, 10, trading_client) @@ -280,25 +286,25 @@ class FinanceTestCase(TestCase): volume, start_date, trade_time_increment, - self.trading_environment ) - + self.trading_environment + ) + + + self.trading_environment.period_start = trade_history[0].dt + self.trading_environment.period_end = trade_history[-1].dt + self.trading_environment.capital_base = 10000 + self.trading_environment.frame_index = ['sid', 'volume', 'dt', \ + 'price', 'changed'] + set1 = SpecificEquityTrades("flat-133", trade_history) #client sill send 10 orders for 100 shares of 133 - trading_client = TradeSimulationClient(start_date) + trading_client = TradeSimulationClient(self.trading_environment) test_algo = TestAlgorithm(133, 100, 10, trading_client) order_source = OrderDataSource() transaction_sim = TransactionSimulator() - perf_tracker = perf.PerformanceTracker( - trade_history[0]['dt'], - trade_history[-1]['dt'], - 1000000.0, - self.trading_environment) - #register perf_tracker to receive callbacks from the client. - trading_client.add_event_callback(perf_tracker.update) - sim.register_components([ trading_client, order_source, @@ -339,19 +345,19 @@ class FinanceTestCase(TestCase): self.assertEqual( transaction_sim.txn_count, - perf_tracker.txn_count, + trading_client.perf.txn_count, "The perf tracker should handle the same number of transactions \ as the simulator emits." ) self.assertEqual( - len(perf_tracker.cumulative_performance.positions), + len(trading_client.perf.cumulative_performance.positions), 1, "Portfolio should have one position." ) self.assertEqual( - perf_tracker.cumulative_performance.positions[133].sid, + trading_client.perf.cumulative_performance.positions[133].sid, 133, "Portfolio should have one position in 133." ) diff --git a/zipline/test/test_perf_tracking.py b/zipline/test/test_perf_tracking.py index 218f3113..36a99ae8 100644 --- a/zipline/test/test_perf_tracking.py +++ b/zipline/test/test_perf_tracking.py @@ -506,34 +506,46 @@ shares in position" trade_count = 100 sid = 133 - price = [10.1] * trade_count + price = 10.1 + price_list = [price] * trade_count volume = [100] * trade_count start_date = datetime.datetime.strptime("01/01/2011","%m/%d/%Y") start_date = start_date.replace(tzinfo=pytz.utc) trade_time_increment = datetime.timedelta(days=1) trade_history = factory.create_trade_history( sid, - price, + price_list, volume, start_date, trade_time_increment, self.trading_environment ) - trade_client = TradeSimulationClient(start_date) - start = trade_history[0].dt - end = trade_history[-1].dt - tracker = perf.PerformanceTracker( - start, - end, - 1000.0, - self.trading_environment + sid2 = 134 + price2 = 12.12 + price2_list = [price2] * trade_count + trade_history2 = factory.create_trade_history( + sid2, + price2_list, + volume, + start_date, + trade_time_increment, + self.trading_environment ) + trade_history.extend(trade_history2) + + self.trading_environment.period_start = trade_history[0].dt + self.trading_environment.period_end = trade_history[-1].dt + self.trading_environment.capital_base = 1000.0 + self.trading_environment.frame_index = ['sid', 'volume', 'dt', \ + 'price', 'changed'] + client = TradeSimulationClient(self.trading_environment) + for event in trade_history: #create a transaction for all but - #one trade, to simulate None transaction - if(event.dt != start): + #first trade in each sid, to simulate None transaction + if(event.dt != self.trading_environment.period_start): txn = zp.namedict({ 'sid' : event.sid, 'amount' : -25, @@ -543,17 +555,19 @@ shares in position" }) else: txn = None - event[zp.TRANSFORM_TYPE.TRANSACTION] = txn - trade_client.queue_event(event) + event[zp.TRANSFORM_TYPE.TRANSACTION] = txn + client.queue_event(event) - df = trade_client.get_frame() - tracker.update(df) + df = client.get_frame() - #we skip one trade, to test case of None transaction - txn_count = len(trade_history) - 1 - self.assertEqual(tracker.txn_count, txn_count) + self.assertEqual(df[133]['price'], price) + self.assertEqual(df[134]['price'], price2) - cumulative_pos = tracker.cumulative_performance.positions[sid] - expected_size = txn_count * -25 + #we skip two trades, to test case of None transaction + txn_count = len(trade_history) - 2 + self.assertEqual(client.perf.txn_count, txn_count) + + cumulative_pos = client.perf.cumulative_performance.positions[sid] + expected_size = txn_count / 2 * -25 self.assertEqual(cumulative_pos.amount, expected_size) \ No newline at end of file diff --git a/zipline/test/test_risk.py b/zipline/test/test_risk.py index 0d293b5e..57264ad6 100644 --- a/zipline/test/test_risk.py +++ b/zipline/test/test_risk.py @@ -11,6 +11,9 @@ class Risk(unittest.TestCase): def setUp(self): qutil.configure_logging() + start_date = datetime.datetime(year=2006, month=1, day=1, tzinfo=pytz.utc) + end_date = datetime.datetime(year=2006, month=12, day=31, tzinfo=pytz.utc) + self.benchmark_returns, self.treasury_curves = \ factory.load_market_data() @@ -23,9 +26,9 @@ class Risk(unittest.TestCase): self.oneday = datetime.timedelta(days=1) self.tradingday = datetime.timedelta(hours=6, minutes=30) self.dt = datetime.datetime.utcnow() - start_date = datetime.datetime(year=2006, month=1, day=1, tzinfo=pytz.utc) + self.algo_returns_06 = factory.create_returns_from_list(RETURNS, start_date, self.trading_calendar) - end_date = datetime.datetime(year=2006, month=12, day=31, tzinfo=pytz.utc) + self.metrics_06 = risk.RiskReport(self.algo_returns_06, self.trading_calendar) def tearDown(self):