From 62d0422eb4c4566e3b8813a05e529dfd0e0261ff Mon Sep 17 00:00:00 2001 From: fawce Date: Thu, 15 Mar 2012 17:38:22 -0400 Subject: [PATCH] a notebook to help explore the dataframe produced for the algo client. --- notebooks/Experimenting with Frames.ipynb | 352 ++++++++++++++++++++++ 1 file changed, 352 insertions(+) create mode 100644 notebooks/Experimenting with Frames.ipynb diff --git a/notebooks/Experimenting with Frames.ipynb b/notebooks/Experimenting with Frames.ipynb new file mode 100644 index 00000000..50e5b5a5 --- /dev/null +++ b/notebooks/Experimenting with Frames.ipynb @@ -0,0 +1,352 @@ +{ + "metadata": { + "name": "Experimenting with Frames" + }, + "nbformat": 3, + "worksheets": [ + { + "cells": [ + { + "cell_type": "heading", + "source": [ + "Performance Tracking" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import datetime", + "import pandas", + "import pytz", + "", + "import zipline.test.factory as factory", + "import zipline.finance.performance as perf", + "import zipline.protocol as zp", + "import zipline.finance.risk as risk", + "import zipline.finance.trading as trading" + ], + "language": "python", + "outputs": [], + "prompt_number": 38 + }, + { + "cell_type": "heading", + "source": [ + "Create a simulated trade history using the test factory" + ] + }, + { + "cell_type": "markdown", + "source": [ + "For any backtesting, zipline relies on a TradingEnvironment object. Trading environment holds essential facts: ", + " ", + " - start and end times for the simulation.", + " - historical daily returns for your benchmark.", + " - historical treasury curves", + " - an assumed capital base for your portfolio", + " - a calendar of trading days based on your benchmark", + "", + "zipline ships with a compressed archives of the S&P daily returns, and US treasury curves to facilitate standalone development and testing. In the next cell we instantiate the environment using these defaults. You can see more of this in zipline/test/test_perf_tracking.py" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "benchmark_returns, treasury_curves = factory.load_market_data()", + " ", + "trading_environment = risk.TradingEnvironment(benchmark_returns, treasury_curves)" + ], + "language": "python", + "outputs": [], + "prompt_number": 39 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "trade_count = 100", + "sid = 133", + "price = 10.1 ", + "price_list = [price] * trade_count", + "volume = [100] * trade_count", + "start_date = datetime.datetime.strptime(\"01/01/2011\",\"%m/%d/%Y\")", + "start_date = start_date.replace(tzinfo=pytz.utc)", + "trade_time_increment = datetime.timedelta(days=1)", + "", + "trade_history = factory.create_trade_history( ", + " sid, ", + " price_list, ", + " volume, ", + " start_date, ", + " trade_time_increment, ", + " trading_environment ", + ")", + "", + "sid2 = 134", + "price2 = 12.12", + "price2_list = [price2] * trade_count ", + "trade_history2 = factory.create_trade_history( ", + " sid2, ", + " price2_list, ", + " volume, ", + " start_date, ", + " trade_time_increment, ", + " trading_environment ", + ")", + " ", + "trade_history.extend(trade_history2) ", + "trade_history = sorted(trade_history, key=lambda x: x.dt)" + ], + "language": "python", + "outputs": [], + "prompt_number": 40 + }, + { + "cell_type": "markdown", + "source": [ + "Now that we have a simulated history of trades for two companies and a corresponding trading environment, we can create a dataframe of trades." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df = pandas.DataFrame(index = ['price', 'volume', 'dt'])", + "for event in trade_history:", + " series = event.as_series()", + " #df.index = df.index.tolist().append(event.sid)", + " #series.name = event.sid", + " df[event.sid] = series" + ], + "language": "python", + "outputs": [], + "prompt_number": 92 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 93, + "text": [ + " 133 134", + "price 10.1 12.12", + "volume 100 100", + "dt 2011-04-08 00:00:00+00:00 2011-04-08 00:00:00+00:00" + ] + } + ], + "prompt_number": 93 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_t = df.transpose()", + "df_t" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 94, + "text": [ + " price volume dt", + "133 10.1 100 2011-04-08 00:00:00+00:00", + "134 12.12 100 2011-04-08 00:00:00+00:00" + ] + } + ], + "prompt_number": 94 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df[133]" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 56, + "text": [ + "sid 133", + "volume 100", + "dt 2011-04-08 00:00:00+00:00", + "price 10.1", + "changed NaN", + "Name: 133" + ] + } + ], + "prompt_number": 56 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_t['price']" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 57, + "text": [ + "133 10.1", + "134 12.12", + "Name: price" + ] + } + ], + "prompt_number": 57 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_t['price'].max()" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 50, + "text": [ + "12.12" + ] + } + ], + "prompt_number": 50 + }, + { + "cell_type": "code", + "collapsed": true, + "input": [ + "last = trade_history[23].dt" + ], + "language": "python", + "outputs": [], + "prompt_number": 51 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_t['changed'] = df_t['dt'] > last" + ], + "language": "python", + "outputs": [], + "prompt_number": 53 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_t" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 54, + "text": [ + " sid volume dt price changed", + "133 133 100 2011-04-08 00:00:00+00:00 10.1 True", + "134 134 100 2011-04-08 00:00:00+00:00 12.12 True" + ] + } + ], + "prompt_number": 54 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_t.index" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 59, + "text": [ + "Int64Index([133, 134])" + ] + } + ], + "prompt_number": 59 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df.index" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 60, + "text": [ + "Index([sid, volume, dt, price, changed], dtype=object)" + ] + } + ], + "prompt_number": 60 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df.columns" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 61, + "text": [ + "Int64Index([133, 134])" + ] + } + ], + "prompt_number": 61 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_t.columns" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 62, + "text": [ + "Index([sid, volume, dt, price, changed], dtype=object)" + ] + } + ], + "prompt_number": 62 + }, + { + "cell_type": "code", + "collapsed": true, + "input": [], + "language": "python", + "outputs": [] + } + ] + } + ] +} \ No newline at end of file