Merge branch 'master' into dataflow

Conflicts:
	zipline/finance/risk.py
	zipline/protocol_utils.py
This commit is contained in:
Stephen Diehl
2012-03-16 15:04:23 -04:00
13 changed files with 527 additions and 99 deletions
+1 -1
View File
@@ -29,7 +29,7 @@ pip freeze
paver apidocs html
#run all the tests in test. see setup.cfg for flags.
nosetests
nosetests --config=jenkins_setup.cfg
#run pylint checks
cp ./pylint.rcfile /mnt/jenkins/.pylintrc #default location for config file...
+12
View File
@@ -0,0 +1,12 @@
[nosetests]
verbosity=2
detailed-errors=1
with-xcoverage=1
cover-package=zipline
cover-erase=1
cover-html=1
cover-html-dir=docs/_build/html/cover
with-xunit=1
+352
View File
@@ -0,0 +1,352 @@
{
"metadata": {
"name": "Experimenting with Frames"
},
"nbformat": 3,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"source": [
"Performance Tracking"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import datetime",
"import pandas",
"import pytz",
"",
"import zipline.test.factory as factory",
"import zipline.finance.performance as perf",
"import zipline.protocol as zp",
"import zipline.finance.risk as risk",
"import zipline.finance.trading as trading"
],
"language": "python",
"outputs": [],
"prompt_number": 38
},
{
"cell_type": "heading",
"source": [
"Create a simulated trade history using the test factory"
]
},
{
"cell_type": "markdown",
"source": [
"For any backtesting, zipline relies on a TradingEnvironment object. Trading environment holds essential facts: ",
" ",
" - start and end times for the simulation.",
" - historical daily returns for your benchmark.",
" - historical treasury curves",
" - an assumed capital base for your portfolio",
" - a calendar of trading days based on your benchmark",
"",
"zipline ships with a compressed archives of the S&P daily returns, and US treasury curves to facilitate standalone development and testing. In the next cell we instantiate the environment using these defaults. You can see more of this in zipline/test/test_perf_tracking.py"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"benchmark_returns, treasury_curves = factory.load_market_data()",
" ",
"trading_environment = risk.TradingEnvironment(benchmark_returns, treasury_curves)"
],
"language": "python",
"outputs": [],
"prompt_number": 39
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"trade_count = 100",
"sid = 133",
"price = 10.1 ",
"price_list = [price] * trade_count",
"volume = [100] * trade_count",
"start_date = datetime.datetime.strptime(\"01/01/2011\",\"%m/%d/%Y\")",
"start_date = start_date.replace(tzinfo=pytz.utc)",
"trade_time_increment = datetime.timedelta(days=1)",
"",
"trade_history = factory.create_trade_history( ",
" sid, ",
" price_list, ",
" volume, ",
" start_date, ",
" trade_time_increment, ",
" trading_environment ",
")",
"",
"sid2 = 134",
"price2 = 12.12",
"price2_list = [price2] * trade_count ",
"trade_history2 = factory.create_trade_history( ",
" sid2, ",
" price2_list, ",
" volume, ",
" start_date, ",
" trade_time_increment, ",
" trading_environment ",
")",
" ",
"trade_history.extend(trade_history2) ",
"trade_history = sorted(trade_history, key=lambda x: x.dt)"
],
"language": "python",
"outputs": [],
"prompt_number": 40
},
{
"cell_type": "markdown",
"source": [
"Now that we have a simulated history of trades for two companies and a corresponding trading environment, we can create a dataframe of trades."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = pandas.DataFrame(index = ['price', 'volume', 'dt'])",
"for event in trade_history:",
" series = event.as_series()",
" #df.index = df.index.tolist().append(event.sid)",
" #series.name = event.sid",
" df[event.sid] = series"
],
"language": "python",
"outputs": [],
"prompt_number": 92
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 93,
"text": [
" 133 134",
"price 10.1 12.12",
"volume 100 100",
"dt 2011-04-08 00:00:00+00:00 2011-04-08 00:00:00+00:00"
]
}
],
"prompt_number": 93
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df_t = df.transpose()",
"df_t"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 94,
"text": [
" price volume dt",
"133 10.1 100 2011-04-08 00:00:00+00:00",
"134 12.12 100 2011-04-08 00:00:00+00:00"
]
}
],
"prompt_number": 94
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df[133]"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 56,
"text": [
"sid 133",
"volume 100",
"dt 2011-04-08 00:00:00+00:00",
"price 10.1",
"changed NaN",
"Name: 133"
]
}
],
"prompt_number": 56
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df_t['price']"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 57,
"text": [
"133 10.1",
"134 12.12",
"Name: price"
]
}
],
"prompt_number": 57
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df_t['price'].max()"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 50,
"text": [
"12.12"
]
}
],
"prompt_number": 50
},
{
"cell_type": "code",
"collapsed": true,
"input": [
"last = trade_history[23].dt"
],
"language": "python",
"outputs": [],
"prompt_number": 51
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df_t['changed'] = df_t['dt'] > last"
],
"language": "python",
"outputs": [],
"prompt_number": 53
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df_t"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 54,
"text": [
" sid volume dt price changed",
"133 133 100 2011-04-08 00:00:00+00:00 10.1 True",
"134 134 100 2011-04-08 00:00:00+00:00 12.12 True"
]
}
],
"prompt_number": 54
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df_t.index"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 59,
"text": [
"Int64Index([133, 134])"
]
}
],
"prompt_number": 59
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df.index"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 60,
"text": [
"Index([sid, volume, dt, price, changed], dtype=object)"
]
}
],
"prompt_number": 60
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df.columns"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 61,
"text": [
"Int64Index([133, 134])"
]
}
],
"prompt_number": 61
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df_t.columns"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 62,
"text": [
"Index([sid, volume, dt, price, changed], dtype=object)"
]
}
],
"prompt_number": 62
},
{
"cell_type": "code",
"collapsed": true,
"input": [],
"language": "python",
"outputs": []
}
]
}
]
}
-9
View File
@@ -2,15 +2,6 @@
verbosity=2
detailed-errors=1
with-xcoverage=1
cover-package=zipline
cover-erase=1
cover-html=1
cover-html-dir=docs/_build/html/cover
with-xunit=1
# Drop into debugger on failure
#pdb=0
#pdb-failures=0
+55 -25
View File
@@ -79,7 +79,7 @@ Position Tracking
| last_sale_date | datetime of the last trade of the position's |
| | security on the exchange |
+-----------------+----------------------------------------------------+
| timestamp | System time evevent occurs in zipilne |
| timestamp | System time event occurs in zipilne |
+-----------------+----------------------------------------------------+
Performance Period
@@ -133,13 +133,15 @@ class PerformanceTracker():
"""
def __init__(self, period_start, period_end, capital_base, trading_environment):
def __init__(self, trading_environment):
self.trading_environment = trading_environment
self.trading_day = datetime.timedelta(hours = 6, minutes = 30)
self.calendar_day = datetime.timedelta(hours = 24)
self.period_start = period_start
self.period_end = period_end
self.period_start = self.trading_environment.period_start
self.period_end = self.trading_environment.period_end
self.market_open = self.period_start
self.market_close = self.market_open + self.trading_day
self.progress = 0.0
@@ -147,25 +149,27 @@ class PerformanceTracker():
self.day_count = 0
self.cumulative_capital_used = 0.0
self.max_capital_used = 0.0
self.capital_base = capital_base
self.trading_environment = trading_environment
self.capital_base = self.trading_environment.capital_base
self.returns = []
self.txn_count = 0
self.event_count = 0
self.result_stream = None
self.last_dict = None
self.cumulative_performance = PerformancePeriod(
{},
capital_base,
starting_cash = capital_base
self.capital_base,
starting_cash = self.capital_base
)
self.todays_performance = PerformancePeriod(
{},
capital_base,
starting_cash = capital_base
self.capital_base,
starting_cash = self.capital_base
)
def get_portfolio(self):
return self.cumulative_performance.to_namedict()
def publish_to(self, zmq_socket, context=None):
"""
@@ -201,17 +205,8 @@ class PerformanceTracker():
'cumulative_risk_metrics' : self.cumulative_risk_metrics.to_dict(),
'timestamp' : datetime.datetime.now(),
}
def update(self, event_frame):
for dt, event_series in event_frame.iteritems():
data = {}
data.update(event_series)
event = zp.namedict(data)
self.process_event(event)
def process_event(self, event):
qutil.LOGGER.debug("series is " + str(event))
self.event_count += 1
if(event.dt >= self.market_close):
@@ -407,10 +402,10 @@ class PerformancePeriod():
def to_dict(self):
"""
Creates a dictionary representing the state of this performance period
Returns a dict object of the form:
Creates a dictionary representing the state of this performance
period. See header comments for a detailed description.
"""
positions = self.get_positions()
return {
'ending_value' : self.ending_value,
@@ -418,6 +413,41 @@ class PerformancePeriod():
'starting_value' : self.starting_value,
'starting_cash' : self.starting_cash,
'ending_cash' : self.ending_cash,
'positions' : self.positions,
'positions' : positions,
'timestamp' : datetime.datetime.now(),
}
def to_namedict(self):
"""
Creates a namedict representing the state of this perfomance period.
Properties are the same as the results of to_dict. See header comments
for a detailed description.
"""
positions = self.get_positions(namedicted=True)
positions = zp.namedict(positions)
return zp.namedict({
'ending_value' : self.ending_value,
'capital_used' : self.period_capital_used,
'starting_value' : self.starting_value,
'starting_cash' : self.starting_cash,
'ending_cash' : self.ending_cash,
'positions' : positions
})
def get_positions(self, namedicted=False):
positions = {}
for sid, pos in self.positions.iteritems():
cur = pos.to_dict()
if namedicted:
positions[sid] = zp.namedict(cur)
else:
positions[sid] = cur
return positions
+14 -2
View File
@@ -182,7 +182,6 @@ class RiskMetrics():
return period_returns, returns
def calculate_volatility(self, daily_returns):
#qutil.LOGGER.debug("trading days {td}".format(td=self.trading_days))
return np.std(daily_returns, ddof=1) * math.sqrt(self.trading_days)
def calculate_sharpe(self):
@@ -379,11 +378,24 @@ class RiskReport():
class TradingEnvironment(object):
def __init__(self, benchmark_returns, treasury_curves):
def __init__(
self,
benchmark_returns,
treasury_curves,
period_start=None,
period_end=None,
capital_base=None,
frame_index=None
):
self.trading_days = []
self.trading_day_map = {}
self.treasury_curves = treasury_curves
self.benchmark_returns = benchmark_returns
self.frame_index = frame_index
self.period_start = period_start
self.period_end = period_end
self.capital_base = capital_base
for bm in benchmark_returns:
self.trading_days.append(bm.date)
self.trading_day_map[bm.date] = bm
+25 -16
View File
@@ -8,19 +8,27 @@ from zmq.core.poll import select
import zipline.messaging as qmsg
import zipline.util as qutil
import zipline.protocol as zp
import zipline.finance.performance as perf
class TradeSimulationClient(qmsg.Component):
def __init__(self, simulation_dt):
def __init__(self, trading_environment):
qmsg.Component.__init__(self)
self.received_count = 0
self.prev_dt = None
self.event_queue = None
self.event_callbacks = []
self.txn_count = 0
self.current_dt = simulation_dt
self.last_iteration_duration = datetime.timedelta(seconds=0)
self.event_frame = None
self.received_count = 0
self.prev_dt = None
self.event_queue = None
self.event_callbacks = []
self.txn_count = 0
self.trading_environment = trading_environment
self.current_dt = trading_environment.period_start
self.last_iteration_dur = datetime.timedelta(seconds=0)
assert self.trading_environment.frame_index != None
self.event_frame = pandas.DataFrame(
index=self.trading_environment.frame_index
)
self.perf = perf.PerformanceTracker(self.trading_environment)
@property
def get_id(self):
@@ -67,9 +75,9 @@ class TradeSimulationClient(qmsg.Component):
self.run_callbacks()
#update time based on receipt of the order
self.last_iteration_duration = datetime.datetime.utcnow() - event_start
self.last_iteration_dur = datetime.datetime.utcnow() - event_start
self.current_dt = self.current_dt + self.last_iteration_duration
self.current_dt = self.current_dt + self.last_iteration_dur
#signal done to order source.
self.order_socket.send(str(zp.ORDER_PROTOCOL.BREAK))
@@ -95,15 +103,16 @@ class TradeSimulationClient(qmsg.Component):
self.order_socket.send(str(zp.ORDER_PROTOCOL.DONE))
def queue_event(self, event):
self.perf.process_event(event)
if self.event_queue == None:
self.event_queue = {}
self.event_queue = []
series = event.as_series()
self.event_queue[event.dt] = series
self.event_queue.append(series)
def get_frame(self):
frame = pandas.DataFrame(self.event_queue)
self.event_queue = None
return frame
for event in self.event_queue:
self.event_frame[event['sid']] = event
return self.event_frame
class OrderDataSource(qmsg.DataSource):
"""DataSource that relays orders from the client"""
-1
View File
@@ -119,7 +119,6 @@ import numbers
import datetime
import pytz
import copy
import pandas
from collections import namedtuple
from protocol_utils import Enum, FrameExceptionFactory, namedict
+5 -4
View File
@@ -1,8 +1,7 @@
import copy
import pandas
from ctypes import Structure, c_ubyte
from pandas import Series
def Enum(*options):
"""
Fast enums are very important when we want really tight zmq
@@ -87,6 +86,8 @@ class namedict(object):
def has_attr(self, name):
return self.__dict__.has_key(name)
def as_series(self):
return Series(self.__dict__, self.__dict__.keys())
s = pandas.Series(self.__dict__)
s.name = self.sid
return s
+2 -3
View File
@@ -103,9 +103,8 @@ class TestAlgorithm():
event = zp.namedict(data)
#place an order for 100 shares of sid:133
if self.incr < self.count:
if event.source_id != zp.FINANCE_COMPONENT.ORDER_SOURCE:
self.trading_client.order(self.sid, self.amount)
self.incr += 1
self.trading_client.order(self.sid, self.amount)
self.incr += 1
elif not self.done:
self.trading_client.signal_order_done()
self.done = True
+21 -15
View File
@@ -207,7 +207,13 @@ class FinanceTestCase(TestCase):
set1 = SpecificEquityTrades("flat-133", trade_history)
trading_client = TradeSimulationClient(start_date)
self.trading_environment.period_start = trade_history[0].dt
self.trading_environment.period_end = trade_history[-1].dt
self.trading_environment.capital_base = 10000
self.trading_environment.frame_index = ['sid', 'volume', 'dt', \
'price', 'changed']
trading_client = TradeSimulationClient(self.trading_environment)
#client will send 10 orders for 100 shares of 133
test_algo = TestAlgorithm(133, 100, 10, trading_client)
@@ -280,25 +286,25 @@ class FinanceTestCase(TestCase):
volume,
start_date,
trade_time_increment,
self.trading_environment )
self.trading_environment
)
self.trading_environment.period_start = trade_history[0].dt
self.trading_environment.period_end = trade_history[-1].dt
self.trading_environment.capital_base = 10000
self.trading_environment.frame_index = ['sid', 'volume', 'dt', \
'price', 'changed']
set1 = SpecificEquityTrades("flat-133", trade_history)
#client sill send 10 orders for 100 shares of 133
trading_client = TradeSimulationClient(start_date)
trading_client = TradeSimulationClient(self.trading_environment)
test_algo = TestAlgorithm(133, 100, 10, trading_client)
order_source = OrderDataSource()
transaction_sim = TransactionSimulator()
perf_tracker = perf.PerformanceTracker(
trade_history[0]['dt'],
trade_history[-1]['dt'],
1000000.0,
self.trading_environment)
#register perf_tracker to receive callbacks from the client.
trading_client.add_event_callback(perf_tracker.update)
sim.register_components([
trading_client,
order_source,
@@ -339,19 +345,19 @@ class FinanceTestCase(TestCase):
self.assertEqual(
transaction_sim.txn_count,
perf_tracker.txn_count,
trading_client.perf.txn_count,
"The perf tracker should handle the same number of transactions \
as the simulator emits."
)
self.assertEqual(
len(perf_tracker.cumulative_performance.positions),
len(trading_client.perf.cumulative_performance.positions),
1,
"Portfolio should have one position."
)
self.assertEqual(
perf_tracker.cumulative_performance.positions[133].sid,
trading_client.perf.cumulative_performance.positions[133].sid,
133,
"Portfolio should have one position in 133."
)
+35 -21
View File
@@ -506,34 +506,46 @@ shares in position"
trade_count = 100
sid = 133
price = [10.1] * trade_count
price = 10.1
price_list = [price] * trade_count
volume = [100] * trade_count
start_date = datetime.datetime.strptime("01/01/2011","%m/%d/%Y")
start_date = start_date.replace(tzinfo=pytz.utc)
trade_time_increment = datetime.timedelta(days=1)
trade_history = factory.create_trade_history(
sid,
price,
price_list,
volume,
start_date,
trade_time_increment,
self.trading_environment
)
trade_client = TradeSimulationClient(start_date)
start = trade_history[0].dt
end = trade_history[-1].dt
tracker = perf.PerformanceTracker(
start,
end,
1000.0,
self.trading_environment
sid2 = 134
price2 = 12.12
price2_list = [price2] * trade_count
trade_history2 = factory.create_trade_history(
sid2,
price2_list,
volume,
start_date,
trade_time_increment,
self.trading_environment
)
trade_history.extend(trade_history2)
self.trading_environment.period_start = trade_history[0].dt
self.trading_environment.period_end = trade_history[-1].dt
self.trading_environment.capital_base = 1000.0
self.trading_environment.frame_index = ['sid', 'volume', 'dt', \
'price', 'changed']
client = TradeSimulationClient(self.trading_environment)
for event in trade_history:
#create a transaction for all but
#one trade, to simulate None transaction
if(event.dt != start):
#first trade in each sid, to simulate None transaction
if(event.dt != self.trading_environment.period_start):
txn = zp.namedict({
'sid' : event.sid,
'amount' : -25,
@@ -543,17 +555,19 @@ shares in position"
})
else:
txn = None
event[zp.TRANSFORM_TYPE.TRANSACTION] = txn
trade_client.queue_event(event)
event[zp.TRANSFORM_TYPE.TRANSACTION] = txn
client.queue_event(event)
df = trade_client.get_frame()
tracker.update(df)
df = client.get_frame()
#we skip one trade, to test case of None transaction
txn_count = len(trade_history) - 1
self.assertEqual(tracker.txn_count, txn_count)
self.assertEqual(df[133]['price'], price)
self.assertEqual(df[134]['price'], price2)
cumulative_pos = tracker.cumulative_performance.positions[sid]
expected_size = txn_count * -25
#we skip two trades, to test case of None transaction
txn_count = len(trade_history) - 2
self.assertEqual(client.perf.txn_count, txn_count)
cumulative_pos = client.perf.cumulative_performance.positions[sid]
expected_size = txn_count / 2 * -25
self.assertEqual(cumulative_pos.amount, expected_size)
+5 -2
View File
@@ -11,6 +11,9 @@ class Risk(unittest.TestCase):
def setUp(self):
qutil.configure_logging()
start_date = datetime.datetime(year=2006, month=1, day=1, tzinfo=pytz.utc)
end_date = datetime.datetime(year=2006, month=12, day=31, tzinfo=pytz.utc)
self.benchmark_returns, self.treasury_curves = \
factory.load_market_data()
@@ -23,9 +26,9 @@ class Risk(unittest.TestCase):
self.oneday = datetime.timedelta(days=1)
self.tradingday = datetime.timedelta(hours=6, minutes=30)
self.dt = datetime.datetime.utcnow()
start_date = datetime.datetime(year=2006, month=1, day=1, tzinfo=pytz.utc)
self.algo_returns_06 = factory.create_returns_from_list(RETURNS, start_date, self.trading_calendar)
end_date = datetime.datetime(year=2006, month=12, day=31, tzinfo=pytz.utc)
self.metrics_06 = risk.RiskReport(self.algo_returns_06, self.trading_calendar)
def tearDown(self):