Files
catalyst/zipline/finance/performance.py
T
fawce a4a4d38a73 TradingEnvironment allows the specification of a benchmark index and a local timezone for the exchange. This commit adds tests to verify the TradingEnvironment properly handles London Stock Exchange index, FTSE.
- added LSE reference rrules calendar (thanks to Edward Johns)
    - added tests to verify LSE environment matches rrule calendar
    - added a test to verify global environment behavior can be set.
    - moved DailyReturn class to trading to eliminate circularity from
    risk <-> trading.
    - updated TradingEnvironment to be a context manager. This allows users
    to run algorithms in individually isolated environments in one python
    process. This is useful for managing multiple algorithms in a single
    ipython notebook.
    - added comments to explain behavior and useage of the global environment
2013-02-18 10:24:32 -05:00

688 lines
28 KiB
Python

#
# Copyright 2012 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Performance Tracking
====================
+-----------------+----------------------------------------------------+
| key | value |
+=================+====================================================+
| period_start | The beginning of the period to be tracked. datetime|
| | in pytz.utc timezone. Will always be 0:00 on the |
| | date in UTC. The fact that the time may be on the |
| | prior day in the exchange's local time is ignored |
+-----------------+----------------------------------------------------+
| period_end | The end of the period to be tracked. datetime |
| | in pytz.utc timezone. Will always be 23:59 on the |
| | date in UTC. The fact that the time may be on the |
| | next day in the exchange's local time is ignored |
+-----------------+----------------------------------------------------+
| progress | percentage of test completed |
+-----------------+----------------------------------------------------+
| started_at | datetime in utc marking the start of this test |
+-----------------+----------------------------------------------------+
| capital_base | The initial capital assumed for this tracker. |
+-----------------+----------------------------------------------------+
| cumulative_perf | A dictionary representing the cumulative |
| | performance through all the events delivered to |
| | this tracker. For details see the comments on |
| | :py:meth:`PerformancePeriod.to_dict` |
+-----------------+----------------------------------------------------+
| todays_perf | A dictionary representing the cumulative |
| | performance through all the events delivered to |
| | this tracker with datetime stamps between last_open|
| | and last_close. For details see the comments on |
| | :py:meth:`PerformancePeriod.to_dict` |
| | TODO: adding this because we calculate it. May be |
| | overkill. |
+-----------------+----------------------------------------------------+
| cumulative_risk | A dictionary representing the risk metrics |
| _metrics | calculated based on the positions aggregated |
| | through all the events delivered to this tracker. |
| | For details look at the comments for |
| | :py:meth:`zipline.finance.risk.RiskMetrics.to_dict`|
+-----------------+----------------------------------------------------+
Position Tracking
=================
+-----------------+----------------------------------------------------+
| key | value |
+=================+====================================================+
| sid | the identifier for the security held in this |
| | position. |
+-----------------+----------------------------------------------------+
| amount | whole number of shares in the position |
+-----------------+----------------------------------------------------+
| last_sale_price | price at last sale of the security on the exchange |
+-----------------+----------------------------------------------------+
| cost_basis | the volume weighted average price paid per share |
+-----------------+----------------------------------------------------+
Performance Period
==================
Performance Periods are updated with every trade. When calling
code needs a portfolio object that fulfills the algorithm
protocol, use the PerformancePeriod.as_portfolio method. See that
method for comments on the specific fields provided (and
omitted).
+---------------+------------------------------------------------------+
| key | value |
+===============+======================================================+
| ending_value | the total market value of the positions held at the |
| | end of the period |
+---------------+------------------------------------------------------+
| cash_flow | the cash flow in the period (negative means spent) |
| | from buying and selling securities in the period. |
| | Includes dividend payments in the period as well. |
+---------------+------------------------------------------------------+
| starting_value| the total market value of the positions held at the |
| | start of the period |
+---------------+------------------------------------------------------+
| starting_cash | cash on hand at the beginning of the period |
+---------------+------------------------------------------------------+
| ending_cash | cash on hand at the end of the period |
+---------------+------------------------------------------------------+
| positions | a list of dicts representing positions, see |
| | :py:meth:`Position.to_dict()` |
| | for details on the contents of the dict |
+---------------+------------------------------------------------------+
| pnl | Dollar value profit and loss, for both realized and |
| | unrealized gains. |
+---------------+------------------------------------------------------+
| returns | percentage returns for the entire portfolio over the |
| | period |
+---------------+------------------------------------------------------+
| cumulative\ | The net capital used (positive is spent) during |
| _capital_used | the period |
+---------------+------------------------------------------------------+
| max_capital\ | The maximum amount of capital deployed during the |
| _used | period. |
+---------------+------------------------------------------------------+
| max_leverage | The maximum leverage used during the period. |
+---------------+------------------------------------------------------+
| period_close | The last close of the market in period. datetime in |
| | pytz.utc timezone. |
+---------------+------------------------------------------------------+
| period_open | The first open of the market in period. datetime in |
| | pytz.utc timezone. |
+---------------+------------------------------------------------------+
| transactions | all the transactions that were acrued during this |
| | period. Unset/missing for cumulative periods. |
+---------------+------------------------------------------------------+
"""
import logbook
import datetime
import pytz
import math
import numpy as np
import zipline.protocol as zp
import zipline.finance.risk as risk
import zipline.finance.trading as trading
log = logbook.Logger('Performance')
class PerformanceTracker(object):
"""
Tracks the performance of the zipline as it is running in
the simulator, relays this out to the Deluge broker and then
to the client. Visually:
+--------------------+ Result Stream +--------+
| PerformanceTracker | ----------------> | Deluge |
+--------------------+ +--------+
"""
def __init__(self, sim_params):
self.sim_params = sim_params
self.started_at = datetime.datetime.utcnow().replace(tzinfo=pytz.utc)
self.period_start = self.sim_params.period_start
self.period_end = self.sim_params.period_end
self.last_close = self.sim_params.last_close
first_day = self.sim_params.first_open
self.market_open, self.market_close = \
trading.environment.get_open_and_close(first_day)
self.progress = 0.0
self.total_days = self.sim_params.days_in_period
# one indexed so that we reach 100%
self.day_count = 0.0
self.capital_base = self.sim_params.capital_base
self.returns = []
self.txn_count = 0
self.event_count = 0
self.last_dict = None
self.cumulative_risk_metrics = \
risk.RiskMetricsIterative(self.period_start)
# this performance period will span the entire simulation.
self.cumulative_performance = PerformancePeriod(
# initial cash is your capital base.
self.capital_base,
# the cumulative period will be calculated over the entire test.
self.period_start,
self.period_end
)
# this performance period will span just the current market day
self.todays_performance = PerformancePeriod(
# initial cash is your capital base.
self.capital_base,
# the daily period will be calculated for the market day
self.market_open,
self.market_close,
# save the transactions for the daily periods
keep_transactions=True
)
def __repr__(self):
return "%s(%r)" % (
self.__class__.__name__,
{'simulation parameters': self.sim_params})
def transform(self, stream_in):
"""
Main generator work loop.
"""
for date, snapshot in stream_in:
new_snapshot = []
for event in snapshot:
messages = self.process_event(event)
if messages is not None:
event.perf_messages = messages
event.portfolio = self.get_portfolio()
new_snapshot.append(event)
if len(new_snapshot) > 0:
yield date, new_snapshot
def get_portfolio(self):
return self.cumulative_performance.as_portfolio()
def to_dict(self):
"""
Creates a dictionary representing the state of this tracker.
Returns a dict object of the form described in header comments.
"""
return {
'started_at': self.started_at,
'period_start': self.period_start,
'period_end': self.period_end,
'progress': self.progress,
'capital_base': self.capital_base,
'cumulative_perf': self.cumulative_performance.to_dict(),
'daily_perf': self.todays_performance.to_dict(),
'cumulative_risk_metrics': self.cumulative_risk_metrics.to_dict()
}
def process_event(self, event):
messages = None
self.event_count += 1
if event.type == zp.DATASOURCE_TYPE.TRADE:
messages = []
while event.dt > self.market_close and event.dt < self.last_close:
messages.append(self.handle_market_close())
if event.TRANSACTION:
self.txn_count += 1
self.cumulative_performance.execute_transaction(
event.TRANSACTION
)
self.todays_performance.execute_transaction(event.TRANSACTION)
#update last sale
self.cumulative_performance.update_last_sale(event)
self.todays_performance.update_last_sale(event)
del event['TRANSACTION']
elif event.type == zp.DATASOURCE_TYPE.DIVIDEND:
self.cumulative_performance.add_dividend(event)
self.todays_performance.add_dividend(event)
#calculate performance as of last trade
self.cumulative_performance.calculate_performance()
self.todays_performance.calculate_performance()
return messages
def handle_market_close(self):
# add the return results from today to the list of DailyReturn objects.
todays_date = self.market_close.replace(hour=0, minute=0, second=0)
self.cumulative_performance.update_dividends(todays_date)
self.todays_performance.update_dividends(todays_date)
todays_return_obj = trading.DailyReturn(
todays_date,
self.todays_performance.returns
)
self.returns.append(todays_return_obj)
#update risk metrics for cumulative performance
self.cumulative_risk_metrics.update(
self.market_close,
self.todays_performance.returns)
# increment the day counter before we move markers forward.
self.day_count += 1.0
# calculate progress of test
self.progress = self.day_count / self.total_days
# Take a snapshot of our current peformance to return to the
# browser.
daily_update = self.to_dict()
# On the last day of the test, don't create tomorrow's performance
# period. We may not be able to find the next trading day if we're
# at the end of our historical data
if self.market_close >= self.last_close:
return daily_update
#move the market day markers forward
self.market_open, self.market_close = \
trading.environment.next_open_and_close(self.market_open)
# Roll over positions to current day.
self.todays_performance.rollover()
self.todays_performance.period_open = self.market_open
self.todays_performance.period_close = self.market_close
# The dividend calculation for the daily needs to be made
# after the rollover. midnight_between is the last midnight
# hour between the close of markets and the next open. To
# make sure midnight_between matches identically with
# dividend data dates, it is in UTC.
midnight_between = self.market_open.replace(hour=0, minute=0, second=0)
self.cumulative_performance.update_dividends(midnight_between)
self.todays_performance.update_dividends(midnight_between)
return daily_update
def handle_simulation_end(self):
"""
When the simulation is complete, run the full period risk report
and send it out on the results socket.
"""
# the stream will end on the last trading day, but will
# not trigger an end of day, so we trigger the final
# market close(s) here
perf_messages = []
while self.last_close > self.market_close:
perf_messages.append(self.handle_market_close())
perf_messages.append(self.handle_market_close())
log_msg = "Simulated {n} trading days out of {m}."
log.info(log_msg.format(n=int(self.day_count), m=self.total_days))
log.info("first open: {d}".format(
d=self.sim_params.first_open))
log.info("last close: {d}".format(
d=self.sim_params.last_close))
self.risk_report = risk.RiskReport(self.returns, self.sim_params)
risk_dict = self.risk_report.to_dict()
return perf_messages, risk_dict
class Position(object):
def __init__(self, sid):
self.sid = sid
self.amount = 0
self.cost_basis = 0.0 # per share
self.last_sale_price = 0.0
self.last_sale_date = 0.0
self.dividends = []
def update_dividends(self, midnight_utc):
"""
midnight_utc is the 0 hour for the current (not yet open) trading day.
This method will be invoked at the end of the market
close handling, before the next market open.
"""
payment = 0.0
unpaid_dividends = []
for dividend in self.dividends:
if midnight_utc == dividend.ex_date:
# if we own shares at midnight of the div_ex date
# we are entitled to the dividend.
dividend.amount_on_ex_date = self.amount
if dividend.net_amount:
dividend.payment = self.amount * dividend.net_amount
else:
dividend.payment = self.amount * dividend.gross_amount
if midnight_utc == dividend.pay_date:
# if it is the payment date, include this
# dividend's actual payment (calculated on
# ex_date)
payment += dividend.payment
else:
unpaid_dividends.append(dividend)
self.dividends = unpaid_dividends
return payment
def add_dividend(self, dividend):
self.dividends.append(dividend)
def update(self, txn):
if(self.sid != txn.sid):
raise NameError('updating position with txn for a different sid')
#we're covering a short or closing a position
if(self.amount + txn.amount == 0):
self.cost_basis = 0.0
self.amount = 0
else:
prev_cost = self.cost_basis * self.amount
txn_cost = txn.amount * txn.price
total_cost = prev_cost + txn_cost
total_shares = self.amount + txn.amount
self.cost_basis = total_cost / total_shares
self.amount = self.amount + txn.amount
def currentValue(self):
return self.amount * self.last_sale_price
def __repr__(self):
template = "sid: {sid}, amount: {amount}, cost_basis: {cost_basis}, \
last_sale_price: {last_sale_price}"
return template.format(
sid=self.sid,
amount=self.amount,
cost_basis=self.cost_basis,
last_sale_price=self.last_sale_price
)
def to_dict(self):
"""
Creates a dictionary representing the state of this position.
Returns a dict object of the form:
"""
return {
'sid': self.sid,
'amount': self.amount,
'cost_basis': self.cost_basis,
'last_sale_price': self.last_sale_price
}
class PerformancePeriod(object):
def __init__(
self,
starting_cash,
period_open=None,
period_close=None,
keep_transactions=False):
self.period_open = period_open
self.period_close = period_close
self.ending_value = 0.0
self.period_cash_flow = 0.0
self.pnl = 0.0
#sid => position object
self.positions = positiondict()
self.starting_value = 0.0
#cash balance at start of period
self.starting_cash = starting_cash
self.ending_cash = starting_cash
self.keep_transactions = keep_transactions
self.processed_transactions = []
self.cumulative_capital_used = 0.0
self.max_capital_used = 0.0
self.max_leverage = 0.0
# Maps position to following array indexes
self._position_index_map = {}
# Arrays for quick calculations of positions value
self._position_amounts = np.array([])
self._position_last_sale_prices = np.array([])
self.calculate_performance()
# An object to recycle via assigning new values
# when returning portfolio information.
# So as not to avoid creating a new object for each event
self._portfolio_store = zp.Portfolio()
self._positions_store = zp.Positions()
def rollover(self):
self.starting_value = self.ending_value
self.starting_cash = self.ending_cash
self.period_cash_flow = 0.0
self.pnl = 0.0
self.processed_transactions = []
self.cumulative_capital_used = 0.0
self.max_capital_used = 0.0
self.max_leverage = 0.0
def index_for_position(self, sid):
try:
index = self._position_index_map[sid]
except KeyError:
index = len(self._position_index_map)
self._position_index_map[sid] = index
self._position_amounts = np.append(self._position_amounts, [0])
self._position_last_sale_prices = np.append(
self._position_last_sale_prices, [0])
return index
def add_dividend(self, div):
# The dividend is received on midnight of the dividend
# declared date. We calculate the dividends based on the amount of
# stock owned on midnight of the ex dividend date. However, the cash
# is not dispersed until the payment date, which is
# included in the event.
self.positions[div.sid].add_dividend(div)
def update_dividends(self, todays_date):
"""
Check the payment date and ex date against today's date
to detrmine if we are owed a dividend payment or if the
payment has been disbursed.
"""
cash_payments = 0.0
for sid, pos in self.positions.iteritems():
cash_payments += pos.update_dividends(todays_date)
# credit our cash balance with the dividend payments, or
# if we are short, debit our cash balance with the
# payments.
self.period_cash_flow += cash_payments
# debit our cumulative cash spent with the dividend
# payments, or credit our cumulative cash spent if we are
# short the stock.
self.cumulative_capital_used -= cash_payments
# recalculate performance, including the dividend
# paymtents
self.calculate_performance()
def calculate_performance(self):
self.ending_value = self.calculate_positions_value()
total_at_start = self.starting_cash + self.starting_value
self.ending_cash = self.starting_cash + self.period_cash_flow
total_at_end = self.ending_cash + self.ending_value
self.pnl = total_at_end - total_at_start
if total_at_start != 0:
self.returns = self.pnl / total_at_start
else:
self.returns = 0.0
def execute_transaction(self, txn):
# Update Position
# ----------------
position = self.positions[txn.sid]
position.update(txn)
index = self.index_for_position(txn.sid)
self._position_amounts[index] = position.amount
self.period_cash_flow += -1 * txn.price * txn.amount
# Max Leverage
# ---------------
# Calculate the maximum capital used and maximum leverage
transaction_cost = txn.price * txn.amount
self.cumulative_capital_used += transaction_cost
if math.fabs(self.cumulative_capital_used) > self.max_capital_used:
self.max_capital_used = math.fabs(self.cumulative_capital_used)
# We want to conveye a level, rather than a precise figure.
# round to the nearest 5,000 to keep the number easy on the eyes
self.max_capital_used = self.round_to_nearest(
self.max_capital_used,
base=5000
)
# we're adding a 10% cushion to the capital used.
self.max_leverage = 1.1 * \
self.max_capital_used / self.starting_cash
# add transaction to the list of processed transactions
if self.keep_transactions:
self.processed_transactions.append(txn)
def round_to_nearest(self, x, base=5):
return int(base * round(float(x) / base))
def calculate_positions_value(self):
return np.dot(self._position_amounts, self._position_last_sale_prices)
def update_last_sale(self, event):
is_trade = event.type == zp.DATASOURCE_TYPE.TRADE
if event.sid in self.positions and is_trade:
self.positions[event.sid].last_sale_price = event.price
index = self.index_for_position(event.sid)
self._position_last_sale_prices[index] = event.price
self.positions[event.sid].last_sale_date = event.dt
def __core_dict(self):
rval = {
'ending_value': self.ending_value,
# this field is renamed to capital_used for backward
# compatibility.
'capital_used': self.period_cash_flow,
'starting_value': self.starting_value,
'starting_cash': self.starting_cash,
'ending_cash': self.ending_cash,
'portfolio_value': self.ending_cash + self.ending_value,
'cumulative_capital_used': self.cumulative_capital_used,
'max_capital_used': self.max_capital_used,
'max_leverage': self.max_leverage,
'pnl': self.pnl,
'returns': self.returns,
'period_open': self.period_open,
'period_close': self.period_close
}
return rval
def to_dict(self):
"""
Creates a dictionary representing the state of this performance
period. See header comments for a detailed description.
"""
rval = self.__core_dict()
positions = self.get_positions_list()
rval['positions'] = positions
# we want the key to be absent, not just empty
if self.keep_transactions:
transactions = [x.__dict__ for x in self.processed_transactions]
rval['transactions'] = transactions
return rval
def as_portfolio(self):
"""
The purpose of this method is to provide a portfolio
object to algorithms running inside the same trading
client. The data needed is captured raw in a
PerformancePeriod, and in this method we rename some
fields for usability and remove extraneous fields.
"""
# Recycles containing objects' Portfolio object
# which is used for returning values.
# as_portfolio is called in an inner loop,
# so repeated object creation becomes too expensive
portfolio = self._portfolio_store
# maintaining the old name for the portfolio field for
# backward compatibility
portfolio.capital_used = self.period_cash_flow
portfolio.starting_cash = self.starting_cash
portfolio.portfolio_value = self.ending_cash + self.ending_value
portfolio.pnl = self.pnl
portfolio.returns = self.returns
portfolio.cash = self.ending_cash
portfolio.start_date = self.period_open
portfolio.positions = self.get_positions()
portfolio.positions_value = self.ending_value
return portfolio
def get_positions(self):
positions = self._positions_store
for sid, pos in self.positions.iteritems():
if sid not in positions:
positions[sid] = zp.Position(sid)
position = positions[sid]
position.amount = pos.amount
position.cost_basis = pos.cost_basis
position.last_sale_price = pos.last_sale_price
return positions
def get_positions_list(self):
positions = []
for sid, pos in self.positions.iteritems():
if pos.amount != 0:
positions.append(pos.to_dict())
return positions
class positiondict(dict):
def __missing__(self, key):
pos = Position(key)
self[key] = pos
return pos