MAINT: Use pd.Series for benchmarks and algorithm returns in risk.

Instead of lists, use pd.Series, so that memory is preallocated.
This commit is contained in:
Eddie Hebert
2013-04-04 16:32:23 -04:00
parent 2dbafd5162
commit 6210467bec
5 changed files with 81 additions and 41 deletions
-3
View File
@@ -1019,9 +1019,6 @@ class TestPerformanceTracker(unittest.TestCase):
expected_size = len(txns) / 2 * -25
self.assertEqual(cumulative_pos.amount, expected_size)
self.assertEqual(perf_tracker.last_close,
perf_tracker.cumulative_risk_metrics.end_date)
self.assertEqual(len(perf_messages),
sim_params.days_in_period)
+4 -4
View File
@@ -135,9 +135,9 @@ class TestRisk(unittest.TestCase):
def test_trading_days_06(self):
returns = factory.create_returns_from_range(self.sim_params)
metrics = risk.RiskReport(returns, self.sim_params)
self.assertEqual([x.trading_days for x in metrics.year_periods],
self.assertEqual([x.num_trading_days for x in metrics.year_periods],
[251])
self.assertEqual([x.trading_days for x in metrics.month_periods],
self.assertEqual([x.num_trading_days for x in metrics.month_periods],
[20, 19, 23, 19, 22, 22, 20, 23, 20, 22, 21, 20])
def test_benchmark_volatility_06(self):
@@ -625,10 +625,10 @@ class TestRisk(unittest.TestCase):
def test_trading_days_08(self):
returns = factory.create_returns_from_range(self.sim_params08)
metrics = risk.RiskReport(returns, self.sim_params08)
self.assertEqual([x.trading_days for x in metrics.year_periods],
self.assertEqual([x.num_trading_days for x in metrics.year_periods],
[253])
self.assertEqual([x.trading_days for x in metrics.month_periods],
self.assertEqual([x.num_trading_days for x in metrics.month_periods],
[21, 20, 20, 22, 21, 21, 22, 21, 21, 23, 19, 22])
def test_benchmark_volatility_08(self):
+22 -4
View File
@@ -19,6 +19,7 @@ import datetime
import pytz
import numpy as np
import pandas as pd
import zipline.finance.risk as risk
import zipline.finance.trading as trading
@@ -52,7 +53,17 @@ class RiskCompareIterativeToBatch(unittest.TestCase):
else:
start_date = trading.environment.next_trading_day(self.start_date)
risk_metrics_refactor = risk.RiskMetricsIterative(start_date)
self.all_benchmark_returns = pd.Series({
x.date: x.returns
for x in trading.environment.benchmark_returns
if x.date >= self.start_date
})
start_index = trading.environment.trading_days.searchsorted(start_date)
end_date = trading.environment.trading_days[
start_index + len(RETURNS)]
risk_metrics_refactor = risk.RiskMetricsIterative(start_date, end_date)
todays_date = start_date
cur_returns = []
@@ -77,17 +88,24 @@ class RiskCompareIterativeToBatch(unittest.TestCase):
#assert that when original raises exception, same
#exception is raised by risk_metrics_refactor
np.testing.assert_raises(
type(e), risk_metrics_refactor.update, todays_date, ret)
type(e),
risk_metrics_refactor.update,
todays_date,
self.all_benchmark_returns[todays_return_obj.date]
)
continue
risk_metrics_refactor.update(todays_date, ret)
risk_metrics_refactor.update(
todays_date,
ret,
self.all_benchmark_returns[todays_return_obj.date])
self.assertEqual(
risk_metrics_original.start_date,
risk_metrics_refactor.start_date)
self.assertEqual(
risk_metrics_original.end_date,
risk_metrics_refactor.end_date)
risk_metrics_refactor.algorithm_returns.index[-1])
self.assertEqual(
risk_metrics_original.treasury_period_return,
risk_metrics_refactor.treasury_period_return)
+18 -4
View File
@@ -134,6 +134,7 @@ import logbook
import math
import numpy as np
import pandas as pd
import zipline.protocol as zp
import zipline.finance.risk as risk
@@ -160,9 +161,16 @@ class PerformanceTracker(object):
self.total_days = self.sim_params.days_in_period
self.capital_base = self.sim_params.capital_base
self.cumulative_risk_metrics = \
risk.RiskMetricsIterative(self.period_start)
risk.RiskMetricsIterative(self.period_start, self.period_end)
self.emission_rate = sim_params.emission_rate
# Temporarily hold these here as we work on streaming benchmarks.
self.all_benchmark_returns = pd.Series({
x.date: x.returns
for x in trading.environment.benchmark_returns
if x.date >= self.period_start
})
# this performance period will span the entire simulation.
self.cumulative_performance = PerformancePeriod(
# initial cash is your capital base.
@@ -337,9 +345,15 @@ class PerformanceTracker(object):
self.returns.append(todays_return_obj)
#update risk metrics for cumulative performance
self.cumulative_risk_metrics.update(
self.market_close,
self.todays_performance.returns)
algorithm_returns = pd.Series({todays_return_obj.date:
todays_return_obj.returns})
benchmark_returns = pd.Series({
todays_return_obj.date:
self.all_benchmark_returns[todays_return_obj.date]})
self.cumulative_risk_metrics.update(todays_return_obj.date,
algorithm_returns,
benchmark_returns)
# increment the day counter before we move markers forward.
self.day_count += 1.0
+37 -26
View File
@@ -311,7 +311,7 @@ class RiskMetricsBase(object):
)
raise Exception(message)
self.trading_days = len(self.benchmark_returns)
self.num_trading_days = len(self.benchmark_returns)
self.benchmark_volatility = self.calculate_volatility(
self.benchmark_returns)
self.algorithm_volatility = self.calculate_volatility(
@@ -338,7 +338,7 @@ class RiskMetricsBase(object):
"""
period_label = self.end_date.strftime("%Y-%m")
rval = {
'trading_days': self.trading_days,
'trading_days': self.num_trading_days,
'benchmark_volatility': self.benchmark_volatility,
'algo_volatility': self.algorithm_volatility,
'treasury_period_return': self.treasury_period_return,
@@ -410,7 +410,7 @@ class RiskMetricsBase(object):
return period_returns, returns
def calculate_volatility(self, daily_returns):
return np.std(daily_returns, ddof=1) * math.sqrt(self.trading_days)
return np.std(daily_returns, ddof=1) * math.sqrt(self.num_trading_days)
def calculate_sharpe(self):
"""
@@ -447,7 +447,6 @@ class RiskMetricsBase(object):
http://en.wikipedia.org/wiki/Beta_(finance)
"""
#it doesn't make much sense to calculate beta for less than two days,
#so return none.
if len(self.algorithm_returns) < 2:
@@ -518,16 +517,28 @@ class RiskMetricsIterative(RiskMetricsBase):
Call update() method on each dt to update the metrics.
"""
def __init__(self, start_date):
def __init__(self, start_date, end_date):
self.treasury_curves = trading.environment.treasury_curves
self.start_date = start_date
self.end_date = start_date
self.start_date = start_date.replace(hour=0, minute=0, second=0,
microsecond=0)
self.end_date = end_date.replace(hour=0, minute=0, second=0,
microsecond=0)
all_trading_days = trading.environment.trading_days
mask = ((all_trading_days >= self.start_date) &
(all_trading_days <= self.end_date))
self.trading_days = all_trading_days[mask]
self.algorithm_returns_cont = pd.Series(index=self.trading_days)
self.benchmark_returns_cont = pd.Series(index=self.trading_days)
self.algorithm_returns = None
self.benchmark_returns = None
self.compounded_log_returns = []
self.moving_avg = []
self.algorithm_returns = []
self.benchmark_returns = []
self.algorithm_volatility = []
self.benchmark_volatility = []
self.algorithm_period_returns = []
@@ -541,22 +552,21 @@ class RiskMetricsIterative(RiskMetricsBase):
self.current_max = -np.inf
self.excess_returns = []
self.last_dt = start_date
self.trading_days = 0
self.all_benchmark_returns = [
x for x in trading.environment.benchmark_returns
if x.date >= self.start_date
]
@property
def last_return_date(self):
return self.algorithm_returns.index[-1]
def update(self, market_close, returns_in_period):
if trading.environment.is_trading_day(self.end_date):
self.algorithm_returns.append(returns_in_period)
self.benchmark_returns.append(
self.all_benchmark_returns.pop(0).returns)
self.trading_days += 1
self.update_compounded_log_returns()
def update(self, dt, algorithm_returns, benchmark_returns):
self.algorithm_returns_cont[dt] = algorithm_returns
self.algorithm_returns = self.algorithm_returns_cont.valid()
self.end_date = market_close
self.benchmark_returns_cont[dt] = benchmark_returns
self.benchmark_returns = self.benchmark_returns_cont.valid()
self.num_trading_days = len(self.algorithm_returns)
self.update_compounded_log_returns()
self.algorithm_period_returns.append(
self.calculate_period_returns(self.algorithm_returns))
@@ -582,7 +592,7 @@ algorithm_returns ({algo_count}) in range {start} : {end}"
self.treasury_period_return = choose_treasury(
self.treasury_curves,
self.start_date,
self.end_date
self.algorithm_returns.index[-1]
)
self.excess_returns.append(
self.algorithm_period_returns[-1] - self.treasury_period_return)
@@ -598,9 +608,9 @@ algorithm_returns ({algo_count}) in range {start} : {end}"
Creates a dictionary representing the state of the risk report.
Returns a dict object of the form:
"""
period_label = self.end_date.strftime("%Y-%m")
period_label = self.last_return_date.strftime("%Y-%m")
rval = {
'trading_days': self.trading_days,
'trading_days': len(self.algorithm_returns.valid()),
'benchmark_volatility': self.benchmark_volatility[-1],
'algo_volatility': self.algorithm_volatility[-1],
'treasury_period_return': self.treasury_period_return,
@@ -667,7 +677,8 @@ algorithm_returns ({algo_count}) in range {start} : {end}"
return
try:
compound = math.log(1 + self.algorithm_returns[-1])
compound = math.log(1 + self.algorithm_returns[
self.algorithm_returns.last_valid_index()])
except ValueError:
compound = 0.0
# BUG? Shouldn't this be set to log(1.0 + 0) ?