catalyst/zipline/finance/risk.py

"""

Risk Report
===========

    +-----------------+----------------------------------------------------+
    | key             | value                                              |
    +=================+====================================================+
    | trading_days    | The number of trading days between self.start_date |
    |                 | and self.end_date                                  |
    +-----------------+----------------------------------------------------+
    | benchmark_volat\| The volatility of the benchmark between            |
    | ility           | self.start_date and self.end_date.                 |
    +-----------------+----------------------------------------------------+
    | algo_volatility | The volatility of the algo between self.start_date |
    |                 | and self.end_date.                                 |
    +-----------------+----------------------------------------------------+
    | treasury_period\| The return of treasuries over the period. Treasury |
    | _return         | maturity is chosen to match the duration of the    |
    |                 | test period.                                       |
    +-----------------+----------------------------------------------------+
    | sharpe          | The sharpe ratio based on the _algorithm_ (rather  |
    |                 | than the static portfolio) returns.                |
    +-----------------+----------------------------------------------------+
    | beta            | The _algorithm_ beta to the benchmark.             |
    +-----------------+----------------------------------------------------+
    | alpha           | The _algorithm_ alpha to the benchmark.            |
    +-----------------+----------------------------------------------------+
    | excess_return   | The excess return of the algorithm over the        |
    |                 | treasuries.                                        |
    +-----------------+----------------------------------------------------+
    | max_drawdown    | The largest relative peak to relative trough move  |
    |                 | for the portfolio returns between self.start_date  |
    |                 | and self.end_date.                                 |
    +-----------------+----------------------------------------------------+

"""

import logbook
import datetime
import math
import numpy as np
import numpy.linalg as la
from zipline.utils.date_utils import epoch_now

log = logbook.Logger('Risk')

def advance_by_months(dt, jump_in_months):
    month = dt.month + jump_in_months
    years = month / 12
    month = month % 12

    # no remainder means that we are landing in december.
    # modulo is, in a way, a zero indexed circular array.
    # this is a way of converting to 1 indexed months.
    # (in our modulo index, december is zeroth)
    if(month == 0):
        month = 12
        years = years - 1

    return dt.replace(year = dt.year + years, month = month)


class DailyReturn():

    def __init__(self, date, returns):

        assert isinstance(date, datetime.datetime)
        self.date = date.replace(hour=0, minute=0, second=0)
        self.returns = returns

    def to_dict(self):
        return {
            'dt'      : self.date,
            'returns' : self.returns
        }

    def __repr__(self):
        return str(self.date) + " - " + str(self.returns)


class RiskMetrics():
    def __init__(self, start_date, end_date, returns, trading_environment):

        self.treasury_curves = trading_environment.treasury_curves
        self.start_date = start_date
        self.end_date = end_date
        self.trading_environment = trading_environment
        self.algorithm_period_returns, self.algorithm_returns = \
            self.calculate_period_returns(returns)

        benchmark_returns = [
                    x for x in self.trading_environment.benchmark_returns
                    if x.date >= returns[0].date and x.date <= returns[-1].date
        ]

        self.benchmark_period_returns, self.benchmark_returns = \
            self.calculate_period_returns(benchmark_returns)

        if(len(self.benchmark_returns) != len(self.algorithm_returns)):
            message = "Mismatch between benchmark_returns ({bm_count}) and \
            algorithm_returns ({algo_count}) in range {start} : {end}"
            message = message.format(
                bm_count=len(self.benchmark_returns),
                algo_count=len(self.algorithm_returns),
                start=start_date,
                end=end_date
            )
            raise Exception(message)


        self.trading_days = len(self.benchmark_returns)
        self.benchmark_volatility = self.calculate_volatility(self.benchmark_returns)
        self.algorithm_volatility = self.calculate_volatility(self.algorithm_returns)
        self.treasury_period_return = self.choose_treasury()
        self.sharpe = self.calculate_sharpe()
        self.beta, self.algorithm_covariance, self.benchmark_variance, \
        self.condition_number, self.eigen_values = self.calculate_beta()
        self.alpha = self.calculate_alpha()
        self.excess_return = self.algorithm_period_returns - self.treasury_period_return
        self.max_drawdown = self.calculate_max_drawdown()

    def to_dict(self):
        """
        Creates a dictionary representing the state of the risk report.
        Returns a dict object of the form:
        """
        period_label = self.end_date.strftime("%Y-%m")
        rval = {
            'trading_days'          : self.trading_days,
            'benchmark_volatility'  : self.benchmark_volatility,
            'algo_volatility'       : self.algorithm_volatility,
            'treasury_period_return': self.treasury_period_return,
            'algorithm_period_return' : self.algorithm_period_returns,
            'benchmark_period_return' : self.benchmark_period_returns,
            'sharpe'                : self.sharpe,
            'beta'                  : self.beta,
            'alpha'                 : self.alpha,
            'excess_return'         : self.excess_return,
            'max_drawdown'          : self.max_drawdown,
            'period_label'          : period_label
        }

        # check if a field in rval is nan, and replace it with
        # None.
        def check_entry(key, value):
            if key != 'period_label':
                return np.isnan(value)
            else:
                return False

        return {k:None if check_entry(k,v) else v for k,v in rval.iteritems()}

    def __repr__(self):
        statements = []
        metrics = [
            "algorithm_period_returns" ,
            "benchmark_period_returns" ,
            "excess_return"            ,
            "trading_days"             ,
            "benchmark_volatility"     ,
            "algorithm_volatility"     ,
            "sharpe"                   ,
            "algorithm_covariance"     ,
            "benchmark_variance"       ,
            "beta"                     ,
            "alpha"                    ,
            "max_drawdown"             ,
            "algorithm_returns"        ,
            "benchmark_returns"        ,
            "condition_number"         ,
            "eigen_values"
        ]

        for metric in metrics:
            value = getattr(self, metric)
            statements.append("{m}:{v}".format(m=metric, v=value))

        return '\n'.join(statements)

    def calculate_period_returns(self, daily_returns):

        #TODO: replace this with pandas.
        returns = [
            x.returns for x in daily_returns
            if x.date >= self.start_date and
               x.date <= self.end_date and
               self.trading_environment.is_trading_day(x.date)
        ]

        period_returns = 1.0

        for r in returns:
            period_returns = period_returns * (1.0 + r)

        period_returns = period_returns - 1.0
        return period_returns, returns

    def calculate_volatility(self, daily_returns):
        # TODO: we should be using an annualized number for the
        # square root, not the days in the period.
        return np.std(daily_returns, ddof=1) * math.sqrt(self.trading_days)

    def calculate_sharpe(self):
        """
        http://en.wikipedia.org/wiki/Sharpe_ratio
        """
        if self.algorithm_volatility == 0:
            return 0.0

        return ( (self.algorithm_period_returns - self.treasury_period_return) /
            self.algorithm_volatility )

    def calculate_beta(self):
        """

        .. math::
            \beta_a = \frac {\mathrm{Cov}(r_a,r_p)}{\mathrm{Var}(r_p)}

        http://en.wikipedia.org/wiki/Beta_(finance)
        """

        #it doesn't make much sense to calculate beta for less than two days,
        #so return none.
        if len(self.algorithm_returns) < 2:
            return 0.0, 0.0, 0.0, 0.0, []

        returns_matrix = np.vstack([self.algorithm_returns, self.benchmark_returns])
        C = np.cov(returns_matrix)
        eigen_values = la.eigvals(C)
        condition_number = max(eigen_values) / min(eigen_values)
        algorithm_covariance = C[0][1]
        benchmark_variance = C[1][1]
        beta = C[0][1] / C[1][1]

        return (
            beta,
            algorithm_covariance,
            benchmark_variance,
            condition_number,
            eigen_values
        )

    def calculate_alpha(self):
        """
        http://en.wikipedia.org/wiki/Alpha_(investment)
        """
        return self.algorithm_period_returns - (self.treasury_period_return + self.beta * (self.benchmark_period_returns - self.treasury_period_return))

    def calculate_max_drawdown(self):
        compounded_returns = []
        cur_return = 0.0
        for r in self.algorithm_returns:
            try:
                cur_return = math.log(1.0 + r) + cur_return
            #this is a guard for a single day returning -100%
            except ValueError:
                log.debug("{cur} return, zeroing the returns".format(cur=cur_return))
                cur_return = 0.0
            compounded_returns.append(cur_return)

        cur_max = None
        max_drawdown = None
        for cur in compounded_returns:
            if cur_max == None or cur > cur_max:
                cur_max = cur

            drawdown = (cur - cur_max)
            if max_drawdown == None or drawdown < max_drawdown:
                max_drawdown = drawdown

        if max_drawdown == None:
            return 0.0

        return 1.0 - math.exp(max_drawdown)


    def choose_treasury(self):
        td = self.end_date - self.start_date
        if td.days <= 31:
            self.treasury_duration = '1month'
        elif td.days <= 93:
            self.treasury_duration = '3month'
        elif td.days <= 186:
            self.treasury_duration = '6month'
        elif td.days <= 366:
            self.treasury_duration = '1year'
        elif td.days <= 365 * 2 + 1:
            self.treasury_duration = '2year'
        elif td.days <= 365 * 3 + 1:
            self.treasury_duration = '3year'
        elif td.days <= 365 * 5 + 2:
            self.treasury_duration = '5year'
        elif td.days <= 365 * 7 + 2:
            self.treasury_duration = '7year'
        elif td.days <= 365 * 10 + 2:
            self.treasury_duration = '10year'
        else:
            self.treasury_duration = '30year'


        one_day = datetime.timedelta(days=1)

        curve = None
        # in case end date is not a trading day, search for the next market
        # day for an interest rate
        for i in xrange(7):
            if(self.treasury_curves.has_key(self.end_date + i * one_day)):
                curve = self.treasury_curves[self.end_date + i * one_day]
                break

        if curve:
            self.treasury_curve = curve
            rate = self.treasury_curve[self.treasury_duration]
            #1month note data begins in 8/2001, so we can use 3month instead.
            if rate == None and self.treasury_duration == '1month':
                rate = self.treasury_curve['3month']
            if rate != None:
                return rate * (td.days + 1) / 365

            message = "no rate for end date = {dt} and term = {term}. Check \
            that date doesn't exceed treasury history range."
            message = message.format(
                    dt=self.end_date,
                    term=self.treasury_duration
            )
            raise Exception(message)


class RiskReport():

    def __init__(
        self,
        algorithm_returns,
        trading_environment,
        exceeded_max_loss=False):
        """
        algorithm_returns needs to be a list of daily_return objects
        sorted in date ascending order
        """

        self.algorithm_returns = algorithm_returns
        self.trading_environment = trading_environment
        self.exceeded_max_loss = exceeded_max_loss
        self.created = epoch_now()

        if len(self.algorithm_returns) == 0:
            start_date = self.trading_environment.period_start
            end_date = self.trading_environment.period_end
        else:
            start_date = self.algorithm_returns[0].date
            end_date = self.algorithm_returns[-1].date

        self.month_periods = self.periodsInRange(1, start_date, end_date)
        self.three_month_periods = self.periodsInRange(3, start_date, end_date)
        self.six_month_periods = self.periodsInRange(6, start_date, end_date)
        self.year_periods = self.periodsInRange(12, start_date, end_date)

    def to_dict(self):
        """
        RiskMetrics are calculated for rolling windows in four lengths::
            - 1_month
            - 3_month
            - 6_month
            - 12_month

        The return value of this funciton is a dictionary keyed by the above
        list of durations. The value of each entry is a list of RiskMetric
        dicts of the same duration as denoted by the top_level key.

        See :py:meth:`RiskMetrics.to_dict` for the detailed list of fields
        provided for each period.
        """
        return {
            'one_month'         : [x.to_dict() for x in self.month_periods],
            'three_month'       : [x.to_dict() for x in self.three_month_periods],
            'six_month'         : [x.to_dict() for x in self.six_month_periods],
            'twelve_month'      : [x.to_dict() for x in self.year_periods],
            'exceeded_max_loss' : self.exceeded_max_loss,
            'created'           : self.created
        }

    def periodsInRange(self, months_per, start, end):
        one_day = datetime.timedelta(days = 1)
        ends = []
        cur_start = start.replace(day=1)

        # in edge cases (all sids filtered out, start/end are adjacent)
        # a test will not generate any returns data
        if len(self.algorithm_returns) == 0:
            return ends

        #ensure that we have an end at the end of a calendar month, in case
        #the return series ends mid-month...
        the_end = advance_by_months(end.replace(day=1),1) - one_day
        while True:
            cur_end = advance_by_months(cur_start, months_per) - one_day
            if(cur_end > the_end):
                break
            cur_period_metrics = RiskMetrics(
                start_date=cur_start,
                end_date=cur_end,
                returns=self.algorithm_returns,
                trading_environment=self.trading_environment
            )

            ends.append(cur_period_metrics)
            cur_start = advance_by_months(cur_start, 1)

        return ends

    def find_metric_by_end(self, end_date, duration, metric):
        col = getattr(self, duration + "_periods")
        col = [getattr(x, metric) for x in col if x.end_date == end_date]
        if len(col) == 1:
            return col[0]
        return None