From ae97e75388a1f0af3d372e3490b44874d9cea86e Mon Sep 17 00:00:00 2001 From: Eddie Hebert Date: Fri, 18 Sep 2015 11:18:42 -0400 Subject: [PATCH 1/4] MAINT: Only calc position values once per packet. Instead of calculating the position values for each stat result, e.g. gross_exposure, net_liquidity etc.; get the positions upfront and then calculate the period and position stats in order, passing each value explicitly to the ones that follow it in the dependency chain. e.g. the gross_value depends on the long_value and the short_value, which called the position_values property for calculating both the long_value and the short_value. Removing the repeated calls to position_values (and position_exposures) removes the need for the caching the last sale prices and position amounts in separate vectors, since it is inexpensive enough to read those values off of the positions dictionary held in the position tracker. This patch gives a small gain to ~500 sized portfolios, but the main intent is to clear the path to not storing last_sale_prices on the position objects at all. Removing all of the caching layer in this class makes that change easier to apply. Removing the extra calls to position_values also made this class easier to step through/reason about when splicing in the new last sale price access, as well. --- tests/test_perf_tracking.py | 46 ++-- zipline/finance/performance/period.py | 98 +++++--- .../finance/performance/position_tracker.py | 230 +++++++++++------- 3 files changed, 229 insertions(+), 145 deletions(-) diff --git a/tests/test_perf_tracking.py b/tests/test_perf_tracking.py index 23e27835..72319220 100644 --- a/tests/test_perf_tracking.py +++ b/tests/test_perf_tracking.py @@ -35,6 +35,7 @@ from six.moves import range, zip import zipline.utils.factory as factory import zipline.finance.performance as perf +from zipline.finance.performance import position_tracker from zipline.finance.slippage import Transaction, create_transaction import zipline.utils.math_utils as zp_math @@ -2181,22 +2182,22 @@ class TestPositionTracker(unittest.TestCase): np.bool_(False) """ pt = perf.PositionTracker(self.env.asset_finder) + pos_stats = position_tracker.calc_position_stats(pt) stats = [ - 'calculate_positions_value', - '_net_exposure', - '_gross_value', - '_gross_exposure', - '_short_value', - '_short_exposure', - '_shorts_count', - '_long_value', - '_long_exposure', - '_longs_count', + 'net_value', + 'net_exposure', + 'gross_value', + 'gross_exposure', + 'short_value', + 'short_exposure', + 'shorts_count', + 'long_value', + 'long_exposure', + 'longs_count', ] for name in stats: - meth = getattr(pt, name) - val = meth() + val = getattr(pos_stats, name) self.assertEquals(val, 0) self.assertNotIsInstance(val, (bool, np.bool_)) @@ -2234,20 +2235,22 @@ class TestPositionTracker(unittest.TestCase): pt.update_positions({1: pos1, 2: pos2, 3: pos3, 4: pos4}) # Test long-only methods - self.assertEqual(100, pt._long_value()) - self.assertEqual(100 + 300000, pt._long_exposure()) + + pos_stats = position_tracker.calc_position_stats(pt) + self.assertEqual(100, pos_stats.long_value) + self.assertEqual(100 + 300000, pos_stats.long_exposure) # Test short-only methods - self.assertEqual(-200, pt._short_value()) - self.assertEqual(-200 - 400000, pt._short_exposure()) + self.assertEqual(-200, pos_stats.short_value) + self.assertEqual(-200 - 400000, pos_stats.short_exposure) # Test gross and net values - self.assertEqual(100 + 200, pt._gross_value()) - self.assertEqual(100 - 200, pt._net_value()) + self.assertEqual(100 + 200, pos_stats.gross_value) + self.assertEqual(100 - 200, pos_stats.net_value) # Test gross and net exposures - self.assertEqual(100 + 200 + 300000 + 400000, pt._gross_exposure()) - self.assertEqual(100 - 200 + 300000 - 400000, pt._net_exposure()) + self.assertEqual(100 + 200 + 300000 + 400000, pos_stats.gross_exposure) + self.assertEqual(100 - 200 + 300000 - 400000, pos_stats.net_exposure) def test_serialization(self): pt = perf.PositionTracker(self.env.asset_finder) @@ -2260,9 +2263,6 @@ class TestPositionTracker(unittest.TestCase): pt.update_positions({1: pos1, 3: pos3}) p_string = dumps_with_persistent_ids(pt) test = loads_with_persistent_ids(p_string, env=self.env) - nt.assert_dict_equal(test._position_amounts, pt._position_amounts) - nt.assert_dict_equal(test._position_last_sale_prices, - pt._position_last_sale_prices) nt.assert_count_equal(test.positions.keys(), pt.positions.keys()) for sid in pt.positions: nt.assert_dict_equal(test.positions[sid].__dict__, diff --git a/zipline/finance/performance/period.py b/zipline/finance/performance/period.py index 6721cc9a..aa348aa5 100644 --- a/zipline/finance/performance/period.py +++ b/zipline/finance/performance/period.py @@ -75,6 +75,7 @@ import logbook import numpy as np +from collections import namedtuple from zipline.assets import Future try: @@ -90,11 +91,49 @@ import zipline.protocol as zp from zipline.utils.serialization_utils import ( VERSION_LABEL ) +from zipline.finance.performance.position_tracker import calc_position_stats log = logbook.Logger('Performance') TRADE_TYPE = zp.DATASOURCE_TYPE.TRADE +PeriodStats = namedtuple('PeriodStats', + ['net_liquidation', + 'gross_leverage', + 'net_leverage']) + + +def calc_net_liquidation(ending_cash, long_value, short_value): + return ending_cash + long_value + short_value + + +def calc_gross_leverage(gross_exposure, net_liq): + if net_liq != 0: + return gross_exposure / net_liq + + return np.inf + + +def calc_net_leverage(net_exposure, net_liq): + if net_liq != 0: + return net_exposure / net_liq + + return np.inf + + +def calc_period_stats(pos_stats, ending_cash): + net_liq = calc_net_liquidation(ending_cash, + pos_stats.long_value, + pos_stats.short_value) + gross_leverage = calc_gross_leverage(pos_stats.gross_exposure, net_liq) + net_leverage = calc_net_leverage(pos_stats.net_exposure, net_liq) + + return PeriodStats( + net_liquidation=net_liq, + gross_leverage=gross_leverage, + net_leverage=net_leverage) + + class PerformancePeriod(object): def __init__( @@ -178,8 +217,9 @@ class PerformancePeriod(object): def calculate_performance(self): pt = self.position_tracker - self.ending_value = pt.calculate_positions_value() - self.ending_exposure = pt.calculate_positions_exposure() + pos_stats = calc_position_stats(pt) + self.ending_value = pos_stats.net_value + self.ending_exposure = pos_stats.net_exposure total_at_start = self.starting_cash + self.starting_value self.ending_cash = self.starting_cash + self.period_cash_flow @@ -245,27 +285,10 @@ class PerformancePeriod(object): def position_amounts(self): return self.position_tracker.position_amounts - @property - def _net_liquidation_value(self): - pt = self.position_tracker - return self.ending_cash + pt._long_value() + pt._short_value() - - def _gross_leverage(self): - net_liq = self._net_liquidation_value - if net_liq != 0: - return self.position_tracker._gross_exposure() / net_liq - - return np.inf - - def _net_leverage(self): - net_liq = self._net_liquidation_value - if net_liq != 0: - return self.position_tracker._net_exposure() / net_liq - - return np.inf - def __core_dict(self): - pt = self.position_tracker + pos_stats = calc_position_stats(self.position_tracker) + period_stats = calc_period_stats(pos_stats, self.ending_cash) + rval = { 'ending_value': self.ending_value, 'ending_exposure': self.ending_exposure, @@ -281,14 +304,14 @@ class PerformancePeriod(object): 'returns': self.returns, 'period_open': self.period_open, 'period_close': self.period_close, - 'gross_leverage': self._gross_leverage(), - 'net_leverage': self._net_leverage(), - 'short_exposure': pt._short_exposure(), - 'long_exposure': pt._long_exposure(), - 'short_value': pt._short_value(), - 'long_value': pt._long_value(), - 'longs_count': pt._longs_count(), - 'shorts_count': pt._shorts_count() + 'gross_leverage': period_stats.gross_leverage, + 'net_leverage': period_stats.net_leverage, + 'short_exposure': pos_stats.short_exposure, + 'long_exposure': pos_stats.long_exposure, + 'short_value': pos_stats.short_value, + 'long_value': pos_stats.long_value, + 'longs_count': pos_stats.longs_count, + 'shorts_count': pos_stats.shorts_count, } return rval @@ -367,6 +390,10 @@ class PerformancePeriod(object): def as_account(self): account = self._account_store + pt = self.position_tracker + pos_stats = calc_position_stats(pt) + period_stats = calc_period_stats(pos_stats, self.ending_cash) + # If no attribute is found on the PerformancePeriod resort to the # following default values. If an attribute is found use the existing # value. For instance, a broker may provide updates to these @@ -402,11 +429,12 @@ class PerformancePeriod(object): self.ending_cash / (self.ending_cash + self.ending_value)) account.day_trades_remaining = \ getattr(self, 'day_trades_remaining', float('inf')) - account.leverage = \ - getattr(self, 'leverage', self._gross_leverage()) - account.net_leverage = self._net_leverage() - account.net_liquidation = \ - getattr(self, 'net_liquidation', self._net_liquidation_value) + account.leverage = getattr(self, 'leverage', + period_stats.gross_leverage) + account.net_leverage = period_stats.net_leverage + + account.net_liquidation = getattr(self, 'net_liquidation', + period_stats.net_liquidation) return account def __getstate__(self): diff --git a/zipline/finance/performance/position_tracker.py b/zipline/finance/performance/position_tracker.py index 04e66a68..4abe65b4 100644 --- a/zipline/finance/performance/position_tracker.py +++ b/zipline/finance/performance/position_tracker.py @@ -4,6 +4,7 @@ import logbook import numpy as np import pandas as pd from pandas.lib import checknull +from collections import namedtuple try: # optional cython based OrderedDict from cyordereddict import OrderedDict @@ -27,6 +28,140 @@ from . position import positiondict log = logbook.Logger('Performance') +PositionStats = namedtuple('PositionStats', + ['net_exposure', + 'gross_value', + 'gross_exposure', + 'short_value', + 'short_exposure', + 'shorts_count', + 'long_value', + 'long_exposure', + 'longs_count', + 'net_value']) + + +def calc_position_values(amounts, + last_sale_prices, + value_multipliers): + iter_amount_price_multiplier = zip( + amounts, + last_sale_prices, + itervalues(value_multipliers), + ) + return [ + price * amount * multiplier for + price, amount, multiplier in iter_amount_price_multiplier + ] + + +def calc_net_value(position_values): + if len(position_values) == 0: + return np.float64(0) + + return sum(position_values) + + +def calc_position_exposures(amounts, + last_sale_prices, + exposure_multipliers): + iter_amount_price_multiplier = zip( + amounts, + last_sale_prices, + itervalues(exposure_multipliers), + ) + return [ + price * amount * multiplier for + price, amount, multiplier in iter_amount_price_multiplier + ] + + +def calc_long_value(position_values): + return sum(i for i in position_values if i > 0) + + +def calc_short_value(position_values): + return sum(i for i in position_values if i < 0) + + +def calc_long_exposure(position_exposures): + return sum(i for i in position_exposures if i > 0) + + +def calc_short_exposure(position_exposures): + return sum(i for i in position_exposures if i < 0) + + +def calc_longs_count(position_exposures): + return sum(1 for i in position_exposures if i > 0) + + +def calc_shorts_count(position_exposures): + return sum(1 for i in position_exposures if i < 0) + + +def calc_gross_exposure(long_exposure, short_exposure): + return long_exposure + abs(short_exposure) + + +def calc_gross_value(long_value, short_value): + return long_value + abs(short_value) + + +def calc_net_exposure(position_exposures): + if len(position_exposures) == 0: + return np.float64(0) + + return sum(position_exposures) + + +def calc_position_stats(pt): + amounts = [] + last_sale_prices = [] + for pos in itervalues(pt.positions): + amounts.append(pos.amount) + last_sale_prices.append(pos.last_sale_price) + + position_value_multipliers = pt._position_value_multipliers + position_exposure_multipliers = pt._position_exposure_multipliers + + position_values = calc_position_values( + amounts, + last_sale_prices, + position_value_multipliers + ) + + position_exposures = calc_position_exposures( + amounts, + last_sale_prices, + position_exposure_multipliers + ) + + long_value = calc_long_value(position_values) + short_value = calc_short_value(position_values) + gross_value = calc_gross_value(long_value, short_value) + long_exposure = calc_long_exposure(position_exposures) + short_exposure = calc_short_exposure(position_exposures) + gross_exposure = calc_gross_exposure(long_exposure, short_exposure) + net_exposure = calc_net_exposure(position_exposures) + longs_count = calc_longs_count(position_exposures) + shorts_count = calc_shorts_count(position_exposures) + net_value = calc_net_value(position_values) + + return PositionStats( + long_value=long_value, + gross_value=gross_value, + short_value=short_value, + long_exposure=long_exposure, + short_exposure=short_exposure, + gross_exposure=gross_exposure, + net_exposure=net_exposure, + longs_count=longs_count, + shorts_count=shorts_count, + net_value=net_value + ) + + class PositionTracker(object): def __init__(self, asset_finder): @@ -35,8 +170,6 @@ class PositionTracker(object): # sid => position object self.positions = positiondict() # Arrays for quick calculations of positions value - self._position_amounts = OrderedDict() - self._position_last_sale_prices = OrderedDict() self._position_value_multipliers = OrderedDict() self._position_exposure_multipliers = OrderedDict() self._position_payout_multipliers = OrderedDict() @@ -145,7 +278,6 @@ class PositionTracker(object): old_price = pos.last_sale_price pos.last_sale_date = event.dt pos.last_sale_price = price - self._position_last_sale_prices[sid] = price # Calculate cash adjustment on assets with multipliers return ((price - old_price) * self._position_payout_multipliers[sid] @@ -155,8 +287,6 @@ class PositionTracker(object): # update positions in batch self.positions.update(positions) for sid, pos in iteritems(positions): - self._position_amounts[sid] = pos.amount - self._position_last_sale_prices[sid] = pos.last_sale_price self._update_asset(sid) def update_position(self, sid, amount=None, last_sale_price=None, @@ -165,13 +295,9 @@ class PositionTracker(object): if amount is not None: pos.amount = amount - self._position_amounts[sid] = amount - self._position_values = None # invalidate cache self._update_asset(sid=sid) if last_sale_price is not None: pos.last_sale_price = last_sale_price - self._position_last_sale_prices[sid] = last_sale_price - self._position_values = None # invalidate cache if last_sale_date is not None: pos.last_sale_date = last_sale_date if cost_basis is not None: @@ -183,8 +309,6 @@ class PositionTracker(object): sid = txn.sid position = self.positions[sid] position.update(txn) - self._position_amounts[sid] = position.amount - self._position_last_sale_prices[sid] = position.last_sale_price self._update_asset(sid) def handle_commission(self, commission): @@ -193,81 +317,12 @@ class PositionTracker(object): self.positions[commission.sid].\ adjust_commission_cost_basis(commission) - @property - def position_values(self): - iter_amount_price_multiplier = zip( - itervalues(self._position_amounts), - itervalues(self._position_last_sale_prices), - itervalues(self._position_value_multipliers), - ) - return [ - price * amount * multiplier for - price, amount, multiplier in iter_amount_price_multiplier - ] - - @property - def position_exposures(self): - iter_amount_price_multiplier = zip( - itervalues(self._position_amounts), - itervalues(self._position_last_sale_prices), - itervalues(self._position_exposure_multipliers), - ) - return [ - price * amount * multiplier for - price, amount, multiplier in iter_amount_price_multiplier - ] - - def calculate_positions_value(self): - if len(self.position_values) == 0: - return np.float64(0) - - return sum(self.position_values) - - def calculate_positions_exposure(self): - if len(self.position_exposures) == 0: - return np.float64(0) - - return sum(self.position_exposures) - - def _longs_count(self): - return sum(1 for i in self.position_exposures if i > 0) - - def _long_exposure(self): - return sum(i for i in self.position_exposures if i > 0) - - def _long_value(self): - return sum(i for i in self.position_values if i > 0) - - def _shorts_count(self): - return sum(1 for i in self.position_exposures if i < 0) - - def _short_exposure(self): - return sum(i for i in self.position_exposures if i < 0) - - def _short_value(self): - return sum(i for i in self.position_values if i < 0) - - def _gross_exposure(self): - return self._long_exposure() + abs(self._short_exposure()) - - def _gross_value(self): - return self._long_value() + abs(self._short_value()) - - def _net_exposure(self): - return self.calculate_positions_exposure() - - def _net_value(self): - return self.calculate_positions_value() - def handle_split(self, split): if split.sid in self.positions: # Make the position object handle the split. It returns the # leftover cash from a fractional share, if there is any. position = self.positions[split.sid] leftover_cash = position.handle_split(split) - self._position_amounts[split.sid] = position.amount - self._position_last_sale_prices[split.sid] = \ - position.last_sale_price self._update_asset(split.sid) return leftover_cash @@ -333,8 +388,6 @@ class PositionTracker(object): position = self.positions[stock] position.amount += share_count - self._position_amounts[stock] = position.amount - self._position_last_sale_prices[stock] = position.last_sale_price self._update_asset(stock) # Add cash equal to the net cash payed from all dividends. Note that @@ -345,15 +398,20 @@ class PositionTracker(object): return net_cash_payment def maybe_create_close_position_transaction(self, event): - if not self._position_amounts.get(event.sid): + try: + pos = self.positions[event.sid] + amount = pos.amount + if amount == 0: + return None + except KeyError: return None if 'price' in event: price = event.price else: - price = self._position_last_sale_prices[event.sid] + price = pos.last_sale_price txn = Transaction( sid=event.sid, - amount=(-1 * self._position_amounts[event.sid]), + amount=(-1 * pos.amount), dt=event.dt, price=price, commission=0, @@ -422,8 +480,6 @@ class PositionTracker(object): self._auto_close_position_sids = state['auto_close_position_sids'] # Arrays for quick calculations of positions value - self._position_amounts = OrderedDict() - self._position_last_sale_prices = OrderedDict() self._position_value_multipliers = OrderedDict() self._position_exposure_multipliers = OrderedDict() self._position_payout_multipliers = OrderedDict() From 31d214d8ff81d6d01d0adab826d324135f63cd9e Mon Sep 17 00:00:00 2001 From: Eddie Hebert Date: Fri, 25 Sep 2015 13:58:44 -0400 Subject: [PATCH 2/4] MAINT: Combine leverage calculations. Instead of having two leverage functions, whose differences were the parameter names, add a `calc_leverage` function, with the calling code determining whether it is gross or net by the type of exposure passed in. --- zipline/finance/performance/period.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/zipline/finance/performance/period.py b/zipline/finance/performance/period.py index aa348aa5..c292b191 100644 --- a/zipline/finance/performance/period.py +++ b/zipline/finance/performance/period.py @@ -107,16 +107,9 @@ def calc_net_liquidation(ending_cash, long_value, short_value): return ending_cash + long_value + short_value -def calc_gross_leverage(gross_exposure, net_liq): +def calc_leverage(exposure, net_liq): if net_liq != 0: - return gross_exposure / net_liq - - return np.inf - - -def calc_net_leverage(net_exposure, net_liq): - if net_liq != 0: - return net_exposure / net_liq + return exposure / net_liq return np.inf @@ -125,8 +118,8 @@ def calc_period_stats(pos_stats, ending_cash): net_liq = calc_net_liquidation(ending_cash, pos_stats.long_value, pos_stats.short_value) - gross_leverage = calc_gross_leverage(pos_stats.gross_exposure, net_liq) - net_leverage = calc_net_leverage(pos_stats.net_exposure, net_liq) + gross_leverage = calc_leverage(pos_stats.gross_exposure, net_liq) + net_leverage = calc_leverage(pos_stats.net_exposure, net_liq) return PeriodStats( net_liquidation=net_liq, From c88412e3f1ac28eb6affb8ffc4caa5b9f8d710e7 Mon Sep 17 00:00:00 2001 From: Eddie Hebert Date: Fri, 25 Sep 2015 16:42:25 -0400 Subject: [PATCH 3/4] TST: Add coverage for long and short count. Make basic checks of position stats test all fields, by adding coverage for `shorts_count` and `longs_count`. --- tests/test_perf_tracking.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_perf_tracking.py b/tests/test_perf_tracking.py index 72319220..dcb0bab9 100644 --- a/tests/test_perf_tracking.py +++ b/tests/test_perf_tracking.py @@ -2239,10 +2239,12 @@ class TestPositionTracker(unittest.TestCase): pos_stats = position_tracker.calc_position_stats(pt) self.assertEqual(100, pos_stats.long_value) self.assertEqual(100 + 300000, pos_stats.long_exposure) + self.assertEqual(2, pos_stats.longs_count) # Test short-only methods self.assertEqual(-200, pos_stats.short_value) self.assertEqual(-200 - 400000, pos_stats.short_exposure) + self.assertEqual(2, pos_stats.shorts_count) # Test gross and net values self.assertEqual(100 + 200, pos_stats.gross_value) From 20c64b591f3050ff5eb1c65112f55368e9db5bfe Mon Sep 17 00:00:00 2001 From: Eddie Hebert Date: Fri, 25 Sep 2015 16:43:34 -0400 Subject: [PATCH 4/4] MAINT: Clean up net calculations. - Combine the net value and exposure functions into `calc_net` since they use the same logic. - Change the logic to handle on empty list to using the a start value of 0.0. More concise, and reduces the number of return points from the function to one. --- .../finance/performance/position_tracker.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/zipline/finance/performance/position_tracker.py b/zipline/finance/performance/position_tracker.py index 4abe65b4..4fe30982 100644 --- a/zipline/finance/performance/position_tracker.py +++ b/zipline/finance/performance/position_tracker.py @@ -55,11 +55,9 @@ def calc_position_values(amounts, ] -def calc_net_value(position_values): - if len(position_values) == 0: - return np.float64(0) - - return sum(position_values) +def calc_net(values): + # Returns 0.0 if there are no values. + return sum(values, np.float64()) def calc_position_exposures(amounts, @@ -108,13 +106,6 @@ def calc_gross_value(long_value, short_value): return long_value + abs(short_value) -def calc_net_exposure(position_exposures): - if len(position_exposures) == 0: - return np.float64(0) - - return sum(position_exposures) - - def calc_position_stats(pt): amounts = [] last_sale_prices = [] @@ -143,10 +134,10 @@ def calc_position_stats(pt): long_exposure = calc_long_exposure(position_exposures) short_exposure = calc_short_exposure(position_exposures) gross_exposure = calc_gross_exposure(long_exposure, short_exposure) - net_exposure = calc_net_exposure(position_exposures) + net_exposure = calc_net(position_exposures) longs_count = calc_longs_count(position_exposures) shorts_count = calc_shorts_count(position_exposures) - net_value = calc_net_value(position_values) + net_value = calc_net(position_values) return PositionStats( long_value=long_value,