From 37c56b9aa4f49160fe56e37cb51ca3cb27c8b30f Mon Sep 17 00:00:00 2001 From: Eddie Hebert Date: Sat, 19 Oct 2013 22:31:47 -0400 Subject: [PATCH] MAINT: Use Series throughout for daily returns. Remove the lists of DailyReturn objects in favor of using pd.Series to store the return values. Should make it easier to inspect the values when stepping through, make the windowing of data to a certain range more facile by using, and have some performance increases due to removing object creation and member access. --- tests/risk/test_risk_period.py | 6 ++--- tests/test_finance.py | 10 +++---- tests/test_perf_tracking.py | 10 +++---- zipline/algorithm.py | 10 +++---- zipline/data/benchmarks.py | 3 ++- zipline/data/loader.py | 8 +----- zipline/examples/dual_moving_average.py | 6 ++--- zipline/finance/performance/tracker.py | 2 +- zipline/finance/risk/period.py | 8 +++--- zipline/finance/risk/report.py | 9 ++----- zipline/finance/trading.py | 16 +++++------ zipline/protocol.py | 19 -------------- zipline/utils/factory.py | 35 +++++++------------------ 13 files changed, 44 insertions(+), 98 deletions(-) diff --git a/tests/risk/test_risk_period.py b/tests/risk/test_risk_period.py index 5ea9c392..b8a6a113 100644 --- a/tests/risk/test_risk_period.py +++ b/tests/risk/test_risk_period.py @@ -84,7 +84,7 @@ class TestRisk(unittest.TestCase): def test_factory(self): returns = [0.1] * 100 r_objects = factory.create_returns_from_list(returns, self.sim_params) - self.assertTrue(r_objects[-1].date <= + self.assertTrue(r_objects.index[-1] <= datetime.datetime( year=2006, month=12, day=31, tzinfo=pytz.utc)) @@ -92,8 +92,8 @@ class TestRisk(unittest.TestCase): returns = factory.create_returns_from_list( [1.0, -0.5, 0.8, .17, 1.0, -0.1, -0.45], self.sim_params) # 200, 100, 180, 210.6, 421.2, 379.8, 208.494 - metrics = risk.RiskMetricsPeriod(returns[0].date, - returns[-1].date, + metrics = risk.RiskMetricsPeriod(returns.index[0], + returns.index[-1], returns) self.assertEqual(metrics.max_drawdown, 0.505) diff --git a/tests/test_finance.py b/tests/test_finance.py index 075df882..09b144f3 100644 --- a/tests/test_finance.py +++ b/tests/test_finance.py @@ -342,14 +342,14 @@ class FinanceTestCase(TestCase): tracker = PerformanceTracker(sim_params) benchmark_returns = [ - Event({'dt': ret.date, - 'returns': ret.returns, + Event({'dt': dt, + 'returns': ret, 'type': zipline.protocol.DATASOURCE_TYPE.BENCHMARK, 'source_id': 'benchmarks'}) - for ret in trading.environment.benchmark_returns - if ret.date.date() >= sim_params.period_start.date() - and ret.date.date() <= sim_params.period_end.date() + for dt, ret in trading.environment.benchmark_returns.iterkv() + if dt.date() >= sim_params.period_start.date() + and dt.date() <= sim_params.period_end.date() ] generated_events = date_sorted_sources(generated_trades, diff --git a/tests/test_perf_tracking.py b/tests/test_perf_tracking.py index c48d0352..bc9eb373 100644 --- a/tests/test_perf_tracking.py +++ b/tests/test_perf_tracking.py @@ -52,14 +52,14 @@ def create_txn(event, price, amount): def benchmark_events_in_range(sim_params): return [ - Event({'dt': ret.date, - 'returns': ret.returns, + Event({'dt': dt, + 'returns': ret, 'type': zipline.protocol.DATASOURCE_TYPE.BENCHMARK, 'source_id': 'benchmarks'}) - for ret in trading.environment.benchmark_returns - if ret.date.date() >= sim_params.period_start.date() - and ret.date.date() <= sim_params.period_end.date() + for dt, ret in trading.environment.benchmark_returns.iterkv() + if dt.date() >= sim_params.period_start.date() + and dt.date() <= sim_params.period_end.date() ] diff --git a/zipline/algorithm.py b/zipline/algorithm.py index a12715ae..158b83df 100644 --- a/zipline/algorithm.py +++ b/zipline/algorithm.py @@ -174,13 +174,13 @@ class TradingAlgorithm(object): """ if self.benchmark_return_source is None: benchmark_return_source = [ - Event({'dt': ret.date, - 'returns': ret.returns, + Event({'dt': dt, + 'returns': ret, 'type': zipline.protocol.DATASOURCE_TYPE.BENCHMARK, 'source_id': 'benchmarks'}) - for ret in trading.environment.benchmark_returns - if ret.date.date() >= sim_params.period_start.date() - and ret.date.date() <= sim_params.period_end.date() + for dt, ret in trading.environment.benchmark_returns.iterkv() + if dt.date() >= sim_params.period_start.date() + and dt.date() <= sim_params.period_end.date() ] else: benchmark_return_source = self.benchmark_return_source diff --git a/zipline/data/benchmarks.py b/zipline/data/benchmarks.py index 6f32a667..091346c7 100644 --- a/zipline/data/benchmarks.py +++ b/zipline/data/benchmarks.py @@ -27,7 +27,8 @@ from . loader_utils import ( source_to_records, Mapping ) -from zipline.protocol import DailyReturn + +DailyReturn = collections.namedtuple('DailyReturn', ['date', 'returns']) class BenchmarkDataNotFoundError(Exception): diff --git a/zipline/data/loader.py b/zipline/data/loader.py index 6e0ea68e..703d85c5 100644 --- a/zipline/data/loader.py +++ b/zipline/data/loader.py @@ -29,7 +29,6 @@ from . treasuries import get_treasury_data from . import benchmarks from . benchmarks import get_benchmark_returns -from zipline.protocol import DailyReturn from zipline.utils.tradingcalendar import trading_days logger = logbook.Logger('Loader') @@ -166,11 +165,6 @@ Fetching data from Yahoo Finance. benchmark_returns = saved_benchmarks benchmark_returns = benchmark_returns.tz_localize('UTC') - bm_returns = [] - for dt, returns in benchmark_returns.iterkv(): - daily_return = DailyReturn(date=dt, returns=returns) - bm_returns.append(daily_return) - try: fp_tr = get_datafile('treasury_curves.csv', "rb") except IOError: @@ -208,7 +202,7 @@ Fetching data from data.treasury.gov ((dt, c) for dt, c in tr_curves.iteritems()), key=lambda t: t[0])) - return bm_returns, tr_curves + return benchmark_returns, tr_curves def _load_raw_yahoo_data(indexes=None, stocks=None, start=None, end=None): diff --git a/zipline/examples/dual_moving_average.py b/zipline/examples/dual_moving_average.py index 503fc49b..8ad234c3 100755 --- a/zipline/examples/dual_moving_average.py +++ b/zipline/examples/dual_moving_average.py @@ -15,7 +15,6 @@ # limitations under the License. import matplotlib.pyplot as plt -import pandas as pd from zipline.algorithm import TradingAlgorithm import zipline.finance.trading as trading @@ -76,9 +75,8 @@ if __name__ == '__main__': dma = DualMovingAverage() results = dma.run(data) - index = [br.date for br in trading.environment.benchmark_returns] - rets = [br.returns for br in trading.environment.benchmark_returns] - bm_returns = pd.Series(rets, index=index).ix[start:end] + br = trading.environment.benchmark_returns + bm_returns = br[(br.index >= start) & (br.index <= end)] results['benchmark_returns'] = (1 + bm_returns).cumprod().values results['algorithm_returns'] = (1 + results.returns).cumprod() fig = plt.figure() diff --git a/zipline/finance/performance/tracker.py b/zipline/finance/performance/tracker.py index 154ec88d..95b300b0 100644 --- a/zipline/finance/performance/tracker.py +++ b/zipline/finance/performance/tracker.py @@ -316,7 +316,7 @@ class PerformanceTracker(object): self.market_close = self.sim_params.last_close def handle_market_close(self): - # add the return results from today to the list of DailyReturn objects. + # add the return results from today to the returns series todays_date = normalize_date(self.market_close) self.cumulative_performance.update_dividends(todays_date) self.todays_performance.update_dividends(todays_date) diff --git a/zipline/finance/risk/period.py b/zipline/finance/risk/period.py index 91a8d46c..f07dec7a 100644 --- a/zipline/finance/risk/period.py +++ b/zipline/finance/risk/period.py @@ -58,11 +58,9 @@ class RiskMetricsPeriod(object): self.end_date = end_date if benchmark_returns is None: - benchmark_returns = [ - x for x in trading.environment.benchmark_returns - if x.date >= returns[0].date and - x.date <= returns[-1].date - ] + br = trading.environment.benchmark_returns + benchmark_returns = br[(br.index >= returns.index[0]) & + (br.index <= returns.index[-1])] self.algorithm_returns = self.mask_returns_to_period(returns) self.benchmark_returns = self.mask_returns_to_period(benchmark_returns) diff --git a/zipline/finance/risk/report.py b/zipline/finance/risk/report.py index 7ffde9f9..b965a1a1 100644 --- a/zipline/finance/risk/report.py +++ b/zipline/finance/risk/report.py @@ -79,13 +79,8 @@ class RiskReport(object): start_date = self.sim_params.period_start end_date = self.sim_params.period_end else: - # FIXME: Papering over multiple algorithm_return types - if isinstance(self.algorithm_returns, list): - start_date = self.algorithm_returns[0].date - end_date = self.algorithm_returns[-1].date - else: - start_date = self.algorithm_returns.index[0] - end_date = self.algorithm_returns.index[-1] + start_date = self.algorithm_returns.index[0] + end_date = self.algorithm_returns.index[-1] self.month_periods = self.periods_in_range(1, start_date, end_date) self.three_month_periods = self.periods_in_range(3, start_date, diff --git a/zipline/finance/trading.py b/zipline/finance/trading.py index cef7908d..b0c0eb68 100644 --- a/zipline/finance/trading.py +++ b/zipline/finance/trading.py @@ -91,17 +91,13 @@ class TradingEnvironment(object): self.early_close_trading_day = datetime.timedelta(hours=3, minutes=30) self.exchange_tz = exchange_tz - bm = None + bi = self.benchmark_returns.index + if max_date: + self.trading_days = bi[bi <= max_date].copy() + else: + self.trading_days = bi.copy() - trading_days_list = [] - for bm in self.benchmark_returns: - if max_date and bm.date > max_date: - break - trading_days_list.append(bm.date) - - self.trading_days = pd.DatetimeIndex(trading_days_list) - - if bm and extra_dates: + if len(self.benchmark_returns) and extra_dates: for extra_date in extra_dates: extra_date = extra_date.replace(hour=0, minute=0, second=0, microsecond=0) diff --git a/zipline/protocol.py b/zipline/protocol.py index a6cc493d..f037b34c 100644 --- a/zipline/protocol.py +++ b/zipline/protocol.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import datetime from . utils.protocol_utils import Enum @@ -182,21 +181,3 @@ class BarData(object): def items(self): return self._data.items() - - -class DailyReturn(object): - - def __init__(self, date, returns): - - assert isinstance(date, datetime.datetime) - self.date = date.replace(hour=0, minute=0, second=0, microsecond=0) - self.returns = returns - - def to_dict(self): - return { - 'dt': self.date, - 'returns': self.returns - } - - def __repr__(self): - return str(self.date) + " - " + str(self.returns) diff --git a/zipline/utils/factory.py b/zipline/utils/factory.py index db79ab7e..56fe65a8 100644 --- a/zipline/utils/factory.py +++ b/zipline/utils/factory.py @@ -26,7 +26,7 @@ import pandas as pd import numpy as np from datetime import datetime, timedelta -from zipline.protocol import DailyReturn, Event, DATASOURCE_TYPE +from zipline.protocol import Event, DATASOURCE_TYPE from zipline.sources import (SpecificEquityTrades, DataFrameSource, DataPanelSource) @@ -72,11 +72,10 @@ def create_noop_environment(): oneday = timedelta(days=1) start = datetime(2006, 1, 1, tzinfo=pytz.utc) - bm_returns = [] + days = [] tr_curves = OrderedDict() for day in date_gen(start=start, delta=oneday, count=252): - dr = DailyReturn(day, 0.01) - bm_returns.append(dr) + days.append(day) curve = { '10year': 0.0799, '1month': 0.0799, @@ -93,6 +92,8 @@ def create_noop_environment(): } tr_curves[day] = curve + bm_returns = pd.Series(index=days, data=0.1) + load_nodata = lambda x: (bm_returns, tr_curves) return trading.TradingEnvironment(load=load_nodata) @@ -223,31 +224,13 @@ def create_txn_history(sid, priceList, amtList, interval, sim_params): def create_returns_from_range(sim_params): - current = sim_params.first_open - end = sim_params.last_close - test_range = [] - while current <= end: - r = DailyReturn(current, random.random()) - test_range.append(r) - current = trading.environment.next_trading_day(current) - - return test_range + return pd.Series(index=sim_params.trading_days, + data=np.random.rand(len(sim_params.trading_days))) def create_returns_from_list(returns, sim_params): - current = sim_params.first_open - test_range = [] - - # sometimes the range starts with a non-trading day. - if not trading.environment.is_trading_day(current): - current = trading.environment.next_trading_day(current) - - for return_val in returns: - r = DailyReturn(current, return_val) - test_range.append(r) - current = trading.environment.next_trading_day(current) - - return test_range + return pd.Series(index=sim_params.trading_days[:len(returns)], + data=returns) def create_daily_trade_source(sids, trade_count, sim_params,