diff --git a/zipline/data/benchmarks.py b/zipline/data/benchmarks.py index 38b4badd..c7f0861e 100644 --- a/zipline/data/benchmarks.py +++ b/zipline/data/benchmarks.py @@ -76,12 +76,14 @@ def get_raw_benchmark_data(start_date, end_date, symbol): return csv.DictReader(StringIO(res.content)) -def get_benchmark_data(symbol): +def get_benchmark_data(symbol, start_date=None, end_date=None): """ Benchmarks from Yahoo. """ - start_date = datetime(year=1950, month=1, day=3) - end_date = datetime.utcnow() + if start_date is None: + start_date = datetime(year=1950, month=1, day=3) + if end_date is None: + end_date = datetime.utcnow() raw_benchmark_data = get_raw_benchmark_data(start_date, end_date, symbol) # Reverse data so we can load it in reverse chron order. @@ -92,10 +94,15 @@ def get_benchmark_data(symbol): return source_to_records(mappings, benchmarks_source) -def get_benchmark_returns(symbol): +def get_benchmark_returns(symbol, start_date=None, end_date=None): + if start_date is None: + start_date = datetime(year=1950, month=1, day=3) + if end_date is None: + end_date = datetime.utcnow() + benchmark_returns = [] - for data_point in get_benchmark_data(symbol): + for data_point in get_benchmark_data(symbol, start_date, end_date): returns = (data_point['close'] - data_point['open']) / \ data_point['open'] daily_return = DailyReturn(date=data_point['date'], returns=returns) diff --git a/zipline/data/loader.py b/zipline/data/loader.py index 0becf432..f70abd59 100644 --- a/zipline/data/loader.py +++ b/zipline/data/loader.py @@ -18,13 +18,14 @@ import os from os.path import expanduser import msgpack from collections import OrderedDict - +from datetime import timedelta from treasuries import get_treasury_data from benchmarks import get_benchmark_returns from zipline.protocol import DailyReturn from zipline.utils.date_utils import tuple_to_date +from zipline.utils.tradingcalendar import trading_days from operator import attrgetter @@ -87,6 +88,58 @@ def dump_benchmarks(symbol): bmark_fp.write(msgpack.dumps(benchmark_data)) +def update_treasury_curves(last_date): + """ + Updates data in the zipline treasury curves message pack + + last_date should be a datetime object of the most recent data + + Puts source treasury and data into zipline. + """ + tr_data = [] + with get_datafile('treasury_curves.msgpack', mode='rb') as tr_fp: + tr_list = msgpack.loads(tr_fp.read()) + for packed_date, curve in tr_list: + tr_data.append((packed_date, curve)) + + for curve in get_treasury_data(): + date_as_tuple = curve['date'].timetuple()[0:6] + \ + (curve['date'].microsecond,) + # Not ideal but massaging data into expected format + del curve['date'] + tr = (date_as_tuple, curve) + tr_data.append(tr) + + with get_datafile('treasury_curves.msgpack', mode='wb') as tr_fp: + tr_fp.write(msgpack.dumps(tr_data)) + + +def update_benchmarks(symbol, last_date): + """ + Updates data in the zipline message pack + + last_date should be a datetime object of the most recent data + + Puts source benchmark into zipline. + """ + benchmark_data = [] + with get_datafile(get_benchmark_filename(symbol), mode='rb') as bmark_fp: + bm_list = msgpack.loads(bmark_fp.read()) + for packed_date, returns in bm_list: + benchmark_data.append((packed_date, returns)) + + start = last_date + timedelta(days=1) + for daily_return in get_benchmark_returns(symbol, start_date=start): + date_as_tuple = daily_return.date.timetuple()[0:6] + \ + (daily_return.date.microsecond,) + # Not ideal but massaging data into expected format + benchmark = (date_as_tuple, daily_return.returns) + benchmark_data.append(benchmark) + + with get_datafile(get_benchmark_filename(symbol), mode='wb') as bmark_fp: + bmark_fp.write(msgpack.dumps(benchmark_data)) + + def get_benchmark_filename(symbol): return "%s_benchmark.msgpack" % symbol @@ -103,6 +156,20 @@ Fetching data from Yahoo Finance. fp_bm = get_datafile(get_benchmark_filename(bm_symbol), "rb") bm_list = msgpack.loads(fp_bm.read()) + + # Find the offset of the last date for which we have trading data in our + # list of valid trading days + last_bm_date = tuple_to_date(bm_list[-1][0]) + last_bm_date_offset = trading_days.get_loc( + last_bm_date.strftime('%Y/%m/%d')) + + # If more than 1 trading days has elapsed since the last day where + # we have data,then we need to update + if len(trading_days) - last_bm_date_offset > 1: + update_benchmarks(bm_symbol, last_bm_date) + fp_bm = get_datafile(get_benchmark_filename(bm_symbol), "rb") + bm_list = msgpack.loads(fp_bm.read()) + bm_returns = [] for packed_date, returns in bm_list: event_dt = tuple_to_date(packed_date) @@ -125,6 +192,20 @@ Fetching data from data.treasury.gov fp_tr = get_datafile('treasury_curves.msgpack', "rb") tr_list = msgpack.loads(fp_tr.read()) + + # Find the offset of the last date for which we have trading data in our + # list of valid trading days + last_tr_date = tuple_to_date(tr_list[-1][0]) + last_tr_date_offset = trading_days.get_loc( + last_tr_date.strftime('%Y/%m/%d')) + + # If more than 1 trading days has elapsed since the last day where + # we have data,then we need to update + if len(trading_days) - last_tr_date_offset > 1: + update_benchmarks(last_tr_date) + fp_tr = get_datafile('treasury_curves.msgpack', "rb") + tr_list = msgpack.loads(fp_tr.read()) + tr_curves = {} for packed_date, curve in tr_list: tr_dt = tuple_to_date(packed_date)