ENH: Update msgpack files if new data is available.

This commit is contained in:
Ryan Day
2013-02-18 13:27:19 -05:00
committed by Eddie Hebert
parent 3e21ea52bc
commit 4170c05fb6
2 changed files with 94 additions and 6 deletions
+12 -5
View File
@@ -76,12 +76,14 @@ def get_raw_benchmark_data(start_date, end_date, symbol):
return csv.DictReader(StringIO(res.content))
def get_benchmark_data(symbol):
def get_benchmark_data(symbol, start_date=None, end_date=None):
"""
Benchmarks from Yahoo.
"""
start_date = datetime(year=1950, month=1, day=3)
end_date = datetime.utcnow()
if start_date is None:
start_date = datetime(year=1950, month=1, day=3)
if end_date is None:
end_date = datetime.utcnow()
raw_benchmark_data = get_raw_benchmark_data(start_date, end_date, symbol)
# Reverse data so we can load it in reverse chron order.
@@ -92,10 +94,15 @@ def get_benchmark_data(symbol):
return source_to_records(mappings, benchmarks_source)
def get_benchmark_returns(symbol):
def get_benchmark_returns(symbol, start_date=None, end_date=None):
if start_date is None:
start_date = datetime(year=1950, month=1, day=3)
if end_date is None:
end_date = datetime.utcnow()
benchmark_returns = []
for data_point in get_benchmark_data(symbol):
for data_point in get_benchmark_data(symbol, start_date, end_date):
returns = (data_point['close'] - data_point['open']) / \
data_point['open']
daily_return = DailyReturn(date=data_point['date'], returns=returns)
+82 -1
View File
@@ -18,13 +18,14 @@ import os
from os.path import expanduser
import msgpack
from collections import OrderedDict
from datetime import timedelta
from treasuries import get_treasury_data
from benchmarks import get_benchmark_returns
from zipline.protocol import DailyReturn
from zipline.utils.date_utils import tuple_to_date
from zipline.utils.tradingcalendar import trading_days
from operator import attrgetter
@@ -87,6 +88,58 @@ def dump_benchmarks(symbol):
bmark_fp.write(msgpack.dumps(benchmark_data))
def update_treasury_curves(last_date):
"""
Updates data in the zipline treasury curves message pack
last_date should be a datetime object of the most recent data
Puts source treasury and data into zipline.
"""
tr_data = []
with get_datafile('treasury_curves.msgpack', mode='rb') as tr_fp:
tr_list = msgpack.loads(tr_fp.read())
for packed_date, curve in tr_list:
tr_data.append((packed_date, curve))
for curve in get_treasury_data():
date_as_tuple = curve['date'].timetuple()[0:6] + \
(curve['date'].microsecond,)
# Not ideal but massaging data into expected format
del curve['date']
tr = (date_as_tuple, curve)
tr_data.append(tr)
with get_datafile('treasury_curves.msgpack', mode='wb') as tr_fp:
tr_fp.write(msgpack.dumps(tr_data))
def update_benchmarks(symbol, last_date):
"""
Updates data in the zipline message pack
last_date should be a datetime object of the most recent data
Puts source benchmark into zipline.
"""
benchmark_data = []
with get_datafile(get_benchmark_filename(symbol), mode='rb') as bmark_fp:
bm_list = msgpack.loads(bmark_fp.read())
for packed_date, returns in bm_list:
benchmark_data.append((packed_date, returns))
start = last_date + timedelta(days=1)
for daily_return in get_benchmark_returns(symbol, start_date=start):
date_as_tuple = daily_return.date.timetuple()[0:6] + \
(daily_return.date.microsecond,)
# Not ideal but massaging data into expected format
benchmark = (date_as_tuple, daily_return.returns)
benchmark_data.append(benchmark)
with get_datafile(get_benchmark_filename(symbol), mode='wb') as bmark_fp:
bmark_fp.write(msgpack.dumps(benchmark_data))
def get_benchmark_filename(symbol):
return "%s_benchmark.msgpack" % symbol
@@ -103,6 +156,20 @@ Fetching data from Yahoo Finance.
fp_bm = get_datafile(get_benchmark_filename(bm_symbol), "rb")
bm_list = msgpack.loads(fp_bm.read())
# Find the offset of the last date for which we have trading data in our
# list of valid trading days
last_bm_date = tuple_to_date(bm_list[-1][0])
last_bm_date_offset = trading_days.get_loc(
last_bm_date.strftime('%Y/%m/%d'))
# If more than 1 trading days has elapsed since the last day where
# we have data,then we need to update
if len(trading_days) - last_bm_date_offset > 1:
update_benchmarks(bm_symbol, last_bm_date)
fp_bm = get_datafile(get_benchmark_filename(bm_symbol), "rb")
bm_list = msgpack.loads(fp_bm.read())
bm_returns = []
for packed_date, returns in bm_list:
event_dt = tuple_to_date(packed_date)
@@ -125,6 +192,20 @@ Fetching data from data.treasury.gov
fp_tr = get_datafile('treasury_curves.msgpack', "rb")
tr_list = msgpack.loads(fp_tr.read())
# Find the offset of the last date for which we have trading data in our
# list of valid trading days
last_tr_date = tuple_to_date(tr_list[-1][0])
last_tr_date_offset = trading_days.get_loc(
last_tr_date.strftime('%Y/%m/%d'))
# If more than 1 trading days has elapsed since the last day where
# we have data,then we need to update
if len(trading_days) - last_tr_date_offset > 1:
update_benchmarks(last_tr_date)
fp_tr = get_datafile('treasury_curves.msgpack', "rb")
tr_list = msgpack.loads(fp_tr.read())
tr_curves = {}
for packed_date, curve in tr_list:
tr_dt = tuple_to_date(packed_date)