ENH: Added functionality to download Canadian treasury curves.

Added automatic switching of treasury curves based on index sent to environment.
This commit is contained in:
David Stephens
2013-12-10 17:25:07 -08:00
committed by Thomas Wiecki
parent e5786b2593
commit e45528458f
3 changed files with 161 additions and 15 deletions
+33 -12
View File
@@ -14,6 +14,7 @@
# limitations under the License.
import importlib
import os
from os.path import expanduser
from collections import OrderedDict
@@ -25,7 +26,6 @@ import pandas as pd
from pandas.io.data import DataReader
import pytz
from . treasuries import get_treasury_data
from . import benchmarks
from . benchmarks import get_benchmark_returns
@@ -49,6 +49,16 @@ CACHE_PATH = os.path.join(
'cache'
)
#Mapping from index symbol to appropriate bond data
INDEX_MAPPING = {
'^GSPC':
('treasuries', 'treasury_curves.csv', 'data.treasury.gov'),
'^GSPTSE':
('treasuries_can', 'treasury_curves_can.csv', 'bankofcanada.ca'),
'^FTSE': # use US treasuries until UK bonds implemented
('treasuries', 'treasury_curves.csv', 'data.treasury.gov'),
}
def get_datafile(name, mode='r'):
"""
@@ -70,21 +80,27 @@ def get_cache_filepath(name):
return os.path.join(CACHE_PATH, name)
def dump_treasury_curves():
def dump_treasury_curves(module='treasuries', filename='treasury_curves.csv'):
"""
Dumps data to be used with zipline.
Puts source treasury and data into zipline.
"""
try:
m = importlib.import_module("." + module, package='zipline.data')
except ImportError:
raise NotImplementedError(
'Treasury curve {0} module not implemented'.format(module))
tr_data = {}
for curve in get_treasury_data():
for curve in m.get_treasury_data():
# Not ideal but massaging data into expected format
tr_data[curve['date']] = curve
curves = pd.DataFrame(tr_data).T
datafile = get_datafile('treasury_curves.csv', mode='wb')
datafile = get_datafile(filename, mode='wb')
curves.to_csv(datafile)
datafile.close()
@@ -184,15 +200,20 @@ Fetching data from Yahoo Finance.
):
benchmark_returns = benchmark_returns.tz_localize('UTC')
#Get treasury curve module, filename & source from mapping.
#Default to USA.
module, filename, source = INDEX_MAPPING.get(
bm_symbol, INDEX_MAPPING['^GSPC'])
try:
fp_tr = get_datafile('treasury_curves.csv', "rb")
fp_tr = get_datafile(filename, "rb")
except IOError:
print("""
data files aren't distributed with source.
Fetching data from data.treasury.gov
""").strip()
dump_treasury_curves()
fp_tr = get_datafile('treasury_curves.csv', "rb")
Fetching data from {0}
""").format(source).strip()
dump_treasury_curves(module, filename)
fp_tr = get_datafile(filename, "rb")
saved_curves = pd.DataFrame.from_csv(fp_tr)
@@ -205,7 +226,7 @@ Fetching data from data.treasury.gov
# If more than 1 trading days has elapsed since the last day where
# we have data,then we need to update
if len(days_up_to_now) - last_tr_date_offset > 1:
treasury_curves = dump_treasury_curves()
treasury_curves = dump_treasury_curves(module, filename)
else:
treasury_curves = saved_curves.tz_localize('UTC')
@@ -218,8 +239,8 @@ Fetching data from data.treasury.gov
fp_tr.close()
tr_curves = OrderedDict(sorted(
((dt, c) for dt, c in tr_curves.iteritems()),
key=lambda t: t[0]))
((dt, c) for dt, c in tr_curves.iteritems()),
key=lambda t: t[0]))
return benchmark_returns, tr_curves
+3 -3
View File
@@ -58,10 +58,10 @@ _CURVE_MAPPINGS = {
}
def treasury_mappings():
def treasury_mappings(mappings):
return {key: Mapping(*value)
for key, value
in _CURVE_MAPPINGS.iteritems()}
in mappings.iteritems()}
class iter_to_stream(object):
@@ -133,7 +133,7 @@ http://data.treasury.gov/feed.svc/DailyTreasuryYieldCurveRateData\
def get_treasury_data():
mappings = treasury_mappings()
mappings = treasury_mappings(_CURVE_MAPPINGS)
source = get_treasury_source()
return source_to_records(mappings, source)
+125
View File
@@ -0,0 +1,125 @@
#
# Copyright 2013 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import requests
from . loader_utils import (
source_to_records
)
from zipline.data.treasuries import (
treasury_mappings, get_treasury_date, get_treasury_rate
)
_CURVE_MAPPINGS = {
'date': (get_treasury_date, "Date"),
'1month': (get_treasury_rate, "V39063"),
'3month': (get_treasury_rate, "V39065"),
'6month': (get_treasury_rate, "V39066"),
'1year': (get_treasury_rate, "V39067"),
'2year': (get_treasury_rate, "V39051"),
'3year': (get_treasury_rate, "V39052"),
'5year': (get_treasury_rate, "V39053"),
'7year': (get_treasury_rate, "V39054"),
'10year': (get_treasury_rate, "V39055"),
#Bank of Canada refers to this as 'Long' Rate, approximately 30 years.
'30year': (get_treasury_rate, "V39056"),
}
BILLS = ['V39063', 'V39065', 'V39066', 'V39067']
BONDS = ['V39051', 'V39052', 'V39053', 'V39054', 'V39055', 'V39056']
def get_treasury_source(start_date=None, end_date=None):
today = datetime.date.today()
#Bank of Canada only has 10 years of data and has this in the URL.
restriction = datetime.date(today.year-10, today.month, today.day)
if not end_date:
end_date = today
if not start_date:
start_date = restriction
bill_url = (
"http://www.bankofcanada.ca/stats/results/csv?"
"lP=lookup_tbill_yields.php&sR={restrict}&se="
"L_V39063-L_V39065-L_V39066-L_V39067&dF={start}&dT={end}"
.format(restrict=restriction.strftime("%Y-%m-%d"),
start=start_date.strftime("%Y-%m-%d"),
end=end_date.strftime("%Y-%m-%d"),
)
)
bond_url = (
"http://www.bankofcanada.ca/stats/results/csv?"
"lP=lookup_bond_yields.php&sR={restrict}&se="
"L_V39051-L_V39052-L_V39053-L_V39054-L_V39055-L_V39056"
"&dF={start}&dT={end}"
.format(restrict=restriction.strftime("%Y-%m-%d"),
start=start_date.strftime("%Y-%m-%d"),
end=end_date.strftime("%Y-%m-%d")
)
)
res_bill = requests.get(bill_url, stream=True)
res_bond = requests.get(bond_url, stream=True)
bill_iter = res_bill.iter_lines()
bond_iter = res_bond.iter_lines()
bill_row = ""
while ",".join(BILLS) not in bill_row:
bill_row = bill_iter.next()
if 'Daily series:' in bill_row:
bill_end_date = datetime.datetime.strptime(
bill_row.split(' - ')[1].strip(),
"%Y-%m-%d").date()
bill_header = bill_row.split(",")
bond_row = ""
while ",".join(BONDS) not in bond_row:
bond_row = bond_iter.next()
if 'Daily series:' in bond_row:
bond_end_date = datetime.datetime.strptime(
bond_row.split(' - ')[1].strip(),
"%Y-%m-%d").date()
bond_header = bond_row.split(",")
#Line up the two dates
if bill_end_date > bond_end_date:
bill_iter.next()
elif bond_end_date > bill_end_date:
bond_iter.next()
for bill_row in bill_iter:
bond_row = bond_iter.next()
bill_dict = dict(zip(bill_header, bill_row.split(",")))
bond_dict = dict(zip(bond_header, bond_row.split(",")))
if ' Bank holiday' in bond_row.split(",") + bill_row.split(","):
continue
if ' Not available' in bond_row.split(",") + bill_row.split(","):
continue
bill_dict.update(bond_dict)
yield bill_dict
def get_treasury_data():
mappings = treasury_mappings(_CURVE_MAPPINGS)
source = get_treasury_source()
return source_to_records(mappings, source)