From e45528458f1b28f870cb296e26db4e059cafa439 Mon Sep 17 00:00:00 2001 From: David Stephens Date: Tue, 10 Dec 2013 17:25:07 -0800 Subject: [PATCH] ENH: Added functionality to download Canadian treasury curves. Added automatic switching of treasury curves based on index sent to environment. --- zipline/data/loader.py | 45 ++++++++---- zipline/data/treasuries.py | 6 +- zipline/data/treasuries_can.py | 125 +++++++++++++++++++++++++++++++++ 3 files changed, 161 insertions(+), 15 deletions(-) create mode 100644 zipline/data/treasuries_can.py diff --git a/zipline/data/loader.py b/zipline/data/loader.py index 98d573df..8e8ed5a8 100644 --- a/zipline/data/loader.py +++ b/zipline/data/loader.py @@ -14,6 +14,7 @@ # limitations under the License. +import importlib import os from os.path import expanduser from collections import OrderedDict @@ -25,7 +26,6 @@ import pandas as pd from pandas.io.data import DataReader import pytz -from . treasuries import get_treasury_data from . import benchmarks from . benchmarks import get_benchmark_returns @@ -49,6 +49,16 @@ CACHE_PATH = os.path.join( 'cache' ) +#Mapping from index symbol to appropriate bond data +INDEX_MAPPING = { + '^GSPC': + ('treasuries', 'treasury_curves.csv', 'data.treasury.gov'), + '^GSPTSE': + ('treasuries_can', 'treasury_curves_can.csv', 'bankofcanada.ca'), + '^FTSE': # use US treasuries until UK bonds implemented + ('treasuries', 'treasury_curves.csv', 'data.treasury.gov'), +} + def get_datafile(name, mode='r'): """ @@ -70,21 +80,27 @@ def get_cache_filepath(name): return os.path.join(CACHE_PATH, name) -def dump_treasury_curves(): +def dump_treasury_curves(module='treasuries', filename='treasury_curves.csv'): """ Dumps data to be used with zipline. Puts source treasury and data into zipline. """ + try: + m = importlib.import_module("." + module, package='zipline.data') + except ImportError: + raise NotImplementedError( + 'Treasury curve {0} module not implemented'.format(module)) + tr_data = {} - for curve in get_treasury_data(): + for curve in m.get_treasury_data(): # Not ideal but massaging data into expected format tr_data[curve['date']] = curve curves = pd.DataFrame(tr_data).T - datafile = get_datafile('treasury_curves.csv', mode='wb') + datafile = get_datafile(filename, mode='wb') curves.to_csv(datafile) datafile.close() @@ -184,15 +200,20 @@ Fetching data from Yahoo Finance. ): benchmark_returns = benchmark_returns.tz_localize('UTC') + #Get treasury curve module, filename & source from mapping. + #Default to USA. + module, filename, source = INDEX_MAPPING.get( + bm_symbol, INDEX_MAPPING['^GSPC']) + try: - fp_tr = get_datafile('treasury_curves.csv', "rb") + fp_tr = get_datafile(filename, "rb") except IOError: print(""" data files aren't distributed with source. -Fetching data from data.treasury.gov -""").strip() - dump_treasury_curves() - fp_tr = get_datafile('treasury_curves.csv', "rb") +Fetching data from {0} +""").format(source).strip() + dump_treasury_curves(module, filename) + fp_tr = get_datafile(filename, "rb") saved_curves = pd.DataFrame.from_csv(fp_tr) @@ -205,7 +226,7 @@ Fetching data from data.treasury.gov # If more than 1 trading days has elapsed since the last day where # we have data,then we need to update if len(days_up_to_now) - last_tr_date_offset > 1: - treasury_curves = dump_treasury_curves() + treasury_curves = dump_treasury_curves(module, filename) else: treasury_curves = saved_curves.tz_localize('UTC') @@ -218,8 +239,8 @@ Fetching data from data.treasury.gov fp_tr.close() tr_curves = OrderedDict(sorted( - ((dt, c) for dt, c in tr_curves.iteritems()), - key=lambda t: t[0])) + ((dt, c) for dt, c in tr_curves.iteritems()), + key=lambda t: t[0])) return benchmark_returns, tr_curves diff --git a/zipline/data/treasuries.py b/zipline/data/treasuries.py index a819a578..a2b325e7 100644 --- a/zipline/data/treasuries.py +++ b/zipline/data/treasuries.py @@ -58,10 +58,10 @@ _CURVE_MAPPINGS = { } -def treasury_mappings(): +def treasury_mappings(mappings): return {key: Mapping(*value) for key, value - in _CURVE_MAPPINGS.iteritems()} + in mappings.iteritems()} class iter_to_stream(object): @@ -133,7 +133,7 @@ http://data.treasury.gov/feed.svc/DailyTreasuryYieldCurveRateData\ def get_treasury_data(): - mappings = treasury_mappings() + mappings = treasury_mappings(_CURVE_MAPPINGS) source = get_treasury_source() return source_to_records(mappings, source) diff --git a/zipline/data/treasuries_can.py b/zipline/data/treasuries_can.py new file mode 100644 index 00000000..a4355442 --- /dev/null +++ b/zipline/data/treasuries_can.py @@ -0,0 +1,125 @@ +# +# Copyright 2013 Quantopian, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import requests + +from . loader_utils import ( + source_to_records +) + +from zipline.data.treasuries import ( + treasury_mappings, get_treasury_date, get_treasury_rate +) + + +_CURVE_MAPPINGS = { + 'date': (get_treasury_date, "Date"), + '1month': (get_treasury_rate, "V39063"), + '3month': (get_treasury_rate, "V39065"), + '6month': (get_treasury_rate, "V39066"), + '1year': (get_treasury_rate, "V39067"), + '2year': (get_treasury_rate, "V39051"), + '3year': (get_treasury_rate, "V39052"), + '5year': (get_treasury_rate, "V39053"), + '7year': (get_treasury_rate, "V39054"), + '10year': (get_treasury_rate, "V39055"), + #Bank of Canada refers to this as 'Long' Rate, approximately 30 years. + '30year': (get_treasury_rate, "V39056"), +} + +BILLS = ['V39063', 'V39065', 'V39066', 'V39067'] +BONDS = ['V39051', 'V39052', 'V39053', 'V39054', 'V39055', 'V39056'] + + +def get_treasury_source(start_date=None, end_date=None): + + today = datetime.date.today() + #Bank of Canada only has 10 years of data and has this in the URL. + restriction = datetime.date(today.year-10, today.month, today.day) + + if not end_date: + end_date = today + + if not start_date: + start_date = restriction + + bill_url = ( + "http://www.bankofcanada.ca/stats/results/csv?" + "lP=lookup_tbill_yields.php&sR={restrict}&se=" + "L_V39063-L_V39065-L_V39066-L_V39067&dF={start}&dT={end}" + .format(restrict=restriction.strftime("%Y-%m-%d"), + start=start_date.strftime("%Y-%m-%d"), + end=end_date.strftime("%Y-%m-%d"), + ) + ) + + bond_url = ( + "http://www.bankofcanada.ca/stats/results/csv?" + "lP=lookup_bond_yields.php&sR={restrict}&se=" + "L_V39051-L_V39052-L_V39053-L_V39054-L_V39055-L_V39056" + "&dF={start}&dT={end}" + .format(restrict=restriction.strftime("%Y-%m-%d"), + start=start_date.strftime("%Y-%m-%d"), + end=end_date.strftime("%Y-%m-%d") + ) + ) + + res_bill = requests.get(bill_url, stream=True) + res_bond = requests.get(bond_url, stream=True) + bill_iter = res_bill.iter_lines() + bond_iter = res_bond.iter_lines() + + bill_row = "" + while ",".join(BILLS) not in bill_row: + bill_row = bill_iter.next() + if 'Daily series:' in bill_row: + bill_end_date = datetime.datetime.strptime( + bill_row.split(' - ')[1].strip(), + "%Y-%m-%d").date() + bill_header = bill_row.split(",") + + bond_row = "" + while ",".join(BONDS) not in bond_row: + bond_row = bond_iter.next() + if 'Daily series:' in bond_row: + bond_end_date = datetime.datetime.strptime( + bond_row.split(' - ')[1].strip(), + "%Y-%m-%d").date() + bond_header = bond_row.split(",") + + #Line up the two dates + if bill_end_date > bond_end_date: + bill_iter.next() + elif bond_end_date > bill_end_date: + bond_iter.next() + + for bill_row in bill_iter: + bond_row = bond_iter.next() + bill_dict = dict(zip(bill_header, bill_row.split(","))) + bond_dict = dict(zip(bond_header, bond_row.split(","))) + if ' Bank holiday' in bond_row.split(",") + bill_row.split(","): + continue + if ' Not available' in bond_row.split(",") + bill_row.split(","): + continue + + bill_dict.update(bond_dict) + yield bill_dict + + +def get_treasury_data(): + mappings = treasury_mappings(_CURVE_MAPPINGS) + source = get_treasury_source() + return source_to_records(mappings, source)