From e45528458f1b28f870cb296e26db4e059cafa439 Mon Sep 17 00:00:00 2001
From: David Stephens <dstephens99@gmail.com>
Date: Tue, 10 Dec 2013 17:25:07 -0800
Subject: [PATCH] ENH: Added functionality to download Canadian treasury
 curves.

Added automatic switching of treasury curves based on index sent to environment.
---
 zipline/data/loader.py         |  45 ++++++++----
 zipline/data/treasuries.py     |   6 +-
 zipline/data/treasuries_can.py | 125 +++++++++++++++++++++++++++++++++
 3 files changed, 161 insertions(+), 15 deletions(-)
 create mode 100644 zipline/data/treasuries_can.py

diff --git a/zipline/data/loader.py b/zipline/data/loader.py
index 98d573df..8e8ed5a8 100644
--- a/zipline/data/loader.py
+++ b/zipline/data/loader.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 
+import importlib
 import os
 from os.path import expanduser
 from collections import OrderedDict
@@ -25,7 +26,6 @@ import pandas as pd
 from pandas.io.data import DataReader
 import pytz
 
-from . treasuries import get_treasury_data
 from . import benchmarks
 from . benchmarks import get_benchmark_returns
 
@@ -49,6 +49,16 @@ CACHE_PATH = os.path.join(
     'cache'
 )
 
+#Mapping from index symbol to appropriate bond data
+INDEX_MAPPING = {
+    '^GSPC':
+    ('treasuries', 'treasury_curves.csv', 'data.treasury.gov'),
+    '^GSPTSE':
+    ('treasuries_can', 'treasury_curves_can.csv', 'bankofcanada.ca'),
+    '^FTSE':  # use US treasuries until UK bonds implemented
+    ('treasuries', 'treasury_curves.csv', 'data.treasury.gov'),
+}
+
 
 def get_datafile(name, mode='r'):
     """
@@ -70,21 +80,27 @@ def get_cache_filepath(name):
     return os.path.join(CACHE_PATH, name)
 
 
-def dump_treasury_curves():
+def dump_treasury_curves(module='treasuries', filename='treasury_curves.csv'):
     """
     Dumps data to be used with zipline.
 
     Puts source treasury and data into zipline.
     """
+    try:
+        m = importlib.import_module("." + module, package='zipline.data')
+    except ImportError:
+        raise NotImplementedError(
+            'Treasury curve {0} module not implemented'.format(module))
+
     tr_data = {}
 
-    for curve in get_treasury_data():
+    for curve in m.get_treasury_data():
         # Not ideal but massaging data into expected format
         tr_data[curve['date']] = curve
 
     curves = pd.DataFrame(tr_data).T
 
-    datafile = get_datafile('treasury_curves.csv', mode='wb')
+    datafile = get_datafile(filename, mode='wb')
     curves.to_csv(datafile)
     datafile.close()
 
@@ -184,15 +200,20 @@ Fetching data from Yahoo Finance.
         ):
             benchmark_returns = benchmark_returns.tz_localize('UTC')
 
+    #Get treasury curve module, filename & source from mapping.
+    #Default to USA.
+    module, filename, source = INDEX_MAPPING.get(
+        bm_symbol, INDEX_MAPPING['^GSPC'])
+
     try:
-        fp_tr = get_datafile('treasury_curves.csv', "rb")
+        fp_tr = get_datafile(filename, "rb")
     except IOError:
         print("""
 data files aren't distributed with source.
-Fetching data from data.treasury.gov
-""").strip()
-        dump_treasury_curves()
-        fp_tr = get_datafile('treasury_curves.csv', "rb")
+Fetching data from {0}
+""").format(source).strip()
+        dump_treasury_curves(module, filename)
+        fp_tr = get_datafile(filename, "rb")
 
     saved_curves = pd.DataFrame.from_csv(fp_tr)
 
@@ -205,7 +226,7 @@ Fetching data from data.treasury.gov
     # If more than 1 trading days has elapsed since the last day where
     # we have data,then we need to update
     if len(days_up_to_now) - last_tr_date_offset > 1:
-        treasury_curves = dump_treasury_curves()
+        treasury_curves = dump_treasury_curves(module, filename)
     else:
         treasury_curves = saved_curves.tz_localize('UTC')
 
@@ -218,8 +239,8 @@ Fetching data from data.treasury.gov
     fp_tr.close()
 
     tr_curves = OrderedDict(sorted(
-                            ((dt, c) for dt, c in tr_curves.iteritems()),
-                            key=lambda t: t[0]))
+        ((dt, c) for dt, c in tr_curves.iteritems()),
+        key=lambda t: t[0]))
 
     return benchmark_returns, tr_curves
 
diff --git a/zipline/data/treasuries.py b/zipline/data/treasuries.py
index a819a578..a2b325e7 100644
--- a/zipline/data/treasuries.py
+++ b/zipline/data/treasuries.py
@@ -58,10 +58,10 @@ _CURVE_MAPPINGS = {
 }
 
 
-def treasury_mappings():
+def treasury_mappings(mappings):
     return {key: Mapping(*value)
             for key, value
-            in _CURVE_MAPPINGS.iteritems()}
+            in mappings.iteritems()}
 
 
 class iter_to_stream(object):
@@ -133,7 +133,7 @@ http://data.treasury.gov/feed.svc/DailyTreasuryYieldCurveRateData\
 
 
 def get_treasury_data():
-    mappings = treasury_mappings()
+    mappings = treasury_mappings(_CURVE_MAPPINGS)
     source = get_treasury_source()
     return source_to_records(mappings, source)
 
diff --git a/zipline/data/treasuries_can.py b/zipline/data/treasuries_can.py
new file mode 100644
index 00000000..a4355442
--- /dev/null
+++ b/zipline/data/treasuries_can.py
@@ -0,0 +1,125 @@
+#
+# Copyright 2013 Quantopian, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+import requests
+
+from . loader_utils import (
+    source_to_records
+)
+
+from zipline.data.treasuries import (
+    treasury_mappings, get_treasury_date, get_treasury_rate
+)
+
+
+_CURVE_MAPPINGS = {
+    'date': (get_treasury_date, "Date"),
+    '1month': (get_treasury_rate, "V39063"),
+    '3month': (get_treasury_rate, "V39065"),
+    '6month': (get_treasury_rate, "V39066"),
+    '1year': (get_treasury_rate, "V39067"),
+    '2year': (get_treasury_rate, "V39051"),
+    '3year': (get_treasury_rate, "V39052"),
+    '5year': (get_treasury_rate, "V39053"),
+    '7year': (get_treasury_rate, "V39054"),
+    '10year': (get_treasury_rate, "V39055"),
+    #Bank of Canada refers to this as 'Long' Rate, approximately 30 years.
+    '30year': (get_treasury_rate, "V39056"),
+}
+
+BILLS = ['V39063', 'V39065', 'V39066', 'V39067']
+BONDS = ['V39051', 'V39052', 'V39053', 'V39054', 'V39055', 'V39056']
+
+
+def get_treasury_source(start_date=None, end_date=None):
+
+    today = datetime.date.today()
+    #Bank of Canada only has 10 years of data and has this in the URL.
+    restriction = datetime.date(today.year-10, today.month, today.day)
+
+    if not end_date:
+        end_date = today
+
+    if not start_date:
+        start_date = restriction
+
+    bill_url = (
+        "http://www.bankofcanada.ca/stats/results/csv?"
+        "lP=lookup_tbill_yields.php&sR={restrict}&se="
+        "L_V39063-L_V39065-L_V39066-L_V39067&dF={start}&dT={end}"
+        .format(restrict=restriction.strftime("%Y-%m-%d"),
+                start=start_date.strftime("%Y-%m-%d"),
+                end=end_date.strftime("%Y-%m-%d"),
+                )
+    )
+
+    bond_url = (
+        "http://www.bankofcanada.ca/stats/results/csv?"
+        "lP=lookup_bond_yields.php&sR={restrict}&se="
+        "L_V39051-L_V39052-L_V39053-L_V39054-L_V39055-L_V39056"
+        "&dF={start}&dT={end}"
+        .format(restrict=restriction.strftime("%Y-%m-%d"),
+                start=start_date.strftime("%Y-%m-%d"),
+                end=end_date.strftime("%Y-%m-%d")
+                )
+    )
+
+    res_bill = requests.get(bill_url, stream=True)
+    res_bond = requests.get(bond_url, stream=True)
+    bill_iter = res_bill.iter_lines()
+    bond_iter = res_bond.iter_lines()
+
+    bill_row = ""
+    while ",".join(BILLS) not in bill_row:
+        bill_row = bill_iter.next()
+        if 'Daily series:' in bill_row:
+            bill_end_date = datetime.datetime.strptime(
+                bill_row.split(' - ')[1].strip(),
+                "%Y-%m-%d").date()
+    bill_header = bill_row.split(",")
+
+    bond_row = ""
+    while ",".join(BONDS) not in bond_row:
+        bond_row = bond_iter.next()
+        if 'Daily series:' in bond_row:
+            bond_end_date = datetime.datetime.strptime(
+                bond_row.split(' - ')[1].strip(),
+                "%Y-%m-%d").date()
+    bond_header = bond_row.split(",")
+
+    #Line up the two dates
+    if bill_end_date > bond_end_date:
+        bill_iter.next()
+    elif bond_end_date > bill_end_date:
+        bond_iter.next()
+
+    for bill_row in bill_iter:
+        bond_row = bond_iter.next()
+        bill_dict = dict(zip(bill_header, bill_row.split(",")))
+        bond_dict = dict(zip(bond_header, bond_row.split(",")))
+        if ' Bank holiday' in bond_row.split(",") + bill_row.split(","):
+            continue
+        if ' Not available' in bond_row.split(",") + bill_row.split(","):
+            continue
+
+        bill_dict.update(bond_dict)
+        yield bill_dict
+
+
+def get_treasury_data():
+    mappings = treasury_mappings(_CURVE_MAPPINGS)
+    source = get_treasury_source()
+    return source_to_records(mappings, source)