MAINT: Skip more rows to match change in treasury data format

I'm not sure what the raw csv pulled from the federal reserve looked like before, but when trying to download fresh treasure data (data not stored in `./zipline`), there is an error that says "Time Period not in list". After checking the raw csv now, it looks like there are 5 header rows rather than just 1, so skipping those rows removes that error.
This commit is contained in:
Freddie Vargus
2017-06-01 13:37:49 -04:00
parent 7a6f45b971
commit a12c34c39c
6 changed files with 14 additions and 6872 deletions
Binary file not shown.
+6
View File
@@ -25,6 +25,8 @@ from zipline.testing import test_resource_path
from zipline.testing.fixtures import WithTmpDir, ZiplineTestCase
from zipline.testing.predicates import assert_equal
from zipline.utils.cache import dataframe_cache
from zipline.utils.paths import ensure_file
# Otherwise the next line sometimes complains about being run too late.
_multiprocess_can_split_ = False
@@ -53,6 +55,10 @@ class ExamplesTests(WithTmpDir, ZiplineTestCase):
serialization='pickle',
)
market_data = ('SPY_benchmark.csv', 'treasury_curves.csv')
for data in market_data:
ensure_file(cls.tmpdir.getpath('example_data/root/data/' + data))
@parameterized.expand(sorted(examples.EXAMPLE_MODULES))
def test_example(self, example_name):
actual_perf = examples.run_example(
+4 -2
View File
@@ -32,11 +32,13 @@ def get_benchmark_returns(symbol, first_date, last_date):
last_date : pd.Timestamp
Last date for which we want to get data.
The furthest date that Google goes back to is 2001-06-26. It has missing
The furthest date that Google goes back to is 1993-02-01. It has missing
data for 2008-12-15, 2009-08-11, and 2012-02-02, so we add data for the
dates for which Google is missing data.
We're also limited to the last 4000 days worth of data.
We're also limited to 4000 days worth of data per request. If we make a
request for data that extends past 4000 trading days, we'll still only
receive 4000 days of data.
first_date is **not** included because we need the close from day N - 1 to
compute the returns for day N.
+2 -4
View File
@@ -31,6 +31,7 @@ from ..utils.paths import (
from ..utils.deprecate import deprecated
from zipline.utils.calendars import get_calendar
logger = logbook.Logger('Loader')
# Mapping from index symbol to appropriate bond data
@@ -136,10 +137,7 @@ def load_market_data(trading_day=None, trading_days=None, bm_symbol='SPY',
if trading_days is None:
trading_days = get_calendar('NYSE').all_sessions
# We want the latest 4000 trading days
# because Google Finance only allows downloading data
# up to the 4000 latest trading days
first_date = trading_days[-4000]
first_date = trading_days[0]
now = pd.Timestamp.utcnow()
# We expect to have benchmark and treasury data that's current up until
+2 -1
View File
@@ -67,9 +67,10 @@ def get_treasury_data(start_date, end_date):
"&from=" # An unbounded query is ~2x faster than specifying dates.
"&to="
"&filetype=csv"
"&label=include"
"&layout=seriescolumn"
"&type=package",
skiprows=1, # First row is a useless header.
skiprows=5, # First 5 rows are useless headers.
parse_dates=['Time Period'],
na_values=['ND'], # Presumably this stands for "No Data".
index_col=0,
File diff suppressed because it is too large Load Diff