mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-27 20:20:55 +08:00
MAINT: Skip more rows to match change in treasury data format
I'm not sure what the raw csv pulled from the federal reserve looked like before, but when trying to download fresh treasure data (data not stored in `./zipline`), there is an error that says "Time Period not in list". After checking the raw csv now, it looks like there are 5 header rows rather than just 1, so skipping those rows removes that error.
This commit is contained in:
Binary file not shown.
@@ -25,6 +25,8 @@ from zipline.testing import test_resource_path
|
||||
from zipline.testing.fixtures import WithTmpDir, ZiplineTestCase
|
||||
from zipline.testing.predicates import assert_equal
|
||||
from zipline.utils.cache import dataframe_cache
|
||||
from zipline.utils.paths import ensure_file
|
||||
|
||||
|
||||
# Otherwise the next line sometimes complains about being run too late.
|
||||
_multiprocess_can_split_ = False
|
||||
@@ -53,6 +55,10 @@ class ExamplesTests(WithTmpDir, ZiplineTestCase):
|
||||
serialization='pickle',
|
||||
)
|
||||
|
||||
market_data = ('SPY_benchmark.csv', 'treasury_curves.csv')
|
||||
for data in market_data:
|
||||
ensure_file(cls.tmpdir.getpath('example_data/root/data/' + data))
|
||||
|
||||
@parameterized.expand(sorted(examples.EXAMPLE_MODULES))
|
||||
def test_example(self, example_name):
|
||||
actual_perf = examples.run_example(
|
||||
|
||||
@@ -32,11 +32,13 @@ def get_benchmark_returns(symbol, first_date, last_date):
|
||||
last_date : pd.Timestamp
|
||||
Last date for which we want to get data.
|
||||
|
||||
The furthest date that Google goes back to is 2001-06-26. It has missing
|
||||
The furthest date that Google goes back to is 1993-02-01. It has missing
|
||||
data for 2008-12-15, 2009-08-11, and 2012-02-02, so we add data for the
|
||||
dates for which Google is missing data.
|
||||
|
||||
We're also limited to the last 4000 days worth of data.
|
||||
We're also limited to 4000 days worth of data per request. If we make a
|
||||
request for data that extends past 4000 trading days, we'll still only
|
||||
receive 4000 days of data.
|
||||
|
||||
first_date is **not** included because we need the close from day N - 1 to
|
||||
compute the returns for day N.
|
||||
|
||||
@@ -31,6 +31,7 @@ from ..utils.paths import (
|
||||
from ..utils.deprecate import deprecated
|
||||
from zipline.utils.calendars import get_calendar
|
||||
|
||||
|
||||
logger = logbook.Logger('Loader')
|
||||
|
||||
# Mapping from index symbol to appropriate bond data
|
||||
@@ -136,10 +137,7 @@ def load_market_data(trading_day=None, trading_days=None, bm_symbol='SPY',
|
||||
if trading_days is None:
|
||||
trading_days = get_calendar('NYSE').all_sessions
|
||||
|
||||
# We want the latest 4000 trading days
|
||||
# because Google Finance only allows downloading data
|
||||
# up to the 4000 latest trading days
|
||||
first_date = trading_days[-4000]
|
||||
first_date = trading_days[0]
|
||||
now = pd.Timestamp.utcnow()
|
||||
|
||||
# We expect to have benchmark and treasury data that's current up until
|
||||
|
||||
@@ -67,9 +67,10 @@ def get_treasury_data(start_date, end_date):
|
||||
"&from=" # An unbounded query is ~2x faster than specifying dates.
|
||||
"&to="
|
||||
"&filetype=csv"
|
||||
"&label=include"
|
||||
"&layout=seriescolumn"
|
||||
"&type=package",
|
||||
skiprows=1, # First row is a useless header.
|
||||
skiprows=5, # First 5 rows are useless headers.
|
||||
parse_dates=['Time Period'],
|
||||
na_values=['ND'], # Presumably this stands for "No Data".
|
||||
index_col=0,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user