From 4f24a32c454c2fe7c01bcf41c49c3a20941bcf4f Mon Sep 17 00:00:00 2001 From: dmichalowicz Date: Fri, 18 Dec 2015 10:33:47 -0500 Subject: [PATCH 1/2] BUG: Benchmark and treasury curves data missing on first download --- zipline/data/loader.py | 88 +++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 40 deletions(-) diff --git a/zipline/data/loader.py b/zipline/data/loader.py index 30e81dd8..1380e95c 100644 --- a/zipline/data/loader.py +++ b/zipline/data/loader.py @@ -204,29 +204,33 @@ def ensure_benchmark_data(symbol, first_date, last_date, now, trading_day): path. """ path = get_data_filepath(get_benchmark_filename(symbol)) - try: - data = pd.Series.from_csv(path).tz_localize('UTC') - if has_data_for_dates(data, first_date, last_date): - return data - # Don't re-download if we've successfully downloaded and written a file - # in the last hour. - last_download_time = last_modified_time(path) - if (now - last_download_time) <= ONE_HOUR: - logger.warn( - "Refusing to download new benchmark " - "data because a download succeeded at %s." % last_download_time - ) - return data + # If the path does not exist, it means the first download has not happened + # yet, so don't try to read from 'path'. + if os.path.exists(path): + try: + data = pd.Series.from_csv(path).tz_localize('UTC') + if has_data_for_dates(data, first_date, last_date): + return data - except (OSError, IOError, ValueError) as e: - # These can all be raised by various versions of pandas on various - # classes of malformed input. Treat them all as cache misses. - logger.info( - "Loading data for {path} failed with error [{error}].".format( - path=path, error=e, + # Don't re-download if we've successfully downloaded and written a + # file in the last hour. + last_download_time = last_modified_time(path) + if (now - last_download_time) <= ONE_HOUR: + logger.warn( + "Refusing to download new benchmark data because a " + "download succeeded at %s." % last_download_time + ) + return data + + except (OSError, IOError, ValueError) as e: + # These can all be raised by various versions of pandas on various + # classes of malformed input. Treat them all as cache misses. + logger.info( + "Loading data for {path} failed with error [{error}].".format( + path=path, error=e, + ) ) - ) logger.info( "Cache at {path} does not have data from {start} to {end}.\n" "Downloading benchmark data for '{symbol}'.", @@ -275,29 +279,33 @@ def ensure_treasury_data(bm_symbol, first_date, last_date, now): ) first_date = max(first_date, loader_module.earliest_possible_date()) path = get_data_filepath(filename) - try: - data = pd.DataFrame.from_csv(path).tz_localize('UTC') - if has_data_for_dates(data, first_date, last_date): - return data - # Don't re-download if we've successfully downloaded and written a file - # in the last hour. - last_download_time = last_modified_time(path) - if (now - last_download_time) <= ONE_HOUR: - logger.warn( - "Refusing to download new treasury " - "data because a download succeeded at %s." % last_download_time - ) - return data + # If the path does not exist, it means the first download has not happened + # yet, so don't try to read from 'path'. + if os.path.exists(path): + try: + data = pd.DataFrame.from_csv(path).tz_localize('UTC') + if has_data_for_dates(data, first_date, last_date): + return data - except (OSError, IOError, ValueError) as e: - # These can all be raised by various versions of pandas on various - # classes of malformed input. Treat them all as cache misses. - logger.info( - "Loading data for {path} failed with error [{error}].".format( - path=path, error=e, + # Don't re-download if we've successfully downloaded and written a + # file in the last hour. + last_download_time = last_modified_time(path) + if (now - last_download_time) <= ONE_HOUR: + logger.warn( + "Refusing to download new treasury data because a " + "download succeeded at %s." % last_download_time + ) + return data + + except (OSError, IOError, ValueError) as e: + # These can all be raised by various versions of pandas on various + # classes of malformed input. Treat them all as cache misses. + logger.info( + "Loading data for {path} failed with error [{error}].".format( + path=path, error=e, + ) ) - ) data = loader_module.get_treasury_data(first_date, last_date) data.to_csv(path) From e2d088c2be39a0de78da8a9ff4e2a867b1da8e40 Mon Sep 17 00:00:00 2001 From: dmichalowicz Date: Mon, 21 Dec 2015 16:49:01 -0500 Subject: [PATCH 2/2] DOC: Update whatsnew --- docs/source/whatsnew/0.8.4.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/whatsnew/0.8.4.txt b/docs/source/whatsnew/0.8.4.txt index 8eaa3522..0689149e 100644 --- a/docs/source/whatsnew/0.8.4.txt +++ b/docs/source/whatsnew/0.8.4.txt @@ -85,6 +85,8 @@ Bug Fixes * Fixed an issue pickling :func:`~zipline.utils.sentinel.sentinel` objects (:issue:`872`). +* Fixed spurious warnings on first download of treasury data (:issue `922`). + Performance ~~~~~~~~~~~