Merge pull request #922 from quantopian/log-noise

Benchmark and treasury curves data missing on first download
This commit is contained in:
Scott Sanderson
2015-12-21 16:58:05 -05:00
2 changed files with 50 additions and 40 deletions
+2
View File
@@ -85,6 +85,8 @@ Bug Fixes
* Fixed an issue pickling :func:`~zipline.utils.sentinel.sentinel` objects
(:issue:`872`).
* Fixed spurious warnings on first download of treasury data (:issue `922`).
Performance
~~~~~~~~~~~
+48 -40
View File
@@ -204,29 +204,33 @@ def ensure_benchmark_data(symbol, first_date, last_date, now, trading_day):
path.
"""
path = get_data_filepath(get_benchmark_filename(symbol))
try:
data = pd.Series.from_csv(path).tz_localize('UTC')
if has_data_for_dates(data, first_date, last_date):
return data
# Don't re-download if we've successfully downloaded and written a file
# in the last hour.
last_download_time = last_modified_time(path)
if (now - last_download_time) <= ONE_HOUR:
logger.warn(
"Refusing to download new benchmark "
"data because a download succeeded at %s." % last_download_time
)
return data
# If the path does not exist, it means the first download has not happened
# yet, so don't try to read from 'path'.
if os.path.exists(path):
try:
data = pd.Series.from_csv(path).tz_localize('UTC')
if has_data_for_dates(data, first_date, last_date):
return data
except (OSError, IOError, ValueError) as e:
# These can all be raised by various versions of pandas on various
# classes of malformed input. Treat them all as cache misses.
logger.info(
"Loading data for {path} failed with error [{error}].".format(
path=path, error=e,
# Don't re-download if we've successfully downloaded and written a
# file in the last hour.
last_download_time = last_modified_time(path)
if (now - last_download_time) <= ONE_HOUR:
logger.warn(
"Refusing to download new benchmark data because a "
"download succeeded at %s." % last_download_time
)
return data
except (OSError, IOError, ValueError) as e:
# These can all be raised by various versions of pandas on various
# classes of malformed input. Treat them all as cache misses.
logger.info(
"Loading data for {path} failed with error [{error}].".format(
path=path, error=e,
)
)
)
logger.info(
"Cache at {path} does not have data from {start} to {end}.\n"
"Downloading benchmark data for '{symbol}'.",
@@ -275,29 +279,33 @@ def ensure_treasury_data(bm_symbol, first_date, last_date, now):
)
first_date = max(first_date, loader_module.earliest_possible_date())
path = get_data_filepath(filename)
try:
data = pd.DataFrame.from_csv(path).tz_localize('UTC')
if has_data_for_dates(data, first_date, last_date):
return data
# Don't re-download if we've successfully downloaded and written a file
# in the last hour.
last_download_time = last_modified_time(path)
if (now - last_download_time) <= ONE_HOUR:
logger.warn(
"Refusing to download new treasury "
"data because a download succeeded at %s." % last_download_time
)
return data
# If the path does not exist, it means the first download has not happened
# yet, so don't try to read from 'path'.
if os.path.exists(path):
try:
data = pd.DataFrame.from_csv(path).tz_localize('UTC')
if has_data_for_dates(data, first_date, last_date):
return data
except (OSError, IOError, ValueError) as e:
# These can all be raised by various versions of pandas on various
# classes of malformed input. Treat them all as cache misses.
logger.info(
"Loading data for {path} failed with error [{error}].".format(
path=path, error=e,
# Don't re-download if we've successfully downloaded and written a
# file in the last hour.
last_download_time = last_modified_time(path)
if (now - last_download_time) <= ONE_HOUR:
logger.warn(
"Refusing to download new treasury data because a "
"download succeeded at %s." % last_download_time
)
return data
except (OSError, IOError, ValueError) as e:
# These can all be raised by various versions of pandas on various
# classes of malformed input. Treat them all as cache misses.
logger.info(
"Loading data for {path} failed with error [{error}].".format(
path=path, error=e,
)
)
)
data = loader_module.get_treasury_data(first_date, last_date)
data.to_csv(path)