Merge pull request #922 from quantopian/log-noise

Benchmark and treasury curves data missing on first download
2026-06-28 23:55:55 +08:00 · 2015-12-21 16:58:05 -05:00
parent c4f3f51ea5 e2d088c2be
commit 2b4ffedd09
2 changed files with 50 additions and 40 deletions
@@ -85,6 +85,8 @@ Bug Fixes
 * Fixed an issue pickling :func:`~zipline.utils.sentinel.sentinel` objects
  (:issue:`872`).

+* Fixed spurious warnings on first download of treasury data (:issue `922`).
+
 Performance
 ~~~~~~~~~~~

@@ -204,29 +204,33 @@ def ensure_benchmark_data(symbol, first_date, last_date, now, trading_day):
    path.
    """
    path = get_data_filepath(get_benchmark_filename(symbol))
-    try:
-        data = pd.Series.from_csv(path).tz_localize('UTC')
-        if has_data_for_dates(data, first_date, last_date):
-            return data

-        # Don't re-download if we've successfully downloaded and written a file
-        # in the last hour.
-        last_download_time = last_modified_time(path)
-        if (now - last_download_time) <= ONE_HOUR:
-            logger.warn(
-                "Refusing to download new benchmark "
-                "data because a download succeeded at %s." % last_download_time
-            )
-            return data
+    # If the path does not exist, it means the first download has not happened
+    # yet, so don't try to read from 'path'.
+    if os.path.exists(path):
+        try:
+            data = pd.Series.from_csv(path).tz_localize('UTC')
+            if has_data_for_dates(data, first_date, last_date):
+                return data

-    except (OSError, IOError, ValueError) as e:
-        # These can all be raised by various versions of pandas on various
-        # classes of malformed input.  Treat them all as cache misses.
-        logger.info(
-            "Loading data for {path} failed with error [{error}].".format(
-                path=path, error=e,
+            # Don't re-download if we've successfully downloaded and written a
+            # file in the last hour.
+            last_download_time = last_modified_time(path)
+            if (now - last_download_time) <= ONE_HOUR:
+                logger.warn(
+                    "Refusing to download new benchmark data because a "
+                    "download succeeded at %s." % last_download_time
+                )
+                return data
+
+        except (OSError, IOError, ValueError) as e:
+            # These can all be raised by various versions of pandas on various
+            # classes of malformed input.  Treat them all as cache misses.
+            logger.info(
+                "Loading data for {path} failed with error [{error}].".format(
+                    path=path, error=e,
+                )
            )
-        )
    logger.info(
        "Cache at {path} does not have data from {start} to {end}.\n"
        "Downloading benchmark data for '{symbol}'.",
@@ -275,29 +279,33 @@ def ensure_treasury_data(bm_symbol, first_date, last_date, now):
    )
    first_date = max(first_date, loader_module.earliest_possible_date())
    path = get_data_filepath(filename)
-    try:
-        data = pd.DataFrame.from_csv(path).tz_localize('UTC')
-        if has_data_for_dates(data, first_date, last_date):
-            return data

-        # Don't re-download if we've successfully downloaded and written a file
-        # in the last hour.
-        last_download_time = last_modified_time(path)
-        if (now - last_download_time) <= ONE_HOUR:
-            logger.warn(
-                "Refusing to download new treasury "
-                "data because a download succeeded at %s." % last_download_time
-            )
-            return data
+    # If the path does not exist, it means the first download has not happened
+    # yet, so don't try to read from 'path'.
+    if os.path.exists(path):
+        try:
+            data = pd.DataFrame.from_csv(path).tz_localize('UTC')
+            if has_data_for_dates(data, first_date, last_date):
+                return data

-    except (OSError, IOError, ValueError) as e:
-        # These can all be raised by various versions of pandas on various
-        # classes of malformed input.  Treat them all as cache misses.
-        logger.info(
-            "Loading data for {path} failed with error [{error}].".format(
-                path=path, error=e,
+            # Don't re-download if we've successfully downloaded and written a
+            # file in the last hour.
+            last_download_time = last_modified_time(path)
+            if (now - last_download_time) <= ONE_HOUR:
+                logger.warn(
+                    "Refusing to download new treasury data because a "
+                    "download succeeded at %s." % last_download_time
+                )
+                return data
+
+        except (OSError, IOError, ValueError) as e:
+            # These can all be raised by various versions of pandas on various
+            # classes of malformed input.  Treat them all as cache misses.
+            logger.info(
+                "Loading data for {path} failed with error [{error}].".format(
+                    path=path, error=e,
+                )
            )
-        )

    data = loader_module.get_treasury_data(first_date, last_date)
    data.to_csv(path)