ENH: Cache daily data to eliminate repeat network calls.

Both unit tests and repeated runs while developing an algorithm can benefit from having a local copy of the Yahoo data, instead of doing a network call each time. Store the web request results as a csv file in a cache directory, named by symbol and date range.
2026-06-30 17:05:35 +08:00 · 2013-10-01 15:04:02 -04:00
parent b44fc20e4e
commit 5ddc134379
1 changed files with 23 additions and 1 deletions
@@ -41,6 +41,12 @@ DATA_PATH = os.path.join(
    'data'
 )

+CACHE_PATH = os.path.join(
+    expanduser("~"),
+    '.zipline',
+    'cache'
+)
+

 def get_datafile(name, mode='r'):
    """
@@ -55,6 +61,13 @@ def get_datafile(name, mode='r'):
    return open(os.path.join(DATA_PATH, name), mode)


+def get_cache_filepath(name):
+    if not os.path.exists(CACHE_PATH):
+        os.makedirs(CACHE_PATH)
+
+    return os.path.join(CACHE_PATH, name)
+
+
 def dump_treasury_curves():
    """
    Dumps data to be used with zipline.
@@ -256,7 +269,16 @@ must specify stocks or indexes"""
    if stocks is not None:
        for stock in stocks:
            print stock
-            stkd = DataReader(stock, 'yahoo', start, end).sort_index()
+            cache_filename = "{stock}-{start}-{end}.csv".format(
+                stock=stock,
+                start=start,
+                end=end)
+            cache_filepath = get_cache_filepath(cache_filename)
+            if os.path.exists(cache_filepath):
+                stkd = pd.DataFrame.from_csv(cache_filepath)
+            else:
+                stkd = DataReader(stock, 'yahoo', start, end).sort_index()
+                stkd.to_csv(cache_filepath)
            data[stock] = stkd

    if indexes is not None: