ENH: Cache daily data to eliminate repeat network calls.

Both unit tests and repeated runs while developing an algorithm
can benefit from having a local copy of the Yahoo data, instead
of doing a network call each time.

Store the web request results as a csv file in a cache directory,
named by symbol and date range.
This commit is contained in:
Eddie Hebert
2013-10-01 15:04:02 -04:00
parent b44fc20e4e
commit 5ddc134379
+23 -1
View File
@@ -41,6 +41,12 @@ DATA_PATH = os.path.join(
'data'
)
CACHE_PATH = os.path.join(
expanduser("~"),
'.zipline',
'cache'
)
def get_datafile(name, mode='r'):
"""
@@ -55,6 +61,13 @@ def get_datafile(name, mode='r'):
return open(os.path.join(DATA_PATH, name), mode)
def get_cache_filepath(name):
if not os.path.exists(CACHE_PATH):
os.makedirs(CACHE_PATH)
return os.path.join(CACHE_PATH, name)
def dump_treasury_curves():
"""
Dumps data to be used with zipline.
@@ -256,7 +269,16 @@ must specify stocks or indexes"""
if stocks is not None:
for stock in stocks:
print stock
stkd = DataReader(stock, 'yahoo', start, end).sort_index()
cache_filename = "{stock}-{start}-{end}.csv".format(
stock=stock,
start=start,
end=end)
cache_filepath = get_cache_filepath(cache_filename)
if os.path.exists(cache_filepath):
stkd = pd.DataFrame.from_csv(cache_filepath)
else:
stkd = DataReader(stock, 'yahoo', start, end).sort_index()
stkd.to_csv(cache_filepath)
data[stock] = stkd
if indexes is not None: