mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-30 17:05:35 +08:00
ENH: Cache daily data to eliminate repeat network calls.
Both unit tests and repeated runs while developing an algorithm can benefit from having a local copy of the Yahoo data, instead of doing a network call each time. Store the web request results as a csv file in a cache directory, named by symbol and date range.
This commit is contained in:
+23
-1
@@ -41,6 +41,12 @@ DATA_PATH = os.path.join(
|
||||
'data'
|
||||
)
|
||||
|
||||
CACHE_PATH = os.path.join(
|
||||
expanduser("~"),
|
||||
'.zipline',
|
||||
'cache'
|
||||
)
|
||||
|
||||
|
||||
def get_datafile(name, mode='r'):
|
||||
"""
|
||||
@@ -55,6 +61,13 @@ def get_datafile(name, mode='r'):
|
||||
return open(os.path.join(DATA_PATH, name), mode)
|
||||
|
||||
|
||||
def get_cache_filepath(name):
|
||||
if not os.path.exists(CACHE_PATH):
|
||||
os.makedirs(CACHE_PATH)
|
||||
|
||||
return os.path.join(CACHE_PATH, name)
|
||||
|
||||
|
||||
def dump_treasury_curves():
|
||||
"""
|
||||
Dumps data to be used with zipline.
|
||||
@@ -256,7 +269,16 @@ must specify stocks or indexes"""
|
||||
if stocks is not None:
|
||||
for stock in stocks:
|
||||
print stock
|
||||
stkd = DataReader(stock, 'yahoo', start, end).sort_index()
|
||||
cache_filename = "{stock}-{start}-{end}.csv".format(
|
||||
stock=stock,
|
||||
start=start,
|
||||
end=end)
|
||||
cache_filepath = get_cache_filepath(cache_filename)
|
||||
if os.path.exists(cache_filepath):
|
||||
stkd = pd.DataFrame.from_csv(cache_filepath)
|
||||
else:
|
||||
stkd = DataReader(stock, 'yahoo', start, end).sort_index()
|
||||
stkd.to_csv(cache_filepath)
|
||||
data[stock] = stkd
|
||||
|
||||
if indexes is not None:
|
||||
|
||||
Reference in New Issue
Block a user