Files
options_backtester/backtester/test/create_test_data.py
2019-05-13 15:35:59 -03:00

89 lines
2.6 KiB
Python

import os
from datetime import date, timedelta
import pandas as pd
import pandas_datareader.data as web
from backtester.utils import get_data_dir
def create_test_data(data_dir, filename="SPX_2008-2018.csv"):
"""Create test data set with 10 years of SPX"""
spx_dir = os.path.join(data_dir, "allspx")
test_file = os.path.join(data_dir, filename)
with open(test_file, "w+") as f:
f.write("date,price\n")
for year in range(2008, 2019):
filename = "SPX_{}.csv".format(year)
year_df = pd.read_csv(os.path.join(spx_dir, filename))
grouped = year_df.groupby("quotedate").first()
grouped.to_csv(
test_file, mode="a", columns=["underlying_last"], header=False)
def create_synthetic_data(data_dir, filename="synthetic_data.csv"):
"""Create an synthetic data set with known statistics.
Price goes from 1 to 2000.
Mean = 1000.5
% Price = 1999"""
synth_file = os.path.join(data_dir, filename)
day = date(1970, 1, 1)
with open(synth_file, "w+") as f:
f.write("date,price\n")
for i in range(1, 2001):
line = "{},{}\n".format(day.strftime("%m/%d/%Y"), i)
f.write(line)
day += timedelta(days=1)
def fetch_balanced_data(data_dir,
filename="balanced_2015.csv",
start=None,
end=None):
"""Downloads daily data from `start` til `end` from IEX.
Symbols
-------
VOO: VANGUARD IX FUN/S&P 500 ETF
GLD: SPDR Gold Trust
VNQ: VANGUARD IX FUN/RL EST IX FD ETF
VNQI: VANGUARD INTL E/GLB EX-US RL EST IX
TLT: iShares Barclays 20+ Yr Treas.Bond
TIP: iShares TIPS Bond ETF
BNDX: VANGUARD CHARLO/TOTAL INTL BD ETF
EEM: iShares MSCI Emerging Markets Indx
RJI: Rogers International Commodity Index
"""
if not start or not end:
start = date(2015, 1, 1)
end = date(2015, 12, 31)
symbols = ["VOO", "GLD", "VNQ", "VNQI", "TLT", "TIP", "BNDX", "EEM", "RJI"]
# Write headers
full_path = os.path.join(data_dir, filename)
with open(full_path, "w+") as f:
f.write("date,symbol,open,high,low,close,volume\n")
columns = ["symbol", "open", "high", "low", "close", "volume"]
for symbol in symbols:
data = web.DataReader(symbol, "iex", start, end)
data["symbol"] = symbol
data.to_csv(
full_path,
mode="a",
index_label="date",
columns=columns,
header=False)
if __name__ == "__main__":
data_dir = get_data_dir()
create_test_data(data_dir)
create_synthetic_data(data_dir)
fetch_balanced_data(data_dir)