options_backtester/backtester/test/create_test_data.py

import os
from datetime import date, timedelta
import pandas as pd
import pandas_datareader.data as web
from backtester.utils import get_data_dir


def create_test_data(data_dir, filename="SPX_2008-2018.csv"):
    """Create test data set with 10 years of SPX"""

    spx_dir = os.path.join(data_dir, "allspx")
    test_file = os.path.join(data_dir, filename)

    with open(test_file, "w+") as f:
        f.write("date,price\n")

    for year in range(2008, 2019):
        filename = "SPX_{}.csv".format(year)
        year_df = pd.read_csv(os.path.join(spx_dir, filename))
        grouped = year_df.groupby("quotedate").first()
        grouped.to_csv(
            test_file, mode="a", columns=["underlying_last"], header=False)


def create_synthetic_data(data_dir, filename="synthetic_data.csv"):
    """Create an synthetic data set with known statistics.
    Price goes from 1 to 2000.
    Mean = 1000.5
    % Price = 1999"""

    synth_file = os.path.join(data_dir, filename)

    day = date(1970, 1, 1)
    with open(synth_file, "w+") as f:
        f.write("date,price\n")
        for i in range(1, 2001):
            line = "{},{}\n".format(day.strftime("%m/%d/%Y"), i)
            f.write(line)
            day += timedelta(days=1)


def fetch_balanced_data(data_dir,
                        filename="balanced_2015.csv",
                        start=None,
                        end=None):
    """Downloads daily data from `start` til `end` from IEX.

    Symbols
    -------
    VOO: VANGUARD IX FUN/S&P 500 ETF
    GLD: SPDR Gold Trust
    VNQ: VANGUARD IX FUN/RL EST IX FD ETF
    VNQI: VANGUARD INTL E/GLB EX-US RL EST IX
    TLT: iShares Barclays 20+ Yr Treas.Bond
    TIP: iShares TIPS Bond ETF
    BNDX: VANGUARD CHARLO/TOTAL INTL BD ETF
    EEM: iShares MSCI Emerging Markets Indx
    RJI: Rogers International Commodity Index
    """

    if not start or not end:
        start = date(2015, 1, 1)
        end = date(2015, 12, 31)

    symbols = ["VOO", "GLD", "VNQ", "VNQI", "TLT", "TIP", "BNDX", "EEM", "RJI"]

    # Write headers
    full_path = os.path.join(data_dir, filename)
    with open(full_path, "w+") as f:
        f.write("date,symbol,open,high,low,close,volume\n")

    columns = ["symbol", "open", "high", "low", "close", "volume"]
    for symbol in symbols:
        data = web.DataReader(symbol, "iex", start, end)
        data["symbol"] = symbol
        data.to_csv(
            full_path,
            mode="a",
            index_label="date",
            columns=columns,
            header=False)


if __name__ == "__main__":
    data_dir = get_data_dir()
    create_test_data(data_dir)
    create_synthetic_data(data_dir)
    fetch_balanced_data(data_dir)