mirror of
https://github.com/wassname/options_backtester.git
synced 2026-06-27 19:31:36 +08:00
250 lines
8.8 KiB
Python
250 lines
8.8 KiB
Python
import logging
|
|
import os
|
|
from datetime import date
|
|
from io import StringIO
|
|
from itertools import groupby
|
|
|
|
from bs4 import BeautifulSoup
|
|
import requests
|
|
import pandas as pd
|
|
|
|
from . import utils, validation
|
|
from .notifications import slack_notification, Status
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
url = "http://www.cboe.com/delayedquote/quote-table-download"
|
|
|
|
|
|
def fetch_data(symbols=None):
|
|
"""Fetches options data for a given list of symbols"""
|
|
symbols = symbols or _get_all_listed_symbols()
|
|
options = utils.get_module_config("cboe")
|
|
mute_notifications = options.get("mute_notifications", [])
|
|
|
|
try:
|
|
form_data = _form_data()
|
|
except requests.ConnectionError as ce:
|
|
msg = "Connection error trying to reach {}".format(url)
|
|
logger.error(msg)
|
|
slack_notification(msg, __name__)
|
|
raise ce
|
|
except Exception as e:
|
|
msg = "Error parsing response"
|
|
logger.error(msg, exc_info=True)
|
|
slack_notification(msg, __name__)
|
|
raise e
|
|
|
|
headers = {"Referer": url}
|
|
file_url = "http://www.cboe.com/delayedquote/quotedata.dat"
|
|
|
|
symbols = [symbol.upper() for symbol in symbols]
|
|
done, failed = [], []
|
|
|
|
for symbol in symbols:
|
|
form_data["ctl00$ContentTop$C005$txtTicker"] = symbol
|
|
try:
|
|
response = requests.post(url,
|
|
data=form_data,
|
|
headers=headers,
|
|
allow_redirects=False)
|
|
symbol_req = requests.get(file_url,
|
|
cookies=response.cookies,
|
|
headers=headers)
|
|
symbol_data = symbol_req.text
|
|
if symbol_data == "" or symbol_data.startswith(" <!DOCTYPE"):
|
|
raise Exception
|
|
except Exception:
|
|
failed.append(symbol)
|
|
msg = "Error fetching symbol {} data".format(symbol)
|
|
logger.error(msg, exc_info=True)
|
|
if symbol not in mute_notifications:
|
|
slack_notification(msg, __name__)
|
|
else:
|
|
_save_data(symbol, symbol_data)
|
|
done.append(symbol)
|
|
|
|
if len(done) > 0:
|
|
msg = "Successfully scraped symbols: " + ", ".join(done)
|
|
slack_notification(msg, __name__, status=Status.Success)
|
|
if len(failed) > 0:
|
|
msg = "Failed to scrape symbols: " + ", ".join(failed)
|
|
slack_notification(msg, __name__, status=Status.Warning)
|
|
|
|
|
|
def aggregate_monthly_data(symbols=None):
|
|
"""Aggregate daily snapshots into monthly files and validate data"""
|
|
symbols = symbols or _get_all_listed_symbols()
|
|
|
|
save_data_path = utils.get_save_data_path()
|
|
scraper_dir = os.path.join(save_data_path, "cboe")
|
|
|
|
symbols = [symbol.upper() for symbol in symbols]
|
|
|
|
for symbol in symbols:
|
|
daily_dir = os.path.join(scraper_dir, symbol + "_daily")
|
|
if not os.path.exists(daily_dir):
|
|
msg = "Error aggregating data. Dir {} not found.".format(daily_dir)
|
|
logger.error(msg)
|
|
slack_notification(msg, __name__)
|
|
continue
|
|
|
|
monthly_dir = os.path.join(scraper_dir, symbol)
|
|
|
|
symbol_files = [
|
|
file for file in os.listdir(daily_dir) if file.endswith(".csv")
|
|
]
|
|
|
|
for month, files in groupby(symbol_files, _monthly_grouper):
|
|
file_names = list(files)
|
|
daily_files = [
|
|
os.path.join(daily_dir, name) for name in file_names
|
|
]
|
|
try:
|
|
symbol_df = concatenate_files(daily_files)
|
|
except Exception:
|
|
msg = "Error concatenating daily files for period " + month
|
|
logger.error(msg, exc_info=True)
|
|
slack_notification(msg, __name__)
|
|
continue
|
|
|
|
date_range = pd.to_datetime(symbol_df["quotedate"].unique())
|
|
if not validation.validate_dates_in_month(symbol, date_range):
|
|
today = pd.Timestamp.today()
|
|
first_date = date_range[0]
|
|
if first_date.year != today.year or first_date.month != today.month:
|
|
msg = "Some trading dates where missing for symbol {}".format(
|
|
symbol)
|
|
slack_notification(msg, __name__)
|
|
continue
|
|
|
|
if not os.path.exists(monthly_dir):
|
|
os.makedirs(monthly_dir)
|
|
logger.debug("Symbol dir %s created", monthly_dir)
|
|
|
|
file_name = _monthly_filename(file_names)
|
|
monthly_file = os.path.join(monthly_dir, file_name)
|
|
symbol_df.to_csv(monthly_file, index=False)
|
|
|
|
if not validation.validate_aggregate_file(monthly_file,
|
|
daily_files):
|
|
utils.remove_file(monthly_file)
|
|
msg = "Data in {} differs from the daily files".format(
|
|
monthly_file)
|
|
logger.error(msg)
|
|
slack_notification(msg, __name__)
|
|
continue
|
|
|
|
logger.debug("Saved monthly data %s", monthly_file)
|
|
|
|
for file in daily_files:
|
|
utils.remove_file(file, logger)
|
|
|
|
|
|
def _get_all_listed_symbols():
|
|
"""Returns array of all listed symbols.
|
|
http://www.cboe.com/publish/scheduledtask/mktdata/cboesymboldir2.csv
|
|
"""
|
|
current_dir = os.path.join(os.getcwd(), os.path.dirname(__file__))
|
|
symbols_file = os.path.realpath(
|
|
os.path.join(current_dir, "cboesymboldir2.csv"))
|
|
symbols_df = pd.read_csv(symbols_file, skiprows=1)
|
|
return symbols_df["Stock Symbol"].array
|
|
|
|
|
|
def concatenate_files(files):
|
|
"""Returns a dataframe of the concatenated data from `files`."""
|
|
df_generator = (pd.read_csv(file) for file in sorted(files))
|
|
return pd.concat(df_generator, ignore_index=True)
|
|
|
|
|
|
def _form_data():
|
|
"""Return validation form data"""
|
|
homepage = requests.get(url)
|
|
soup = BeautifulSoup(homepage.content, "lxml")
|
|
data = {
|
|
"__VIEWSTATE": soup.select_one("#__VIEWSTATE")["value"],
|
|
"__EVENTVALIDATION": soup.select_one("#__EVENTVALIDATION")["value"]
|
|
}
|
|
return data
|
|
|
|
|
|
def _save_data(symbol, symbol_data):
|
|
"""Saves the contents of `symbol_data` to
|
|
`$SAVE_DATA_PATH/cboe/{symbol}_daily/{symbol}_{%date}.csv`
|
|
"""
|
|
filename = date.today().strftime(symbol + "_%Y%m%d.csv")
|
|
|
|
save_data_path = utils.get_save_data_path()
|
|
symbol_dir = os.path.join(save_data_path, "cboe", symbol + "_daily")
|
|
|
|
if not os.path.exists(symbol_dir):
|
|
os.makedirs(symbol_dir)
|
|
logger.debug("Symbol dir %s created", symbol_dir)
|
|
file_path = os.path.join(symbol_dir, filename)
|
|
|
|
if os.path.exists(file_path) and validation.file_hash_matches_data(
|
|
file_path, symbol_data):
|
|
logger.debug("File %s already downloaded", file_path)
|
|
else:
|
|
daily_df = _wrangle_data(symbol, symbol_data)
|
|
daily_df.to_csv(file_path, index=False)
|
|
logger.debug("Saved daily symbol data as %s", file_path)
|
|
|
|
|
|
def _wrangle_data(symbol, symbol_data):
|
|
"""Returns a properly formated (_tidy_) dataframe"""
|
|
string_data = StringIO(symbol_data)
|
|
first_line = string_data.readline()
|
|
spot_price = float(first_line.split(",")[-2])
|
|
quote_date = date.today().strftime("%m/%d/%Y")
|
|
|
|
data = pd.read_csv(string_data, skiprows=1)
|
|
call_columns = [
|
|
"Calls", "Expiration Date", "Strike", "Last Sale", "Net", "Bid", "Ask",
|
|
"Vol", "Open Int", "IV", "Delta", "Gamma"
|
|
]
|
|
calls = data[call_columns]
|
|
|
|
put_columns = [
|
|
"Puts", "Expiration Date", "Strike", "Last Sale.1", "Net.1", "Bid.1",
|
|
"Ask.1", "Vol.1", "Open Int.1", "IV.1", "Delta.1", "Gamma.1"
|
|
]
|
|
puts = data[put_columns]
|
|
|
|
renamed_columns = [
|
|
"optionroot", "expiration", "strike", "last", "net", "bid", "ask",
|
|
"volume", "openinterest", "impliedvol", "delta", "gamma"
|
|
]
|
|
calls.columns = renamed_columns
|
|
calls.insert(loc=1, column="type", value="call")
|
|
puts.columns = renamed_columns
|
|
puts.insert(loc=1, column="type", value="put")
|
|
|
|
merged = pd.concat([calls, puts])
|
|
merged.insert(loc=0, column="underlying", value=symbol)
|
|
merged.insert(loc=1, column="underlying_last", value=spot_price)
|
|
merged.insert(loc=2, column="exchange", value="CBOE")
|
|
merged.insert(loc=6, column="quotedate", value=quote_date)
|
|
|
|
return merged
|
|
|
|
|
|
def _monthly_grouper(filename):
|
|
"""Returns `{year}{month}` string. Used to group files by month."""
|
|
basename = filename.split(".")[0]
|
|
file_date = basename.split("_")[1]
|
|
return file_date[:-2]
|
|
|
|
|
|
def _monthly_filename(filenames):
|
|
"""Returns filename of monthly aggregate file in the form
|
|
`{symbol}_{start_date}_to_{end_date}.csv`
|
|
"""
|
|
sorted_files = list(sorted(filenames))
|
|
first_file = sorted_files[0]
|
|
last_file = sorted_files[-1]
|
|
last_day = last_file.split(".")[0][-8:] # Get only the date
|
|
file_name = first_file.split(".")[0] + "_to_" + last_day + ".csv"
|
|
return file_name
|