mirror of
https://github.com/wassname/options_backtester.git
synced 2026-06-27 18:05:27 +08:00
@@ -1,4 +1,4 @@
|
||||
.PHONY: install env test test_notebook
|
||||
.PHONY: install env test test_notebook lint notebook help
|
||||
.DEFAULT_GOAL := help
|
||||
|
||||
install: ## Create environment and install dependencies
|
||||
@@ -7,6 +7,13 @@ install: ## Create environment and install dependencies
|
||||
env: ## Run pipenv shell
|
||||
pipenv shell
|
||||
|
||||
notebook: ## Run Jupyter notebook
|
||||
pipenv run jupyter notebook
|
||||
|
||||
lint: ## Run linter and format checker (flake8 & yapf)
|
||||
pipenv run flake8 backtester
|
||||
pipenv run yapf --diff --recursive backtester/
|
||||
|
||||
test: ## Run tests
|
||||
pipenv run python -m pytest -v backtester
|
||||
|
||||
@@ -14,10 +21,6 @@ test_notebook: ## Run jupyter notebook test
|
||||
pipenv run jupyter nbconvert nbconvert --to notebook --execute --ExecutePreprocessor.timeout=60 \
|
||||
backtester/examples/backtester_example.ipynb --stdout > /dev/null
|
||||
|
||||
lint: ## Run linter and format checker (flake8 & yapf)
|
||||
pipenv run flake8 backtester
|
||||
pipenv run yapf --diff --recursive backtester/
|
||||
|
||||
help:
|
||||
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) |\
|
||||
awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
[](https://travis-ci.com/lambdaclass/options_backtester)
|
||||
|
||||
Options Backtester
|
||||
==============================
|
||||
|
||||
@@ -13,73 +15,87 @@ Simple backtester to evaluate and analyse options strategies over historical pri
|
||||
|
||||
## Requirements
|
||||
|
||||
- Python >= 3.5
|
||||
- Python >= 3.6
|
||||
- pipenv
|
||||
|
||||
|
||||
## Setup
|
||||
|
||||
For backtesting, set `$OPTIONS_DATA_PATH` to the appropriate directory where the data is located. All file paths parsed by the backtester will be relative to this directory.
|
||||
|
||||
To use the data scraper the following environment variables need to be set:
|
||||
- `$SAVE_DATA_PATH`: where the data will be saved to (default is `./data/scraped`)
|
||||
- `$TIINGO_API_KEY`: used to fetch data from [Tiingo](https://api.tiingo.com)
|
||||
- `$S3_BUCKET`: name of the S3 bucket to backup data
|
||||
- `$AWS_ACCESS_KEY_ID`: AWS acces key id
|
||||
- `$AWS_SECRET_ACCESS_KEY`: AWS secret key
|
||||
|
||||
You can configure the data scraper by editing the configuration file `data_scraper.conf` (json-formated).
|
||||
|
||||
Sample file:
|
||||
|
||||
```json
|
||||
{
|
||||
"cboe": {
|
||||
"mute_notifications": ["BFB", "CBSA"]
|
||||
},
|
||||
"notifications": {
|
||||
"slack_webhook": "https://hooks.slack.com/services/MY_WORKSPACE_WEBHOOK"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**HINT**: store environment variables in an `.env` file and pipenv will load them automatically when using `make env`.
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
### Create environment and download dependencies
|
||||
Install [pipenv](https://pipenv.pypa.io/en/latest/)
|
||||
|
||||
```shell
|
||||
$> make init
|
||||
$> pip install pipenv
|
||||
```
|
||||
|
||||
### Activate environment
|
||||
Create environment and download dependencies
|
||||
|
||||
```shell
|
||||
$> make install
|
||||
```
|
||||
|
||||
Activate environment
|
||||
|
||||
```shell
|
||||
$> make env
|
||||
```
|
||||
|
||||
### Run tests
|
||||
Run [Jupyter](https://jupyter.org) notebook
|
||||
|
||||
```shell
|
||||
$> make notebook
|
||||
```
|
||||
|
||||
Run tests
|
||||
|
||||
```shell
|
||||
$> make test
|
||||
```
|
||||
|
||||
### Scrape data (supported scrapers: CBOE, Tiingo)
|
||||
## Usage
|
||||
|
||||
```shell
|
||||
$> make scrape scraper=cboe
|
||||
### Example:
|
||||
|
||||
$> make scrape scraper=tiingo
|
||||
```
|
||||
|
||||
### Run backtester with benchmark strategy
|
||||
|
||||
```shell
|
||||
$> make bench
|
||||
We'll run a backtest of a stock portfolio holding `$AAPL` and `$GOOG`, and simultaneously buying 10% OTM calls and puts on `$SPX` ([long strangle](https://www.investopedia.com/terms/s/strangle.asp)).
|
||||
We'll allocate 97% of our capital to stocks and the rest to options, and do a rebalance every month.
|
||||
|
||||
```python
|
||||
from backtester import Backtest, Type, Direction, Stock
|
||||
from backtester.strategy import Strategy, StrategyLeg
|
||||
from backtester.datahandler import HistoricalOptionsData, TiingoData
|
||||
|
||||
# Stocks data
|
||||
stocks_data = TiingoData('stocks.csv')
|
||||
stocks = [Stock(symbol='AAPL', percentage=0.5), Stock(symbol='GOOG', percentage=0.5)]
|
||||
|
||||
# Options data
|
||||
options_data = HistoricalOptionsData('options.h5', key='/SPX')
|
||||
schema = options_data.schema
|
||||
|
||||
# Long strangle
|
||||
leg_1 = StrategyLeg('leg_1', schema, option_type=Type.PUT, direction=Direction.BUY)
|
||||
leg_1.entry_filter = (schema.underlying == 'SPX') & (schema.dte >= 60) & (schema.underlying_last <=
|
||||
1.1 * schema.strike)
|
||||
leg_1.exit_filter = (schema.dte <= 30)
|
||||
|
||||
leg_2 = StrategyLeg('leg_2', schema, option_type=Type.CALL, direction=Direction.BUY)
|
||||
leg_2.entry_filter = (schema.underlying == 'SPX') & (schema.dte >= 60) & (schema.underlying_last >=
|
||||
0.9 * schema.strike)
|
||||
leg_2.exit_filter = (schema.dte <= 30)
|
||||
|
||||
strategy = Strategy(schema)
|
||||
strategy.add_legs([leg_1, leg_2])
|
||||
|
||||
allocation = {'stocks': .97, 'options': .03}
|
||||
initial_capital = 1_000_000
|
||||
bt = Backtest(allocation, initial_capital)
|
||||
bt.stocks = stocks
|
||||
bt.stocks_data = stocks_data
|
||||
bt.options_data = options_data
|
||||
bt.options_strategy = strategy
|
||||
|
||||
bt.run(rebalance_freq=1)
|
||||
```
|
||||
|
||||
You can explore more usage examples in the Jupyter [notebooks](backtester/examples/).
|
||||
|
||||
## Recommended reading
|
||||
|
||||
|
||||
@@ -1,2 +0,0 @@
|
||||
.env
|
||||
target/
|
||||
Generated
-1865
File diff suppressed because it is too large
Load Diff
@@ -1,17 +0,0 @@
|
||||
[package]
|
||||
name = "alert"
|
||||
version = "0.1.0"
|
||||
authors = ["Amin Arria <arria.amin@gmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
fantoccini = "0.11.6"
|
||||
futures = "0.1.25"
|
||||
tokio = "0.1.17"
|
||||
webdriver = "0.39.0"
|
||||
serde_json = "1.0.39"
|
||||
select = "0.4.2"
|
||||
reqwest = "0.9.12"
|
||||
diesel = { version = "1.4.2", features = ["sqlite", "chrono"] }
|
||||
dotenv = "0.10"
|
||||
chrono = "0.4"
|
||||
@@ -1,26 +0,0 @@
|
||||
FROM debian:latest
|
||||
MAINTAINER Amin Arria <amin.arria@lambdaclass.com>
|
||||
|
||||
WORKDIR /root
|
||||
RUN apt-get update
|
||||
RUN apt-get install -y curl make build-essential pkg-config openssl libssl-dev
|
||||
|
||||
RUN apt-get -y install firefox-esr
|
||||
RUN curl -L -O 'https://github.com/mozilla/geckodriver/releases/download/v0.24.0/geckodriver-v0.24.0-linux64.tar.gz'
|
||||
RUN tar -vxf geckodriver-v0.24.0-linux64.tar.gz
|
||||
|
||||
RUN apt-get install -y sqlite3 libsqlite3-dev
|
||||
|
||||
RUN curl https://sh.rustup.rs -sSf > rustup
|
||||
RUN sh rustup -y
|
||||
ENV PATH=/root/.cargo/bin:$PATH
|
||||
|
||||
COPY . watchdog/
|
||||
WORKDIR watchdog
|
||||
RUN make install_diesel_cli
|
||||
RUN make migration
|
||||
RUN make release
|
||||
|
||||
RUN apt-get clean
|
||||
RUN rm -rf /var/lib/apt/lists/*
|
||||
CMD (/root/geckodriver &) && ./target/release/alert
|
||||
@@ -1,13 +0,0 @@
|
||||
.PHONY: migration run install_diesel_cli release
|
||||
|
||||
migration:
|
||||
diesel migration run
|
||||
|
||||
run:
|
||||
cargo run
|
||||
|
||||
install_diesel_cli:
|
||||
cargo install diesel_cli --no-default-features --features sqlite chrono
|
||||
|
||||
release:
|
||||
cargo build --release
|
||||
@@ -1,34 +0,0 @@
|
||||
# Financial Watchdog
|
||||
|
||||
## Requirements
|
||||
- [WebDriver](https://www.w3.org/TR/webdriver1/) process running on port 4444 (e.g., [geckodriver](https://github.com/mozilla/geckodriver/releases), [chromedriver](https://sites.google.com/a/chromium.org/chromedriver/))
|
||||
- SQLite 3.19.3
|
||||
- Rust 1.33
|
||||
- [Diesel CLI 1.4](https://crates.io/crates/diesel_cli)
|
||||
|
||||
When installing `diesel_cli` if you run into any errors try installing with our needed features only:
|
||||
|
||||
```
|
||||
$> make install_diesel_cli
|
||||
```
|
||||
|
||||
## Setup
|
||||
|
||||
Create a `.env` with the following values:
|
||||
|
||||
- `DATABASE_URL`: The filepath where the DB will be stored (SQLite)
|
||||
- `SLACK_WEBHOOK_URL`: URL for Slack's webhook
|
||||
|
||||
Create the database by running the migrations:
|
||||
|
||||
```
|
||||
$> make migration
|
||||
```
|
||||
|
||||
## Running
|
||||
|
||||
Run it
|
||||
|
||||
```
|
||||
$> make run
|
||||
```
|
||||
@@ -1,5 +0,0 @@
|
||||
# For documentation on how to configure this file,
|
||||
# see diesel.rs/guides/configuring-diesel-cli
|
||||
|
||||
[print_schema]
|
||||
file = "src/schema.rs"
|
||||
@@ -1 +0,0 @@
|
||||
DROP TABLE dollar;
|
||||
@@ -1,16 +0,0 @@
|
||||
CREATE TABLE dollar (
|
||||
id INTEGER PRIMARY KEY,
|
||||
buy DOUBLE NOT NULL,
|
||||
buy_amount INTEGER NOT NULL,
|
||||
sell DOUBLE NOT NULL,
|
||||
sell_amount INTEGER NOT NULL,
|
||||
last DOUBLE NOT NULL,
|
||||
var DOUBLE NOT NULL,
|
||||
varper DOUBLE NOT NULL,
|
||||
volume INTEGER NOT NULL,
|
||||
adjustment DOUBLE NOT NULL,
|
||||
min DOUBLE NOT NULL,
|
||||
max DOUBLE NOT NULL,
|
||||
oin INTEGER NOT NULL,
|
||||
created_at DATETIME NOT NULL
|
||||
);
|
||||
@@ -1 +0,0 @@
|
||||
DROP TABLE alerts;
|
||||
@@ -1,8 +0,0 @@
|
||||
CREATE TABLE alerts (
|
||||
id INTEGER PRIMARY KEY,
|
||||
created_at DATETIME NOT NULL,
|
||||
asset VARCHAR(20) NOT NULL,
|
||||
previous_value DOUBLE NOT NULL,
|
||||
current_value DOUBLE NOT NULL,
|
||||
active BOOLEAN NOT NULL
|
||||
);
|
||||
@@ -1,99 +0,0 @@
|
||||
extern crate reqwest;
|
||||
extern crate serde_json;
|
||||
|
||||
use diesel::sqlite::SqliteConnection;
|
||||
use serde_json::json;
|
||||
use std::env;
|
||||
use crate::models::{Dollar, Alert};
|
||||
use crate::storage;
|
||||
|
||||
pub fn check_dollar(conn: &SqliteConnection, dollar: &Dollar) {
|
||||
let dollar_close = storage::get_dollar_on_close(conn);
|
||||
let closing_prc_change =
|
||||
match &dollar_close {
|
||||
Some(dollar_close) => {
|
||||
(dollar.last - dollar_close.last)/dollar_close.last * 100.0
|
||||
},
|
||||
None => 0.0
|
||||
};
|
||||
|
||||
match storage::get_dollar_alert(conn) {
|
||||
Some(dollar_alert) => {
|
||||
let alert_prc_change = (dollar.last - dollar_alert.current_value)/dollar_alert.current_value * 100.0;
|
||||
// TODO: This value should be set by arguments to binary or by config files
|
||||
if alert_prc_change.abs() > 2.0 {
|
||||
if closing_prc_change.abs() > 3.0 {
|
||||
// If we couldn't unwrap closing_prc_change == 0.0
|
||||
let dollar_close = dollar_close.unwrap();
|
||||
let new_alert = Alert::new("dollar".to_string(), dollar_alert.previous_value, dollar.last);
|
||||
storage::replace_alert(conn, &dollar_alert, &new_alert);
|
||||
send_updated_alert(dollar_close.last, dollar_alert.current_value, dollar.last, alert_prc_change, closing_prc_change);
|
||||
} else {
|
||||
storage::deactivate_alert(conn, &dollar_alert);
|
||||
send_resolved_alert();
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
// TODO: This value should be set by arguments to binary or by config files
|
||||
if closing_prc_change.abs() > 3.0 {
|
||||
// If we couldn't unwrap closing_prc_change == 0.0
|
||||
let dollar_close = dollar_close.unwrap();
|
||||
let alert = Alert::new(String::from("dollar"), dollar_close.last, dollar.last);
|
||||
storage::store_alert(conn, &alert);
|
||||
send_new_alert(dollar_close.last, dollar.last, closing_prc_change);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
// payload = {
|
||||
// "username": "Financial Watchdog",
|
||||
// "icon_emoji": ":money_with_wings:",
|
||||
// "attachments": [{
|
||||
// "color": color,
|
||||
// "title": title,
|
||||
// "text": text,
|
||||
// "fallback": text
|
||||
// "footer": "Financial Watchdog",
|
||||
// }]
|
||||
// }
|
||||
fn send_new_alert(price_close: f64, price_current: f64, prc_change: f64) {
|
||||
let msg = format!("Closing price: {:.2}\nCurrent price: {:.2}\nChanged: {:+.2}%", price_close, price_current, prc_change);
|
||||
send_alert_slack("#B22222", "Price jump from closing", &msg);
|
||||
// TODO: send_alert_mail(prc_change);
|
||||
}
|
||||
|
||||
fn send_resolved_alert() {
|
||||
let msg = format!("Dollar price from closing price is back below threshold");
|
||||
send_alert_slack("#3873AD", "Price back to normal", &msg);
|
||||
// TODO: send_alert_mail(prc_change);
|
||||
}
|
||||
|
||||
fn send_updated_alert(price_close: f64, price_previous: f64, price_current: f64, prc_change: f64, prc_change_close: f64) {
|
||||
let msg = format!("Closing price: {:.2}\nPrevious price: {:.2}\nCurrent price: {:.2}\nChange from previous: *{:+.2}%*\nChange from close: {:+.2}%", price_close, price_previous, price_current, prc_change, prc_change_close);
|
||||
send_alert_slack("#f1a031", "[UPDATE] Price jump", &msg);
|
||||
// TODO: send_alert_mail(prc_change);
|
||||
}
|
||||
|
||||
fn send_alert_slack(color: &str, title: &str, msg: &str) {
|
||||
let payload = json!({
|
||||
"username": "Financial Watchdog",
|
||||
"icon_emoji": ":money_with_wings:",
|
||||
"attachments": [{
|
||||
"color": color,
|
||||
"title": title,
|
||||
"text": msg,
|
||||
"fallback": msg,
|
||||
"footer": "Financial Watchdog"
|
||||
}]
|
||||
})
|
||||
.to_string();
|
||||
// We can unwrap safely because env variables are checked in main.rs::init_env()
|
||||
let slack_webhook_url = env::var("SLACK_WEBHOOK_URL").unwrap();
|
||||
let client = reqwest::Client::new();
|
||||
client.post(&slack_webhook_url)
|
||||
.header(reqwest::header::CONTENT_TYPE, "application/json")
|
||||
.body(payload)
|
||||
.send()
|
||||
.expect("Failed sending slack alert");
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
#[macro_use]
|
||||
extern crate diesel;
|
||||
extern crate dotenv;
|
||||
extern crate chrono;
|
||||
|
||||
pub mod schema;
|
||||
pub mod models;
|
||||
|
||||
mod scrape;
|
||||
mod storage;
|
||||
mod alert;
|
||||
|
||||
use dotenv::dotenv;
|
||||
use std::{thread, time, env};
|
||||
|
||||
fn main() {
|
||||
init_env();
|
||||
|
||||
loop {
|
||||
std::thread::spawn(|| {
|
||||
let dbconn = crate::storage::establish_connection();
|
||||
let value = crate::scrape::scrape();
|
||||
crate::alert::check_dollar(&dbconn, &value);
|
||||
crate::storage::store(&dbconn, &value);
|
||||
});
|
||||
|
||||
thread::sleep(time::Duration::from_secs(60));
|
||||
}
|
||||
}
|
||||
|
||||
fn init_env() {
|
||||
dotenv().ok();
|
||||
env::var("DATABASE_URL")
|
||||
.expect("DATABASE_URL must be set");
|
||||
env::var("SLACK_WEBHOOK_URL")
|
||||
.expect("SLACK_WEBHOOK_URL must be set");
|
||||
}
|
||||
@@ -1,71 +0,0 @@
|
||||
use chrono::NaiveDateTime;
|
||||
use chrono::offset::Utc;
|
||||
use crate::schema::{dollar, alerts};
|
||||
|
||||
#[derive(Queryable, Insertable, Debug)]
|
||||
#[table_name="dollar"]
|
||||
pub struct Dollar {
|
||||
pub id: Option<i32>,
|
||||
pub buy: f64,
|
||||
pub buy_amount: i32,
|
||||
pub sell: f64,
|
||||
pub sell_amount: i32,
|
||||
pub last: f64,
|
||||
pub var: f64,
|
||||
pub varper: f64,
|
||||
pub volume: i32,
|
||||
pub adjustment: f64,
|
||||
pub min: f64,
|
||||
pub max: f64,
|
||||
pub oin: i32,
|
||||
pub created_at: NaiveDateTime
|
||||
}
|
||||
|
||||
impl Dollar {
|
||||
pub fn new() -> Dollar {
|
||||
let created_at = Utc::now().naive_utc();
|
||||
|
||||
Dollar {
|
||||
id: None,
|
||||
buy: 0.0,
|
||||
buy_amount: 0,
|
||||
sell: 0.0,
|
||||
sell_amount: 0,
|
||||
last: 0.0,
|
||||
var: 0.0,
|
||||
varper: 0.0,
|
||||
volume: 0,
|
||||
adjustment: 0.0,
|
||||
min: 0.0,
|
||||
max: 0.0,
|
||||
oin: 0,
|
||||
created_at: created_at
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Queryable, Insertable, Debug)]
|
||||
#[table_name="alerts"]
|
||||
pub struct Alert {
|
||||
pub id: Option<i32>,
|
||||
pub created_at: NaiveDateTime,
|
||||
pub asset: String,
|
||||
pub previous_value: f64,
|
||||
pub current_value: f64,
|
||||
pub active: bool
|
||||
}
|
||||
|
||||
impl Alert {
|
||||
pub fn new(asset: String, previous_value: f64, current_value: f64) -> Alert {
|
||||
let created_at = Utc::now().naive_utc();
|
||||
|
||||
Alert {
|
||||
id: None,
|
||||
created_at: created_at,
|
||||
asset: asset,
|
||||
previous_value: previous_value,
|
||||
current_value: current_value,
|
||||
active: true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
table! {
|
||||
alerts (id) {
|
||||
id -> Nullable<Integer>,
|
||||
created_at -> Timestamp,
|
||||
asset -> Text,
|
||||
previous_value -> Double,
|
||||
current_value -> Double,
|
||||
active -> Bool,
|
||||
}
|
||||
}
|
||||
|
||||
table! {
|
||||
dollar (id) {
|
||||
id -> Nullable<Integer>,
|
||||
buy -> Double,
|
||||
buy_amount -> Integer,
|
||||
sell -> Double,
|
||||
sell_amount -> Integer,
|
||||
last -> Double,
|
||||
var -> Double,
|
||||
varper -> Double,
|
||||
volume -> Integer,
|
||||
adjustment -> Double,
|
||||
min -> Double,
|
||||
max -> Double,
|
||||
oin -> Integer,
|
||||
created_at -> Timestamp,
|
||||
}
|
||||
}
|
||||
|
||||
allow_tables_to_appear_in_same_query!(
|
||||
alerts,
|
||||
dollar,
|
||||
);
|
||||
@@ -1,102 +0,0 @@
|
||||
extern crate fantoccini;
|
||||
extern crate futures;
|
||||
extern crate select;
|
||||
extern crate serde_json;
|
||||
extern crate tokio;
|
||||
extern crate webdriver;
|
||||
|
||||
use fantoccini::{Client, Locator};
|
||||
use futures::future::Future;
|
||||
use futures::sync::oneshot;
|
||||
use select::document::Document;
|
||||
use select::predicate::Class;
|
||||
use serde_json::json;
|
||||
use webdriver::capabilities::Capabilities;
|
||||
|
||||
use crate::models::Dollar;
|
||||
|
||||
pub fn scrape() -> Dollar {
|
||||
let html = fetch_site();
|
||||
|
||||
let document = Document::from(html.as_str());
|
||||
|
||||
let mut value = Dollar::new();
|
||||
|
||||
for node in document.find(Class("PriceCell")) {
|
||||
let full_class =
|
||||
node.attr("class")
|
||||
.unwrap_or_else(|| panic!("Node without class attribute"));
|
||||
let class = full_class.split_whitespace().last();
|
||||
|
||||
match class {
|
||||
Some("bsz") => value.buy_amount = parse_i32(node.text()),
|
||||
Some("bid") => value.buy = parse_f64(node.text()),
|
||||
Some("ask") => value.sell = parse_f64(node.text()),
|
||||
Some("asz") => value.sell_amount = parse_i32(node.text()),
|
||||
Some("lst") => value.last = parse_f64(node.text()),
|
||||
Some("variation") => value.var = parse_f64(node.text()),
|
||||
Some("change") => {
|
||||
value.varper = parse_f64(node.text().trim_end_matches("%").to_string())
|
||||
}
|
||||
Some("von") => value.volume = parse_i32(node.text()),
|
||||
Some("settlementPrice") => value.adjustment = parse_f64(node.text()),
|
||||
Some("low") => value.min = parse_f64(node.text()),
|
||||
Some("hgh") => value.max = parse_f64(node.text()),
|
||||
Some("oin") => value.oin = parse_i32(node.text()),
|
||||
Some("futureImpliedRate") => {}
|
||||
Some(class) => {
|
||||
panic!("Non-matching class: {}", class);
|
||||
}
|
||||
None => {
|
||||
panic!("Node without empty class attribute");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
value
|
||||
}
|
||||
|
||||
fn fetch_site() -> String {
|
||||
// TODO: Set headless capabilities for chromedriver.
|
||||
let mut cap = Capabilities::new();
|
||||
let arg = json!({"args": ["-headless"]});
|
||||
cap.insert("moz:firefoxOptions".to_string(), arg);
|
||||
let c = Client::with_capabilities("http://localhost:4444", cap);
|
||||
let (sender, receiver) = oneshot::channel::<String>();
|
||||
|
||||
tokio::run(
|
||||
c.map_err(|e| unimplemented!("failed to connect to WebDriver: {:?}", e))
|
||||
.and_then(|c| c.goto("https://rofex.primary.ventures/rofex/futuros"))
|
||||
.and_then(|mut c| c.current_url().map(move |url| (c, url)))
|
||||
.and_then(|(c, _url)| {
|
||||
c.wait_for_find(Locator::XPath("((//div[@class='PricePanelRow-row'])[1]//div[@class='PriceCell lst'])[not(contains(., '-'))]/.."))
|
||||
})
|
||||
.and_then(|mut e| {
|
||||
e.html(true)
|
||||
})
|
||||
.and_then(|e| match sender.send(e) {
|
||||
Err(err) => panic!("Error sending fetched site: {}", err),
|
||||
Ok(()) => Ok(()),
|
||||
})
|
||||
.map_err(|e| {
|
||||
panic!("a WebDriver command failed: {:?}", e);
|
||||
}),
|
||||
);
|
||||
|
||||
receiver.wait().unwrap()
|
||||
}
|
||||
|
||||
fn parse_i32(text: String) -> i32 {
|
||||
text.replace(".", "")
|
||||
.trim()
|
||||
.parse()
|
||||
.unwrap_or_else(|err| panic!("Error parsing \"{}\": {}", text, err))
|
||||
}
|
||||
|
||||
fn parse_f64(text: String) -> f64 {
|
||||
text.replace(".", "")
|
||||
.replace(",", ".")
|
||||
.trim()
|
||||
.parse()
|
||||
.unwrap_or_else(|err| panic!("Error parsing \"{}\": {}", text, err))
|
||||
}
|
||||
@@ -1,70 +0,0 @@
|
||||
extern crate diesel;
|
||||
extern crate dotenv;
|
||||
|
||||
use diesel::prelude::*;
|
||||
use diesel::sqlite::SqliteConnection;
|
||||
use diesel::result::Error::NotFound;
|
||||
use chrono::{Utc, NaiveDateTime, NaiveTime};
|
||||
use std::env;
|
||||
use crate::models::{Dollar, Alert};
|
||||
use crate::schema::{dollar, alerts};
|
||||
|
||||
pub fn establish_connection() -> SqliteConnection {
|
||||
// We can unwrap safely because env variables are checked in main.rs::init_env()
|
||||
let database_url = env::var("DATABASE_URL").unwrap();
|
||||
SqliteConnection::establish(&database_url)
|
||||
.expect(&format!("Error connecting to {}", database_url))
|
||||
}
|
||||
|
||||
pub fn store(conn: &SqliteConnection, new_dollar: &Dollar) {
|
||||
diesel::insert_into(dollar::table)
|
||||
.values(new_dollar)
|
||||
.execute(conn)
|
||||
.expect("Error saving new dollar");
|
||||
}
|
||||
|
||||
pub fn get_dollar_on_close(conn: &SqliteConnection) -> Option<Dollar> {
|
||||
let yesterday = Utc::today().naive_utc().pred();
|
||||
let timestamp = NaiveDateTime::new(yesterday, NaiveTime::from_hms(23, 59, 59));
|
||||
|
||||
let result = dollar::table.filter(dollar::created_at.lt(timestamp))
|
||||
.order(dollar::created_at.desc())
|
||||
.first::<Dollar>(conn);
|
||||
|
||||
match result {
|
||||
Ok(previous) => Some(previous),
|
||||
Err(NotFound) => None,
|
||||
Err(err) => panic!("Error getting previous dollar: \n{}", err)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn store_alert(conn: &SqliteConnection, new_alert: &Alert) {
|
||||
diesel::insert_into(alerts::table)
|
||||
.values(new_alert)
|
||||
.execute(conn)
|
||||
.expect("Error saving new alert");
|
||||
}
|
||||
|
||||
pub fn get_dollar_alert(conn: &SqliteConnection) -> Option<Alert> {
|
||||
let result = alerts::table.filter(alerts::asset.eq("dollar").and(alerts::active.eq(true)))
|
||||
.first::<Alert>(conn);
|
||||
|
||||
match result {
|
||||
Ok(alert) => Some(alert),
|
||||
Err(NotFound) => None,
|
||||
Err(err) => panic!("Error getting current alert: \n{}", err)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn deactivate_alert(conn: &SqliteConnection, alert: &Alert) {
|
||||
diesel::update(alerts::table.filter(alerts::id.eq(alert.id)))
|
||||
.set(alerts::active.eq(false))
|
||||
.execute(conn)
|
||||
.expect("Error deactivating alert");
|
||||
}
|
||||
|
||||
pub fn replace_alert(conn: &SqliteConnection, old_alert: &Alert, new_alert: &Alert) {
|
||||
// TODO: Do this operations inside a transaction for atomicity
|
||||
store_alert(conn, new_alert);
|
||||
deactivate_alert(conn, old_alert);
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
from . import datahandler, charts
|
||||
from .backtester import Backtest
|
||||
from .portfolio import *
|
||||
@@ -1,121 +0,0 @@
|
||||
import pandas as pd
|
||||
import pyprind
|
||||
from .portfolio import Portfolio
|
||||
|
||||
|
||||
class Backtest:
|
||||
def __init__(self, schema, initial_capital=1_000_000):
|
||||
self.schema = schema
|
||||
self._portfolio = None
|
||||
self._data = None
|
||||
self.initial_capital = initial_capital
|
||||
|
||||
@property
|
||||
def portfolio(self):
|
||||
return self._portfolio
|
||||
|
||||
@portfolio.setter
|
||||
def portfolio(self, portfolio):
|
||||
assert isinstance(portfolio, Portfolio)
|
||||
self._portfolio = portfolio
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
return self._data
|
||||
|
||||
@data.setter
|
||||
def data(self, data):
|
||||
self._data = data
|
||||
|
||||
def run(self, periods=1, sma_days=None):
|
||||
"""Runs a backtest and returns a dataframe with the daily balance"""
|
||||
assert self._data is not None
|
||||
assert self._portfolio is not None
|
||||
|
||||
self.current_capital = 0
|
||||
self.current_cash = self.initial_capital
|
||||
self.inventory = pd.DataFrame(columns=['symbol', 'cost', 'qty'])
|
||||
self.balance = pd.DataFrame()
|
||||
if sma_days:
|
||||
self._data.sma(sma_days)
|
||||
|
||||
data_iterator = self._data.iter_dates()
|
||||
|
||||
first_day = self._data['date'].min()
|
||||
last_day = self._data['date'].max()
|
||||
rebalancing_days = pd.date_range(first_day, last_day, freq=str(periods) +
|
||||
'BMS').to_pydatetime() if periods is not None else []
|
||||
|
||||
bar = pyprind.ProgBar(data_iterator.ngroups, bar_char='█')
|
||||
|
||||
self.balance = pd.DataFrame({
|
||||
'capital': self.current_cash,
|
||||
'cash': self.current_cash
|
||||
},
|
||||
index=[self._data.start_date - pd.Timedelta(1, unit='day')])
|
||||
|
||||
for date, data in data_iterator:
|
||||
|
||||
if date == first_day:
|
||||
self._rebalance_portfolio(data, sma_days)
|
||||
self._update_balance(date, data)
|
||||
if date in rebalancing_days:
|
||||
self._rebalance_portfolio(data, sma_days)
|
||||
|
||||
bar.update()
|
||||
|
||||
self.balance['% change'] = self.balance['capital'].pct_change()
|
||||
self.balance['accumulated return'] = (1.0 + self.balance['% change']).cumprod()
|
||||
|
||||
return self.balance
|
||||
|
||||
def _rebalance_portfolio(self, data, sma_days):
|
||||
"""Rebalances the portfolio so that the total money is allocated according to the given percentages"""
|
||||
money_total = self.current_cash + self.current_capital
|
||||
for asset in self._portfolio.assets:
|
||||
query = '{} == "{}"'.format(self.schema['symbol'], asset.symbol)
|
||||
asset_current = data.query(query)
|
||||
|
||||
asset_price = asset_current[self.schema['adjClose']].values[0]
|
||||
|
||||
if sma_days is not None:
|
||||
if asset_current['sma'].values[0] < asset_price:
|
||||
qty = (money_total * asset.percentage) // asset_price
|
||||
else:
|
||||
qty = 0
|
||||
|
||||
else:
|
||||
qty = (money_total * asset.percentage) // asset_price
|
||||
|
||||
inventory_entry = self.inventory.query(query)
|
||||
self.inventory.drop(inventory_entry.index, inplace=True)
|
||||
updated_asset = pd.Series([asset.symbol, asset_price, qty])
|
||||
updated_asset.index = self.inventory.columns
|
||||
self.inventory = self.inventory.append(updated_asset, ignore_index=True)
|
||||
# Update current cash
|
||||
invested_capital = sum(self.inventory['cost'] * self.inventory['qty'])
|
||||
self.current_cash = money_total - invested_capital
|
||||
|
||||
def _update_balance(self, date, data):
|
||||
"""Updates self.balance for the given date"""
|
||||
costs = []
|
||||
|
||||
for asset in self._portfolio.assets:
|
||||
query = '{} == "{}"'.format(self.schema['symbol'], asset.symbol)
|
||||
asset_current = data.query(query)
|
||||
inventory_asset = self.inventory.query(query)
|
||||
|
||||
cost = asset_current[self.schema['adjClose']].values[0]
|
||||
qty = inventory_asset['qty'].values[0]
|
||||
costs.append(cost * qty)
|
||||
|
||||
total_value = sum(costs)
|
||||
self.current_capital = total_value
|
||||
money_total = total_value + self.current_cash
|
||||
|
||||
row = pd.Series({
|
||||
'total value': total_value,
|
||||
'cash': self.current_cash,
|
||||
'capital': money_total,
|
||||
}, name=date)
|
||||
self.balance = self.balance.append(row)
|
||||
@@ -1,78 +0,0 @@
|
||||
"""Generates charts from a portfolio report"""
|
||||
|
||||
import altair as alt
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def returns_chart(report):
|
||||
# Time interval selector
|
||||
time_interval = alt.selection(type='interval', encodings=['x'])
|
||||
|
||||
# Area plot
|
||||
areas = alt.Chart().mark_area(opacity=0.7).encode(x='index:T',
|
||||
y=alt.Y('accumulated return:Q', axis=alt.Axis(format='%')))
|
||||
|
||||
# Nearest point selector
|
||||
nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=['index'], empty='none')
|
||||
|
||||
points = areas.mark_point().encode(opacity=alt.condition(nearest, alt.value(1), alt.value(0)))
|
||||
|
||||
# Transparent date selector
|
||||
selectors = alt.Chart().mark_point().encode(
|
||||
x='index:T',
|
||||
opacity=alt.value(0),
|
||||
).add_selection(nearest)
|
||||
|
||||
text = areas.mark_text(
|
||||
align='left', dx=5,
|
||||
dy=-5).encode(text=alt.condition(nearest, 'accumulated return:Q', alt.value(' '), format='.2%'))
|
||||
|
||||
layered = alt.layer(selectors,
|
||||
points,
|
||||
text,
|
||||
areas.encode(
|
||||
alt.X('index:T', axis=alt.Axis(title='date'), scale=alt.Scale(domain=time_interval))),
|
||||
width=700,
|
||||
height=350,
|
||||
title='Wealth over time')
|
||||
|
||||
lower = areas.properties(width=700, height=70).add_selection(time_interval)
|
||||
|
||||
return alt.vconcat(layered, lower, data=report.reset_index())
|
||||
|
||||
|
||||
def returns_histogram(report):
|
||||
bar = alt.Chart(report).mark_bar().encode(x=alt.X('% change:Q',
|
||||
bin=alt.BinParams(maxbins=100),
|
||||
axis=alt.Axis(format='%')),
|
||||
y='count():Q')
|
||||
return bar
|
||||
|
||||
|
||||
def monthly_returns_heatmap(report):
|
||||
resample = report.resample('M')['capital'].last()
|
||||
monthly_returns = resample.pct_change().reset_index()
|
||||
monthly_returns['capital'].iat[0] = resample.iloc[0] / report.iloc[0]['capital'] - 1
|
||||
monthly_returns.columns = ['date', 'capital']
|
||||
|
||||
chart = alt.Chart(monthly_returns).mark_rect().encode(
|
||||
alt.X('year(date):O', title='Year'), alt.Y('month(date):O', title='Month'),
|
||||
alt.Color('mean(capital)', title='Return', scale=alt.Scale(scheme='redyellowgreen')),
|
||||
alt.Tooltip('mean(capital)', format='.2f')).properties(title='Monthly Returns')
|
||||
|
||||
return chart
|
||||
|
||||
|
||||
def sma_graph(data):
|
||||
|
||||
price_chart = alt.Chart(
|
||||
data,
|
||||
width=700,
|
||||
height=350,
|
||||
).mark_line().encode(x='date:T', y=alt.Y('adjClose:Q'), color='symbol:N', opacity=alt.value(0.3))
|
||||
sma_chart = alt.Chart(
|
||||
data,
|
||||
width=700,
|
||||
height=350,
|
||||
).mark_line(strokeDash=[1, 1]).encode(x='date:T', y=alt.Y('sma:Q'), color='symbol:N')
|
||||
return price_chart + sma_chart
|
||||
@@ -1,2 +0,0 @@
|
||||
from .schema import *
|
||||
from .historical_asset_data import HistoricalAssetData
|
||||
@@ -1,79 +0,0 @@
|
||||
import os
|
||||
from .schema import Schema
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class HistoricalAssetData:
|
||||
"""Historical Asset Data container class."""
|
||||
def __init__(self, file, schema=None, **params):
|
||||
if schema:
|
||||
assert isinstance(schema, Schema)
|
||||
else:
|
||||
self.schema = HistoricalAssetData.default_schema()
|
||||
|
||||
file_extension = os.path.splitext(file)[1]
|
||||
|
||||
if file_extension == '.h5':
|
||||
self._data = pd.read_hdf(file, **params)
|
||||
elif file_extension == '.csv':
|
||||
params['parse_dates'] = [self.schema.date.mapping]
|
||||
self._data = pd.read_csv(file, **params)
|
||||
|
||||
columns = self._data.columns
|
||||
assert all((col in columns for _key, col in self.schema))
|
||||
|
||||
date_col = self.schema['date']
|
||||
|
||||
self.start_date = self._data[date_col].min()
|
||||
self.end_date = self._data[date_col].max()
|
||||
|
||||
def apply_filter(self, f):
|
||||
"""Apply Filter `f` to the data. Returns a `pd.DataFrame` with the filtered rows."""
|
||||
return self._data.query(f.query)
|
||||
|
||||
def iter_dates(self):
|
||||
"""Returns `pd.DataFrameGroupBy` that groups contracts by date"""
|
||||
return self._data.groupby(self.schema['date'])
|
||||
|
||||
def __getattr__(self, attr):
|
||||
"""Pass method invocation to `self._data`"""
|
||||
|
||||
method = getattr(self._data, attr)
|
||||
if hasattr(method, '__call__'):
|
||||
|
||||
def df_method(*args, **kwargs):
|
||||
return method(*args, **kwargs)
|
||||
|
||||
return df_method
|
||||
else:
|
||||
return method
|
||||
|
||||
def __getitem__(self, item):
|
||||
if isinstance(item, pd.Series):
|
||||
return self._data[item]
|
||||
else:
|
||||
key = self.schema[item]
|
||||
return self._data[key]
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self._data[key] = value
|
||||
if key not in self.schema:
|
||||
self.schema.update({key: key})
|
||||
|
||||
def __len__(self):
|
||||
return len(self._data)
|
||||
|
||||
def __repr__(self):
|
||||
return self._data.__repr__()
|
||||
|
||||
def default_schema():
|
||||
"""Returns default schema for Historical Asset Data"""
|
||||
schema = Schema.canonical()
|
||||
return schema
|
||||
|
||||
def sma(self, months):
|
||||
sma = self._data.groupby('symbol').rolling(months)['adjClose'].mean()
|
||||
sma = sma.reset_index('symbol').sort_index()
|
||||
sma = sma.fillna(0)
|
||||
self._data['sma'] = sma['adjClose']
|
||||
self.schema.update({'sma': 'sma'})
|
||||
@@ -1,162 +0,0 @@
|
||||
class Schema:
|
||||
"""Data schema class.
|
||||
Used to run validations and provide uniform access to fields in the data set.
|
||||
"""
|
||||
|
||||
columns = [
|
||||
"symbol", "date", "open", "close", "high", "low", "volume", "adjClose", "adjHigh", "adjLow", "adjOpen",
|
||||
"adjVolume", "divCash", "splitFactor"
|
||||
]
|
||||
|
||||
def canonical():
|
||||
"""Builder method that returns a `Schema` with default mappings"""
|
||||
mappings = {key: key for key in Schema.columns}
|
||||
return Schema(mappings)
|
||||
|
||||
def __init__(self, mappings):
|
||||
assert all((key in mappings for key in Schema.columns))
|
||||
|
||||
self._mappings = mappings
|
||||
|
||||
def update(self, mappings):
|
||||
"""Update schema according to given `mappings`"""
|
||||
self._mappings.update(mappings)
|
||||
return self
|
||||
|
||||
def __contains__(self, key):
|
||||
"""Returns True if key is in schema"""
|
||||
return key in self._mappings.keys()
|
||||
|
||||
def __getattr__(self, key):
|
||||
"""Returns Field object used to build Filters"""
|
||||
return Field(key, self._mappings[key])
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self._mappings[key] = value
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""Returns mapping of given `key`"""
|
||||
return self._mappings[key]
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._mappings.items())
|
||||
|
||||
def __repr__(self):
|
||||
return "Schema({})".format([Field(k, m) for k, m in self._mappings.items()])
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._mappings == other._mappings
|
||||
|
||||
|
||||
class Field:
|
||||
"""Encapsulates data fields to build filters used by strategies"""
|
||||
|
||||
__slots__ = ("name", "mapping")
|
||||
|
||||
def __init__(self, name, mapping):
|
||||
self.name = name
|
||||
self.mapping = mapping
|
||||
|
||||
def _create_filter(self, op, other):
|
||||
if isinstance(other, Field):
|
||||
query = Field._format_query(self.mapping, op, other.mapping)
|
||||
else:
|
||||
query = Field._format_query(self.mapping, op, other)
|
||||
return Filter(query)
|
||||
|
||||
def _combine_fields(self, op, other, invert=False):
|
||||
if isinstance(other, Field):
|
||||
name = Field._format_query(self.name, op, other.name, invert)
|
||||
mapping = Field._format_query(self.mapping, op, other.mapping, invert)
|
||||
elif isinstance(other, (int, float)):
|
||||
name = Field._format_query(self.name, op, other, invert)
|
||||
mapping = Field._format_query(self.mapping, op, other, invert)
|
||||
else:
|
||||
raise TypeError
|
||||
|
||||
return Field(name, mapping)
|
||||
|
||||
def _format_query(left, op, right, invert=False):
|
||||
if invert:
|
||||
left, right = right, left
|
||||
query = "{left} {op} {right}".format(left=left, op=op, right=right)
|
||||
return query
|
||||
|
||||
def __add__(self, value):
|
||||
return self._combine_fields("+", value)
|
||||
|
||||
def __radd__(self, value):
|
||||
return self._combine_fields("+", value, invert=True)
|
||||
|
||||
def __sub__(self, value):
|
||||
return self._combine_fields("-", value)
|
||||
|
||||
def __rsub__(self, value):
|
||||
return self._combine_fields("-", value, invert=True)
|
||||
|
||||
def __mul__(self, value):
|
||||
return self._combine_fields("*", value)
|
||||
|
||||
def __rmul__(self, value):
|
||||
return self._combine_fields("*", value, invert=True)
|
||||
|
||||
def __truediv__(self, value):
|
||||
return self._combine_fields("/", value)
|
||||
|
||||
def __rtruediv__(self, value):
|
||||
return self._combine_fields("/", value, invert=True)
|
||||
|
||||
def __lt__(self, value):
|
||||
return self._create_filter("<", value)
|
||||
|
||||
def __le__(self, value):
|
||||
return self._create_filter("<=", value)
|
||||
|
||||
def __gt__(self, value):
|
||||
return self._create_filter(">", value)
|
||||
|
||||
def __ge__(self, value):
|
||||
return self._create_filter(">=", value)
|
||||
|
||||
def __eq__(self, value):
|
||||
if isinstance(value, str):
|
||||
value = "'{}'".format(value)
|
||||
return self._create_filter("==", value)
|
||||
|
||||
def __ne__(self, value):
|
||||
return self._create_filter("!=", value)
|
||||
|
||||
def __repr__(self):
|
||||
return "Field(name='{}', mapping='{}')".format(self.name, self.mapping)
|
||||
|
||||
|
||||
class Filter:
|
||||
"""This class determines entry/exit conditions for strategies"""
|
||||
|
||||
__slots__ = ("query")
|
||||
|
||||
def __init__(self, query):
|
||||
self.query = query
|
||||
|
||||
def __and__(self, other):
|
||||
"""Returns logical *and* between `self` and `other`"""
|
||||
assert isinstance(other, Filter)
|
||||
new_query = "({}) & ({})".format(self.query, other.query)
|
||||
return Filter(query=new_query)
|
||||
|
||||
def __or__(self, other):
|
||||
"""Returns logical *or* between `self` and `other`"""
|
||||
assert isinstance(other, Filter)
|
||||
new_query = "(({}) | ({}))".format(self.query, other.query)
|
||||
return Filter(query=new_query)
|
||||
|
||||
def __invert__(self):
|
||||
"""Negates filter"""
|
||||
return Filter("!({})".format(self.query))
|
||||
|
||||
def __call__(self, data):
|
||||
"""Returns dataframe of filtered data"""
|
||||
return data.eval(self.query)
|
||||
|
||||
def __repr__(self):
|
||||
return "Filter(query='{}')".format(self.query)
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,2 +0,0 @@
|
||||
from .portfolio import Portfolio
|
||||
from .asset import Asset
|
||||
@@ -1,8 +0,0 @@
|
||||
class Asset:
|
||||
"""Asset data class"""
|
||||
def __init__(self, symbol, percentage):
|
||||
self.symbol = symbol
|
||||
self.percentage = percentage
|
||||
|
||||
def __repr__(self):
|
||||
return "Asset(symbol={}, percentage={})".format(self.symbol, self.percentage)
|
||||
@@ -1,28 +0,0 @@
|
||||
from .asset import Asset
|
||||
|
||||
|
||||
class Portfolio:
|
||||
def __init__(self):
|
||||
self.assets = []
|
||||
|
||||
def add_asset(self, asset):
|
||||
"""Adds asset to the Portfolio"""
|
||||
assert isinstance(asset, Asset)
|
||||
self.assets.append(asset)
|
||||
return self
|
||||
|
||||
def add_assets(self, assets):
|
||||
"""Adds assets to the Portfolio"""
|
||||
for asset in assets:
|
||||
self.add_asset(asset)
|
||||
return self
|
||||
|
||||
def remove_asset(self, asset_number):
|
||||
"""Removes asset from the Portfolio"""
|
||||
self.assets.pop(asset_number)
|
||||
return self
|
||||
|
||||
def clear_assets(self):
|
||||
"""Removes *all* assets from the Portfolio"""
|
||||
self.assets = []
|
||||
return self
|
||||
@@ -1,103 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from asset_backtester import Backtest, Portfolio, Asset
|
||||
|
||||
# We use Portfolio Visualizer (https://www.portfoliovisualizer.com/backtest-portfolio)
|
||||
# to find the actual return for the test porfolios.
|
||||
|
||||
|
||||
def test_ivy_portfolio(sample_datahandler):
|
||||
bt = run_backtest(sample_datahandler, ivy_portfolio())
|
||||
|
||||
balance = bt.balance[1:]
|
||||
tolerance = 0.0001
|
||||
assert np.allclose(balance['capital'], balance['cash'] + balance['total value'], rtol=tolerance)
|
||||
assert np.allclose(balance['capital'], bt.initial_capital * balance['accumulated return'], rtol=tolerance)
|
||||
|
||||
actual_return = 1.2041
|
||||
return_tolerance = 0.01
|
||||
assert np.isclose(balance['accumulated return'].iloc[-1], actual_return, rtol=return_tolerance)
|
||||
|
||||
|
||||
def test_ivy_monthly_rebalance(sample_datahandler):
|
||||
bt = run_backtest(sample_datahandler, ivy_portfolio(), periods=1)
|
||||
|
||||
balance = bt.balance[1:]
|
||||
tolerance = 0.0001
|
||||
assert np.allclose(balance['capital'], balance['cash'] + balance['total value'], rtol=tolerance)
|
||||
assert np.allclose(balance['capital'], bt.initial_capital * balance['accumulated return'], rtol=tolerance)
|
||||
|
||||
actual_return = 1.2043
|
||||
return_tolerance = 0.01
|
||||
assert np.isclose(balance['accumulated return'].iloc[-1], actual_return, rtol=return_tolerance)
|
||||
|
||||
|
||||
def test_all_weather_portfolio(sample_datahandler):
|
||||
bt = run_backtest(sample_datahandler, all_weather_portfolio())
|
||||
|
||||
balance = bt.balance[1:]
|
||||
tolerance = 0.0001
|
||||
assert np.allclose(balance['capital'], balance['cash'] + balance['total value'], rtol=tolerance)
|
||||
assert np.allclose(balance['capital'], bt.initial_capital * balance['accumulated return'], rtol=tolerance)
|
||||
|
||||
actual_return = 1.1874
|
||||
return_tolerance = 0.01
|
||||
assert np.isclose(balance['accumulated return'].iloc[-1], actual_return, rtol=return_tolerance)
|
||||
|
||||
|
||||
def test_all_weather_monthly_rebalance(sample_datahandler):
|
||||
bt = run_backtest(sample_datahandler, all_weather_portfolio(), periods=1)
|
||||
|
||||
balance = bt.balance[1:]
|
||||
tolerance = 0.0001
|
||||
assert np.allclose(balance['capital'], balance['cash'] + balance['total value'], rtol=tolerance)
|
||||
assert np.allclose(balance['capital'], bt.initial_capital * balance['accumulated return'], rtol=tolerance)
|
||||
|
||||
actual_return = 1.1828
|
||||
return_tolerance = 0.01
|
||||
assert np.isclose(balance['accumulated return'].iloc[-1], actual_return, rtol=return_tolerance)
|
||||
|
||||
|
||||
def test_constant_price(constant_price_datahandler):
|
||||
bt = run_backtest(constant_price_datahandler, ivy_portfolio())
|
||||
|
||||
balance = bt.balance[1:]
|
||||
tolerance = 0.0001
|
||||
assert np.allclose(balance['% change'], 0.0, rtol=tolerance)
|
||||
assert np.allclose(balance['capital'], bt.initial_capital, rtol=tolerance)
|
||||
assert np.allclose(balance['total value'], bt.initial_capital, rtol=tolerance)
|
||||
assert np.allclose(balance['accumulated return'], 1.0, rtol=tolerance)
|
||||
|
||||
|
||||
def test_zero_initial_capital(sample_datahandler):
|
||||
bt = run_backtest(sample_datahandler, ivy_portfolio(), initial_capital=0)
|
||||
|
||||
balance = bt.balance[1:]
|
||||
tolerance = 0.0001
|
||||
assert np.allclose(balance['capital'], balance['cash'] + balance['total value'], rtol=tolerance)
|
||||
assert np.allclose(balance['cash'], 0.0, rtol=tolerance)
|
||||
assert np.allclose(balance['total value'], 0.0, rtol=tolerance)
|
||||
|
||||
|
||||
# Helpers
|
||||
def run_backtest(data, portfolio, initial_capital=1_000_000, periods=None):
|
||||
bt = Backtest(data.schema, initial_capital=initial_capital)
|
||||
bt.portfolio = portfolio
|
||||
bt.data = data
|
||||
bt.run(periods=periods)
|
||||
|
||||
return bt
|
||||
|
||||
|
||||
def ivy_portfolio():
|
||||
portfolio = Portfolio()
|
||||
assets = [Asset('VTI', 0.2), Asset('VEU', 0.2), Asset('BND', 0.2), Asset('VNQ', 0.2), Asset('DBC', 0.2)]
|
||||
|
||||
return portfolio.add_assets(assets)
|
||||
|
||||
|
||||
def all_weather_portfolio():
|
||||
portfolio = Portfolio()
|
||||
assets = [Asset('VTI', 0.3), Asset('TLT', 0.4), Asset('IEF', 0.15), Asset('GLD', 0.075), Asset('DBC', 0.075)]
|
||||
|
||||
return portfolio.add_assets(assets)
|
||||
@@ -1,23 +0,0 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from asset_backtester.datahandler import HistoricalAssetData
|
||||
|
||||
TEST_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
# 2019 data for TLT, GLD, IEF, VTI, VEU, BND, VNQ and DBC
|
||||
SAMPLE_DATA = os.path.join(TEST_DIR, 'test_data', 'sample_data.csv')
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def sample_datahandler():
|
||||
data = HistoricalAssetData(SAMPLE_DATA)
|
||||
return data
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def constant_price_datahandler():
|
||||
data = HistoricalAssetData(SAMPLE_DATA)
|
||||
data['adjClose'] = data['close'] = 10.0
|
||||
return data
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,12 +0,0 @@
|
||||
import os
|
||||
import backtester as bt
|
||||
from backtester.utils import get_data_dir
|
||||
|
||||
data_dir = get_data_dir()
|
||||
spx_test_data = os.path.join(data_dir, "SPX_2008-2018.csv")
|
||||
|
||||
portfolio = bt.run(spx_test_data)
|
||||
report = portfolio.create_report()
|
||||
|
||||
print("Running Benchmark strategy on SPX data for 2008-2018")
|
||||
print(report.tail(30))
|
||||
@@ -1,8 +0,0 @@
|
||||
{
|
||||
"cboe": {
|
||||
"mute_notifications": []
|
||||
},
|
||||
"notifications": {
|
||||
"slack_webhook": ""
|
||||
}
|
||||
}
|
||||
@@ -1,48 +0,0 @@
|
||||
import logging.config
|
||||
import os
|
||||
import argparse
|
||||
|
||||
from data_scraper import cboe, tiingo, backup
|
||||
|
||||
parser = argparse.ArgumentParser(prog="data_scraper.py")
|
||||
parser.add_argument("-t", "--symbols", nargs="+", help="Symbols to fetch")
|
||||
parser.add_argument(
|
||||
"-s",
|
||||
"--scraper",
|
||||
choices=["cboe", "tiingo"],
|
||||
default="cboe",
|
||||
help="Scraper to use")
|
||||
parser.add_argument(
|
||||
"-v", "--verbose", action="store_true", help="Enable logging")
|
||||
parser.add_argument(
|
||||
"-a",
|
||||
"--aggregate",
|
||||
action="store_true",
|
||||
help="Aggregate daily data files")
|
||||
parser.add_argument(
|
||||
"-b", "--backup", action="store_true", help="Backup files in S3 bucket")
|
||||
|
||||
args = parser.parse_args()
|
||||
module_dir = os.path.join(os.getcwd(), os.path.dirname(__file__))
|
||||
|
||||
if args.verbose:
|
||||
config_file = os.path.realpath(os.path.join(module_dir, "logconfig.ini"))
|
||||
logging.config.fileConfig(fname=config_file)
|
||||
|
||||
if args.aggregate:
|
||||
if args.symbols:
|
||||
cboe.aggregate_monthly_data(args.symbols)
|
||||
else:
|
||||
cboe.aggregate_monthly_data()
|
||||
elif args.backup:
|
||||
backup.backup_data()
|
||||
else:
|
||||
if args.scraper == "tiingo":
|
||||
scraper = tiingo
|
||||
else:
|
||||
scraper = cboe
|
||||
|
||||
if args.symbols:
|
||||
scraper.fetch_data(args.symbols)
|
||||
else:
|
||||
scraper.fetch_data()
|
||||
@@ -1,114 +0,0 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from data_scraper import utils
|
||||
from data_scraper.notifications import slack_notification, Status
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def backup_data():
|
||||
"""Uploads scraped files to S3 bucket.
|
||||
Set bucket name in environment variable $S3_BUCKET
|
||||
"""
|
||||
try:
|
||||
bucket_name = utils.get_environment_var("S3_BUCKET")
|
||||
except EnvironmentError as e:
|
||||
logger.error(str(e))
|
||||
slack_notification("Backup failed. Set $S3_BUCKET env variable",
|
||||
__name__)
|
||||
raise e
|
||||
|
||||
s3 = boto3.resource("s3")
|
||||
bucket = s3.Bucket(bucket_name)
|
||||
|
||||
data_path = utils.get_save_data_path()
|
||||
|
||||
cboe_data = os.path.join(data_path, "cboe")
|
||||
cboe_folders = []
|
||||
if os.path.exists(cboe_data):
|
||||
cboe_folders = [
|
||||
os.path.join(cboe_data, folder) for folder in os.listdir(cboe_data)
|
||||
if not folder.endswith("daily")
|
||||
]
|
||||
|
||||
tiingo_data = os.path.join(data_path, "tiingo")
|
||||
tiingo_folders = []
|
||||
if os.path.exists(tiingo_data):
|
||||
tiingo_folders = [
|
||||
os.path.join(tiingo_data, folder)
|
||||
for folder in os.listdir(tiingo_data)
|
||||
]
|
||||
|
||||
done_cboe, fail_cboe = _upload_folders(
|
||||
bucket, "cboe", cboe_folders, remove_files=False)
|
||||
done_tiingo, fail_tiingo = _upload_folders(
|
||||
bucket, "tiingo", tiingo_folders, remove_files=True)
|
||||
|
||||
done = done_cboe + done_tiingo
|
||||
failed = fail_cboe + fail_tiingo
|
||||
if len(done) > 0:
|
||||
msg = "Successful backup of symbols: " + ", ".join(done)
|
||||
slack_notification(msg, __name__, status=Status.Success)
|
||||
if len(failed) > 0:
|
||||
msg = "Unable to backup symbols: " + ", ".join(done)
|
||||
slack_notification(msg, __name__, status=Status.Warning)
|
||||
|
||||
|
||||
def _upload_folders(bucket, scraper, folders, remove_files=False):
|
||||
"""Uploads folders to S3 bucket and (optionally) removes old files"""
|
||||
data_path = utils.get_save_data_path()
|
||||
done, failed = [], []
|
||||
|
||||
for folder in folders:
|
||||
symbol = os.path.basename(folder)
|
||||
try:
|
||||
if remove_files:
|
||||
_remove_old_files(bucket, prefix=scraper + "/" + symbol)
|
||||
_upload_folder(bucket, folder, data_path)
|
||||
except Exception:
|
||||
failed.append(os.path.basename(folder))
|
||||
else:
|
||||
done.append(os.path.basename(folder))
|
||||
|
||||
return (done, failed)
|
||||
|
||||
|
||||
def _upload_folder(bucket, folder, data_path):
|
||||
"""Uploads folder contents to S3 bucket"""
|
||||
if not os.path.isdir(folder):
|
||||
return
|
||||
|
||||
for root, dirs, files in os.walk(folder):
|
||||
for file in files:
|
||||
file_path = os.path.join(root, file)
|
||||
key = os.path.relpath(file_path, data_path)
|
||||
if _key_exists(bucket, key):
|
||||
logger.debug("File already exists in S3")
|
||||
continue
|
||||
try:
|
||||
bucket.upload_file(file_path, key)
|
||||
logger.debug("Uploaded file %s to S3", file)
|
||||
except Exception as e:
|
||||
msg = "Error uploading data file {} to S3.\nReceived exception message {}".format(
|
||||
file_path, str(e))
|
||||
logger.error(msg, exc_info=True)
|
||||
slack_notification(msg, __name__)
|
||||
raise e
|
||||
|
||||
|
||||
def _key_exists(bucket, key):
|
||||
try:
|
||||
bucket.Object(key).load()
|
||||
except ClientError as e:
|
||||
return int(e.response["Error"]["Code"]) != 404
|
||||
return False
|
||||
|
||||
|
||||
def _remove_old_files(bucket, prefix):
|
||||
old_files = bucket.objects.filter(Prefix=prefix)
|
||||
for file in old_files:
|
||||
file.delete()
|
||||
@@ -1,249 +0,0 @@
|
||||
import logging
|
||||
import os
|
||||
from datetime import date
|
||||
from io import StringIO
|
||||
from itertools import groupby
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import pandas as pd
|
||||
|
||||
from . import utils, validation
|
||||
from .notifications import slack_notification, Status
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
url = "http://www.cboe.com/delayedquote/quote-table-download"
|
||||
|
||||
|
||||
def fetch_data(symbols=None):
|
||||
"""Fetches options data for a given list of symbols"""
|
||||
symbols = symbols or _get_all_listed_symbols()
|
||||
options = utils.get_module_config("cboe")
|
||||
mute_notifications = options.get("mute_notifications", [])
|
||||
|
||||
try:
|
||||
form_data = _form_data()
|
||||
except requests.ConnectionError as ce:
|
||||
msg = "Connection error trying to reach {}".format(url)
|
||||
logger.error(msg)
|
||||
slack_notification(msg, __name__)
|
||||
raise ce
|
||||
except Exception as e:
|
||||
msg = "Error parsing response"
|
||||
logger.error(msg, exc_info=True)
|
||||
slack_notification(msg, __name__)
|
||||
raise e
|
||||
|
||||
headers = {"Referer": url}
|
||||
file_url = "http://www.cboe.com/delayedquote/quotedata.dat"
|
||||
|
||||
symbols = [symbol.upper() for symbol in symbols]
|
||||
done, failed = [], []
|
||||
|
||||
for symbol in symbols:
|
||||
form_data["ctl00$ContentTop$C005$txtTicker"] = symbol
|
||||
try:
|
||||
response = requests.post(url,
|
||||
data=form_data,
|
||||
headers=headers,
|
||||
allow_redirects=False)
|
||||
symbol_req = requests.get(file_url,
|
||||
cookies=response.cookies,
|
||||
headers=headers)
|
||||
symbol_data = symbol_req.text
|
||||
if symbol_data == "" or symbol_data.startswith(" <!DOCTYPE"):
|
||||
raise Exception
|
||||
except Exception:
|
||||
failed.append(symbol)
|
||||
msg = "Error fetching symbol {} data".format(symbol)
|
||||
logger.error(msg, exc_info=True)
|
||||
if symbol not in mute_notifications:
|
||||
slack_notification(msg, __name__)
|
||||
else:
|
||||
_save_data(symbol, symbol_data)
|
||||
done.append(symbol)
|
||||
|
||||
if len(done) > 0:
|
||||
msg = "Successfully scraped symbols: " + ", ".join(done)
|
||||
slack_notification(msg, __name__, status=Status.Success)
|
||||
if len(failed) > 0:
|
||||
msg = "Failed to scrape symbols: " + ", ".join(failed)
|
||||
slack_notification(msg, __name__, status=Status.Warning)
|
||||
|
||||
|
||||
def aggregate_monthly_data(symbols=None):
|
||||
"""Aggregate daily snapshots into monthly files and validate data"""
|
||||
symbols = symbols or _get_all_listed_symbols()
|
||||
|
||||
save_data_path = utils.get_save_data_path()
|
||||
scraper_dir = os.path.join(save_data_path, "cboe")
|
||||
|
||||
symbols = [symbol.upper() for symbol in symbols]
|
||||
|
||||
for symbol in symbols:
|
||||
daily_dir = os.path.join(scraper_dir, symbol + "_daily")
|
||||
if not os.path.exists(daily_dir):
|
||||
msg = "Error aggregating data. Dir {} not found.".format(daily_dir)
|
||||
logger.error(msg)
|
||||
slack_notification(msg, __name__)
|
||||
continue
|
||||
|
||||
monthly_dir = os.path.join(scraper_dir, symbol)
|
||||
|
||||
symbol_files = [
|
||||
file for file in os.listdir(daily_dir) if file.endswith(".csv")
|
||||
]
|
||||
|
||||
for month, files in groupby(symbol_files, _monthly_grouper):
|
||||
file_names = list(files)
|
||||
daily_files = [
|
||||
os.path.join(daily_dir, name) for name in file_names
|
||||
]
|
||||
try:
|
||||
symbol_df = concatenate_files(daily_files)
|
||||
except Exception:
|
||||
msg = "Error concatenating daily files for period " + month
|
||||
logger.error(msg, exc_info=True)
|
||||
slack_notification(msg, __name__)
|
||||
continue
|
||||
|
||||
date_range = pd.to_datetime(symbol_df["quotedate"].unique())
|
||||
if not validation.validate_dates_in_month(symbol, date_range):
|
||||
today = pd.Timestamp.today()
|
||||
first_date = date_range[0]
|
||||
if first_date.year != today.year or first_date.month != today.month:
|
||||
msg = "Some trading dates where missing for symbol {}".format(
|
||||
symbol)
|
||||
slack_notification(msg, __name__)
|
||||
continue
|
||||
|
||||
if not os.path.exists(monthly_dir):
|
||||
os.makedirs(monthly_dir)
|
||||
logger.debug("Symbol dir %s created", monthly_dir)
|
||||
|
||||
file_name = _monthly_filename(file_names)
|
||||
monthly_file = os.path.join(monthly_dir, file_name)
|
||||
symbol_df.to_csv(monthly_file, index=False)
|
||||
|
||||
if not validation.validate_aggregate_file(monthly_file,
|
||||
daily_files):
|
||||
utils.remove_file(monthly_file)
|
||||
msg = "Data in {} differs from the daily files".format(
|
||||
monthly_file)
|
||||
logger.error(msg)
|
||||
slack_notification(msg, __name__)
|
||||
continue
|
||||
|
||||
logger.debug("Saved monthly data %s", monthly_file)
|
||||
|
||||
for file in daily_files:
|
||||
utils.remove_file(file, logger)
|
||||
|
||||
|
||||
def _get_all_listed_symbols():
|
||||
"""Returns array of all listed symbols.
|
||||
http://www.cboe.com/publish/scheduledtask/mktdata/cboesymboldir2.csv
|
||||
"""
|
||||
current_dir = os.path.join(os.getcwd(), os.path.dirname(__file__))
|
||||
symbols_file = os.path.realpath(
|
||||
os.path.join(current_dir, "cboesymboldir2.csv"))
|
||||
symbols_df = pd.read_csv(symbols_file, skiprows=1)
|
||||
return symbols_df["Stock Symbol"].array
|
||||
|
||||
|
||||
def concatenate_files(files):
|
||||
"""Returns a dataframe of the concatenated data from `files`."""
|
||||
df_generator = (pd.read_csv(file) for file in sorted(files))
|
||||
return pd.concat(df_generator, ignore_index=True)
|
||||
|
||||
|
||||
def _form_data():
|
||||
"""Return validation form data"""
|
||||
homepage = requests.get(url)
|
||||
soup = BeautifulSoup(homepage.content, "lxml")
|
||||
data = {
|
||||
"__VIEWSTATE": soup.select_one("#__VIEWSTATE")["value"],
|
||||
"__EVENTVALIDATION": soup.select_one("#__EVENTVALIDATION")["value"]
|
||||
}
|
||||
return data
|
||||
|
||||
|
||||
def _save_data(symbol, symbol_data):
|
||||
"""Saves the contents of `symbol_data` to
|
||||
`$SAVE_DATA_PATH/cboe/{symbol}_daily/{symbol}_{%date}.csv`
|
||||
"""
|
||||
filename = date.today().strftime(symbol + "_%Y%m%d.csv")
|
||||
|
||||
save_data_path = utils.get_save_data_path()
|
||||
symbol_dir = os.path.join(save_data_path, "cboe", symbol + "_daily")
|
||||
|
||||
if not os.path.exists(symbol_dir):
|
||||
os.makedirs(symbol_dir)
|
||||
logger.debug("Symbol dir %s created", symbol_dir)
|
||||
file_path = os.path.join(symbol_dir, filename)
|
||||
|
||||
if os.path.exists(file_path) and validation.file_hash_matches_data(
|
||||
file_path, symbol_data):
|
||||
logger.debug("File %s already downloaded", file_path)
|
||||
else:
|
||||
daily_df = _wrangle_data(symbol, symbol_data)
|
||||
daily_df.to_csv(file_path, index=False)
|
||||
logger.debug("Saved daily symbol data as %s", file_path)
|
||||
|
||||
|
||||
def _wrangle_data(symbol, symbol_data):
|
||||
"""Returns a properly formated (_tidy_) dataframe"""
|
||||
string_data = StringIO(symbol_data)
|
||||
first_line = string_data.readline()
|
||||
spot_price = float(first_line.split(",")[-2])
|
||||
quote_date = date.today().strftime("%m/%d/%Y")
|
||||
|
||||
data = pd.read_csv(string_data, skiprows=1)
|
||||
call_columns = [
|
||||
"Calls", "Expiration Date", "Strike", "Last Sale", "Net", "Bid", "Ask",
|
||||
"Vol", "Open Int", "IV", "Delta", "Gamma"
|
||||
]
|
||||
calls = data[call_columns]
|
||||
|
||||
put_columns = [
|
||||
"Puts", "Expiration Date", "Strike", "Last Sale.1", "Net.1", "Bid.1",
|
||||
"Ask.1", "Vol.1", "Open Int.1", "IV.1", "Delta.1", "Gamma.1"
|
||||
]
|
||||
puts = data[put_columns]
|
||||
|
||||
renamed_columns = [
|
||||
"optionroot", "expiration", "strike", "last", "net", "bid", "ask",
|
||||
"volume", "openinterest", "impliedvol", "delta", "gamma"
|
||||
]
|
||||
calls.columns = renamed_columns
|
||||
calls.insert(loc=1, column="type", value="call")
|
||||
puts.columns = renamed_columns
|
||||
puts.insert(loc=1, column="type", value="put")
|
||||
|
||||
merged = pd.concat([calls, puts])
|
||||
merged.insert(loc=0, column="underlying", value=symbol)
|
||||
merged.insert(loc=1, column="underlying_last", value=spot_price)
|
||||
merged.insert(loc=2, column="exchange", value="CBOE")
|
||||
merged.insert(loc=6, column="quotedate", value=quote_date)
|
||||
|
||||
return merged
|
||||
|
||||
|
||||
def _monthly_grouper(filename):
|
||||
"""Returns `{year}{month}` string. Used to group files by month."""
|
||||
basename = filename.split(".")[0]
|
||||
file_date = basename.split("_")[1]
|
||||
return file_date[:-2]
|
||||
|
||||
|
||||
def _monthly_filename(filenames):
|
||||
"""Returns filename of monthly aggregate file in the form
|
||||
`{symbol}_{start_date}_to_{end_date}.csv`
|
||||
"""
|
||||
sorted_files = list(sorted(filenames))
|
||||
first_file = sorted_files[0]
|
||||
last_file = sorted_files[-1]
|
||||
last_day = last_file.split(".")[0][-8:] # Get only the date
|
||||
file_name = first_file.split(".")[0] + "_to_" + last_day + ".csv"
|
||||
return file_name
|
||||
@@ -1,66 +0,0 @@
|
||||
# CBOE data scraper
|
||||
# Requires Selenium and a headless Chrome driver
|
||||
|
||||
import tempfile
|
||||
import time
|
||||
import os
|
||||
import shutil
|
||||
from datetime import date
|
||||
from selenium import webdriver
|
||||
|
||||
|
||||
class CBOE():
|
||||
"""CBOE data downloader."""
|
||||
url = "http://www.cboe.com/delayedquote/quote-table-download"
|
||||
|
||||
def __init__(self):
|
||||
self.data_path = self._get_data_path()
|
||||
self.tmp_dir = tempfile.TemporaryDirectory()
|
||||
self.driver = self._initilize_driver(self.tmp_dir.name)
|
||||
|
||||
def _get_data_path():
|
||||
path = os.getenv("OPTIONS_DATA_PATH")
|
||||
if not path:
|
||||
raise EnvironmentError("Environment variable $OPTIONS_DATA_PATH not set")
|
||||
return os.path.expanduser(path)
|
||||
|
||||
def _initilize_driver(download_dir):
|
||||
"""Initilizes the Chrome driver to silently download files
|
||||
to a temporary directory.
|
||||
"""
|
||||
options = webdriver.ChromeOptions()
|
||||
options.add_argument("headless")
|
||||
options.add_argument("disable-gpu")
|
||||
|
||||
driver = webdriver.Chrome(options=options)
|
||||
driver.command_executor._commands["send_command"] = (
|
||||
"POST",
|
||||
"/session/$sessionId/chromium/send_command"
|
||||
)
|
||||
params = {
|
||||
"cmd": "Page.setDownloadBehavior",
|
||||
"params": {
|
||||
"behavior": "allow",
|
||||
"downloadPath": download_dir
|
||||
}
|
||||
}
|
||||
driver.execute("send_command", params)
|
||||
driver.implicitly_wait(10)
|
||||
return driver
|
||||
|
||||
def fetch_data(self, symbols):
|
||||
"""Fetches options data for a given list of symbols"""
|
||||
self.driver.get(CBOE.url)
|
||||
for symbol in symbols:
|
||||
ticker = self.driver.find_element_by_css_selector("input#txtTicker")
|
||||
ticker.send_keys(symbol)
|
||||
submit = self.driver.find_element_by_css_selector("input#cmdSubmit")
|
||||
submit.click()
|
||||
time.sleep(15) # Horrible hack
|
||||
download_path = os.path.join(self.tmp_dir.name, "quotedata.dat")
|
||||
renamed_file = date.today().strftime(symbol + "_%Y%m%d.csv")
|
||||
full_path = os.path.join(self.data_path, renamed_file)
|
||||
shutil.move(download_path, full_path)
|
||||
|
||||
def __del__(self):
|
||||
self.tmp_dir.cleanup()
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,39 +0,0 @@
|
||||
[loggers]
|
||||
keys=root,tiingo,cboe
|
||||
|
||||
[handlers]
|
||||
keys=consoleHandler,fileHandler
|
||||
|
||||
[formatters]
|
||||
keys=simpleFormatter
|
||||
|
||||
[logger_root]
|
||||
level=ERROR
|
||||
handlers=consoleHandler
|
||||
|
||||
[logger_tiingo]
|
||||
level=DEBUG
|
||||
handlers=consoleHandler,fileHandler
|
||||
qualname=data_scraper.tiingo
|
||||
propagate=0
|
||||
|
||||
[logger_cboe]
|
||||
level=DEBUG
|
||||
handlers=consoleHandler,fileHandler
|
||||
qualname=data_scraper.cboe
|
||||
propagate=0
|
||||
|
||||
[handler_consoleHandler]
|
||||
class=StreamHandler
|
||||
level=ERROR
|
||||
formatter=simpleFormatter
|
||||
args=(sys.stdout,)
|
||||
|
||||
[handler_fileHandler]
|
||||
class=handlers.RotatingFileHandler
|
||||
level=DEBUG
|
||||
formatter=simpleFormatter
|
||||
args=("data_scraper.log", "a", 3000000, 10)
|
||||
|
||||
[formatter_simpleFormatter]
|
||||
format=%(asctime)s - %(name)s - %(levelname)s - %(message)s
|
||||
@@ -1,59 +0,0 @@
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
import requests
|
||||
|
||||
from .utils import get_module_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
Status = Enum("Status", "Success Warning Error")
|
||||
|
||||
options = get_module_config("notifications")
|
||||
try:
|
||||
webhook = options["slack_webhook"]
|
||||
except KeyError as e:
|
||||
logger.error("Missing slack webhook from configuration file")
|
||||
raise e
|
||||
|
||||
payload = {
|
||||
"channel": "#algotrading",
|
||||
"username": "Talebot",
|
||||
"icon_emoji": ":taleb:",
|
||||
"attachments": [{
|
||||
"footer": "Talebot"
|
||||
}]
|
||||
}
|
||||
|
||||
|
||||
def slack_notification(text, scraper, status=Status.Error):
|
||||
"""Post Slack notification"""
|
||||
|
||||
if status == Status.Error:
|
||||
emoji = ":thumbsdown: "
|
||||
title = "data_scraper error"
|
||||
color = "#B22222"
|
||||
else:
|
||||
title = "data_scraper status report"
|
||||
if status == Status.Success:
|
||||
emoji = ":thumbsup: "
|
||||
color = "#49C39E"
|
||||
else:
|
||||
emoji = ":warning: "
|
||||
color = "#EDB625"
|
||||
msg = emoji + text
|
||||
|
||||
payload["attachments"][0]["fallback"] = msg
|
||||
payload["attachments"][0]["text"] = msg
|
||||
payload["attachments"][0]["color"] = color
|
||||
payload["attachments"][0]["title"] = title
|
||||
payload["attachments"][0]["fields"] = [{"title": scraper}]
|
||||
payload["attachments"][0]["ts"] = datetime.today().timestamp()
|
||||
|
||||
response = requests.post(webhook, json=payload)
|
||||
|
||||
if response.status_code != 200:
|
||||
msg = "Error connecting to Slack {}. Response is:\n{}".format(
|
||||
response.status_code, response.text)
|
||||
logger.error(msg)
|
||||
@@ -1,41 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
## This script downloads stock options data,
|
||||
## veryfies zipfile integrity, saves md5 signature
|
||||
## and uploads them to S3.
|
||||
## To run, pass a list of files to download:
|
||||
## $> ./backup.sh files.txt
|
||||
|
||||
TMPDIR=tmp
|
||||
NOW=$(date +"%m-%d-%Y-%H%M%S")
|
||||
RETRY="${NOW}.txt"
|
||||
MD5SUMS=md5sums.txt
|
||||
|
||||
mkdir -p $TMPDIR
|
||||
|
||||
while read filename
|
||||
do
|
||||
echo "Downloading file $filename to $TMPDIR"
|
||||
wget --quiet -P $TMPDIR "ftp://l3_hdall:JKNRH7LYXV@ftp.deltaneutral.com/${filename}"
|
||||
|
||||
newpath="$TMPDIR/$filename"
|
||||
echo "Verifying zipfile $newpath"
|
||||
if unzip -t -q $newpath
|
||||
then
|
||||
echo "File check OK"
|
||||
else
|
||||
echo "ERROR: File check failed for $newfile"
|
||||
echo $filename >> $RETRY
|
||||
rm $newpath
|
||||
continue
|
||||
fi
|
||||
|
||||
echo "Appending md5 sum for $f"
|
||||
md5sum $newpath >> $MD5SUMS
|
||||
|
||||
echo "Copying $newpath to S3 bucket"
|
||||
rclone copy -v $newpath longueduree:longueduree
|
||||
|
||||
echo "Deleting $newpath"
|
||||
rm $newpath
|
||||
done <$1
|
||||
@@ -1,104 +0,0 @@
|
||||
import logging
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from requests import ConnectionError
|
||||
import pandas as pd
|
||||
|
||||
from data_scraper import cboe
|
||||
|
||||
logging.disable(level=logging.CRITICAL)
|
||||
|
||||
|
||||
class TestCBOE(unittest.TestCase):
|
||||
"""Tests CBOE data scraper"""
|
||||
|
||||
test_dir = os.path.join(os.getcwd(), os.path.dirname(__file__))
|
||||
test_data_path = os.path.realpath(os.path.join(test_dir, "data"))
|
||||
cboe_data_path = os.path.join(test_data_path, "cboe")
|
||||
spx_data_path = os.path.join(cboe_data_path, "SPX_March_2019.csv")
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.save_data_path = os.environ.get("SAVE_DATA_PATH", None)
|
||||
os.environ["SAVE_DATA_PATH"] = cls.test_data_path
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
if cls.save_data_path:
|
||||
os.environ["SAVE_DATA_PATH"] = cls.save_data_path
|
||||
|
||||
@patch("data_scraper.cboe.slack_notification", return_value=None)
|
||||
def test_fetch_spy(self, mocked_notification):
|
||||
"""Fetch todays SPY quote"""
|
||||
cboe.fetch_data(["SPY"])
|
||||
spy_dir = os.path.join(TestCBOE.cboe_data_path, "SPY_daily")
|
||||
self.addCleanup(TestCBOE.remove_files, spy_dir)
|
||||
|
||||
if self.assertTrue(os.path.exists(spy_dir)):
|
||||
self.assertTrue(mocked_notification.called)
|
||||
file_name = "SPY_" + pd.Timestamp.today().strftime(
|
||||
"%Y%m%d") + ".csv"
|
||||
file_path = os.path.join(spy_dir, file_name)
|
||||
spy_df = pd.read_csv(file_path, parse_dates=["quotedate"])
|
||||
self.assertTrue(all(spy_df["underlying"] == "SPX"))
|
||||
self.assertEqual(spy_df["quotedate"].nunique(), 1)
|
||||
counts = spy_df["type"].value_counts()
|
||||
self.assertEqual(counts["put"] + counts["call"], len(spy_df))
|
||||
|
||||
@patch("data_scraper.cboe.slack_notification", return_value=None)
|
||||
def test_fetch_invalid_symbol(self, mocked_notification):
|
||||
"""Fetching invalid symbol should send notification"""
|
||||
cboe.fetch_data(["FOOBAR"])
|
||||
self.assertTrue(mocked_notification.called)
|
||||
|
||||
@patch("data_scraper.cboe.url", new="http://www.aldkfjaskldfjsa.com")
|
||||
@patch("data_scraper.cboe.slack_notification", return_value=None)
|
||||
def test_no_connection(self, mocked_notification):
|
||||
"""Raise ConnectionError and send notification when host is unreachable"""
|
||||
with self.assertRaises(ConnectionError):
|
||||
cboe.fetch_data(["SPX"])
|
||||
self.assertTrue(mocked_notification.called)
|
||||
|
||||
@patch("data_scraper.cboe.utils.remove_file", return_value=None)
|
||||
@patch("data_scraper.cboe.slack_notification", return_value=None)
|
||||
def test_data_aggregation(self, mocked_notification, mocked_remove):
|
||||
"""Test data aggregation happy path"""
|
||||
cboe.aggregate_monthly_data(["SPX"])
|
||||
aggregate_file = os.path.join(TestCBOE.cboe_data_path, "SPX",
|
||||
"SPX_20190301_to_20190329.csv")
|
||||
self.addCleanup(TestCBOE.remove_files, os.path.dirname(aggregate_file))
|
||||
self.assertTrue(mocked_remove.called)
|
||||
self.assertFalse(mocked_notification.called)
|
||||
|
||||
if self.assertTrue(os.path.exists(aggregate_file)):
|
||||
spx_df = pd.read_csv(TestCBOE.spx_data_path)
|
||||
aggregate_df = pd.read_csv(aggregate_file)
|
||||
self.assertTrue(spx_df.equals(aggregate_df))
|
||||
|
||||
@patch("data_scraper.cboe.utils.remove_file", return_value=None)
|
||||
@patch("data_scraper.cboe.slack_notification", return_value=None)
|
||||
def test_aggregate_missing_days(self, mocked_notification, mocked_remove):
|
||||
"""Data aggregation should send notification when there are missing days"""
|
||||
cboe.aggregate_monthly_data(["GOOG"])
|
||||
self.assertTrue(mocked_notification.called)
|
||||
self.assertFalse(mocked_remove.called)
|
||||
|
||||
@patch("data_scraper.cboe.utils.remove_file", return_value=None)
|
||||
@patch("data_scraper.cboe.slack_notification", return_value=None)
|
||||
def test_aggregate_invalid_symbol(self, mocked_notification,
|
||||
mocked_remove):
|
||||
"""Data aggregation should fail and send notification on invalid symbol"""
|
||||
cboe.aggregate_monthly_data(["FOOBAR"])
|
||||
self.assertTrue(mocked_notification.called)
|
||||
self.assertFalse(mocked_remove.called)
|
||||
|
||||
def remove_files(file_path):
|
||||
if os.path.exists(file_path):
|
||||
shutil.rmtree(file_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,75 +0,0 @@
|
||||
import logging
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
import os
|
||||
import shutil
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from data_scraper import tiingo
|
||||
|
||||
logging.disable(level=logging.CRITICAL)
|
||||
|
||||
|
||||
class TestTiingo(unittest.TestCase):
|
||||
"""Tests Tiingo data scraper"""
|
||||
|
||||
test_dir = os.path.join(os.getcwd(), os.path.dirname(__file__))
|
||||
test_data_path = os.path.realpath(os.path.join(test_dir, "data"))
|
||||
tiingo_data_path = os.path.join(test_data_path, "tiingo")
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
assert "TIINGO_API_KEY" in os.environ, "$TIINGO_API_KEY env variable must be set"
|
||||
cls.save_data_path = os.environ.get("SAVE_DATA_PATH", None)
|
||||
os.environ["SAVE_DATA_PATH"] = cls.test_data_path
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
if cls.save_data_path:
|
||||
os.environ["SAVE_DATA_PATH"] = cls.save_data_path
|
||||
|
||||
@patch("data_scraper.tiingo.slack_notification", return_value=None)
|
||||
def test_fetch_gld(self, mocked_notification):
|
||||
"""Fetch GLD data"""
|
||||
tiingo.fetch_data(["GLD"])
|
||||
gld_dir = os.path.join(TestTiingo.tiingo_data_path, "GLD")
|
||||
self.addCleanup(TestTiingo.remove_files, gld_dir)
|
||||
|
||||
if self.assertTrue(os.path.exists(gld_dir)):
|
||||
self.assertTrue(mocked_notification.called)
|
||||
file_name = "GLD_" + pd.Timestamp.today().strftime(
|
||||
"%Y%m%d") + ".csv"
|
||||
file_path = os.path.join(gld_dir, file_name)
|
||||
gld_df = pd.read_csv(file_path)
|
||||
self.assertTrue(all(gld_df["symbol"] == "GLD"))
|
||||
expected_columns = [
|
||||
"symbol", "date", "adjClose", "adjHigh", "adjLow", "adjOpen",
|
||||
"adjVolume", "close", "divCash", "high", "low", "open",
|
||||
"splitFactor", "volume"
|
||||
]
|
||||
self.assertEqual(gld_df.columns, expected_columns)
|
||||
|
||||
@patch("data_scraper.tiingo.slack_notification", return_value=None)
|
||||
def test_fetch_invalid_symbol(self, mocked_notification):
|
||||
"""Fetching invalid symbol data should send notification"""
|
||||
tiingo.fetch_data(["FOOBAR"])
|
||||
self.assertTrue(mocked_notification.called)
|
||||
|
||||
@patch("data_scraper.tiingo.pdr.get_data_tiingo") # mock pandas_datareader
|
||||
@patch("data_scraper.tiingo.slack_notification", return_value=None)
|
||||
def test_no_connection(self, mocked_notification, mocked_pdr):
|
||||
"""Raise ConnectionError and send notification when host is unreachable"""
|
||||
mocked_pdr.side_effect = ConnectionError("This is a test")
|
||||
|
||||
with self.assertRaises(ConnectionError):
|
||||
tiingo.fetch_data(["IBM"])
|
||||
self.assertTrue(mocked_notification.called)
|
||||
|
||||
def remove_files(file_path):
|
||||
if os.path.exists(file_path):
|
||||
shutil.rmtree(file_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,122 +0,0 @@
|
||||
import logging
|
||||
import os
|
||||
from datetime import date
|
||||
|
||||
import pandas as pd
|
||||
import pandas_datareader as pdr
|
||||
|
||||
from . import utils, validation
|
||||
from .notifications import slack_notification, Status
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default symbols to fetch
|
||||
assets = [
|
||||
"VTSMX", "VFINX", "VIVAX", "VIGRX", "VIMSX", "VMVIX", "VMGIX", "NAESX",
|
||||
"VISVX", "VISGX", "BRSIX", "VGTSX", "VTMGX", "VFSVX", "EFV", "VEURX",
|
||||
"VPACX", "VEIEX", "VFISX", "VFITX", "IEF", "VUSTX", "VBMFX", "VIPSX",
|
||||
"PIGLX", "PGBIX", "VFSTX", "LQD", "VWESX", "VWEHX", "VWSTX", "VWITX",
|
||||
"VWLTX", "VGSIX", "GLD", "PSAU", "GSG"
|
||||
]
|
||||
|
||||
|
||||
def fetch_data(symbols=assets):
|
||||
"""Fetches historical data for given symbols from Tiingo"""
|
||||
api_key = utils.get_environment_var("TIINGO_API_KEY")
|
||||
|
||||
symbols = [symbol.upper() for symbol in symbols]
|
||||
done, failed = [], []
|
||||
|
||||
for symbol in symbols:
|
||||
try:
|
||||
symbol_data = pdr.get_data_tiingo(symbol, api_key=api_key)
|
||||
except ConnectionError as ce:
|
||||
msg = "Unable to connect to api.tiingo.com when fetching symbol {}".format(
|
||||
symbol)
|
||||
logger.error(msg, exc_info=True)
|
||||
slack_notification(msg, __name__)
|
||||
raise ce
|
||||
except TypeError:
|
||||
# pandas_datareader raises TypeError when fetching invalid symbol
|
||||
failed.append(symbol)
|
||||
msg = "Attempted to fetch invalid symbol {}".format(symbol)
|
||||
logger.error(msg, exc_info=True)
|
||||
slack_notification(msg, __name__)
|
||||
except Exception:
|
||||
msg = "Error fetching symbol {}".format(symbol)
|
||||
logger.error(msg, exc_info=True)
|
||||
slack_notification(msg, __name__)
|
||||
else:
|
||||
_save_data(symbol, symbol_data.reset_index())
|
||||
done.append(symbol)
|
||||
|
||||
if len(done) > 0:
|
||||
msg = "Successfully scraped symbols: " + ", ".join(done)
|
||||
slack_notification(msg, __name__, status=Status.Success)
|
||||
if len(failed) > 0:
|
||||
msg = "Failed to scrape symbols: " + ", ".join(failed)
|
||||
slack_notification(msg, __name__, status=Status.Warning)
|
||||
|
||||
|
||||
def _save_data(symbol, symbol_df):
|
||||
"""Saves the contents of `symbol_df` to
|
||||
`$SAVE_DATA_PATH/tiingo/{symbol}/{symbol}_{%date}.csv`"""
|
||||
filename = date.today().strftime(symbol + "_%Y%m%d.csv")
|
||||
|
||||
save_data_path = utils.get_save_data_path()
|
||||
symbol_dir = os.path.join(save_data_path, "tiingo", symbol)
|
||||
|
||||
if not os.path.exists(symbol_dir):
|
||||
os.makedirs(symbol_dir)
|
||||
logger.debug("Symbol dir %s created", symbol_dir)
|
||||
file_path = os.path.join(symbol_dir, filename)
|
||||
|
||||
if os.path.exists(file_path) and validation.file_hash_matches_data(
|
||||
file_path, symbol_df.to_csv()):
|
||||
logger.debug("File %s already downloaded", file_path)
|
||||
else:
|
||||
expected_columns = [
|
||||
"symbol", "date", "adjClose", "adjHigh", "adjLow", "adjOpen",
|
||||
"adjVolume", "close", "divCash", "high", "low", "open",
|
||||
"splitFactor", "volume"
|
||||
]
|
||||
|
||||
if validation.validate_historical_dates(
|
||||
symbol, symbol_df["date"]) and validation.validate_columns(
|
||||
expected_columns, symbol_df.columns):
|
||||
merged_df = _merge(symbol, symbol_df)
|
||||
pattern = symbol + "_*"
|
||||
utils.remove_files(symbol_dir, pattern, logger)
|
||||
|
||||
merged_df.to_csv(file_path, index=False)
|
||||
logger.debug("Saved symbol data as %s", file_path)
|
||||
|
||||
|
||||
def _merge(symbol, symbol_df):
|
||||
"""Merge `symbol_df` with previous data file."""
|
||||
|
||||
save_data_path = utils.get_save_data_path()
|
||||
symbol_dir = os.path.join(save_data_path, "tiingo", symbol)
|
||||
|
||||
files = os.listdir(symbol_dir)
|
||||
if len(files) == 0:
|
||||
return symbol_df
|
||||
|
||||
last_file = sorted(files)[-1]
|
||||
old_df = pd.read_csv(os.path.join(symbol_dir, last_file),
|
||||
parse_dates=["date"],
|
||||
index_col="date")
|
||||
symbol_df.index = symbol_df["date"]
|
||||
|
||||
diffs = old_df.index.difference(symbol_df.index)
|
||||
|
||||
if diffs.empty:
|
||||
return symbol_df
|
||||
else:
|
||||
msg = """Old data included dates not present in scraped file for symbol {}
|
||||
Merged new data with previous file.""".format(symbol)
|
||||
logger.error(msg)
|
||||
slack_notification(msg, __name__)
|
||||
merged_df = pd.concat([symbol_df, old_df.loc[diffs]])
|
||||
merged_df.sort_index(inplace=True)
|
||||
return merged_df.reset_index()
|
||||
@@ -1,52 +0,0 @@
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
|
||||
|
||||
def get_environment_var(variable):
|
||||
"""Returns the value of a given environment variable.
|
||||
Raises `EnvironmentError` if not found.
|
||||
"""
|
||||
if variable not in os.environ:
|
||||
raise EnvironmentError(
|
||||
"Environment variable {} not set".format(variable))
|
||||
|
||||
return os.path.expanduser(os.environ[variable])
|
||||
|
||||
|
||||
def get_save_data_path():
|
||||
"""Reads data path from environment variable `$SAVE_DATA_PATH`.
|
||||
If it is not set, defaults to `./data/scraped`.
|
||||
"""
|
||||
try:
|
||||
data_dir = get_environment_var("SAVE_DATA_PATH")
|
||||
except EnvironmentError:
|
||||
data_dir = "data/scraped"
|
||||
os.makedirs(data_dir)
|
||||
|
||||
return data_dir
|
||||
|
||||
|
||||
def get_module_config(module, config_file="data_scraper.conf"):
|
||||
"""Parses configuration file and returns the configuration options
|
||||
for the chosen `module`.
|
||||
"""
|
||||
options = {}
|
||||
if os.path.exists(config_file):
|
||||
with open(config_file) as file:
|
||||
config = json.load(file)
|
||||
options = config.get(module, {})
|
||||
|
||||
return options
|
||||
|
||||
|
||||
def remove_files(data_dir, pattern, logger=None):
|
||||
"""Removes files in `data_dir` that match `pattern`"""
|
||||
for file in glob.glob(os.path.join(data_dir, pattern)):
|
||||
remove_file(file, logger)
|
||||
|
||||
|
||||
def remove_file(file, logger=None):
|
||||
os.remove(file)
|
||||
if logger:
|
||||
logger.debug("Removed file %s", file)
|
||||
@@ -1,93 +0,0 @@
|
||||
import logging
|
||||
import hashlib
|
||||
|
||||
import pandas as pd
|
||||
import pandas_market_calendars as mcal
|
||||
|
||||
from . import cboe
|
||||
from .notifications import slack_notification
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def file_hash_matches_data(file_path, data):
|
||||
file_hash = file_md5(file_path)
|
||||
data_md5 = hashlib.md5(data.encode()).hexdigest()
|
||||
return file_hash == data_md5
|
||||
|
||||
|
||||
def file_md5(file, chunk_size=4096):
|
||||
md5 = hashlib.md5()
|
||||
with open(file, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(chunk_size), b""):
|
||||
md5.update(chunk)
|
||||
|
||||
return md5.hexdigest()
|
||||
|
||||
|
||||
def validate_dates_in_month(symbol, date_range):
|
||||
"""Compares `date_range` (month) with NYSE trading calendar.
|
||||
Returns `True` if there are no missing days.
|
||||
"""
|
||||
# NYSE and CBOE have the same trading calendar
|
||||
# https://www.nyse.com/markets/hours-calendars
|
||||
# http://cfe.cboe.com/about-cfe/holiday-calendar
|
||||
nyse = mcal.get_calendar("NYSE")
|
||||
first_date = date_range[0]
|
||||
period = pd.Period(year=first_date.year, month=first_date.month, freq="M")
|
||||
trading_days = nyse.valid_days(start_date=period.start_time,
|
||||
end_date=period.end_time)
|
||||
|
||||
# Remove timezone info
|
||||
trading_days = trading_days.tz_convert(tz=None)
|
||||
missing_days = trading_days.difference(date_range)
|
||||
|
||||
if not missing_days.empty:
|
||||
logger.error("Error validating monthly dates. Missing: %s",
|
||||
missing_days)
|
||||
return missing_days.empty
|
||||
|
||||
|
||||
def validate_historical_dates(symbol, date_range):
|
||||
"""Compares `date_range` (any time range) with trading calendar.
|
||||
Returns `True` if there are no missing days.
|
||||
"""
|
||||
nyse = mcal.get_calendar("NYSE")
|
||||
start_date = date_range.min()
|
||||
end_date = date_range.max()
|
||||
trading_days = nyse.valid_days(start_date=start_date, end_date=end_date)
|
||||
|
||||
# Remove timezone info
|
||||
trading_days = trading_days.tz_convert(tz=None)
|
||||
date_range = date_range.dt.tz_convert(tz=None)
|
||||
missing_days = trading_days.difference(date_range)
|
||||
|
||||
if not missing_days.empty:
|
||||
logger.error("Error validating historical dates. Missing: %s",
|
||||
missing_days)
|
||||
|
||||
return missing_days.empty
|
||||
|
||||
|
||||
def validate_columns(expected, received):
|
||||
"""Verify that the `received` columns scraped are equal to `expected`"""
|
||||
valid = all(expected == received)
|
||||
|
||||
if not valid:
|
||||
expected_cols = ", ".join(expected)
|
||||
received_cols = ", ".join(received)
|
||||
msg = """Columns expected differ from those received.
|
||||
Expected: {}
|
||||
Received: {}""".format(expected_cols, received_cols)
|
||||
logger.error(msg)
|
||||
slack_notification(msg, __name__)
|
||||
|
||||
return valid
|
||||
|
||||
|
||||
def validate_aggregate_file(aggregate_file, daily_files):
|
||||
"""Compares `aggregate_file` with the data from `daily_files`."""
|
||||
aggregate_df = pd.read_csv(aggregate_file)
|
||||
recreated_df = cboe.concatenate_files(daily_files)
|
||||
|
||||
return aggregate_df.equals(recreated_df)
|
||||
@@ -1,19 +0,0 @@
|
||||
FROM debian:latest
|
||||
MAINTAINER Juan Pablo Amoroso <jamoroso@lambdaclass.com>
|
||||
|
||||
RUN apt-get update
|
||||
RUN apt-get install -y python3 python3-pip make cron build-essential pkg-config openssl libssl-dev
|
||||
RUN python3 -m pip install pipenv
|
||||
ENV LC_ALL=C.UTF-8 LANG=C.UTF-8
|
||||
|
||||
COPY . /finance
|
||||
WORKDIR /finance
|
||||
|
||||
RUN make init
|
||||
|
||||
COPY ./docker/data_scraper/crontab /etc/cron.d/scraper-cron
|
||||
COPY ./docker/data_scraper/entrypoint.sh /usr/bin/entrypoint.sh
|
||||
COPY ./docker/data_scraper/run-task.sh /usr/bin/run-task
|
||||
RUN chmod 0644 /etc/cron.d/scraper-cron && crontab /etc/cron.d/scraper-cron
|
||||
ENTRYPOINT ["entrypoint.sh"]
|
||||
CMD ["cron", "-f"]
|
||||
@@ -1,4 +0,0 @@
|
||||
0 19 * * 1-5 root cd /finance && run-task make scrape scraper=cboe
|
||||
0 19 * * 1-5 root cd /finance && run-task make scrape scraper=tiingo
|
||||
0 0 1 * * root cd /finance && run-task make aggregate && run-task make backup
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# cron does not read env, save it here
|
||||
env > /root/env
|
||||
exec "$@"
|
||||
@@ -1,4 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# import env vars that were written in entrypoint
|
||||
env - `cat /root/env` $@
|
||||
@@ -1,8 +0,0 @@
|
||||
version: "3"
|
||||
|
||||
services:
|
||||
scraper:
|
||||
image: data_scraper:latest
|
||||
container_name: data_scraper
|
||||
volumes:
|
||||
- ~/finance/data:/finance/data
|
||||
Reference in New Issue
Block a user