Merge pull request #70 from lambdaclass/organize-repo

Organized repo
2026-06-27 18:05:27 +08:00 · 2020-03-20 17:05:07 -03:00
parent 61bcce5a67 97c0ebe6cc
commit f504e1b5ed
56 changed files with 69 additions and 14173 deletions
@@ -1,4 +1,4 @@
-.PHONY: install env test test_notebook
+.PHONY: install env test test_notebook lint notebook help
 .DEFAULT_GOAL := help

 install: ## Create environment and install dependencies
@@ -7,6 +7,13 @@ install: ## Create environment and install dependencies
 env: ## Run pipenv shell
 	pipenv shell

+notebook: ## Run Jupyter notebook
+	pipenv run jupyter notebook
+
+lint: ## Run linter and format checker (flake8 & yapf)
+	pipenv run flake8 backtester
+	pipenv run yapf --diff --recursive backtester/
+
 test: ## Run tests
 	pipenv run python -m pytest -v backtester

@@ -14,10 +21,6 @@ test_notebook: ## Run jupyter notebook test
 	pipenv run jupyter nbconvert nbconvert --to notebook --execute --ExecutePreprocessor.timeout=60 \
 	backtester/examples/backtester_example.ipynb --stdout > /dev/null

-lint: ## Run linter and format checker (flake8 & yapf)
-	pipenv run flake8 backtester
-	pipenv run yapf --diff --recursive backtester/
-
 help:
 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) |\
 	awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
@@ -1,3 +1,5 @@
+[![Build Status](https://travis-ci.com/lambdaclass/options_backtester.svg?branch=master)](https://travis-ci.com/lambdaclass/options_backtester)
+
 Options Backtester
 ==============================

@@ -13,73 +15,87 @@ Simple backtester to evaluate and analyse options strategies over historical pri

 ## Requirements

- Python >= 3.5
+- Python >= 3.6
 - pipenv

-
 ## Setup

-For backtesting, set `$OPTIONS_DATA_PATH` to the appropriate directory where the data is located. All file paths parsed by the backtester will be relative to this directory.  
-
-To use the data scraper the following environment variables need to be set:
- `$SAVE_DATA_PATH`: where the data will be saved to (default is `./data/scraped`)
- `$TIINGO_API_KEY`: used to fetch data from [Tiingo](https://api.tiingo.com)
- `$S3_BUCKET`: name of the S3 bucket to backup data
- `$AWS_ACCESS_KEY_ID`: AWS acces key id
- `$AWS_SECRET_ACCESS_KEY`: AWS secret key
-
-You can configure the data scraper by editing the configuration file `data_scraper.conf` (json-formated).  
-
-Sample file:
-
-```json
-{
-  "cboe": {
-    "mute_notifications": ["BFB", "CBSA"]
-  },
-  "notifications": {
-      "slack_webhook": "https://hooks.slack.com/services/MY_WORKSPACE_WEBHOOK"
-  }
-}
-```
-
-**HINT**: store environment variables in an `.env` file and pipenv will load them automatically when using `make env`.
-
-
-## Usage
-
-### Create environment and download dependencies
+Install [pipenv](https://pipenv.pypa.io/en/latest/)

 ```shell
-$> make init
+$> pip install pipenv
 ```

-### Activate environment
+Create environment and download dependencies
+
+```shell
+$> make install
+```
+
+Activate environment

 ```shell
 $> make env
 ```

-### Run tests
+Run [Jupyter](https://jupyter.org) notebook
+
+```shell
+$> make notebook
+```
+
+Run tests

 ```shell
 $> make test
 ```

-### Scrape data (supported scrapers: CBOE, Tiingo)
+## Usage

-```shell
-$> make scrape scraper=cboe
+### Example:

-$> make scrape scraper=tiingo
-```
-
-### Run backtester with benchmark strategy
-
-```shell
-$> make bench
+We'll run a backtest of a stock portfolio holding `$AAPL` and `$GOOG`, and simultaneously buying 10% OTM calls and puts on `$SPX` ([long strangle](https://www.investopedia.com/terms/s/strangle.asp)).  
+We'll allocate 97% of our capital to stocks and the rest to options, and do a rebalance every month.
+
+```python
+from backtester import Backtest, Type, Direction, Stock
+from backtester.strategy import Strategy, StrategyLeg
+from backtester.datahandler import HistoricalOptionsData, TiingoData
+
+# Stocks data
+stocks_data = TiingoData('stocks.csv')
+stocks = [Stock(symbol='AAPL', percentage=0.5), Stock(symbol='GOOG', percentage=0.5)]
+
+# Options data
+options_data = HistoricalOptionsData('options.h5', key='/SPX')
+schema = options_data.schema
+
+# Long strangle
+leg_1 = StrategyLeg('leg_1', schema, option_type=Type.PUT, direction=Direction.BUY)
+leg_1.entry_filter = (schema.underlying == 'SPX') & (schema.dte >= 60) & (schema.underlying_last <=
+                                                                          1.1 * schema.strike)
+leg_1.exit_filter = (schema.dte <= 30)
+
+leg_2 = StrategyLeg('leg_2', schema, option_type=Type.CALL, direction=Direction.BUY)
+leg_2.entry_filter = (schema.underlying == 'SPX') & (schema.dte >= 60) & (schema.underlying_last >=
+                                                                          0.9 * schema.strike)
+leg_2.exit_filter = (schema.dte <= 30)
+
+strategy = Strategy(schema)
+strategy.add_legs([leg_1, leg_2])
+
+allocation = {'stocks': .97, 'options': .03}
+initial_capital = 1_000_000
+bt = Backtest(allocation, initial_capital)
+bt.stocks = stocks
+bt.stocks_data = stocks_data
+bt.options_data = options_data
+bt.options_strategy = strategy
+
+bt.run(rebalance_freq=1)
 ```

+You can explore more usage examples in the Jupyter [notebooks](backtester/examples/).

 ## Recommended reading

@@ -1,2 +0,0 @@
-.env
-target/
@@ -1,17 +0,0 @@
-[package]
-name = "alert"
-version = "0.1.0"
-authors = ["Amin Arria <arria.amin@gmail.com>"]
-edition = "2018"
-
-[dependencies]
-fantoccini = "0.11.6"
-futures = "0.1.25"
-tokio = "0.1.17"
-webdriver = "0.39.0"
-serde_json = "1.0.39"
-select = "0.4.2"
-reqwest = "0.9.12"
-diesel = { version = "1.4.2", features = ["sqlite", "chrono"] }
-dotenv = "0.10"
-chrono = "0.4"
@@ -1,26 +0,0 @@
-FROM debian:latest
-MAINTAINER Amin Arria <amin.arria@lambdaclass.com>
-
-WORKDIR /root
-RUN apt-get update
-RUN apt-get install -y curl make build-essential pkg-config openssl libssl-dev
-
-RUN apt-get -y install firefox-esr
-RUN curl -L -O  'https://github.com/mozilla/geckodriver/releases/download/v0.24.0/geckodriver-v0.24.0-linux64.tar.gz'
-RUN tar -vxf geckodriver-v0.24.0-linux64.tar.gz
-
-RUN apt-get install -y sqlite3 libsqlite3-dev
-
-RUN curl https://sh.rustup.rs -sSf > rustup
-RUN sh rustup -y
-ENV PATH=/root/.cargo/bin:$PATH
-
-COPY . watchdog/
-WORKDIR watchdog
-RUN make install_diesel_cli
-RUN make migration
-RUN make release
-
-RUN apt-get clean
-RUN rm -rf /var/lib/apt/lists/*
-CMD (/root/geckodriver &) && ./target/release/alert
@@ -1,13 +0,0 @@
-.PHONY: migration run install_diesel_cli release
-
-migration:
-	diesel migration run
-
-run:
-	cargo run
-
-install_diesel_cli:
-	cargo install diesel_cli --no-default-features --features sqlite chrono
-
-release:
-	cargo build --release
@@ -1,34 +0,0 @@
-# Financial Watchdog
-
-## Requirements
- [WebDriver](https://www.w3.org/TR/webdriver1/) process running on port 4444 (e.g., [geckodriver](https://github.com/mozilla/geckodriver/releases), [chromedriver](https://sites.google.com/a/chromium.org/chromedriver/))
- SQLite 3.19.3
- Rust 1.33
- [Diesel CLI 1.4](https://crates.io/crates/diesel_cli)
-
-When installing `diesel_cli` if you run into any errors try installing with our needed features only:
-
-```
-$> make install_diesel_cli
-```
-
-## Setup
-
-Create a `.env` with the following values:
-
- `DATABASE_URL`: The filepath where the DB will be stored (SQLite)
- `SLACK_WEBHOOK_URL`: URL for Slack's webhook
-
-Create the database by running the migrations:
-
-```
-$> make migration
-```
-
-## Running
-
-Run it
-
-```
-$> make run
-```
@@ -1,5 +0,0 @@
-# For documentation on how to configure this file,
-# see diesel.rs/guides/configuring-diesel-cli
-
-[print_schema]
-file = "src/schema.rs"
@@ -1 +0,0 @@
-DROP TABLE dollar;
@@ -1,16 +0,0 @@
-CREATE TABLE dollar (
-  id              INTEGER PRIMARY KEY,
-  buy             DOUBLE NOT NULL,
-  buy_amount      INTEGER NOT NULL,
-  sell            DOUBLE NOT NULL,
-  sell_amount     INTEGER NOT NULL,
-  last            DOUBLE NOT NULL,
-  var             DOUBLE NOT NULL,
-  varper          DOUBLE NOT NULL,
-  volume          INTEGER NOT NULL,
-  adjustment      DOUBLE NOT NULL,
-  min             DOUBLE NOT NULL,
-  max             DOUBLE NOT NULL,
-  oin             INTEGER NOT NULL,
-  created_at      DATETIME NOT NULL
-);
@@ -1 +0,0 @@
-DROP TABLE alerts;
@@ -1,8 +0,0 @@
-CREATE TABLE alerts (
-  id                  INTEGER PRIMARY KEY,
-  created_at          DATETIME NOT NULL,
-  asset               VARCHAR(20) NOT NULL,
-  previous_value      DOUBLE NOT NULL,
-  current_value       DOUBLE NOT NULL,
-  active              BOOLEAN NOT NULL
-);
@@ -1,99 +0,0 @@
-extern crate reqwest;
-extern crate serde_json;
-
-use diesel::sqlite::SqliteConnection;
-use serde_json::json;
-use std::env;
-use crate::models::{Dollar, Alert};
-use crate::storage;
-
-pub fn check_dollar(conn: &SqliteConnection, dollar: &Dollar) {
-    let dollar_close = storage::get_dollar_on_close(conn);
-    let closing_prc_change =
-        match &dollar_close {
-            Some(dollar_close) => {
-                (dollar.last - dollar_close.last)/dollar_close.last * 100.0
-            },
-            None => 0.0
-        };
-
-    match storage::get_dollar_alert(conn) {
-        Some(dollar_alert) => {
-            let alert_prc_change = (dollar.last - dollar_alert.current_value)/dollar_alert.current_value * 100.0;
-            // TODO: This value should be set by arguments to binary or by config files
-            if alert_prc_change.abs() > 2.0 {
-                if closing_prc_change.abs() > 3.0 {
-                    // If we couldn't unwrap closing_prc_change == 0.0
-                    let dollar_close = dollar_close.unwrap();
-                    let new_alert = Alert::new("dollar".to_string(), dollar_alert.previous_value, dollar.last);
-                    storage::replace_alert(conn, &dollar_alert, &new_alert);
-                    send_updated_alert(dollar_close.last, dollar_alert.current_value, dollar.last, alert_prc_change, closing_prc_change);
-                } else {
-                    storage::deactivate_alert(conn, &dollar_alert);
-                    send_resolved_alert();
-                }
-            }
-        }
-        None => {
-            // TODO: This value should be set by arguments to binary or by config files
-            if closing_prc_change.abs() > 3.0 {
-                // If we couldn't unwrap closing_prc_change == 0.0
-                let dollar_close = dollar_close.unwrap();
-                let alert = Alert::new(String::from("dollar"), dollar_close.last, dollar.last);
-                storage::store_alert(conn, &alert);
-                send_new_alert(dollar_close.last, dollar.last, closing_prc_change);
-            }
-        }
-    };
-}
-// payload = {
-//     "username": "Financial Watchdog",
-//     "icon_emoji": ":money_with_wings:",
-//     "attachments": [{
-//         "color": color,
-//         "title": title,
-//         "text": text,
-//         "fallback": text
-//         "footer": "Financial Watchdog",
-//     }]
-// }
-fn send_new_alert(price_close: f64, price_current: f64, prc_change: f64) {
-    let msg = format!("Closing price: {:.2}\nCurrent price: {:.2}\nChanged: {:+.2}%", price_close, price_current, prc_change);
-    send_alert_slack("#B22222", "Price jump from closing", &msg);
-    // TODO: send_alert_mail(prc_change);
-}
-
-fn send_resolved_alert() {
-    let msg = format!("Dollar price from closing price is back below threshold");
-    send_alert_slack("#3873AD", "Price back to normal", &msg);
-    // TODO: send_alert_mail(prc_change);
-}
-
-fn send_updated_alert(price_close: f64, price_previous: f64, price_current: f64, prc_change: f64, prc_change_close: f64) {
-    let msg = format!("Closing price: {:.2}\nPrevious price: {:.2}\nCurrent price: {:.2}\nChange from previous: *{:+.2}%*\nChange from close: {:+.2}%", price_close, price_previous, price_current, prc_change, prc_change_close);
-    send_alert_slack("#f1a031", "[UPDATE] Price jump", &msg);
-    // TODO: send_alert_mail(prc_change);
-}
-
-fn send_alert_slack(color: &str, title: &str, msg: &str) {
-    let payload = json!({
-            "username": "Financial Watchdog",
-            "icon_emoji": ":money_with_wings:",
-            "attachments": [{
-                "color": color,
-                "title": title,
-                "text": msg,
-                "fallback": msg,
-                "footer": "Financial Watchdog"
-            }]
-        })
-        .to_string();
-    // We can unwrap safely because env variables are checked in main.rs::init_env()
-    let slack_webhook_url = env::var("SLACK_WEBHOOK_URL").unwrap();
-    let client = reqwest::Client::new();
-    client.post(&slack_webhook_url)
-        .header(reqwest::header::CONTENT_TYPE, "application/json")
-        .body(payload)
-        .send()
-        .expect("Failed sending slack alert");
-}
@@ -1,37 +0,0 @@
-#[macro_use]
-extern crate diesel;
-extern crate dotenv;
-extern crate chrono;
-
-pub mod schema;
-pub mod models;
-
-mod scrape;
-mod storage;
-mod alert;
-
-use dotenv::dotenv;
-use std::{thread, time, env};
-
-fn main() {
-    init_env();
-
-    loop {
-        std::thread::spawn(|| {
-            let dbconn = crate::storage::establish_connection();
-            let value = crate::scrape::scrape();
-            crate::alert::check_dollar(&dbconn, &value);
-            crate::storage::store(&dbconn, &value);
-        });
-
-        thread::sleep(time::Duration::from_secs(60));
-    }
-}
-
-fn init_env() {
-    dotenv().ok();
-    env::var("DATABASE_URL")
-        .expect("DATABASE_URL must be set");
-    env::var("SLACK_WEBHOOK_URL")
-        .expect("SLACK_WEBHOOK_URL must be set");
-}
@@ -1,71 +0,0 @@
-use chrono::NaiveDateTime;
-use chrono::offset::Utc;
-use crate::schema::{dollar, alerts};
-
-#[derive(Queryable, Insertable, Debug)]
-#[table_name="dollar"]
-pub struct Dollar {
-    pub id: Option<i32>,
-    pub buy: f64,
-    pub buy_amount: i32,
-    pub sell: f64,
-    pub sell_amount: i32,
-    pub last: f64,
-    pub var: f64,
-    pub varper: f64,
-    pub volume: i32,
-    pub adjustment: f64,
-    pub min: f64,
-    pub max: f64,
-    pub oin: i32,
-    pub created_at: NaiveDateTime
-}
-
-impl Dollar {
-    pub fn new() -> Dollar {
-        let created_at = Utc::now().naive_utc();
-
-        Dollar {
-            id: None,
-            buy: 0.0,
-            buy_amount: 0,
-            sell: 0.0,
-            sell_amount: 0,
-            last: 0.0,
-            var: 0.0,
-            varper: 0.0,
-            volume: 0,
-            adjustment: 0.0,
-            min: 0.0,
-            max: 0.0,
-            oin: 0,
-            created_at: created_at
-        }
-    }
-}
-
-#[derive(Queryable, Insertable, Debug)]
-#[table_name="alerts"]
-pub struct Alert {
-    pub id: Option<i32>,
-    pub created_at: NaiveDateTime,
-    pub asset: String,
-    pub previous_value: f64,
-    pub current_value: f64,
-    pub active: bool
-}
-
-impl Alert {
-    pub fn new(asset: String, previous_value: f64, current_value: f64) -> Alert {
-        let created_at = Utc::now().naive_utc();
-
-        Alert {
-            id: None,
-            created_at: created_at,
-            asset: asset,
-            previous_value: previous_value,
-            current_value: current_value,
-            active: true
-        }
-    }
-}
@@ -1,34 +0,0 @@
-table! {
-    alerts (id) {
-        id -> Nullable<Integer>,
-        created_at -> Timestamp,
-        asset -> Text,
-        previous_value -> Double,
-        current_value -> Double,
-        active -> Bool,
-    }
-}
-
-table! {
-    dollar (id) {
-        id -> Nullable<Integer>,
-        buy -> Double,
-        buy_amount -> Integer,
-        sell -> Double,
-        sell_amount -> Integer,
-        last -> Double,
-        var -> Double,
-        varper -> Double,
-        volume -> Integer,
-        adjustment -> Double,
-        min -> Double,
-        max -> Double,
-        oin -> Integer,
-        created_at -> Timestamp,
-    }
-}
-
-allow_tables_to_appear_in_same_query!(
-    alerts,
-    dollar,
-);
@@ -1,102 +0,0 @@
-extern crate fantoccini;
-extern crate futures;
-extern crate select;
-extern crate serde_json;
-extern crate tokio;
-extern crate webdriver;
-
-use fantoccini::{Client, Locator};
-use futures::future::Future;
-use futures::sync::oneshot;
-use select::document::Document;
-use select::predicate::Class;
-use serde_json::json;
-use webdriver::capabilities::Capabilities;
-
-use crate::models::Dollar;
-
-pub fn scrape() -> Dollar {
-    let html = fetch_site();
-
-    let document = Document::from(html.as_str());
-
-    let mut value = Dollar::new();
-
-    for node in document.find(Class("PriceCell")) {
-        let full_class =
-            node.attr("class")
-            .unwrap_or_else(|| panic!("Node without class attribute"));
-        let class = full_class.split_whitespace().last();
-
-        match class {
-            Some("bsz") => value.buy_amount = parse_i32(node.text()),
-            Some("bid") => value.buy = parse_f64(node.text()),
-            Some("ask") => value.sell = parse_f64(node.text()),
-            Some("asz") => value.sell_amount = parse_i32(node.text()),
-            Some("lst") => value.last = parse_f64(node.text()),
-            Some("variation") => value.var = parse_f64(node.text()),
-            Some("change") => {
-                value.varper = parse_f64(node.text().trim_end_matches("%").to_string())
-            }
-            Some("von") => value.volume = parse_i32(node.text()),
-            Some("settlementPrice") => value.adjustment = parse_f64(node.text()),
-            Some("low") => value.min = parse_f64(node.text()),
-            Some("hgh") => value.max = parse_f64(node.text()),
-            Some("oin") => value.oin = parse_i32(node.text()),
-            Some("futureImpliedRate") => {}
-            Some(class) => {
-                panic!("Non-matching class: {}", class);
-            }
-            None => {
-                panic!("Node without empty class attribute");
-            }
-        }
-    }
-
-    value
-}
-
-fn fetch_site() -> String {
-    // TODO: Set headless capabilities for chromedriver.
-    let mut cap = Capabilities::new();
-    let arg = json!({"args": ["-headless"]});
-    cap.insert("moz:firefoxOptions".to_string(), arg);
-    let c = Client::with_capabilities("http://localhost:4444", cap);
-    let (sender, receiver) = oneshot::channel::<String>();
-
-    tokio::run(
-        c.map_err(|e| unimplemented!("failed to connect to WebDriver: {:?}", e))
-            .and_then(|c| c.goto("https://rofex.primary.ventures/rofex/futuros"))
-            .and_then(|mut c| c.current_url().map(move |url| (c, url)))
-            .and_then(|(c, _url)| {
-                c.wait_for_find(Locator::XPath("((//div[@class='PricePanelRow-row'])[1]//div[@class='PriceCell lst'])[not(contains(., '-'))]/.."))
-            })
-            .and_then(|mut e| {
-                e.html(true)
-            })
-            .and_then(|e| match sender.send(e) {
-                Err(err) => panic!("Error sending fetched site: {}", err),
-                Ok(()) => Ok(()),
-            })
-            .map_err(|e| {
-                panic!("a WebDriver command failed: {:?}", e);
-            }),
-    );
-
-    receiver.wait().unwrap()
-}
-
-fn parse_i32(text: String) -> i32 {
-    text.replace(".", "")
-        .trim()
-        .parse()
-        .unwrap_or_else(|err| panic!("Error parsing \"{}\": {}", text, err))
-}
-
-fn parse_f64(text: String) -> f64 {
-    text.replace(".", "")
-        .replace(",", ".")
-        .trim()
-        .parse()
-        .unwrap_or_else(|err| panic!("Error parsing \"{}\": {}", text, err))
-}
@@ -1,70 +0,0 @@
-extern crate diesel;
-extern crate dotenv;
-
-use diesel::prelude::*;
-use diesel::sqlite::SqliteConnection;
-use diesel::result::Error::NotFound;
-use chrono::{Utc, NaiveDateTime, NaiveTime};
-use std::env;
-use crate::models::{Dollar, Alert};
-use crate::schema::{dollar, alerts};
-
-pub fn establish_connection() -> SqliteConnection {
-    // We can unwrap safely because env variables are checked in main.rs::init_env()
-    let database_url = env::var("DATABASE_URL").unwrap();
-    SqliteConnection::establish(&database_url)
-        .expect(&format!("Error connecting to {}", database_url))
-}
-
-pub fn store(conn: &SqliteConnection, new_dollar: &Dollar) {
-    diesel::insert_into(dollar::table)
-        .values(new_dollar)
-        .execute(conn)
-        .expect("Error saving new dollar");
-}
-
-pub fn get_dollar_on_close(conn: &SqliteConnection) -> Option<Dollar> {
-    let yesterday = Utc::today().naive_utc().pred();
-    let timestamp = NaiveDateTime::new(yesterday, NaiveTime::from_hms(23, 59, 59));
-
-    let result = dollar::table.filter(dollar::created_at.lt(timestamp))
-        .order(dollar::created_at.desc())
-        .first::<Dollar>(conn);
-
-    match result {
-        Ok(previous) => Some(previous),
-        Err(NotFound) => None,
-        Err(err) => panic!("Error getting previous dollar: \n{}", err)
-    }
-}
-
-pub fn store_alert(conn: &SqliteConnection, new_alert: &Alert) {
-    diesel::insert_into(alerts::table)
-        .values(new_alert)
-        .execute(conn)
-        .expect("Error saving new alert");
-}
-
-pub fn get_dollar_alert(conn: &SqliteConnection) -> Option<Alert> {
-    let result = alerts::table.filter(alerts::asset.eq("dollar").and(alerts::active.eq(true)))
-        .first::<Alert>(conn);
-
-    match result {
-        Ok(alert) => Some(alert),
-        Err(NotFound) => None,
-        Err(err) => panic!("Error getting current alert: \n{}", err)
-    }
-}
-
-pub fn deactivate_alert(conn: &SqliteConnection, alert: &Alert) {
-    diesel::update(alerts::table.filter(alerts::id.eq(alert.id)))
-        .set(alerts::active.eq(false))
-        .execute(conn)
-        .expect("Error deactivating alert");
-}
-
-pub fn replace_alert(conn: &SqliteConnection, old_alert: &Alert, new_alert: &Alert) {
-    // TODO: Do this operations inside a transaction for atomicity
-    store_alert(conn, new_alert);
-    deactivate_alert(conn, old_alert);
-}
@@ -1,3 +0,0 @@
-from . import datahandler, charts
-from .backtester import Backtest
-from .portfolio import *
@@ -1,121 +0,0 @@
-import pandas as pd
-import pyprind
-from .portfolio import Portfolio
-
-
-class Backtest:
-    def __init__(self, schema, initial_capital=1_000_000):
-        self.schema = schema
-        self._portfolio = None
-        self._data = None
-        self.initial_capital = initial_capital
-
-    @property
-    def portfolio(self):
-        return self._portfolio
-
-    @portfolio.setter
-    def portfolio(self, portfolio):
-        assert isinstance(portfolio, Portfolio)
-        self._portfolio = portfolio
-
-    @property
-    def data(self):
-        return self._data
-
-    @data.setter
-    def data(self, data):
-        self._data = data
-
-    def run(self, periods=1, sma_days=None):
-        """Runs a backtest and returns a dataframe with the daily balance"""
-        assert self._data is not None
-        assert self._portfolio is not None
-
-        self.current_capital = 0
-        self.current_cash = self.initial_capital
-        self.inventory = pd.DataFrame(columns=['symbol', 'cost', 'qty'])
-        self.balance = pd.DataFrame()
-        if sma_days:
-            self._data.sma(sma_days)
-
-        data_iterator = self._data.iter_dates()
-
-        first_day = self._data['date'].min()
-        last_day = self._data['date'].max()
-        rebalancing_days = pd.date_range(first_day, last_day, freq=str(periods) +
-                                         'BMS').to_pydatetime() if periods is not None else []
-
-        bar = pyprind.ProgBar(data_iterator.ngroups, bar_char='█')
-
-        self.balance = pd.DataFrame({
-            'capital': self.current_cash,
-            'cash': self.current_cash
-        },
-                                    index=[self._data.start_date - pd.Timedelta(1, unit='day')])
-
-        for date, data in data_iterator:
-
-            if date == first_day:
-                self._rebalance_portfolio(data, sma_days)
-            self._update_balance(date, data)
-            if date in rebalancing_days:
-                self._rebalance_portfolio(data, sma_days)
-
-            bar.update()
-
-        self.balance['% change'] = self.balance['capital'].pct_change()
-        self.balance['accumulated return'] = (1.0 + self.balance['% change']).cumprod()
-
-        return self.balance
-
-    def _rebalance_portfolio(self, data, sma_days):
-        """Rebalances the portfolio so that the total money is allocated according to the given percentages"""
-        money_total = self.current_cash + self.current_capital
-        for asset in self._portfolio.assets:
-            query = '{} == "{}"'.format(self.schema['symbol'], asset.symbol)
-            asset_current = data.query(query)
-
-            asset_price = asset_current[self.schema['adjClose']].values[0]
-
-            if sma_days is not None:
-                if asset_current['sma'].values[0] < asset_price:
-                    qty = (money_total * asset.percentage) // asset_price
-                else:
-                    qty = 0
-
-            else:
-                qty = (money_total * asset.percentage) // asset_price
-
-            inventory_entry = self.inventory.query(query)
-            self.inventory.drop(inventory_entry.index, inplace=True)
-            updated_asset = pd.Series([asset.symbol, asset_price, qty])
-            updated_asset.index = self.inventory.columns
-            self.inventory = self.inventory.append(updated_asset, ignore_index=True)
-        # Update current cash
-        invested_capital = sum(self.inventory['cost'] * self.inventory['qty'])
-        self.current_cash = money_total - invested_capital
-
-    def _update_balance(self, date, data):
-        """Updates self.balance for the given date"""
-        costs = []
-
-        for asset in self._portfolio.assets:
-            query = '{} == "{}"'.format(self.schema['symbol'], asset.symbol)
-            asset_current = data.query(query)
-            inventory_asset = self.inventory.query(query)
-
-            cost = asset_current[self.schema['adjClose']].values[0]
-            qty = inventory_asset['qty'].values[0]
-            costs.append(cost * qty)
-
-        total_value = sum(costs)
-        self.current_capital = total_value
-        money_total = total_value + self.current_cash
-
-        row = pd.Series({
-            'total value': total_value,
-            'cash': self.current_cash,
-            'capital': money_total,
-        }, name=date)
-        self.balance = self.balance.append(row)
@@ -1,78 +0,0 @@
-"""Generates charts from a portfolio report"""
-
-import altair as alt
-import pandas as pd
-
-
-def returns_chart(report):
-    # Time interval selector
-    time_interval = alt.selection(type='interval', encodings=['x'])
-
-    # Area plot
-    areas = alt.Chart().mark_area(opacity=0.7).encode(x='index:T',
-                                                      y=alt.Y('accumulated return:Q', axis=alt.Axis(format='%')))
-
-    # Nearest point selector
-    nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=['index'], empty='none')
-
-    points = areas.mark_point().encode(opacity=alt.condition(nearest, alt.value(1), alt.value(0)))
-
-    # Transparent date selector
-    selectors = alt.Chart().mark_point().encode(
-        x='index:T',
-        opacity=alt.value(0),
-    ).add_selection(nearest)
-
-    text = areas.mark_text(
-        align='left', dx=5,
-        dy=-5).encode(text=alt.condition(nearest, 'accumulated return:Q', alt.value(' '), format='.2%'))
-
-    layered = alt.layer(selectors,
-                        points,
-                        text,
-                        areas.encode(
-                            alt.X('index:T', axis=alt.Axis(title='date'), scale=alt.Scale(domain=time_interval))),
-                        width=700,
-                        height=350,
-                        title='Wealth over time')
-
-    lower = areas.properties(width=700, height=70).add_selection(time_interval)
-
-    return alt.vconcat(layered, lower, data=report.reset_index())
-
-
-def returns_histogram(report):
-    bar = alt.Chart(report).mark_bar().encode(x=alt.X('% change:Q',
-                                                      bin=alt.BinParams(maxbins=100),
-                                                      axis=alt.Axis(format='%')),
-                                              y='count():Q')
-    return bar
-
-
-def monthly_returns_heatmap(report):
-    resample = report.resample('M')['capital'].last()
-    monthly_returns = resample.pct_change().reset_index()
-    monthly_returns['capital'].iat[0] = resample.iloc[0] / report.iloc[0]['capital'] - 1
-    monthly_returns.columns = ['date', 'capital']
-
-    chart = alt.Chart(monthly_returns).mark_rect().encode(
-        alt.X('year(date):O', title='Year'), alt.Y('month(date):O', title='Month'),
-        alt.Color('mean(capital)', title='Return', scale=alt.Scale(scheme='redyellowgreen')),
-        alt.Tooltip('mean(capital)', format='.2f')).properties(title='Monthly Returns')
-
-    return chart
-
-
-def sma_graph(data):
-
-    price_chart = alt.Chart(
-        data,
-        width=700,
-        height=350,
-    ).mark_line().encode(x='date:T', y=alt.Y('adjClose:Q'), color='symbol:N', opacity=alt.value(0.3))
-    sma_chart = alt.Chart(
-        data,
-        width=700,
-        height=350,
-    ).mark_line(strokeDash=[1, 1]).encode(x='date:T', y=alt.Y('sma:Q'), color='symbol:N')
-    return price_chart + sma_chart
@@ -1,2 +0,0 @@
-from .schema import *
-from .historical_asset_data import HistoricalAssetData
@@ -1,79 +0,0 @@
-import os
-from .schema import Schema
-import pandas as pd
-
-
-class HistoricalAssetData:
-    """Historical Asset Data container class."""
-    def __init__(self, file, schema=None, **params):
-        if schema:
-            assert isinstance(schema, Schema)
-        else:
-            self.schema = HistoricalAssetData.default_schema()
-
-        file_extension = os.path.splitext(file)[1]
-
-        if file_extension == '.h5':
-            self._data = pd.read_hdf(file, **params)
-        elif file_extension == '.csv':
-            params['parse_dates'] = [self.schema.date.mapping]
-            self._data = pd.read_csv(file, **params)
-
-        columns = self._data.columns
-        assert all((col in columns for _key, col in self.schema))
-
-        date_col = self.schema['date']
-
-        self.start_date = self._data[date_col].min()
-        self.end_date = self._data[date_col].max()
-
-    def apply_filter(self, f):
-        """Apply Filter `f` to the data. Returns a `pd.DataFrame` with the filtered rows."""
-        return self._data.query(f.query)
-
-    def iter_dates(self):
-        """Returns `pd.DataFrameGroupBy` that groups contracts by date"""
-        return self._data.groupby(self.schema['date'])
-
-    def __getattr__(self, attr):
-        """Pass method invocation to `self._data`"""
-
-        method = getattr(self._data, attr)
-        if hasattr(method, '__call__'):
-
-            def df_method(*args, **kwargs):
-                return method(*args, **kwargs)
-
-            return df_method
-        else:
-            return method
-
-    def __getitem__(self, item):
-        if isinstance(item, pd.Series):
-            return self._data[item]
-        else:
-            key = self.schema[item]
-            return self._data[key]
-
-    def __setitem__(self, key, value):
-        self._data[key] = value
-        if key not in self.schema:
-            self.schema.update({key: key})
-
-    def __len__(self):
-        return len(self._data)
-
-    def __repr__(self):
-        return self._data.__repr__()
-
-    def default_schema():
-        """Returns default schema for Historical Asset Data"""
-        schema = Schema.canonical()
-        return schema
-
-    def sma(self, months):
-        sma = self._data.groupby('symbol').rolling(months)['adjClose'].mean()
-        sma = sma.reset_index('symbol').sort_index()
-        sma = sma.fillna(0)
-        self._data['sma'] = sma['adjClose']
-        self.schema.update({'sma': 'sma'})
@@ -1,162 +0,0 @@
-class Schema:
-    """Data schema class.
-    Used to run validations and provide uniform access to fields in the data set.
-    """
-
-    columns = [
-        "symbol", "date", "open", "close", "high", "low", "volume", "adjClose", "adjHigh", "adjLow", "adjOpen",
-        "adjVolume", "divCash", "splitFactor"
-    ]
-
-    def canonical():
-        """Builder method that returns a `Schema` with default mappings"""
-        mappings = {key: key for key in Schema.columns}
-        return Schema(mappings)
-
-    def __init__(self, mappings):
-        assert all((key in mappings for key in Schema.columns))
-
-        self._mappings = mappings
-
-    def update(self, mappings):
-        """Update schema according to given `mappings`"""
-        self._mappings.update(mappings)
-        return self
-
-    def __contains__(self, key):
-        """Returns True if key is in schema"""
-        return key in self._mappings.keys()
-
-    def __getattr__(self, key):
-        """Returns Field object used to build Filters"""
-        return Field(key, self._mappings[key])
-
-    def __setitem__(self, key, value):
-        self._mappings[key] = value
-
-    def __getitem__(self, key):
-        """Returns mapping of given `key`"""
-        return self._mappings[key]
-
-    def __iter__(self):
-        return iter(self._mappings.items())
-
-    def __repr__(self):
-        return "Schema({})".format([Field(k, m) for k, m in self._mappings.items()])
-
-    def __eq__(self, other):
-        return self._mappings == other._mappings
-
-
-class Field:
-    """Encapsulates data fields to build filters used by strategies"""
-
-    __slots__ = ("name", "mapping")
-
-    def __init__(self, name, mapping):
-        self.name = name
-        self.mapping = mapping
-
-    def _create_filter(self, op, other):
-        if isinstance(other, Field):
-            query = Field._format_query(self.mapping, op, other.mapping)
-        else:
-            query = Field._format_query(self.mapping, op, other)
-        return Filter(query)
-
-    def _combine_fields(self, op, other, invert=False):
-        if isinstance(other, Field):
-            name = Field._format_query(self.name, op, other.name, invert)
-            mapping = Field._format_query(self.mapping, op, other.mapping, invert)
-        elif isinstance(other, (int, float)):
-            name = Field._format_query(self.name, op, other, invert)
-            mapping = Field._format_query(self.mapping, op, other, invert)
-        else:
-            raise TypeError
-
-        return Field(name, mapping)
-
-    def _format_query(left, op, right, invert=False):
-        if invert:
-            left, right = right, left
-        query = "{left} {op} {right}".format(left=left, op=op, right=right)
-        return query
-
-    def __add__(self, value):
-        return self._combine_fields("+", value)
-
-    def __radd__(self, value):
-        return self._combine_fields("+", value, invert=True)
-
-    def __sub__(self, value):
-        return self._combine_fields("-", value)
-
-    def __rsub__(self, value):
-        return self._combine_fields("-", value, invert=True)
-
-    def __mul__(self, value):
-        return self._combine_fields("*", value)
-
-    def __rmul__(self, value):
-        return self._combine_fields("*", value, invert=True)
-
-    def __truediv__(self, value):
-        return self._combine_fields("/", value)
-
-    def __rtruediv__(self, value):
-        return self._combine_fields("/", value, invert=True)
-
-    def __lt__(self, value):
-        return self._create_filter("<", value)
-
-    def __le__(self, value):
-        return self._create_filter("<=", value)
-
-    def __gt__(self, value):
-        return self._create_filter(">", value)
-
-    def __ge__(self, value):
-        return self._create_filter(">=", value)
-
-    def __eq__(self, value):
-        if isinstance(value, str):
-            value = "'{}'".format(value)
-        return self._create_filter("==", value)
-
-    def __ne__(self, value):
-        return self._create_filter("!=", value)
-
-    def __repr__(self):
-        return "Field(name='{}', mapping='{}')".format(self.name, self.mapping)
-
-
-class Filter:
-    """This class determines entry/exit conditions for strategies"""
-
-    __slots__ = ("query")
-
-    def __init__(self, query):
-        self.query = query
-
-    def __and__(self, other):
-        """Returns logical *and* between `self` and `other`"""
-        assert isinstance(other, Filter)
-        new_query = "({}) & ({})".format(self.query, other.query)
-        return Filter(query=new_query)
-
-    def __or__(self, other):
-        """Returns logical *or* between `self` and `other`"""
-        assert isinstance(other, Filter)
-        new_query = "(({}) | ({}))".format(self.query, other.query)
-        return Filter(query=new_query)
-
-    def __invert__(self):
-        """Negates filter"""
-        return Filter("!({})".format(self.query))
-
-    def __call__(self, data):
-        """Returns dataframe of filtered data"""
-        return data.eval(self.query)
-
-    def __repr__(self):
-        return "Filter(query='{}')".format(self.query)
@@ -1,2 +0,0 @@
-from .portfolio import Portfolio
-from .asset import Asset
@@ -1,8 +0,0 @@
-class Asset:
-    """Asset data class"""
-    def __init__(self, symbol, percentage):
-        self.symbol = symbol
-        self.percentage = percentage
-
-    def __repr__(self):
-        return "Asset(symbol={}, percentage={})".format(self.symbol, self.percentage)
@@ -1,28 +0,0 @@
-from .asset import Asset
-
-
-class Portfolio:
-    def __init__(self):
-        self.assets = []
-
-    def add_asset(self, asset):
-        """Adds asset to the Portfolio"""
-        assert isinstance(asset, Asset)
-        self.assets.append(asset)
-        return self
-
-    def add_assets(self, assets):
-        """Adds assets to the Portfolio"""
-        for asset in assets:
-            self.add_asset(asset)
-        return self
-
-    def remove_asset(self, asset_number):
-        """Removes asset from the Portfolio"""
-        self.assets.pop(asset_number)
-        return self
-
-    def clear_assets(self):
-        """Removes *all* assets from the Portfolio"""
-        self.assets = []
-        return self
@@ -1,103 +0,0 @@
-import numpy as np
-
-from asset_backtester import Backtest, Portfolio, Asset
-
-# We use Portfolio Visualizer (https://www.portfoliovisualizer.com/backtest-portfolio)
-# to find the actual return for the test porfolios.
-
-
-def test_ivy_portfolio(sample_datahandler):
-    bt = run_backtest(sample_datahandler, ivy_portfolio())
-
-    balance = bt.balance[1:]
-    tolerance = 0.0001
-    assert np.allclose(balance['capital'], balance['cash'] + balance['total value'], rtol=tolerance)
-    assert np.allclose(balance['capital'], bt.initial_capital * balance['accumulated return'], rtol=tolerance)
-
-    actual_return = 1.2041
-    return_tolerance = 0.01
-    assert np.isclose(balance['accumulated return'].iloc[-1], actual_return, rtol=return_tolerance)
-
-
-def test_ivy_monthly_rebalance(sample_datahandler):
-    bt = run_backtest(sample_datahandler, ivy_portfolio(), periods=1)
-
-    balance = bt.balance[1:]
-    tolerance = 0.0001
-    assert np.allclose(balance['capital'], balance['cash'] + balance['total value'], rtol=tolerance)
-    assert np.allclose(balance['capital'], bt.initial_capital * balance['accumulated return'], rtol=tolerance)
-
-    actual_return = 1.2043
-    return_tolerance = 0.01
-    assert np.isclose(balance['accumulated return'].iloc[-1], actual_return, rtol=return_tolerance)
-
-
-def test_all_weather_portfolio(sample_datahandler):
-    bt = run_backtest(sample_datahandler, all_weather_portfolio())
-
-    balance = bt.balance[1:]
-    tolerance = 0.0001
-    assert np.allclose(balance['capital'], balance['cash'] + balance['total value'], rtol=tolerance)
-    assert np.allclose(balance['capital'], bt.initial_capital * balance['accumulated return'], rtol=tolerance)
-
-    actual_return = 1.1874
-    return_tolerance = 0.01
-    assert np.isclose(balance['accumulated return'].iloc[-1], actual_return, rtol=return_tolerance)
-
-
-def test_all_weather_monthly_rebalance(sample_datahandler):
-    bt = run_backtest(sample_datahandler, all_weather_portfolio(), periods=1)
-
-    balance = bt.balance[1:]
-    tolerance = 0.0001
-    assert np.allclose(balance['capital'], balance['cash'] + balance['total value'], rtol=tolerance)
-    assert np.allclose(balance['capital'], bt.initial_capital * balance['accumulated return'], rtol=tolerance)
-
-    actual_return = 1.1828
-    return_tolerance = 0.01
-    assert np.isclose(balance['accumulated return'].iloc[-1], actual_return, rtol=return_tolerance)
-
-
-def test_constant_price(constant_price_datahandler):
-    bt = run_backtest(constant_price_datahandler, ivy_portfolio())
-
-    balance = bt.balance[1:]
-    tolerance = 0.0001
-    assert np.allclose(balance['% change'], 0.0, rtol=tolerance)
-    assert np.allclose(balance['capital'], bt.initial_capital, rtol=tolerance)
-    assert np.allclose(balance['total value'], bt.initial_capital, rtol=tolerance)
-    assert np.allclose(balance['accumulated return'], 1.0, rtol=tolerance)
-
-
-def test_zero_initial_capital(sample_datahandler):
-    bt = run_backtest(sample_datahandler, ivy_portfolio(), initial_capital=0)
-
-    balance = bt.balance[1:]
-    tolerance = 0.0001
-    assert np.allclose(balance['capital'], balance['cash'] + balance['total value'], rtol=tolerance)
-    assert np.allclose(balance['cash'], 0.0, rtol=tolerance)
-    assert np.allclose(balance['total value'], 0.0, rtol=tolerance)
-
-
-# Helpers
-def run_backtest(data, portfolio, initial_capital=1_000_000, periods=None):
-    bt = Backtest(data.schema, initial_capital=initial_capital)
-    bt.portfolio = portfolio
-    bt.data = data
-    bt.run(periods=periods)
-
-    return bt
-
-
-def ivy_portfolio():
-    portfolio = Portfolio()
-    assets = [Asset('VTI', 0.2), Asset('VEU', 0.2), Asset('BND', 0.2), Asset('VNQ', 0.2), Asset('DBC', 0.2)]
-
-    return portfolio.add_assets(assets)
-
-
-def all_weather_portfolio():
-    portfolio = Portfolio()
-    assets = [Asset('VTI', 0.3), Asset('TLT', 0.4), Asset('IEF', 0.15), Asset('GLD', 0.075), Asset('DBC', 0.075)]
-
-    return portfolio.add_assets(assets)
@@ -1,23 +0,0 @@
-import os
-
-import pytest
-
-from asset_backtester.datahandler import HistoricalAssetData
-
-TEST_DIR = os.path.abspath(os.path.dirname(__file__))
-
-# 2019 data for TLT, GLD, IEF, VTI, VEU, BND, VNQ and DBC
-SAMPLE_DATA = os.path.join(TEST_DIR, 'test_data', 'sample_data.csv')
-
-
-@pytest.fixture(scope='module')
-def sample_datahandler():
-    data = HistoricalAssetData(SAMPLE_DATA)
-    return data
-
-
-@pytest.fixture(scope='module')
-def constant_price_datahandler():
-    data = HistoricalAssetData(SAMPLE_DATA)
-    data['adjClose'] = data['close'] = 10.0
-    return data
@@ -1,12 +0,0 @@
-import os
-import backtester as bt
-from backtester.utils import get_data_dir
-
-data_dir = get_data_dir()
-spx_test_data = os.path.join(data_dir, "SPX_2008-2018.csv")
-
-portfolio = bt.run(spx_test_data)
-report = portfolio.create_report()
-
-print("Running Benchmark strategy on SPX data for 2008-2018")
-print(report.tail(30))
@@ -1,8 +0,0 @@
-{
-  "cboe": {
-    "mute_notifications": []
-  },
-  "notifications": {
-      "slack_webhook": ""
-  }
-}
@@ -1,48 +0,0 @@
-import logging.config
-import os
-import argparse
-
-from data_scraper import cboe, tiingo, backup
-
-parser = argparse.ArgumentParser(prog="data_scraper.py")
-parser.add_argument("-t", "--symbols", nargs="+", help="Symbols to fetch")
-parser.add_argument(
-    "-s",
-    "--scraper",
-    choices=["cboe", "tiingo"],
-    default="cboe",
-    help="Scraper to use")
-parser.add_argument(
-    "-v", "--verbose", action="store_true", help="Enable logging")
-parser.add_argument(
-    "-a",
-    "--aggregate",
-    action="store_true",
-    help="Aggregate daily data files")
-parser.add_argument(
-    "-b", "--backup", action="store_true", help="Backup files in S3 bucket")
-
-args = parser.parse_args()
-module_dir = os.path.join(os.getcwd(), os.path.dirname(__file__))
-
-if args.verbose:
-    config_file = os.path.realpath(os.path.join(module_dir, "logconfig.ini"))
-    logging.config.fileConfig(fname=config_file)
-
-if args.aggregate:
-    if args.symbols:
-        cboe.aggregate_monthly_data(args.symbols)
-    else:
-        cboe.aggregate_monthly_data()
-elif args.backup:
-    backup.backup_data()
-else:
-    if args.scraper == "tiingo":
-        scraper = tiingo
-    else:
-        scraper = cboe
-
-    if args.symbols:
-        scraper.fetch_data(args.symbols)
-    else:
-        scraper.fetch_data()
@@ -1,114 +0,0 @@
-import logging
-import os
-
-import boto3
-from botocore.exceptions import ClientError
-
-from data_scraper import utils
-from data_scraper.notifications import slack_notification, Status
-
-logger = logging.getLogger(__name__)
-
-
-def backup_data():
-    """Uploads scraped files to S3 bucket.
-    Set bucket name in environment variable $S3_BUCKET
-    """
-    try:
-        bucket_name = utils.get_environment_var("S3_BUCKET")
-    except EnvironmentError as e:
-        logger.error(str(e))
-        slack_notification("Backup failed. Set $S3_BUCKET env variable",
-                           __name__)
-        raise e
-
-    s3 = boto3.resource("s3")
-    bucket = s3.Bucket(bucket_name)
-
-    data_path = utils.get_save_data_path()
-
-    cboe_data = os.path.join(data_path, "cboe")
-    cboe_folders = []
-    if os.path.exists(cboe_data):
-        cboe_folders = [
-            os.path.join(cboe_data, folder) for folder in os.listdir(cboe_data)
-            if not folder.endswith("daily")
-        ]
-
-    tiingo_data = os.path.join(data_path, "tiingo")
-    tiingo_folders = []
-    if os.path.exists(tiingo_data):
-        tiingo_folders = [
-            os.path.join(tiingo_data, folder)
-            for folder in os.listdir(tiingo_data)
-        ]
-
-    done_cboe, fail_cboe = _upload_folders(
-        bucket, "cboe", cboe_folders, remove_files=False)
-    done_tiingo, fail_tiingo = _upload_folders(
-        bucket, "tiingo", tiingo_folders, remove_files=True)
-
-    done = done_cboe + done_tiingo
-    failed = fail_cboe + fail_tiingo
-    if len(done) > 0:
-        msg = "Successful backup of symbols: " + ", ".join(done)
-        slack_notification(msg, __name__, status=Status.Success)
-    if len(failed) > 0:
-        msg = "Unable to backup symbols: " + ", ".join(done)
-        slack_notification(msg, __name__, status=Status.Warning)
-
-
-def _upload_folders(bucket, scraper, folders, remove_files=False):
-    """Uploads folders to S3 bucket and (optionally) removes old files"""
-    data_path = utils.get_save_data_path()
-    done, failed = [], []
-
-    for folder in folders:
-        symbol = os.path.basename(folder)
-        try:
-            if remove_files:
-                _remove_old_files(bucket, prefix=scraper + "/" + symbol)
-            _upload_folder(bucket, folder, data_path)
-        except Exception:
-            failed.append(os.path.basename(folder))
-        else:
-            done.append(os.path.basename(folder))
-
-    return (done, failed)
-
-
-def _upload_folder(bucket, folder, data_path):
-    """Uploads folder contents to S3 bucket"""
-    if not os.path.isdir(folder):
-        return
-
-    for root, dirs, files in os.walk(folder):
-        for file in files:
-            file_path = os.path.join(root, file)
-            key = os.path.relpath(file_path, data_path)
-            if _key_exists(bucket, key):
-                logger.debug("File already exists in S3")
-                continue
-            try:
-                bucket.upload_file(file_path, key)
-                logger.debug("Uploaded file %s to S3", file)
-            except Exception as e:
-                msg = "Error uploading data file {} to S3.\nReceived exception message {}".format(
-                    file_path, str(e))
-                logger.error(msg, exc_info=True)
-                slack_notification(msg, __name__)
-                raise e
-
-
-def _key_exists(bucket, key):
-    try:
-        bucket.Object(key).load()
-    except ClientError as e:
-        return int(e.response["Error"]["Code"]) != 404
-    return False
-
-
-def _remove_old_files(bucket, prefix):
-    old_files = bucket.objects.filter(Prefix=prefix)
-    for file in old_files:
-        file.delete()
@@ -1,249 +0,0 @@
-import logging
-import os
-from datetime import date
-from io import StringIO
-from itertools import groupby
-
-from bs4 import BeautifulSoup
-import requests
-import pandas as pd
-
-from . import utils, validation
-from .notifications import slack_notification, Status
-
-logger = logging.getLogger(__name__)
-
-url = "http://www.cboe.com/delayedquote/quote-table-download"
-
-
-def fetch_data(symbols=None):
-    """Fetches options data for a given list of symbols"""
-    symbols = symbols or _get_all_listed_symbols()
-    options = utils.get_module_config("cboe")
-    mute_notifications = options.get("mute_notifications", [])
-
-    try:
-        form_data = _form_data()
-    except requests.ConnectionError as ce:
-        msg = "Connection error trying to reach {}".format(url)
-        logger.error(msg)
-        slack_notification(msg, __name__)
-        raise ce
-    except Exception as e:
-        msg = "Error parsing response"
-        logger.error(msg, exc_info=True)
-        slack_notification(msg, __name__)
-        raise e
-
-    headers = {"Referer": url}
-    file_url = "http://www.cboe.com/delayedquote/quotedata.dat"
-
-    symbols = [symbol.upper() for symbol in symbols]
-    done, failed = [], []
-
-    for symbol in symbols:
-        form_data["ctl00$ContentTop$C005$txtTicker"] = symbol
-        try:
-            response = requests.post(url,
-                                     data=form_data,
-                                     headers=headers,
-                                     allow_redirects=False)
-            symbol_req = requests.get(file_url,
-                                      cookies=response.cookies,
-                                      headers=headers)
-            symbol_data = symbol_req.text
-            if symbol_data == "" or symbol_data.startswith(" <!DOCTYPE"):
-                raise Exception
-        except Exception:
-            failed.append(symbol)
-            msg = "Error fetching symbol {} data".format(symbol)
-            logger.error(msg, exc_info=True)
-            if symbol not in mute_notifications:
-                slack_notification(msg, __name__)
-        else:
-            _save_data(symbol, symbol_data)
-            done.append(symbol)
-
-    if len(done) > 0:
-        msg = "Successfully scraped symbols: " + ", ".join(done)
-        slack_notification(msg, __name__, status=Status.Success)
-    if len(failed) > 0:
-        msg = "Failed to scrape symbols: " + ", ".join(failed)
-        slack_notification(msg, __name__, status=Status.Warning)
-
-
-def aggregate_monthly_data(symbols=None):
-    """Aggregate daily snapshots into monthly files and validate data"""
-    symbols = symbols or _get_all_listed_symbols()
-
-    save_data_path = utils.get_save_data_path()
-    scraper_dir = os.path.join(save_data_path, "cboe")
-
-    symbols = [symbol.upper() for symbol in symbols]
-
-    for symbol in symbols:
-        daily_dir = os.path.join(scraper_dir, symbol + "_daily")
-        if not os.path.exists(daily_dir):
-            msg = "Error aggregating data. Dir {} not found.".format(daily_dir)
-            logger.error(msg)
-            slack_notification(msg, __name__)
-            continue
-
-        monthly_dir = os.path.join(scraper_dir, symbol)
-
-        symbol_files = [
-            file for file in os.listdir(daily_dir) if file.endswith(".csv")
-        ]
-
-        for month, files in groupby(symbol_files, _monthly_grouper):
-            file_names = list(files)
-            daily_files = [
-                os.path.join(daily_dir, name) for name in file_names
-            ]
-            try:
-                symbol_df = concatenate_files(daily_files)
-            except Exception:
-                msg = "Error concatenating daily files for period " + month
-                logger.error(msg, exc_info=True)
-                slack_notification(msg, __name__)
-                continue
-
-            date_range = pd.to_datetime(symbol_df["quotedate"].unique())
-            if not validation.validate_dates_in_month(symbol, date_range):
-                today = pd.Timestamp.today()
-                first_date = date_range[0]
-                if first_date.year != today.year or first_date.month != today.month:
-                    msg = "Some trading dates where missing for symbol {}".format(
-                        symbol)
-                    slack_notification(msg, __name__)
-                continue
-
-            if not os.path.exists(monthly_dir):
-                os.makedirs(monthly_dir)
-                logger.debug("Symbol dir %s created", monthly_dir)
-
-            file_name = _monthly_filename(file_names)
-            monthly_file = os.path.join(monthly_dir, file_name)
-            symbol_df.to_csv(monthly_file, index=False)
-
-            if not validation.validate_aggregate_file(monthly_file,
-                                                      daily_files):
-                utils.remove_file(monthly_file)
-                msg = "Data in {} differs from the daily files".format(
-                    monthly_file)
-                logger.error(msg)
-                slack_notification(msg, __name__)
-                continue
-
-            logger.debug("Saved monthly data %s", monthly_file)
-
-            for file in daily_files:
-                utils.remove_file(file, logger)
-
-
-def _get_all_listed_symbols():
-    """Returns array of all listed symbols.
-    http://www.cboe.com/publish/scheduledtask/mktdata/cboesymboldir2.csv
-    """
-    current_dir = os.path.join(os.getcwd(), os.path.dirname(__file__))
-    symbols_file = os.path.realpath(
-        os.path.join(current_dir, "cboesymboldir2.csv"))
-    symbols_df = pd.read_csv(symbols_file, skiprows=1)
-    return symbols_df["Stock Symbol"].array
-
-
-def concatenate_files(files):
-    """Returns a dataframe of the concatenated data from `files`."""
-    df_generator = (pd.read_csv(file) for file in sorted(files))
-    return pd.concat(df_generator, ignore_index=True)
-
-
-def _form_data():
-    """Return validation form data"""
-    homepage = requests.get(url)
-    soup = BeautifulSoup(homepage.content, "lxml")
-    data = {
-        "__VIEWSTATE": soup.select_one("#__VIEWSTATE")["value"],
-        "__EVENTVALIDATION": soup.select_one("#__EVENTVALIDATION")["value"]
-    }
-    return data
-
-
-def _save_data(symbol, symbol_data):
-    """Saves the contents of `symbol_data` to
-    `$SAVE_DATA_PATH/cboe/{symbol}_daily/{symbol}_{%date}.csv`
-    """
-    filename = date.today().strftime(symbol + "_%Y%m%d.csv")
-
-    save_data_path = utils.get_save_data_path()
-    symbol_dir = os.path.join(save_data_path, "cboe", symbol + "_daily")
-
-    if not os.path.exists(symbol_dir):
-        os.makedirs(symbol_dir)
-        logger.debug("Symbol dir %s created", symbol_dir)
-    file_path = os.path.join(symbol_dir, filename)
-
-    if os.path.exists(file_path) and validation.file_hash_matches_data(
-            file_path, symbol_data):
-        logger.debug("File %s already downloaded", file_path)
-    else:
-        daily_df = _wrangle_data(symbol, symbol_data)
-        daily_df.to_csv(file_path, index=False)
-        logger.debug("Saved daily symbol data as %s", file_path)
-
-
-def _wrangle_data(symbol, symbol_data):
-    """Returns a properly formated (_tidy_) dataframe"""
-    string_data = StringIO(symbol_data)
-    first_line = string_data.readline()
-    spot_price = float(first_line.split(",")[-2])
-    quote_date = date.today().strftime("%m/%d/%Y")
-
-    data = pd.read_csv(string_data, skiprows=1)
-    call_columns = [
-        "Calls", "Expiration Date", "Strike", "Last Sale", "Net", "Bid", "Ask",
-        "Vol", "Open Int", "IV", "Delta", "Gamma"
-    ]
-    calls = data[call_columns]
-
-    put_columns = [
-        "Puts", "Expiration Date", "Strike", "Last Sale.1", "Net.1", "Bid.1",
-        "Ask.1", "Vol.1", "Open Int.1", "IV.1", "Delta.1", "Gamma.1"
-    ]
-    puts = data[put_columns]
-
-    renamed_columns = [
-        "optionroot", "expiration", "strike", "last", "net", "bid", "ask",
-        "volume", "openinterest", "impliedvol", "delta", "gamma"
-    ]
-    calls.columns = renamed_columns
-    calls.insert(loc=1, column="type", value="call")
-    puts.columns = renamed_columns
-    puts.insert(loc=1, column="type", value="put")
-
-    merged = pd.concat([calls, puts])
-    merged.insert(loc=0, column="underlying", value=symbol)
-    merged.insert(loc=1, column="underlying_last", value=spot_price)
-    merged.insert(loc=2, column="exchange", value="CBOE")
-    merged.insert(loc=6, column="quotedate", value=quote_date)
-
-    return merged
-
-
-def _monthly_grouper(filename):
-    """Returns `{year}{month}` string. Used to group files by month."""
-    basename = filename.split(".")[0]
-    file_date = basename.split("_")[1]
-    return file_date[:-2]
-
-
-def _monthly_filename(filenames):
-    """Returns filename of monthly aggregate file in the form
-    `{symbol}_{start_date}_to_{end_date}.csv`
-    """
-    sorted_files = list(sorted(filenames))
-    first_file = sorted_files[0]
-    last_file = sorted_files[-1]
-    last_day = last_file.split(".")[0][-8:]  # Get only the date
-    file_name = first_file.split(".")[0] + "_to_" + last_day + ".csv"
-    return file_name
@@ -1,66 +0,0 @@
-# CBOE data scraper
-# Requires Selenium and a headless Chrome driver
-
-import tempfile
-import time
-import os
-import shutil
-from datetime import date
-from selenium import webdriver
-
-
-class CBOE():
-    """CBOE data downloader."""
-    url = "http://www.cboe.com/delayedquote/quote-table-download"
-
-    def __init__(self):
-        self.data_path = self._get_data_path()
-        self.tmp_dir = tempfile.TemporaryDirectory()
-        self.driver = self._initilize_driver(self.tmp_dir.name)
-
-    def _get_data_path():
-        path = os.getenv("OPTIONS_DATA_PATH")
-        if not path:
-            raise EnvironmentError("Environment variable $OPTIONS_DATA_PATH not set")
-        return os.path.expanduser(path)
-
-    def _initilize_driver(download_dir):
-        """Initilizes the Chrome driver to silently download files
-        to a temporary directory.
-        """
-        options = webdriver.ChromeOptions()
-        options.add_argument("headless")
-        options.add_argument("disable-gpu")
-
-        driver = webdriver.Chrome(options=options)
-        driver.command_executor._commands["send_command"] = (
-            "POST",
-            "/session/$sessionId/chromium/send_command"
-        )
-        params = {
-            "cmd": "Page.setDownloadBehavior",
-            "params": {
-                "behavior": "allow",
-                "downloadPath": download_dir
-            }
-        }
-        driver.execute("send_command", params)
-        driver.implicitly_wait(10)
-        return driver
-
-    def fetch_data(self, symbols):
-        """Fetches options data for a given list of symbols"""
-        self.driver.get(CBOE.url)
-        for symbol in symbols:
-            ticker = self.driver.find_element_by_css_selector("input#txtTicker")
-            ticker.send_keys(symbol)
-            submit = self.driver.find_element_by_css_selector("input#cmdSubmit")
-            submit.click()
-            time.sleep(15)  # Horrible hack
-            download_path = os.path.join(self.tmp_dir.name, "quotedata.dat")
-            renamed_file = date.today().strftime(symbol + "_%Y%m%d.csv")
-            full_path = os.path.join(self.data_path, renamed_file)
-            shutil.move(download_path, full_path)
-
-    def __del__(self):
-        self.tmp_dir.cleanup()
@@ -1,39 +0,0 @@
-[loggers]
-keys=root,tiingo,cboe
-
-[handlers]
-keys=consoleHandler,fileHandler
-
-[formatters]
-keys=simpleFormatter
-
-[logger_root]
-level=ERROR
-handlers=consoleHandler
-
-[logger_tiingo]
-level=DEBUG
-handlers=consoleHandler,fileHandler
-qualname=data_scraper.tiingo
-propagate=0
-
-[logger_cboe]
-level=DEBUG
-handlers=consoleHandler,fileHandler
-qualname=data_scraper.cboe
-propagate=0
-
-[handler_consoleHandler]
-class=StreamHandler
-level=ERROR
-formatter=simpleFormatter
-args=(sys.stdout,)
-
-[handler_fileHandler]
-class=handlers.RotatingFileHandler
-level=DEBUG
-formatter=simpleFormatter
-args=("data_scraper.log", "a", 3000000, 10)
-
-[formatter_simpleFormatter]
-format=%(asctime)s - %(name)s - %(levelname)s - %(message)s
@@ -1,59 +0,0 @@
-import logging
-from datetime import datetime
-from enum import Enum
-
-import requests
-
-from .utils import get_module_config
-
-logger = logging.getLogger(__name__)
-
-Status = Enum("Status", "Success Warning Error")
-
-options = get_module_config("notifications")
-try:
-    webhook = options["slack_webhook"]
-except KeyError as e:
-    logger.error("Missing slack webhook from configuration file")
-    raise e
-
-payload = {
-    "channel": "#algotrading",
-    "username": "Talebot",
-    "icon_emoji": ":taleb:",
-    "attachments": [{
-        "footer": "Talebot"
-    }]
-}
-
-
-def slack_notification(text, scraper, status=Status.Error):
-    """Post Slack notification"""
-
-    if status == Status.Error:
-        emoji = ":thumbsdown: "
-        title = "data_scraper error"
-        color = "#B22222"
-    else:
-        title = "data_scraper status report"
-        if status == Status.Success:
-            emoji = ":thumbsup: "
-            color = "#49C39E"
-        else:
-            emoji = ":warning: "
-            color = "#EDB625"
-    msg = emoji + text
-
-    payload["attachments"][0]["fallback"] = msg
-    payload["attachments"][0]["text"] = msg
-    payload["attachments"][0]["color"] = color
-    payload["attachments"][0]["title"] = title
-    payload["attachments"][0]["fields"] = [{"title": scraper}]
-    payload["attachments"][0]["ts"] = datetime.today().timestamp()
-
-    response = requests.post(webhook, json=payload)
-
-    if response.status_code != 200:
-        msg = "Error connecting to Slack {}. Response is:\n{}".format(
-            response.status_code, response.text)
-        logger.error(msg)
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-## This script downloads stock options data, 
-## veryfies zipfile integrity, saves md5 signature 
-## and uploads them to S3.
-## To run, pass a list of files to download:
-## $> ./backup.sh files.txt
-
-TMPDIR=tmp
-NOW=$(date +"%m-%d-%Y-%H%M%S")
-RETRY="${NOW}.txt"
-MD5SUMS=md5sums.txt
-
-mkdir -p $TMPDIR
-
-while read filename
-do
-	echo "Downloading file $filename to $TMPDIR"
-	wget --quiet -P $TMPDIR "ftp://l3_hdall:JKNRH7LYXV@ftp.deltaneutral.com/${filename}"
-
-	newpath="$TMPDIR/$filename"
-	echo "Verifying zipfile $newpath"
-	if unzip -t -q $newpath
-	then
-		echo "File check OK"
-	else
-		echo "ERROR: File check failed for $newfile"
-		echo $filename >> $RETRY
-		rm $newpath
-		continue
-	fi
-
-	echo "Appending md5 sum for $f"
-	md5sum $newpath >> $MD5SUMS
-    
-	echo "Copying $newpath to S3 bucket"
-	rclone copy -v $newpath longueduree:longueduree
-	
-	echo "Deleting $newpath"
-	rm $newpath
-done <$1
@@ -1,104 +0,0 @@
-import logging
-import unittest
-from unittest.mock import patch
-import os
-import shutil
-
-from requests import ConnectionError
-import pandas as pd
-
-from data_scraper import cboe
-
-logging.disable(level=logging.CRITICAL)
-
-
-class TestCBOE(unittest.TestCase):
-    """Tests CBOE data scraper"""
-
-    test_dir = os.path.join(os.getcwd(), os.path.dirname(__file__))
-    test_data_path = os.path.realpath(os.path.join(test_dir, "data"))
-    cboe_data_path = os.path.join(test_data_path, "cboe")
-    spx_data_path = os.path.join(cboe_data_path, "SPX_March_2019.csv")
-
-    @classmethod
-    def setUpClass(cls):
-        cls.save_data_path = os.environ.get("SAVE_DATA_PATH", None)
-        os.environ["SAVE_DATA_PATH"] = cls.test_data_path
-
-    @classmethod
-    def tearDownClass(cls):
-        if cls.save_data_path:
-            os.environ["SAVE_DATA_PATH"] = cls.save_data_path
-
-    @patch("data_scraper.cboe.slack_notification", return_value=None)
-    def test_fetch_spy(self, mocked_notification):
-        """Fetch todays SPY quote"""
-        cboe.fetch_data(["SPY"])
-        spy_dir = os.path.join(TestCBOE.cboe_data_path, "SPY_daily")
-        self.addCleanup(TestCBOE.remove_files, spy_dir)
-
-        if self.assertTrue(os.path.exists(spy_dir)):
-            self.assertTrue(mocked_notification.called)
-            file_name = "SPY_" + pd.Timestamp.today().strftime(
-                "%Y%m%d") + ".csv"
-            file_path = os.path.join(spy_dir, file_name)
-            spy_df = pd.read_csv(file_path, parse_dates=["quotedate"])
-            self.assertTrue(all(spy_df["underlying"] == "SPX"))
-            self.assertEqual(spy_df["quotedate"].nunique(), 1)
-            counts = spy_df["type"].value_counts()
-            self.assertEqual(counts["put"] + counts["call"], len(spy_df))
-
-    @patch("data_scraper.cboe.slack_notification", return_value=None)
-    def test_fetch_invalid_symbol(self, mocked_notification):
-        """Fetching invalid symbol should send notification"""
-        cboe.fetch_data(["FOOBAR"])
-        self.assertTrue(mocked_notification.called)
-
-    @patch("data_scraper.cboe.url", new="http://www.aldkfjaskldfjsa.com")
-    @patch("data_scraper.cboe.slack_notification", return_value=None)
-    def test_no_connection(self, mocked_notification):
-        """Raise ConnectionError and send notification when host is unreachable"""
-        with self.assertRaises(ConnectionError):
-            cboe.fetch_data(["SPX"])
-            self.assertTrue(mocked_notification.called)
-
-    @patch("data_scraper.cboe.utils.remove_file", return_value=None)
-    @patch("data_scraper.cboe.slack_notification", return_value=None)
-    def test_data_aggregation(self, mocked_notification, mocked_remove):
-        """Test data aggregation happy path"""
-        cboe.aggregate_monthly_data(["SPX"])
-        aggregate_file = os.path.join(TestCBOE.cboe_data_path, "SPX",
-                                      "SPX_20190301_to_20190329.csv")
-        self.addCleanup(TestCBOE.remove_files, os.path.dirname(aggregate_file))
-        self.assertTrue(mocked_remove.called)
-        self.assertFalse(mocked_notification.called)
-
-        if self.assertTrue(os.path.exists(aggregate_file)):
-            spx_df = pd.read_csv(TestCBOE.spx_data_path)
-            aggregate_df = pd.read_csv(aggregate_file)
-            self.assertTrue(spx_df.equals(aggregate_df))
-
-    @patch("data_scraper.cboe.utils.remove_file", return_value=None)
-    @patch("data_scraper.cboe.slack_notification", return_value=None)
-    def test_aggregate_missing_days(self, mocked_notification, mocked_remove):
-        """Data aggregation should send notification when there are missing days"""
-        cboe.aggregate_monthly_data(["GOOG"])
-        self.assertTrue(mocked_notification.called)
-        self.assertFalse(mocked_remove.called)
-
-    @patch("data_scraper.cboe.utils.remove_file", return_value=None)
-    @patch("data_scraper.cboe.slack_notification", return_value=None)
-    def test_aggregate_invalid_symbol(self, mocked_notification,
-                                      mocked_remove):
-        """Data aggregation should fail and send notification on invalid symbol"""
-        cboe.aggregate_monthly_data(["FOOBAR"])
-        self.assertTrue(mocked_notification.called)
-        self.assertFalse(mocked_remove.called)
-
-    def remove_files(file_path):
-        if os.path.exists(file_path):
-            shutil.rmtree(file_path)
-
-
-if __name__ == "__main__":
-    unittest.main()
@@ -1,75 +0,0 @@
-import logging
-import unittest
-from unittest.mock import patch
-import os
-import shutil
-
-import pandas as pd
-
-from data_scraper import tiingo
-
-logging.disable(level=logging.CRITICAL)
-
-
-class TestTiingo(unittest.TestCase):
-    """Tests Tiingo data scraper"""
-
-    test_dir = os.path.join(os.getcwd(), os.path.dirname(__file__))
-    test_data_path = os.path.realpath(os.path.join(test_dir, "data"))
-    tiingo_data_path = os.path.join(test_data_path, "tiingo")
-
-    @classmethod
-    def setUpClass(cls):
-        assert "TIINGO_API_KEY" in os.environ, "$TIINGO_API_KEY env variable must be set"
-        cls.save_data_path = os.environ.get("SAVE_DATA_PATH", None)
-        os.environ["SAVE_DATA_PATH"] = cls.test_data_path
-
-    @classmethod
-    def tearDownClass(cls):
-        if cls.save_data_path:
-            os.environ["SAVE_DATA_PATH"] = cls.save_data_path
-
-    @patch("data_scraper.tiingo.slack_notification", return_value=None)
-    def test_fetch_gld(self, mocked_notification):
-        """Fetch GLD data"""
-        tiingo.fetch_data(["GLD"])
-        gld_dir = os.path.join(TestTiingo.tiingo_data_path, "GLD")
-        self.addCleanup(TestTiingo.remove_files, gld_dir)
-
-        if self.assertTrue(os.path.exists(gld_dir)):
-            self.assertTrue(mocked_notification.called)
-            file_name = "GLD_" + pd.Timestamp.today().strftime(
-                "%Y%m%d") + ".csv"
-            file_path = os.path.join(gld_dir, file_name)
-            gld_df = pd.read_csv(file_path)
-            self.assertTrue(all(gld_df["symbol"] == "GLD"))
-            expected_columns = [
-                "symbol", "date", "adjClose", "adjHigh", "adjLow", "adjOpen",
-                "adjVolume", "close", "divCash", "high", "low", "open",
-                "splitFactor", "volume"
-            ]
-            self.assertEqual(gld_df.columns, expected_columns)
-
-    @patch("data_scraper.tiingo.slack_notification", return_value=None)
-    def test_fetch_invalid_symbol(self, mocked_notification):
-        """Fetching invalid symbol data should send notification"""
-        tiingo.fetch_data(["FOOBAR"])
-        self.assertTrue(mocked_notification.called)
-
-    @patch("data_scraper.tiingo.pdr.get_data_tiingo")  # mock pandas_datareader
-    @patch("data_scraper.tiingo.slack_notification", return_value=None)
-    def test_no_connection(self, mocked_notification, mocked_pdr):
-        """Raise ConnectionError and send notification when host is unreachable"""
-        mocked_pdr.side_effect = ConnectionError("This is a test")
-
-        with self.assertRaises(ConnectionError):
-            tiingo.fetch_data(["IBM"])
-            self.assertTrue(mocked_notification.called)
-
-    def remove_files(file_path):
-        if os.path.exists(file_path):
-            shutil.rmtree(file_path)
-
-
-if __name__ == "__main__":
-    unittest.main()
@@ -1,122 +0,0 @@
-import logging
-import os
-from datetime import date
-
-import pandas as pd
-import pandas_datareader as pdr
-
-from . import utils, validation
-from .notifications import slack_notification, Status
-
-logger = logging.getLogger(__name__)
-
-# Default symbols to fetch
-assets = [
-    "VTSMX", "VFINX", "VIVAX", "VIGRX", "VIMSX", "VMVIX", "VMGIX", "NAESX",
-    "VISVX", "VISGX", "BRSIX", "VGTSX", "VTMGX", "VFSVX", "EFV", "VEURX",
-    "VPACX", "VEIEX", "VFISX", "VFITX", "IEF", "VUSTX", "VBMFX", "VIPSX",
-    "PIGLX", "PGBIX", "VFSTX", "LQD", "VWESX", "VWEHX", "VWSTX", "VWITX",
-    "VWLTX", "VGSIX", "GLD", "PSAU", "GSG"
-]
-
-
-def fetch_data(symbols=assets):
-    """Fetches historical data for given symbols from Tiingo"""
-    api_key = utils.get_environment_var("TIINGO_API_KEY")
-
-    symbols = [symbol.upper() for symbol in symbols]
-    done, failed = [], []
-
-    for symbol in symbols:
-        try:
-            symbol_data = pdr.get_data_tiingo(symbol, api_key=api_key)
-        except ConnectionError as ce:
-            msg = "Unable to connect to api.tiingo.com when fetching symbol {}".format(
-                symbol)
-            logger.error(msg, exc_info=True)
-            slack_notification(msg, __name__)
-            raise ce
-        except TypeError:
-            # pandas_datareader raises TypeError when fetching invalid symbol
-            failed.append(symbol)
-            msg = "Attempted to fetch invalid symbol {}".format(symbol)
-            logger.error(msg, exc_info=True)
-            slack_notification(msg, __name__)
-        except Exception:
-            msg = "Error fetching symbol {}".format(symbol)
-            logger.error(msg, exc_info=True)
-            slack_notification(msg, __name__)
-        else:
-            _save_data(symbol, symbol_data.reset_index())
-            done.append(symbol)
-
-    if len(done) > 0:
-        msg = "Successfully scraped symbols: " + ", ".join(done)
-        slack_notification(msg, __name__, status=Status.Success)
-    if len(failed) > 0:
-        msg = "Failed to scrape symbols: " + ", ".join(failed)
-        slack_notification(msg, __name__, status=Status.Warning)
-
-
-def _save_data(symbol, symbol_df):
-    """Saves the contents of `symbol_df` to
-    `$SAVE_DATA_PATH/tiingo/{symbol}/{symbol}_{%date}.csv`"""
-    filename = date.today().strftime(symbol + "_%Y%m%d.csv")
-
-    save_data_path = utils.get_save_data_path()
-    symbol_dir = os.path.join(save_data_path, "tiingo", symbol)
-
-    if not os.path.exists(symbol_dir):
-        os.makedirs(symbol_dir)
-        logger.debug("Symbol dir %s created", symbol_dir)
-    file_path = os.path.join(symbol_dir, filename)
-
-    if os.path.exists(file_path) and validation.file_hash_matches_data(
-            file_path, symbol_df.to_csv()):
-        logger.debug("File %s already downloaded", file_path)
-    else:
-        expected_columns = [
-            "symbol", "date", "adjClose", "adjHigh", "adjLow", "adjOpen",
-            "adjVolume", "close", "divCash", "high", "low", "open",
-            "splitFactor", "volume"
-        ]
-
-        if validation.validate_historical_dates(
-                symbol, symbol_df["date"]) and validation.validate_columns(
-                    expected_columns, symbol_df.columns):
-            merged_df = _merge(symbol, symbol_df)
-            pattern = symbol + "_*"
-            utils.remove_files(symbol_dir, pattern, logger)
-
-            merged_df.to_csv(file_path, index=False)
-            logger.debug("Saved symbol data as %s", file_path)
-
-
-def _merge(symbol, symbol_df):
-    """Merge `symbol_df` with previous data file."""
-
-    save_data_path = utils.get_save_data_path()
-    symbol_dir = os.path.join(save_data_path, "tiingo", symbol)
-
-    files = os.listdir(symbol_dir)
-    if len(files) == 0:
-        return symbol_df
-
-    last_file = sorted(files)[-1]
-    old_df = pd.read_csv(os.path.join(symbol_dir, last_file),
-                         parse_dates=["date"],
-                         index_col="date")
-    symbol_df.index = symbol_df["date"]
-
-    diffs = old_df.index.difference(symbol_df.index)
-
-    if diffs.empty:
-        return symbol_df
-    else:
-        msg = """Old data included dates not present in scraped file for symbol {}
-            Merged new data with previous file.""".format(symbol)
-        logger.error(msg)
-        slack_notification(msg, __name__)
-        merged_df = pd.concat([symbol_df, old_df.loc[diffs]])
-        merged_df.sort_index(inplace=True)
-        return merged_df.reset_index()
@@ -1,52 +0,0 @@
-import glob
-import json
-import os
-
-
-def get_environment_var(variable):
-    """Returns the value of a given environment variable.
-    Raises `EnvironmentError` if not found.
-    """
-    if variable not in os.environ:
-        raise EnvironmentError(
-            "Environment variable {} not set".format(variable))
-
-    return os.path.expanduser(os.environ[variable])
-
-
-def get_save_data_path():
-    """Reads data path from environment variable `$SAVE_DATA_PATH`.
-    If it is not set, defaults to `./data/scraped`.
-    """
-    try:
-        data_dir = get_environment_var("SAVE_DATA_PATH")
-    except EnvironmentError:
-        data_dir = "data/scraped"
-        os.makedirs(data_dir)
-
-    return data_dir
-
-
-def get_module_config(module, config_file="data_scraper.conf"):
-    """Parses configuration file and returns the configuration options
-    for the chosen `module`.
-    """
-    options = {}
-    if os.path.exists(config_file):
-        with open(config_file) as file:
-            config = json.load(file)
-            options = config.get(module, {})
-
-    return options
-
-
-def remove_files(data_dir, pattern, logger=None):
-    """Removes files in `data_dir` that match `pattern`"""
-    for file in glob.glob(os.path.join(data_dir, pattern)):
-        remove_file(file, logger)
-
-
-def remove_file(file, logger=None):
-    os.remove(file)
-    if logger:
-        logger.debug("Removed file %s", file)
@@ -1,93 +0,0 @@
-import logging
-import hashlib
-
-import pandas as pd
-import pandas_market_calendars as mcal
-
-from . import cboe
-from .notifications import slack_notification
-
-logger = logging.getLogger(__name__)
-
-
-def file_hash_matches_data(file_path, data):
-    file_hash = file_md5(file_path)
-    data_md5 = hashlib.md5(data.encode()).hexdigest()
-    return file_hash == data_md5
-
-
-def file_md5(file, chunk_size=4096):
-    md5 = hashlib.md5()
-    with open(file, "rb") as f:
-        for chunk in iter(lambda: f.read(chunk_size), b""):
-            md5.update(chunk)
-
-    return md5.hexdigest()
-
-
-def validate_dates_in_month(symbol, date_range):
-    """Compares `date_range` (month) with NYSE trading calendar.
-    Returns `True` if there are no missing days.
-    """
-    # NYSE and CBOE have the same trading calendar
-    # https://www.nyse.com/markets/hours-calendars
-    # http://cfe.cboe.com/about-cfe/holiday-calendar
-    nyse = mcal.get_calendar("NYSE")
-    first_date = date_range[0]
-    period = pd.Period(year=first_date.year, month=first_date.month, freq="M")
-    trading_days = nyse.valid_days(start_date=period.start_time,
-                                   end_date=period.end_time)
-
-    # Remove timezone info
-    trading_days = trading_days.tz_convert(tz=None)
-    missing_days = trading_days.difference(date_range)
-
-    if not missing_days.empty:
-        logger.error("Error validating monthly dates. Missing: %s",
-                     missing_days)
-    return missing_days.empty
-
-
-def validate_historical_dates(symbol, date_range):
-    """Compares `date_range` (any time range) with trading calendar.
-    Returns `True` if there are no missing days.
-    """
-    nyse = mcal.get_calendar("NYSE")
-    start_date = date_range.min()
-    end_date = date_range.max()
-    trading_days = nyse.valid_days(start_date=start_date, end_date=end_date)
-
-    # Remove timezone info
-    trading_days = trading_days.tz_convert(tz=None)
-    date_range = date_range.dt.tz_convert(tz=None)
-    missing_days = trading_days.difference(date_range)
-
-    if not missing_days.empty:
-        logger.error("Error validating historical dates. Missing: %s",
-                     missing_days)
-
-    return missing_days.empty
-
-
-def validate_columns(expected, received):
-    """Verify that the `received` columns scraped are equal to `expected`"""
-    valid = all(expected == received)
-
-    if not valid:
-        expected_cols = ", ".join(expected)
-        received_cols = ", ".join(received)
-        msg = """Columns expected differ from those received.
-            Expected: {}
-            Received: {}""".format(expected_cols, received_cols)
-        logger.error(msg)
-        slack_notification(msg, __name__)
-
-    return valid
-
-
-def validate_aggregate_file(aggregate_file, daily_files):
-    """Compares `aggregate_file` with the data from `daily_files`."""
-    aggregate_df = pd.read_csv(aggregate_file)
-    recreated_df = cboe.concatenate_files(daily_files)
-
-    return aggregate_df.equals(recreated_df)
@@ -1,19 +0,0 @@
-FROM debian:latest
-MAINTAINER Juan Pablo Amoroso <jamoroso@lambdaclass.com>
-
-RUN apt-get update
-RUN apt-get install -y python3 python3-pip make cron build-essential pkg-config openssl libssl-dev
-RUN python3 -m pip install pipenv
-ENV LC_ALL=C.UTF-8 LANG=C.UTF-8
-
-COPY . /finance
-WORKDIR /finance
-
-RUN make init
-
-COPY ./docker/data_scraper/crontab /etc/cron.d/scraper-cron
-COPY ./docker/data_scraper/entrypoint.sh /usr/bin/entrypoint.sh
-COPY ./docker/data_scraper/run-task.sh /usr/bin/run-task
-RUN chmod 0644 /etc/cron.d/scraper-cron && crontab /etc/cron.d/scraper-cron
-ENTRYPOINT ["entrypoint.sh"]
-CMD ["cron", "-f"]
@@ -1,4 +0,0 @@
-0 19 * * 1-5 root cd /finance && run-task make scrape scraper=cboe
-0 19 * * 1-5 root cd /finance && run-task make scrape scraper=tiingo
-0 0 1 * * root cd /finance && run-task make aggregate && run-task make backup
-
@@ -1,5 +0,0 @@
-#!/bin/bash
-
-# cron does not read env, save it here
-env > /root/env
-exec "$@"
@@ -1,4 +0,0 @@
-#!/bin/bash
-
-# import env vars that were written in entrypoint
-env - `cat /root/env` $@
@@ -1,8 +0,0 @@
-version: "3"
-
-services:
-  scraper:
-    image: data_scraper:latest
-    container_name: data_scraper
-    volumes:
-      - ~/finance/data:/finance/data