Merge pull request #70 from lambdaclass/organize-repo

Organized repo
This commit is contained in:
Juan Pablo Amoroso
2020-03-20 17:05:07 -03:00
committed by GitHub
56 changed files with 69 additions and 14173 deletions
+8 -5
View File
@@ -1,4 +1,4 @@
.PHONY: install env test test_notebook
.PHONY: install env test test_notebook lint notebook help
.DEFAULT_GOAL := help
install: ## Create environment and install dependencies
@@ -7,6 +7,13 @@ install: ## Create environment and install dependencies
env: ## Run pipenv shell
pipenv shell
notebook: ## Run Jupyter notebook
pipenv run jupyter notebook
lint: ## Run linter and format checker (flake8 & yapf)
pipenv run flake8 backtester
pipenv run yapf --diff --recursive backtester/
test: ## Run tests
pipenv run python -m pytest -v backtester
@@ -14,10 +21,6 @@ test_notebook: ## Run jupyter notebook test
pipenv run jupyter nbconvert nbconvert --to notebook --execute --ExecutePreprocessor.timeout=60 \
backtester/examples/backtester_example.ipynb --stdout > /dev/null
lint: ## Run linter and format checker (flake8 & yapf)
pipenv run flake8 backtester
pipenv run yapf --diff --recursive backtester/
help:
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) |\
awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
+61 -45
View File
@@ -1,3 +1,5 @@
[![Build Status](https://travis-ci.com/lambdaclass/options_backtester.svg?branch=master)](https://travis-ci.com/lambdaclass/options_backtester)
Options Backtester
==============================
@@ -13,73 +15,87 @@ Simple backtester to evaluate and analyse options strategies over historical pri
## Requirements
- Python >= 3.5
- Python >= 3.6
- pipenv
## Setup
For backtesting, set `$OPTIONS_DATA_PATH` to the appropriate directory where the data is located. All file paths parsed by the backtester will be relative to this directory.
To use the data scraper the following environment variables need to be set:
- `$SAVE_DATA_PATH`: where the data will be saved to (default is `./data/scraped`)
- `$TIINGO_API_KEY`: used to fetch data from [Tiingo](https://api.tiingo.com)
- `$S3_BUCKET`: name of the S3 bucket to backup data
- `$AWS_ACCESS_KEY_ID`: AWS acces key id
- `$AWS_SECRET_ACCESS_KEY`: AWS secret key
You can configure the data scraper by editing the configuration file `data_scraper.conf` (json-formated).
Sample file:
```json
{
"cboe": {
"mute_notifications": ["BFB", "CBSA"]
},
"notifications": {
"slack_webhook": "https://hooks.slack.com/services/MY_WORKSPACE_WEBHOOK"
}
}
```
**HINT**: store environment variables in an `.env` file and pipenv will load them automatically when using `make env`.
## Usage
### Create environment and download dependencies
Install [pipenv](https://pipenv.pypa.io/en/latest/)
```shell
$> make init
$> pip install pipenv
```
### Activate environment
Create environment and download dependencies
```shell
$> make install
```
Activate environment
```shell
$> make env
```
### Run tests
Run [Jupyter](https://jupyter.org) notebook
```shell
$> make notebook
```
Run tests
```shell
$> make test
```
### Scrape data (supported scrapers: CBOE, Tiingo)
## Usage
```shell
$> make scrape scraper=cboe
### Example:
$> make scrape scraper=tiingo
```
### Run backtester with benchmark strategy
```shell
$> make bench
We'll run a backtest of a stock portfolio holding `$AAPL` and `$GOOG`, and simultaneously buying 10% OTM calls and puts on `$SPX` ([long strangle](https://www.investopedia.com/terms/s/strangle.asp)).
We'll allocate 97% of our capital to stocks and the rest to options, and do a rebalance every month.
```python
from backtester import Backtest, Type, Direction, Stock
from backtester.strategy import Strategy, StrategyLeg
from backtester.datahandler import HistoricalOptionsData, TiingoData
# Stocks data
stocks_data = TiingoData('stocks.csv')
stocks = [Stock(symbol='AAPL', percentage=0.5), Stock(symbol='GOOG', percentage=0.5)]
# Options data
options_data = HistoricalOptionsData('options.h5', key='/SPX')
schema = options_data.schema
# Long strangle
leg_1 = StrategyLeg('leg_1', schema, option_type=Type.PUT, direction=Direction.BUY)
leg_1.entry_filter = (schema.underlying == 'SPX') & (schema.dte >= 60) & (schema.underlying_last <=
1.1 * schema.strike)
leg_1.exit_filter = (schema.dte <= 30)
leg_2 = StrategyLeg('leg_2', schema, option_type=Type.CALL, direction=Direction.BUY)
leg_2.entry_filter = (schema.underlying == 'SPX') & (schema.dte >= 60) & (schema.underlying_last >=
0.9 * schema.strike)
leg_2.exit_filter = (schema.dte <= 30)
strategy = Strategy(schema)
strategy.add_legs([leg_1, leg_2])
allocation = {'stocks': .97, 'options': .03}
initial_capital = 1_000_000
bt = Backtest(allocation, initial_capital)
bt.stocks = stocks
bt.stocks_data = stocks_data
bt.options_data = options_data
bt.options_strategy = strategy
bt.run(rebalance_freq=1)
```
You can explore more usage examples in the Jupyter [notebooks](backtester/examples/).
## Recommended reading
-2
View File
@@ -1,2 +0,0 @@
.env
target/
-1865
View File
File diff suppressed because it is too large Load Diff
-17
View File
@@ -1,17 +0,0 @@
[package]
name = "alert"
version = "0.1.0"
authors = ["Amin Arria <arria.amin@gmail.com>"]
edition = "2018"
[dependencies]
fantoccini = "0.11.6"
futures = "0.1.25"
tokio = "0.1.17"
webdriver = "0.39.0"
serde_json = "1.0.39"
select = "0.4.2"
reqwest = "0.9.12"
diesel = { version = "1.4.2", features = ["sqlite", "chrono"] }
dotenv = "0.10"
chrono = "0.4"
-26
View File
@@ -1,26 +0,0 @@
FROM debian:latest
MAINTAINER Amin Arria <amin.arria@lambdaclass.com>
WORKDIR /root
RUN apt-get update
RUN apt-get install -y curl make build-essential pkg-config openssl libssl-dev
RUN apt-get -y install firefox-esr
RUN curl -L -O 'https://github.com/mozilla/geckodriver/releases/download/v0.24.0/geckodriver-v0.24.0-linux64.tar.gz'
RUN tar -vxf geckodriver-v0.24.0-linux64.tar.gz
RUN apt-get install -y sqlite3 libsqlite3-dev
RUN curl https://sh.rustup.rs -sSf > rustup
RUN sh rustup -y
ENV PATH=/root/.cargo/bin:$PATH
COPY . watchdog/
WORKDIR watchdog
RUN make install_diesel_cli
RUN make migration
RUN make release
RUN apt-get clean
RUN rm -rf /var/lib/apt/lists/*
CMD (/root/geckodriver &) && ./target/release/alert
-13
View File
@@ -1,13 +0,0 @@
.PHONY: migration run install_diesel_cli release
migration:
diesel migration run
run:
cargo run
install_diesel_cli:
cargo install diesel_cli --no-default-features --features sqlite chrono
release:
cargo build --release
-34
View File
@@ -1,34 +0,0 @@
# Financial Watchdog
## Requirements
- [WebDriver](https://www.w3.org/TR/webdriver1/) process running on port 4444 (e.g., [geckodriver](https://github.com/mozilla/geckodriver/releases), [chromedriver](https://sites.google.com/a/chromium.org/chromedriver/))
- SQLite 3.19.3
- Rust 1.33
- [Diesel CLI 1.4](https://crates.io/crates/diesel_cli)
When installing `diesel_cli` if you run into any errors try installing with our needed features only:
```
$> make install_diesel_cli
```
## Setup
Create a `.env` with the following values:
- `DATABASE_URL`: The filepath where the DB will be stored (SQLite)
- `SLACK_WEBHOOK_URL`: URL for Slack's webhook
Create the database by running the migrations:
```
$> make migration
```
## Running
Run it
```
$> make run
```
-5
View File
@@ -1,5 +0,0 @@
# For documentation on how to configure this file,
# see diesel.rs/guides/configuring-diesel-cli
[print_schema]
file = "src/schema.rs"
View File
@@ -1 +0,0 @@
DROP TABLE dollar;
@@ -1,16 +0,0 @@
CREATE TABLE dollar (
id INTEGER PRIMARY KEY,
buy DOUBLE NOT NULL,
buy_amount INTEGER NOT NULL,
sell DOUBLE NOT NULL,
sell_amount INTEGER NOT NULL,
last DOUBLE NOT NULL,
var DOUBLE NOT NULL,
varper DOUBLE NOT NULL,
volume INTEGER NOT NULL,
adjustment DOUBLE NOT NULL,
min DOUBLE NOT NULL,
max DOUBLE NOT NULL,
oin INTEGER NOT NULL,
created_at DATETIME NOT NULL
);
@@ -1 +0,0 @@
DROP TABLE alerts;
@@ -1,8 +0,0 @@
CREATE TABLE alerts (
id INTEGER PRIMARY KEY,
created_at DATETIME NOT NULL,
asset VARCHAR(20) NOT NULL,
previous_value DOUBLE NOT NULL,
current_value DOUBLE NOT NULL,
active BOOLEAN NOT NULL
);
-99
View File
@@ -1,99 +0,0 @@
extern crate reqwest;
extern crate serde_json;
use diesel::sqlite::SqliteConnection;
use serde_json::json;
use std::env;
use crate::models::{Dollar, Alert};
use crate::storage;
pub fn check_dollar(conn: &SqliteConnection, dollar: &Dollar) {
let dollar_close = storage::get_dollar_on_close(conn);
let closing_prc_change =
match &dollar_close {
Some(dollar_close) => {
(dollar.last - dollar_close.last)/dollar_close.last * 100.0
},
None => 0.0
};
match storage::get_dollar_alert(conn) {
Some(dollar_alert) => {
let alert_prc_change = (dollar.last - dollar_alert.current_value)/dollar_alert.current_value * 100.0;
// TODO: This value should be set by arguments to binary or by config files
if alert_prc_change.abs() > 2.0 {
if closing_prc_change.abs() > 3.0 {
// If we couldn't unwrap closing_prc_change == 0.0
let dollar_close = dollar_close.unwrap();
let new_alert = Alert::new("dollar".to_string(), dollar_alert.previous_value, dollar.last);
storage::replace_alert(conn, &dollar_alert, &new_alert);
send_updated_alert(dollar_close.last, dollar_alert.current_value, dollar.last, alert_prc_change, closing_prc_change);
} else {
storage::deactivate_alert(conn, &dollar_alert);
send_resolved_alert();
}
}
}
None => {
// TODO: This value should be set by arguments to binary or by config files
if closing_prc_change.abs() > 3.0 {
// If we couldn't unwrap closing_prc_change == 0.0
let dollar_close = dollar_close.unwrap();
let alert = Alert::new(String::from("dollar"), dollar_close.last, dollar.last);
storage::store_alert(conn, &alert);
send_new_alert(dollar_close.last, dollar.last, closing_prc_change);
}
}
};
}
// payload = {
// "username": "Financial Watchdog",
// "icon_emoji": ":money_with_wings:",
// "attachments": [{
// "color": color,
// "title": title,
// "text": text,
// "fallback": text
// "footer": "Financial Watchdog",
// }]
// }
fn send_new_alert(price_close: f64, price_current: f64, prc_change: f64) {
let msg = format!("Closing price: {:.2}\nCurrent price: {:.2}\nChanged: {:+.2}%", price_close, price_current, prc_change);
send_alert_slack("#B22222", "Price jump from closing", &msg);
// TODO: send_alert_mail(prc_change);
}
fn send_resolved_alert() {
let msg = format!("Dollar price from closing price is back below threshold");
send_alert_slack("#3873AD", "Price back to normal", &msg);
// TODO: send_alert_mail(prc_change);
}
fn send_updated_alert(price_close: f64, price_previous: f64, price_current: f64, prc_change: f64, prc_change_close: f64) {
let msg = format!("Closing price: {:.2}\nPrevious price: {:.2}\nCurrent price: {:.2}\nChange from previous: *{:+.2}%*\nChange from close: {:+.2}%", price_close, price_previous, price_current, prc_change, prc_change_close);
send_alert_slack("#f1a031", "[UPDATE] Price jump", &msg);
// TODO: send_alert_mail(prc_change);
}
fn send_alert_slack(color: &str, title: &str, msg: &str) {
let payload = json!({
"username": "Financial Watchdog",
"icon_emoji": ":money_with_wings:",
"attachments": [{
"color": color,
"title": title,
"text": msg,
"fallback": msg,
"footer": "Financial Watchdog"
}]
})
.to_string();
// We can unwrap safely because env variables are checked in main.rs::init_env()
let slack_webhook_url = env::var("SLACK_WEBHOOK_URL").unwrap();
let client = reqwest::Client::new();
client.post(&slack_webhook_url)
.header(reqwest::header::CONTENT_TYPE, "application/json")
.body(payload)
.send()
.expect("Failed sending slack alert");
}
-37
View File
@@ -1,37 +0,0 @@
#[macro_use]
extern crate diesel;
extern crate dotenv;
extern crate chrono;
pub mod schema;
pub mod models;
mod scrape;
mod storage;
mod alert;
use dotenv::dotenv;
use std::{thread, time, env};
fn main() {
init_env();
loop {
std::thread::spawn(|| {
let dbconn = crate::storage::establish_connection();
let value = crate::scrape::scrape();
crate::alert::check_dollar(&dbconn, &value);
crate::storage::store(&dbconn, &value);
});
thread::sleep(time::Duration::from_secs(60));
}
}
fn init_env() {
dotenv().ok();
env::var("DATABASE_URL")
.expect("DATABASE_URL must be set");
env::var("SLACK_WEBHOOK_URL")
.expect("SLACK_WEBHOOK_URL must be set");
}
-71
View File
@@ -1,71 +0,0 @@
use chrono::NaiveDateTime;
use chrono::offset::Utc;
use crate::schema::{dollar, alerts};
#[derive(Queryable, Insertable, Debug)]
#[table_name="dollar"]
pub struct Dollar {
pub id: Option<i32>,
pub buy: f64,
pub buy_amount: i32,
pub sell: f64,
pub sell_amount: i32,
pub last: f64,
pub var: f64,
pub varper: f64,
pub volume: i32,
pub adjustment: f64,
pub min: f64,
pub max: f64,
pub oin: i32,
pub created_at: NaiveDateTime
}
impl Dollar {
pub fn new() -> Dollar {
let created_at = Utc::now().naive_utc();
Dollar {
id: None,
buy: 0.0,
buy_amount: 0,
sell: 0.0,
sell_amount: 0,
last: 0.0,
var: 0.0,
varper: 0.0,
volume: 0,
adjustment: 0.0,
min: 0.0,
max: 0.0,
oin: 0,
created_at: created_at
}
}
}
#[derive(Queryable, Insertable, Debug)]
#[table_name="alerts"]
pub struct Alert {
pub id: Option<i32>,
pub created_at: NaiveDateTime,
pub asset: String,
pub previous_value: f64,
pub current_value: f64,
pub active: bool
}
impl Alert {
pub fn new(asset: String, previous_value: f64, current_value: f64) -> Alert {
let created_at = Utc::now().naive_utc();
Alert {
id: None,
created_at: created_at,
asset: asset,
previous_value: previous_value,
current_value: current_value,
active: true
}
}
}
-34
View File
@@ -1,34 +0,0 @@
table! {
alerts (id) {
id -> Nullable<Integer>,
created_at -> Timestamp,
asset -> Text,
previous_value -> Double,
current_value -> Double,
active -> Bool,
}
}
table! {
dollar (id) {
id -> Nullable<Integer>,
buy -> Double,
buy_amount -> Integer,
sell -> Double,
sell_amount -> Integer,
last -> Double,
var -> Double,
varper -> Double,
volume -> Integer,
adjustment -> Double,
min -> Double,
max -> Double,
oin -> Integer,
created_at -> Timestamp,
}
}
allow_tables_to_appear_in_same_query!(
alerts,
dollar,
);
-102
View File
@@ -1,102 +0,0 @@
extern crate fantoccini;
extern crate futures;
extern crate select;
extern crate serde_json;
extern crate tokio;
extern crate webdriver;
use fantoccini::{Client, Locator};
use futures::future::Future;
use futures::sync::oneshot;
use select::document::Document;
use select::predicate::Class;
use serde_json::json;
use webdriver::capabilities::Capabilities;
use crate::models::Dollar;
pub fn scrape() -> Dollar {
let html = fetch_site();
let document = Document::from(html.as_str());
let mut value = Dollar::new();
for node in document.find(Class("PriceCell")) {
let full_class =
node.attr("class")
.unwrap_or_else(|| panic!("Node without class attribute"));
let class = full_class.split_whitespace().last();
match class {
Some("bsz") => value.buy_amount = parse_i32(node.text()),
Some("bid") => value.buy = parse_f64(node.text()),
Some("ask") => value.sell = parse_f64(node.text()),
Some("asz") => value.sell_amount = parse_i32(node.text()),
Some("lst") => value.last = parse_f64(node.text()),
Some("variation") => value.var = parse_f64(node.text()),
Some("change") => {
value.varper = parse_f64(node.text().trim_end_matches("%").to_string())
}
Some("von") => value.volume = parse_i32(node.text()),
Some("settlementPrice") => value.adjustment = parse_f64(node.text()),
Some("low") => value.min = parse_f64(node.text()),
Some("hgh") => value.max = parse_f64(node.text()),
Some("oin") => value.oin = parse_i32(node.text()),
Some("futureImpliedRate") => {}
Some(class) => {
panic!("Non-matching class: {}", class);
}
None => {
panic!("Node without empty class attribute");
}
}
}
value
}
fn fetch_site() -> String {
// TODO: Set headless capabilities for chromedriver.
let mut cap = Capabilities::new();
let arg = json!({"args": ["-headless"]});
cap.insert("moz:firefoxOptions".to_string(), arg);
let c = Client::with_capabilities("http://localhost:4444", cap);
let (sender, receiver) = oneshot::channel::<String>();
tokio::run(
c.map_err(|e| unimplemented!("failed to connect to WebDriver: {:?}", e))
.and_then(|c| c.goto("https://rofex.primary.ventures/rofex/futuros"))
.and_then(|mut c| c.current_url().map(move |url| (c, url)))
.and_then(|(c, _url)| {
c.wait_for_find(Locator::XPath("((//div[@class='PricePanelRow-row'])[1]//div[@class='PriceCell lst'])[not(contains(., '-'))]/.."))
})
.and_then(|mut e| {
e.html(true)
})
.and_then(|e| match sender.send(e) {
Err(err) => panic!("Error sending fetched site: {}", err),
Ok(()) => Ok(()),
})
.map_err(|e| {
panic!("a WebDriver command failed: {:?}", e);
}),
);
receiver.wait().unwrap()
}
fn parse_i32(text: String) -> i32 {
text.replace(".", "")
.trim()
.parse()
.unwrap_or_else(|err| panic!("Error parsing \"{}\": {}", text, err))
}
fn parse_f64(text: String) -> f64 {
text.replace(".", "")
.replace(",", ".")
.trim()
.parse()
.unwrap_or_else(|err| panic!("Error parsing \"{}\": {}", text, err))
}
-70
View File
@@ -1,70 +0,0 @@
extern crate diesel;
extern crate dotenv;
use diesel::prelude::*;
use diesel::sqlite::SqliteConnection;
use diesel::result::Error::NotFound;
use chrono::{Utc, NaiveDateTime, NaiveTime};
use std::env;
use crate::models::{Dollar, Alert};
use crate::schema::{dollar, alerts};
pub fn establish_connection() -> SqliteConnection {
// We can unwrap safely because env variables are checked in main.rs::init_env()
let database_url = env::var("DATABASE_URL").unwrap();
SqliteConnection::establish(&database_url)
.expect(&format!("Error connecting to {}", database_url))
}
pub fn store(conn: &SqliteConnection, new_dollar: &Dollar) {
diesel::insert_into(dollar::table)
.values(new_dollar)
.execute(conn)
.expect("Error saving new dollar");
}
pub fn get_dollar_on_close(conn: &SqliteConnection) -> Option<Dollar> {
let yesterday = Utc::today().naive_utc().pred();
let timestamp = NaiveDateTime::new(yesterday, NaiveTime::from_hms(23, 59, 59));
let result = dollar::table.filter(dollar::created_at.lt(timestamp))
.order(dollar::created_at.desc())
.first::<Dollar>(conn);
match result {
Ok(previous) => Some(previous),
Err(NotFound) => None,
Err(err) => panic!("Error getting previous dollar: \n{}", err)
}
}
pub fn store_alert(conn: &SqliteConnection, new_alert: &Alert) {
diesel::insert_into(alerts::table)
.values(new_alert)
.execute(conn)
.expect("Error saving new alert");
}
pub fn get_dollar_alert(conn: &SqliteConnection) -> Option<Alert> {
let result = alerts::table.filter(alerts::asset.eq("dollar").and(alerts::active.eq(true)))
.first::<Alert>(conn);
match result {
Ok(alert) => Some(alert),
Err(NotFound) => None,
Err(err) => panic!("Error getting current alert: \n{}", err)
}
}
pub fn deactivate_alert(conn: &SqliteConnection, alert: &Alert) {
diesel::update(alerts::table.filter(alerts::id.eq(alert.id)))
.set(alerts::active.eq(false))
.execute(conn)
.expect("Error deactivating alert");
}
pub fn replace_alert(conn: &SqliteConnection, old_alert: &Alert, new_alert: &Alert) {
// TODO: Do this operations inside a transaction for atomicity
store_alert(conn, new_alert);
deactivate_alert(conn, old_alert);
}
-3
View File
@@ -1,3 +0,0 @@
from . import datahandler, charts
from .backtester import Backtest
from .portfolio import *
-121
View File
@@ -1,121 +0,0 @@
import pandas as pd
import pyprind
from .portfolio import Portfolio
class Backtest:
def __init__(self, schema, initial_capital=1_000_000):
self.schema = schema
self._portfolio = None
self._data = None
self.initial_capital = initial_capital
@property
def portfolio(self):
return self._portfolio
@portfolio.setter
def portfolio(self, portfolio):
assert isinstance(portfolio, Portfolio)
self._portfolio = portfolio
@property
def data(self):
return self._data
@data.setter
def data(self, data):
self._data = data
def run(self, periods=1, sma_days=None):
"""Runs a backtest and returns a dataframe with the daily balance"""
assert self._data is not None
assert self._portfolio is not None
self.current_capital = 0
self.current_cash = self.initial_capital
self.inventory = pd.DataFrame(columns=['symbol', 'cost', 'qty'])
self.balance = pd.DataFrame()
if sma_days:
self._data.sma(sma_days)
data_iterator = self._data.iter_dates()
first_day = self._data['date'].min()
last_day = self._data['date'].max()
rebalancing_days = pd.date_range(first_day, last_day, freq=str(periods) +
'BMS').to_pydatetime() if periods is not None else []
bar = pyprind.ProgBar(data_iterator.ngroups, bar_char='')
self.balance = pd.DataFrame({
'capital': self.current_cash,
'cash': self.current_cash
},
index=[self._data.start_date - pd.Timedelta(1, unit='day')])
for date, data in data_iterator:
if date == first_day:
self._rebalance_portfolio(data, sma_days)
self._update_balance(date, data)
if date in rebalancing_days:
self._rebalance_portfolio(data, sma_days)
bar.update()
self.balance['% change'] = self.balance['capital'].pct_change()
self.balance['accumulated return'] = (1.0 + self.balance['% change']).cumprod()
return self.balance
def _rebalance_portfolio(self, data, sma_days):
"""Rebalances the portfolio so that the total money is allocated according to the given percentages"""
money_total = self.current_cash + self.current_capital
for asset in self._portfolio.assets:
query = '{} == "{}"'.format(self.schema['symbol'], asset.symbol)
asset_current = data.query(query)
asset_price = asset_current[self.schema['adjClose']].values[0]
if sma_days is not None:
if asset_current['sma'].values[0] < asset_price:
qty = (money_total * asset.percentage) // asset_price
else:
qty = 0
else:
qty = (money_total * asset.percentage) // asset_price
inventory_entry = self.inventory.query(query)
self.inventory.drop(inventory_entry.index, inplace=True)
updated_asset = pd.Series([asset.symbol, asset_price, qty])
updated_asset.index = self.inventory.columns
self.inventory = self.inventory.append(updated_asset, ignore_index=True)
# Update current cash
invested_capital = sum(self.inventory['cost'] * self.inventory['qty'])
self.current_cash = money_total - invested_capital
def _update_balance(self, date, data):
"""Updates self.balance for the given date"""
costs = []
for asset in self._portfolio.assets:
query = '{} == "{}"'.format(self.schema['symbol'], asset.symbol)
asset_current = data.query(query)
inventory_asset = self.inventory.query(query)
cost = asset_current[self.schema['adjClose']].values[0]
qty = inventory_asset['qty'].values[0]
costs.append(cost * qty)
total_value = sum(costs)
self.current_capital = total_value
money_total = total_value + self.current_cash
row = pd.Series({
'total value': total_value,
'cash': self.current_cash,
'capital': money_total,
}, name=date)
self.balance = self.balance.append(row)
-78
View File
@@ -1,78 +0,0 @@
"""Generates charts from a portfolio report"""
import altair as alt
import pandas as pd
def returns_chart(report):
# Time interval selector
time_interval = alt.selection(type='interval', encodings=['x'])
# Area plot
areas = alt.Chart().mark_area(opacity=0.7).encode(x='index:T',
y=alt.Y('accumulated return:Q', axis=alt.Axis(format='%')))
# Nearest point selector
nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=['index'], empty='none')
points = areas.mark_point().encode(opacity=alt.condition(nearest, alt.value(1), alt.value(0)))
# Transparent date selector
selectors = alt.Chart().mark_point().encode(
x='index:T',
opacity=alt.value(0),
).add_selection(nearest)
text = areas.mark_text(
align='left', dx=5,
dy=-5).encode(text=alt.condition(nearest, 'accumulated return:Q', alt.value(' '), format='.2%'))
layered = alt.layer(selectors,
points,
text,
areas.encode(
alt.X('index:T', axis=alt.Axis(title='date'), scale=alt.Scale(domain=time_interval))),
width=700,
height=350,
title='Wealth over time')
lower = areas.properties(width=700, height=70).add_selection(time_interval)
return alt.vconcat(layered, lower, data=report.reset_index())
def returns_histogram(report):
bar = alt.Chart(report).mark_bar().encode(x=alt.X('% change:Q',
bin=alt.BinParams(maxbins=100),
axis=alt.Axis(format='%')),
y='count():Q')
return bar
def monthly_returns_heatmap(report):
resample = report.resample('M')['capital'].last()
monthly_returns = resample.pct_change().reset_index()
monthly_returns['capital'].iat[0] = resample.iloc[0] / report.iloc[0]['capital'] - 1
monthly_returns.columns = ['date', 'capital']
chart = alt.Chart(monthly_returns).mark_rect().encode(
alt.X('year(date):O', title='Year'), alt.Y('month(date):O', title='Month'),
alt.Color('mean(capital)', title='Return', scale=alt.Scale(scheme='redyellowgreen')),
alt.Tooltip('mean(capital)', format='.2f')).properties(title='Monthly Returns')
return chart
def sma_graph(data):
price_chart = alt.Chart(
data,
width=700,
height=350,
).mark_line().encode(x='date:T', y=alt.Y('adjClose:Q'), color='symbol:N', opacity=alt.value(0.3))
sma_chart = alt.Chart(
data,
width=700,
height=350,
).mark_line(strokeDash=[1, 1]).encode(x='date:T', y=alt.Y('sma:Q'), color='symbol:N')
return price_chart + sma_chart
-2
View File
@@ -1,2 +0,0 @@
from .schema import *
from .historical_asset_data import HistoricalAssetData
@@ -1,79 +0,0 @@
import os
from .schema import Schema
import pandas as pd
class HistoricalAssetData:
"""Historical Asset Data container class."""
def __init__(self, file, schema=None, **params):
if schema:
assert isinstance(schema, Schema)
else:
self.schema = HistoricalAssetData.default_schema()
file_extension = os.path.splitext(file)[1]
if file_extension == '.h5':
self._data = pd.read_hdf(file, **params)
elif file_extension == '.csv':
params['parse_dates'] = [self.schema.date.mapping]
self._data = pd.read_csv(file, **params)
columns = self._data.columns
assert all((col in columns for _key, col in self.schema))
date_col = self.schema['date']
self.start_date = self._data[date_col].min()
self.end_date = self._data[date_col].max()
def apply_filter(self, f):
"""Apply Filter `f` to the data. Returns a `pd.DataFrame` with the filtered rows."""
return self._data.query(f.query)
def iter_dates(self):
"""Returns `pd.DataFrameGroupBy` that groups contracts by date"""
return self._data.groupby(self.schema['date'])
def __getattr__(self, attr):
"""Pass method invocation to `self._data`"""
method = getattr(self._data, attr)
if hasattr(method, '__call__'):
def df_method(*args, **kwargs):
return method(*args, **kwargs)
return df_method
else:
return method
def __getitem__(self, item):
if isinstance(item, pd.Series):
return self._data[item]
else:
key = self.schema[item]
return self._data[key]
def __setitem__(self, key, value):
self._data[key] = value
if key not in self.schema:
self.schema.update({key: key})
def __len__(self):
return len(self._data)
def __repr__(self):
return self._data.__repr__()
def default_schema():
"""Returns default schema for Historical Asset Data"""
schema = Schema.canonical()
return schema
def sma(self, months):
sma = self._data.groupby('symbol').rolling(months)['adjClose'].mean()
sma = sma.reset_index('symbol').sort_index()
sma = sma.fillna(0)
self._data['sma'] = sma['adjClose']
self.schema.update({'sma': 'sma'})
-162
View File
@@ -1,162 +0,0 @@
class Schema:
"""Data schema class.
Used to run validations and provide uniform access to fields in the data set.
"""
columns = [
"symbol", "date", "open", "close", "high", "low", "volume", "adjClose", "adjHigh", "adjLow", "adjOpen",
"adjVolume", "divCash", "splitFactor"
]
def canonical():
"""Builder method that returns a `Schema` with default mappings"""
mappings = {key: key for key in Schema.columns}
return Schema(mappings)
def __init__(self, mappings):
assert all((key in mappings for key in Schema.columns))
self._mappings = mappings
def update(self, mappings):
"""Update schema according to given `mappings`"""
self._mappings.update(mappings)
return self
def __contains__(self, key):
"""Returns True if key is in schema"""
return key in self._mappings.keys()
def __getattr__(self, key):
"""Returns Field object used to build Filters"""
return Field(key, self._mappings[key])
def __setitem__(self, key, value):
self._mappings[key] = value
def __getitem__(self, key):
"""Returns mapping of given `key`"""
return self._mappings[key]
def __iter__(self):
return iter(self._mappings.items())
def __repr__(self):
return "Schema({})".format([Field(k, m) for k, m in self._mappings.items()])
def __eq__(self, other):
return self._mappings == other._mappings
class Field:
"""Encapsulates data fields to build filters used by strategies"""
__slots__ = ("name", "mapping")
def __init__(self, name, mapping):
self.name = name
self.mapping = mapping
def _create_filter(self, op, other):
if isinstance(other, Field):
query = Field._format_query(self.mapping, op, other.mapping)
else:
query = Field._format_query(self.mapping, op, other)
return Filter(query)
def _combine_fields(self, op, other, invert=False):
if isinstance(other, Field):
name = Field._format_query(self.name, op, other.name, invert)
mapping = Field._format_query(self.mapping, op, other.mapping, invert)
elif isinstance(other, (int, float)):
name = Field._format_query(self.name, op, other, invert)
mapping = Field._format_query(self.mapping, op, other, invert)
else:
raise TypeError
return Field(name, mapping)
def _format_query(left, op, right, invert=False):
if invert:
left, right = right, left
query = "{left} {op} {right}".format(left=left, op=op, right=right)
return query
def __add__(self, value):
return self._combine_fields("+", value)
def __radd__(self, value):
return self._combine_fields("+", value, invert=True)
def __sub__(self, value):
return self._combine_fields("-", value)
def __rsub__(self, value):
return self._combine_fields("-", value, invert=True)
def __mul__(self, value):
return self._combine_fields("*", value)
def __rmul__(self, value):
return self._combine_fields("*", value, invert=True)
def __truediv__(self, value):
return self._combine_fields("/", value)
def __rtruediv__(self, value):
return self._combine_fields("/", value, invert=True)
def __lt__(self, value):
return self._create_filter("<", value)
def __le__(self, value):
return self._create_filter("<=", value)
def __gt__(self, value):
return self._create_filter(">", value)
def __ge__(self, value):
return self._create_filter(">=", value)
def __eq__(self, value):
if isinstance(value, str):
value = "'{}'".format(value)
return self._create_filter("==", value)
def __ne__(self, value):
return self._create_filter("!=", value)
def __repr__(self):
return "Field(name='{}', mapping='{}')".format(self.name, self.mapping)
class Filter:
"""This class determines entry/exit conditions for strategies"""
__slots__ = ("query")
def __init__(self, query):
self.query = query
def __and__(self, other):
"""Returns logical *and* between `self` and `other`"""
assert isinstance(other, Filter)
new_query = "({}) & ({})".format(self.query, other.query)
return Filter(query=new_query)
def __or__(self, other):
"""Returns logical *or* between `self` and `other`"""
assert isinstance(other, Filter)
new_query = "(({}) | ({}))".format(self.query, other.query)
return Filter(query=new_query)
def __invert__(self):
"""Negates filter"""
return Filter("!({})".format(self.query))
def __call__(self, data):
"""Returns dataframe of filtered data"""
return data.eval(self.query)
def __repr__(self):
return "Filter(query='{}')".format(self.query)
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
-2
View File
@@ -1,2 +0,0 @@
from .portfolio import Portfolio
from .asset import Asset
-8
View File
@@ -1,8 +0,0 @@
class Asset:
"""Asset data class"""
def __init__(self, symbol, percentage):
self.symbol = symbol
self.percentage = percentage
def __repr__(self):
return "Asset(symbol={}, percentage={})".format(self.symbol, self.percentage)
-28
View File
@@ -1,28 +0,0 @@
from .asset import Asset
class Portfolio:
def __init__(self):
self.assets = []
def add_asset(self, asset):
"""Adds asset to the Portfolio"""
assert isinstance(asset, Asset)
self.assets.append(asset)
return self
def add_assets(self, assets):
"""Adds assets to the Portfolio"""
for asset in assets:
self.add_asset(asset)
return self
def remove_asset(self, asset_number):
"""Removes asset from the Portfolio"""
self.assets.pop(asset_number)
return self
def clear_assets(self):
"""Removes *all* assets from the Portfolio"""
self.assets = []
return self
@@ -1,103 +0,0 @@
import numpy as np
from asset_backtester import Backtest, Portfolio, Asset
# We use Portfolio Visualizer (https://www.portfoliovisualizer.com/backtest-portfolio)
# to find the actual return for the test porfolios.
def test_ivy_portfolio(sample_datahandler):
bt = run_backtest(sample_datahandler, ivy_portfolio())
balance = bt.balance[1:]
tolerance = 0.0001
assert np.allclose(balance['capital'], balance['cash'] + balance['total value'], rtol=tolerance)
assert np.allclose(balance['capital'], bt.initial_capital * balance['accumulated return'], rtol=tolerance)
actual_return = 1.2041
return_tolerance = 0.01
assert np.isclose(balance['accumulated return'].iloc[-1], actual_return, rtol=return_tolerance)
def test_ivy_monthly_rebalance(sample_datahandler):
bt = run_backtest(sample_datahandler, ivy_portfolio(), periods=1)
balance = bt.balance[1:]
tolerance = 0.0001
assert np.allclose(balance['capital'], balance['cash'] + balance['total value'], rtol=tolerance)
assert np.allclose(balance['capital'], bt.initial_capital * balance['accumulated return'], rtol=tolerance)
actual_return = 1.2043
return_tolerance = 0.01
assert np.isclose(balance['accumulated return'].iloc[-1], actual_return, rtol=return_tolerance)
def test_all_weather_portfolio(sample_datahandler):
bt = run_backtest(sample_datahandler, all_weather_portfolio())
balance = bt.balance[1:]
tolerance = 0.0001
assert np.allclose(balance['capital'], balance['cash'] + balance['total value'], rtol=tolerance)
assert np.allclose(balance['capital'], bt.initial_capital * balance['accumulated return'], rtol=tolerance)
actual_return = 1.1874
return_tolerance = 0.01
assert np.isclose(balance['accumulated return'].iloc[-1], actual_return, rtol=return_tolerance)
def test_all_weather_monthly_rebalance(sample_datahandler):
bt = run_backtest(sample_datahandler, all_weather_portfolio(), periods=1)
balance = bt.balance[1:]
tolerance = 0.0001
assert np.allclose(balance['capital'], balance['cash'] + balance['total value'], rtol=tolerance)
assert np.allclose(balance['capital'], bt.initial_capital * balance['accumulated return'], rtol=tolerance)
actual_return = 1.1828
return_tolerance = 0.01
assert np.isclose(balance['accumulated return'].iloc[-1], actual_return, rtol=return_tolerance)
def test_constant_price(constant_price_datahandler):
bt = run_backtest(constant_price_datahandler, ivy_portfolio())
balance = bt.balance[1:]
tolerance = 0.0001
assert np.allclose(balance['% change'], 0.0, rtol=tolerance)
assert np.allclose(balance['capital'], bt.initial_capital, rtol=tolerance)
assert np.allclose(balance['total value'], bt.initial_capital, rtol=tolerance)
assert np.allclose(balance['accumulated return'], 1.0, rtol=tolerance)
def test_zero_initial_capital(sample_datahandler):
bt = run_backtest(sample_datahandler, ivy_portfolio(), initial_capital=0)
balance = bt.balance[1:]
tolerance = 0.0001
assert np.allclose(balance['capital'], balance['cash'] + balance['total value'], rtol=tolerance)
assert np.allclose(balance['cash'], 0.0, rtol=tolerance)
assert np.allclose(balance['total value'], 0.0, rtol=tolerance)
# Helpers
def run_backtest(data, portfolio, initial_capital=1_000_000, periods=None):
bt = Backtest(data.schema, initial_capital=initial_capital)
bt.portfolio = portfolio
bt.data = data
bt.run(periods=periods)
return bt
def ivy_portfolio():
portfolio = Portfolio()
assets = [Asset('VTI', 0.2), Asset('VEU', 0.2), Asset('BND', 0.2), Asset('VNQ', 0.2), Asset('DBC', 0.2)]
return portfolio.add_assets(assets)
def all_weather_portfolio():
portfolio = Portfolio()
assets = [Asset('VTI', 0.3), Asset('TLT', 0.4), Asset('IEF', 0.15), Asset('GLD', 0.075), Asset('DBC', 0.075)]
return portfolio.add_assets(assets)
-23
View File
@@ -1,23 +0,0 @@
import os
import pytest
from asset_backtester.datahandler import HistoricalAssetData
TEST_DIR = os.path.abspath(os.path.dirname(__file__))
# 2019 data for TLT, GLD, IEF, VTI, VEU, BND, VNQ and DBC
SAMPLE_DATA = os.path.join(TEST_DIR, 'test_data', 'sample_data.csv')
@pytest.fixture(scope='module')
def sample_datahandler():
data = HistoricalAssetData(SAMPLE_DATA)
return data
@pytest.fixture(scope='module')
def constant_price_datahandler():
data = HistoricalAssetData(SAMPLE_DATA)
data['adjClose'] = data['close'] = 10.0
return data
File diff suppressed because it is too large Load Diff
-12
View File
@@ -1,12 +0,0 @@
import os
import backtester as bt
from backtester.utils import get_data_dir
data_dir = get_data_dir()
spx_test_data = os.path.join(data_dir, "SPX_2008-2018.csv")
portfolio = bt.run(spx_test_data)
report = portfolio.create_report()
print("Running Benchmark strategy on SPX data for 2008-2018")
print(report.tail(30))
-8
View File
@@ -1,8 +0,0 @@
{
"cboe": {
"mute_notifications": []
},
"notifications": {
"slack_webhook": ""
}
}
View File
-48
View File
@@ -1,48 +0,0 @@
import logging.config
import os
import argparse
from data_scraper import cboe, tiingo, backup
parser = argparse.ArgumentParser(prog="data_scraper.py")
parser.add_argument("-t", "--symbols", nargs="+", help="Symbols to fetch")
parser.add_argument(
"-s",
"--scraper",
choices=["cboe", "tiingo"],
default="cboe",
help="Scraper to use")
parser.add_argument(
"-v", "--verbose", action="store_true", help="Enable logging")
parser.add_argument(
"-a",
"--aggregate",
action="store_true",
help="Aggregate daily data files")
parser.add_argument(
"-b", "--backup", action="store_true", help="Backup files in S3 bucket")
args = parser.parse_args()
module_dir = os.path.join(os.getcwd(), os.path.dirname(__file__))
if args.verbose:
config_file = os.path.realpath(os.path.join(module_dir, "logconfig.ini"))
logging.config.fileConfig(fname=config_file)
if args.aggregate:
if args.symbols:
cboe.aggregate_monthly_data(args.symbols)
else:
cboe.aggregate_monthly_data()
elif args.backup:
backup.backup_data()
else:
if args.scraper == "tiingo":
scraper = tiingo
else:
scraper = cboe
if args.symbols:
scraper.fetch_data(args.symbols)
else:
scraper.fetch_data()
-114
View File
@@ -1,114 +0,0 @@
import logging
import os
import boto3
from botocore.exceptions import ClientError
from data_scraper import utils
from data_scraper.notifications import slack_notification, Status
logger = logging.getLogger(__name__)
def backup_data():
"""Uploads scraped files to S3 bucket.
Set bucket name in environment variable $S3_BUCKET
"""
try:
bucket_name = utils.get_environment_var("S3_BUCKET")
except EnvironmentError as e:
logger.error(str(e))
slack_notification("Backup failed. Set $S3_BUCKET env variable",
__name__)
raise e
s3 = boto3.resource("s3")
bucket = s3.Bucket(bucket_name)
data_path = utils.get_save_data_path()
cboe_data = os.path.join(data_path, "cboe")
cboe_folders = []
if os.path.exists(cboe_data):
cboe_folders = [
os.path.join(cboe_data, folder) for folder in os.listdir(cboe_data)
if not folder.endswith("daily")
]
tiingo_data = os.path.join(data_path, "tiingo")
tiingo_folders = []
if os.path.exists(tiingo_data):
tiingo_folders = [
os.path.join(tiingo_data, folder)
for folder in os.listdir(tiingo_data)
]
done_cboe, fail_cboe = _upload_folders(
bucket, "cboe", cboe_folders, remove_files=False)
done_tiingo, fail_tiingo = _upload_folders(
bucket, "tiingo", tiingo_folders, remove_files=True)
done = done_cboe + done_tiingo
failed = fail_cboe + fail_tiingo
if len(done) > 0:
msg = "Successful backup of symbols: " + ", ".join(done)
slack_notification(msg, __name__, status=Status.Success)
if len(failed) > 0:
msg = "Unable to backup symbols: " + ", ".join(done)
slack_notification(msg, __name__, status=Status.Warning)
def _upload_folders(bucket, scraper, folders, remove_files=False):
"""Uploads folders to S3 bucket and (optionally) removes old files"""
data_path = utils.get_save_data_path()
done, failed = [], []
for folder in folders:
symbol = os.path.basename(folder)
try:
if remove_files:
_remove_old_files(bucket, prefix=scraper + "/" + symbol)
_upload_folder(bucket, folder, data_path)
except Exception:
failed.append(os.path.basename(folder))
else:
done.append(os.path.basename(folder))
return (done, failed)
def _upload_folder(bucket, folder, data_path):
"""Uploads folder contents to S3 bucket"""
if not os.path.isdir(folder):
return
for root, dirs, files in os.walk(folder):
for file in files:
file_path = os.path.join(root, file)
key = os.path.relpath(file_path, data_path)
if _key_exists(bucket, key):
logger.debug("File already exists in S3")
continue
try:
bucket.upload_file(file_path, key)
logger.debug("Uploaded file %s to S3", file)
except Exception as e:
msg = "Error uploading data file {} to S3.\nReceived exception message {}".format(
file_path, str(e))
logger.error(msg, exc_info=True)
slack_notification(msg, __name__)
raise e
def _key_exists(bucket, key):
try:
bucket.Object(key).load()
except ClientError as e:
return int(e.response["Error"]["Code"]) != 404
return False
def _remove_old_files(bucket, prefix):
old_files = bucket.objects.filter(Prefix=prefix)
for file in old_files:
file.delete()
-249
View File
@@ -1,249 +0,0 @@
import logging
import os
from datetime import date
from io import StringIO
from itertools import groupby
from bs4 import BeautifulSoup
import requests
import pandas as pd
from . import utils, validation
from .notifications import slack_notification, Status
logger = logging.getLogger(__name__)
url = "http://www.cboe.com/delayedquote/quote-table-download"
def fetch_data(symbols=None):
"""Fetches options data for a given list of symbols"""
symbols = symbols or _get_all_listed_symbols()
options = utils.get_module_config("cboe")
mute_notifications = options.get("mute_notifications", [])
try:
form_data = _form_data()
except requests.ConnectionError as ce:
msg = "Connection error trying to reach {}".format(url)
logger.error(msg)
slack_notification(msg, __name__)
raise ce
except Exception as e:
msg = "Error parsing response"
logger.error(msg, exc_info=True)
slack_notification(msg, __name__)
raise e
headers = {"Referer": url}
file_url = "http://www.cboe.com/delayedquote/quotedata.dat"
symbols = [symbol.upper() for symbol in symbols]
done, failed = [], []
for symbol in symbols:
form_data["ctl00$ContentTop$C005$txtTicker"] = symbol
try:
response = requests.post(url,
data=form_data,
headers=headers,
allow_redirects=False)
symbol_req = requests.get(file_url,
cookies=response.cookies,
headers=headers)
symbol_data = symbol_req.text
if symbol_data == "" or symbol_data.startswith(" <!DOCTYPE"):
raise Exception
except Exception:
failed.append(symbol)
msg = "Error fetching symbol {} data".format(symbol)
logger.error(msg, exc_info=True)
if symbol not in mute_notifications:
slack_notification(msg, __name__)
else:
_save_data(symbol, symbol_data)
done.append(symbol)
if len(done) > 0:
msg = "Successfully scraped symbols: " + ", ".join(done)
slack_notification(msg, __name__, status=Status.Success)
if len(failed) > 0:
msg = "Failed to scrape symbols: " + ", ".join(failed)
slack_notification(msg, __name__, status=Status.Warning)
def aggregate_monthly_data(symbols=None):
"""Aggregate daily snapshots into monthly files and validate data"""
symbols = symbols or _get_all_listed_symbols()
save_data_path = utils.get_save_data_path()
scraper_dir = os.path.join(save_data_path, "cboe")
symbols = [symbol.upper() for symbol in symbols]
for symbol in symbols:
daily_dir = os.path.join(scraper_dir, symbol + "_daily")
if not os.path.exists(daily_dir):
msg = "Error aggregating data. Dir {} not found.".format(daily_dir)
logger.error(msg)
slack_notification(msg, __name__)
continue
monthly_dir = os.path.join(scraper_dir, symbol)
symbol_files = [
file for file in os.listdir(daily_dir) if file.endswith(".csv")
]
for month, files in groupby(symbol_files, _monthly_grouper):
file_names = list(files)
daily_files = [
os.path.join(daily_dir, name) for name in file_names
]
try:
symbol_df = concatenate_files(daily_files)
except Exception:
msg = "Error concatenating daily files for period " + month
logger.error(msg, exc_info=True)
slack_notification(msg, __name__)
continue
date_range = pd.to_datetime(symbol_df["quotedate"].unique())
if not validation.validate_dates_in_month(symbol, date_range):
today = pd.Timestamp.today()
first_date = date_range[0]
if first_date.year != today.year or first_date.month != today.month:
msg = "Some trading dates where missing for symbol {}".format(
symbol)
slack_notification(msg, __name__)
continue
if not os.path.exists(monthly_dir):
os.makedirs(monthly_dir)
logger.debug("Symbol dir %s created", monthly_dir)
file_name = _monthly_filename(file_names)
monthly_file = os.path.join(monthly_dir, file_name)
symbol_df.to_csv(monthly_file, index=False)
if not validation.validate_aggregate_file(monthly_file,
daily_files):
utils.remove_file(monthly_file)
msg = "Data in {} differs from the daily files".format(
monthly_file)
logger.error(msg)
slack_notification(msg, __name__)
continue
logger.debug("Saved monthly data %s", monthly_file)
for file in daily_files:
utils.remove_file(file, logger)
def _get_all_listed_symbols():
"""Returns array of all listed symbols.
http://www.cboe.com/publish/scheduledtask/mktdata/cboesymboldir2.csv
"""
current_dir = os.path.join(os.getcwd(), os.path.dirname(__file__))
symbols_file = os.path.realpath(
os.path.join(current_dir, "cboesymboldir2.csv"))
symbols_df = pd.read_csv(symbols_file, skiprows=1)
return symbols_df["Stock Symbol"].array
def concatenate_files(files):
"""Returns a dataframe of the concatenated data from `files`."""
df_generator = (pd.read_csv(file) for file in sorted(files))
return pd.concat(df_generator, ignore_index=True)
def _form_data():
"""Return validation form data"""
homepage = requests.get(url)
soup = BeautifulSoup(homepage.content, "lxml")
data = {
"__VIEWSTATE": soup.select_one("#__VIEWSTATE")["value"],
"__EVENTVALIDATION": soup.select_one("#__EVENTVALIDATION")["value"]
}
return data
def _save_data(symbol, symbol_data):
"""Saves the contents of `symbol_data` to
`$SAVE_DATA_PATH/cboe/{symbol}_daily/{symbol}_{%date}.csv`
"""
filename = date.today().strftime(symbol + "_%Y%m%d.csv")
save_data_path = utils.get_save_data_path()
symbol_dir = os.path.join(save_data_path, "cboe", symbol + "_daily")
if not os.path.exists(symbol_dir):
os.makedirs(symbol_dir)
logger.debug("Symbol dir %s created", symbol_dir)
file_path = os.path.join(symbol_dir, filename)
if os.path.exists(file_path) and validation.file_hash_matches_data(
file_path, symbol_data):
logger.debug("File %s already downloaded", file_path)
else:
daily_df = _wrangle_data(symbol, symbol_data)
daily_df.to_csv(file_path, index=False)
logger.debug("Saved daily symbol data as %s", file_path)
def _wrangle_data(symbol, symbol_data):
"""Returns a properly formated (_tidy_) dataframe"""
string_data = StringIO(symbol_data)
first_line = string_data.readline()
spot_price = float(first_line.split(",")[-2])
quote_date = date.today().strftime("%m/%d/%Y")
data = pd.read_csv(string_data, skiprows=1)
call_columns = [
"Calls", "Expiration Date", "Strike", "Last Sale", "Net", "Bid", "Ask",
"Vol", "Open Int", "IV", "Delta", "Gamma"
]
calls = data[call_columns]
put_columns = [
"Puts", "Expiration Date", "Strike", "Last Sale.1", "Net.1", "Bid.1",
"Ask.1", "Vol.1", "Open Int.1", "IV.1", "Delta.1", "Gamma.1"
]
puts = data[put_columns]
renamed_columns = [
"optionroot", "expiration", "strike", "last", "net", "bid", "ask",
"volume", "openinterest", "impliedvol", "delta", "gamma"
]
calls.columns = renamed_columns
calls.insert(loc=1, column="type", value="call")
puts.columns = renamed_columns
puts.insert(loc=1, column="type", value="put")
merged = pd.concat([calls, puts])
merged.insert(loc=0, column="underlying", value=symbol)
merged.insert(loc=1, column="underlying_last", value=spot_price)
merged.insert(loc=2, column="exchange", value="CBOE")
merged.insert(loc=6, column="quotedate", value=quote_date)
return merged
def _monthly_grouper(filename):
"""Returns `{year}{month}` string. Used to group files by month."""
basename = filename.split(".")[0]
file_date = basename.split("_")[1]
return file_date[:-2]
def _monthly_filename(filenames):
"""Returns filename of monthly aggregate file in the form
`{symbol}_{start_date}_to_{end_date}.csv`
"""
sorted_files = list(sorted(filenames))
first_file = sorted_files[0]
last_file = sorted_files[-1]
last_day = last_file.split(".")[0][-8:] # Get only the date
file_name = first_file.split(".")[0] + "_to_" + last_day + ".csv"
return file_name
-66
View File
@@ -1,66 +0,0 @@
# CBOE data scraper
# Requires Selenium and a headless Chrome driver
import tempfile
import time
import os
import shutil
from datetime import date
from selenium import webdriver
class CBOE():
"""CBOE data downloader."""
url = "http://www.cboe.com/delayedquote/quote-table-download"
def __init__(self):
self.data_path = self._get_data_path()
self.tmp_dir = tempfile.TemporaryDirectory()
self.driver = self._initilize_driver(self.tmp_dir.name)
def _get_data_path():
path = os.getenv("OPTIONS_DATA_PATH")
if not path:
raise EnvironmentError("Environment variable $OPTIONS_DATA_PATH not set")
return os.path.expanduser(path)
def _initilize_driver(download_dir):
"""Initilizes the Chrome driver to silently download files
to a temporary directory.
"""
options = webdriver.ChromeOptions()
options.add_argument("headless")
options.add_argument("disable-gpu")
driver = webdriver.Chrome(options=options)
driver.command_executor._commands["send_command"] = (
"POST",
"/session/$sessionId/chromium/send_command"
)
params = {
"cmd": "Page.setDownloadBehavior",
"params": {
"behavior": "allow",
"downloadPath": download_dir
}
}
driver.execute("send_command", params)
driver.implicitly_wait(10)
return driver
def fetch_data(self, symbols):
"""Fetches options data for a given list of symbols"""
self.driver.get(CBOE.url)
for symbol in symbols:
ticker = self.driver.find_element_by_css_selector("input#txtTicker")
ticker.send_keys(symbol)
submit = self.driver.find_element_by_css_selector("input#cmdSubmit")
submit.click()
time.sleep(15) # Horrible hack
download_path = os.path.join(self.tmp_dir.name, "quotedata.dat")
renamed_file = date.today().strftime(symbol + "_%Y%m%d.csv")
full_path = os.path.join(self.data_path, renamed_file)
shutil.move(download_path, full_path)
def __del__(self):
self.tmp_dir.cleanup()
File diff suppressed because it is too large Load Diff
-39
View File
@@ -1,39 +0,0 @@
[loggers]
keys=root,tiingo,cboe
[handlers]
keys=consoleHandler,fileHandler
[formatters]
keys=simpleFormatter
[logger_root]
level=ERROR
handlers=consoleHandler
[logger_tiingo]
level=DEBUG
handlers=consoleHandler,fileHandler
qualname=data_scraper.tiingo
propagate=0
[logger_cboe]
level=DEBUG
handlers=consoleHandler,fileHandler
qualname=data_scraper.cboe
propagate=0
[handler_consoleHandler]
class=StreamHandler
level=ERROR
formatter=simpleFormatter
args=(sys.stdout,)
[handler_fileHandler]
class=handlers.RotatingFileHandler
level=DEBUG
formatter=simpleFormatter
args=("data_scraper.log", "a", 3000000, 10)
[formatter_simpleFormatter]
format=%(asctime)s - %(name)s - %(levelname)s - %(message)s
-59
View File
@@ -1,59 +0,0 @@
import logging
from datetime import datetime
from enum import Enum
import requests
from .utils import get_module_config
logger = logging.getLogger(__name__)
Status = Enum("Status", "Success Warning Error")
options = get_module_config("notifications")
try:
webhook = options["slack_webhook"]
except KeyError as e:
logger.error("Missing slack webhook from configuration file")
raise e
payload = {
"channel": "#algotrading",
"username": "Talebot",
"icon_emoji": ":taleb:",
"attachments": [{
"footer": "Talebot"
}]
}
def slack_notification(text, scraper, status=Status.Error):
"""Post Slack notification"""
if status == Status.Error:
emoji = ":thumbsdown: "
title = "data_scraper error"
color = "#B22222"
else:
title = "data_scraper status report"
if status == Status.Success:
emoji = ":thumbsup: "
color = "#49C39E"
else:
emoji = ":warning: "
color = "#EDB625"
msg = emoji + text
payload["attachments"][0]["fallback"] = msg
payload["attachments"][0]["text"] = msg
payload["attachments"][0]["color"] = color
payload["attachments"][0]["title"] = title
payload["attachments"][0]["fields"] = [{"title": scraper}]
payload["attachments"][0]["ts"] = datetime.today().timestamp()
response = requests.post(webhook, json=payload)
if response.status_code != 200:
msg = "Error connecting to Slack {}. Response is:\n{}".format(
response.status_code, response.text)
logger.error(msg)
-41
View File
@@ -1,41 +0,0 @@
#!/bin/bash
## This script downloads stock options data,
## veryfies zipfile integrity, saves md5 signature
## and uploads them to S3.
## To run, pass a list of files to download:
## $> ./backup.sh files.txt
TMPDIR=tmp
NOW=$(date +"%m-%d-%Y-%H%M%S")
RETRY="${NOW}.txt"
MD5SUMS=md5sums.txt
mkdir -p $TMPDIR
while read filename
do
echo "Downloading file $filename to $TMPDIR"
wget --quiet -P $TMPDIR "ftp://l3_hdall:JKNRH7LYXV@ftp.deltaneutral.com/${filename}"
newpath="$TMPDIR/$filename"
echo "Verifying zipfile $newpath"
if unzip -t -q $newpath
then
echo "File check OK"
else
echo "ERROR: File check failed for $newfile"
echo $filename >> $RETRY
rm $newpath
continue
fi
echo "Appending md5 sum for $f"
md5sum $newpath >> $MD5SUMS
echo "Copying $newpath to S3 bucket"
rclone copy -v $newpath longueduree:longueduree
echo "Deleting $newpath"
rm $newpath
done <$1
View File
-104
View File
@@ -1,104 +0,0 @@
import logging
import unittest
from unittest.mock import patch
import os
import shutil
from requests import ConnectionError
import pandas as pd
from data_scraper import cboe
logging.disable(level=logging.CRITICAL)
class TestCBOE(unittest.TestCase):
"""Tests CBOE data scraper"""
test_dir = os.path.join(os.getcwd(), os.path.dirname(__file__))
test_data_path = os.path.realpath(os.path.join(test_dir, "data"))
cboe_data_path = os.path.join(test_data_path, "cboe")
spx_data_path = os.path.join(cboe_data_path, "SPX_March_2019.csv")
@classmethod
def setUpClass(cls):
cls.save_data_path = os.environ.get("SAVE_DATA_PATH", None)
os.environ["SAVE_DATA_PATH"] = cls.test_data_path
@classmethod
def tearDownClass(cls):
if cls.save_data_path:
os.environ["SAVE_DATA_PATH"] = cls.save_data_path
@patch("data_scraper.cboe.slack_notification", return_value=None)
def test_fetch_spy(self, mocked_notification):
"""Fetch todays SPY quote"""
cboe.fetch_data(["SPY"])
spy_dir = os.path.join(TestCBOE.cboe_data_path, "SPY_daily")
self.addCleanup(TestCBOE.remove_files, spy_dir)
if self.assertTrue(os.path.exists(spy_dir)):
self.assertTrue(mocked_notification.called)
file_name = "SPY_" + pd.Timestamp.today().strftime(
"%Y%m%d") + ".csv"
file_path = os.path.join(spy_dir, file_name)
spy_df = pd.read_csv(file_path, parse_dates=["quotedate"])
self.assertTrue(all(spy_df["underlying"] == "SPX"))
self.assertEqual(spy_df["quotedate"].nunique(), 1)
counts = spy_df["type"].value_counts()
self.assertEqual(counts["put"] + counts["call"], len(spy_df))
@patch("data_scraper.cboe.slack_notification", return_value=None)
def test_fetch_invalid_symbol(self, mocked_notification):
"""Fetching invalid symbol should send notification"""
cboe.fetch_data(["FOOBAR"])
self.assertTrue(mocked_notification.called)
@patch("data_scraper.cboe.url", new="http://www.aldkfjaskldfjsa.com")
@patch("data_scraper.cboe.slack_notification", return_value=None)
def test_no_connection(self, mocked_notification):
"""Raise ConnectionError and send notification when host is unreachable"""
with self.assertRaises(ConnectionError):
cboe.fetch_data(["SPX"])
self.assertTrue(mocked_notification.called)
@patch("data_scraper.cboe.utils.remove_file", return_value=None)
@patch("data_scraper.cboe.slack_notification", return_value=None)
def test_data_aggregation(self, mocked_notification, mocked_remove):
"""Test data aggregation happy path"""
cboe.aggregate_monthly_data(["SPX"])
aggregate_file = os.path.join(TestCBOE.cboe_data_path, "SPX",
"SPX_20190301_to_20190329.csv")
self.addCleanup(TestCBOE.remove_files, os.path.dirname(aggregate_file))
self.assertTrue(mocked_remove.called)
self.assertFalse(mocked_notification.called)
if self.assertTrue(os.path.exists(aggregate_file)):
spx_df = pd.read_csv(TestCBOE.spx_data_path)
aggregate_df = pd.read_csv(aggregate_file)
self.assertTrue(spx_df.equals(aggregate_df))
@patch("data_scraper.cboe.utils.remove_file", return_value=None)
@patch("data_scraper.cboe.slack_notification", return_value=None)
def test_aggregate_missing_days(self, mocked_notification, mocked_remove):
"""Data aggregation should send notification when there are missing days"""
cboe.aggregate_monthly_data(["GOOG"])
self.assertTrue(mocked_notification.called)
self.assertFalse(mocked_remove.called)
@patch("data_scraper.cboe.utils.remove_file", return_value=None)
@patch("data_scraper.cboe.slack_notification", return_value=None)
def test_aggregate_invalid_symbol(self, mocked_notification,
mocked_remove):
"""Data aggregation should fail and send notification on invalid symbol"""
cboe.aggregate_monthly_data(["FOOBAR"])
self.assertTrue(mocked_notification.called)
self.assertFalse(mocked_remove.called)
def remove_files(file_path):
if os.path.exists(file_path):
shutil.rmtree(file_path)
if __name__ == "__main__":
unittest.main()
-75
View File
@@ -1,75 +0,0 @@
import logging
import unittest
from unittest.mock import patch
import os
import shutil
import pandas as pd
from data_scraper import tiingo
logging.disable(level=logging.CRITICAL)
class TestTiingo(unittest.TestCase):
"""Tests Tiingo data scraper"""
test_dir = os.path.join(os.getcwd(), os.path.dirname(__file__))
test_data_path = os.path.realpath(os.path.join(test_dir, "data"))
tiingo_data_path = os.path.join(test_data_path, "tiingo")
@classmethod
def setUpClass(cls):
assert "TIINGO_API_KEY" in os.environ, "$TIINGO_API_KEY env variable must be set"
cls.save_data_path = os.environ.get("SAVE_DATA_PATH", None)
os.environ["SAVE_DATA_PATH"] = cls.test_data_path
@classmethod
def tearDownClass(cls):
if cls.save_data_path:
os.environ["SAVE_DATA_PATH"] = cls.save_data_path
@patch("data_scraper.tiingo.slack_notification", return_value=None)
def test_fetch_gld(self, mocked_notification):
"""Fetch GLD data"""
tiingo.fetch_data(["GLD"])
gld_dir = os.path.join(TestTiingo.tiingo_data_path, "GLD")
self.addCleanup(TestTiingo.remove_files, gld_dir)
if self.assertTrue(os.path.exists(gld_dir)):
self.assertTrue(mocked_notification.called)
file_name = "GLD_" + pd.Timestamp.today().strftime(
"%Y%m%d") + ".csv"
file_path = os.path.join(gld_dir, file_name)
gld_df = pd.read_csv(file_path)
self.assertTrue(all(gld_df["symbol"] == "GLD"))
expected_columns = [
"symbol", "date", "adjClose", "adjHigh", "adjLow", "adjOpen",
"adjVolume", "close", "divCash", "high", "low", "open",
"splitFactor", "volume"
]
self.assertEqual(gld_df.columns, expected_columns)
@patch("data_scraper.tiingo.slack_notification", return_value=None)
def test_fetch_invalid_symbol(self, mocked_notification):
"""Fetching invalid symbol data should send notification"""
tiingo.fetch_data(["FOOBAR"])
self.assertTrue(mocked_notification.called)
@patch("data_scraper.tiingo.pdr.get_data_tiingo") # mock pandas_datareader
@patch("data_scraper.tiingo.slack_notification", return_value=None)
def test_no_connection(self, mocked_notification, mocked_pdr):
"""Raise ConnectionError and send notification when host is unreachable"""
mocked_pdr.side_effect = ConnectionError("This is a test")
with self.assertRaises(ConnectionError):
tiingo.fetch_data(["IBM"])
self.assertTrue(mocked_notification.called)
def remove_files(file_path):
if os.path.exists(file_path):
shutil.rmtree(file_path)
if __name__ == "__main__":
unittest.main()
-122
View File
@@ -1,122 +0,0 @@
import logging
import os
from datetime import date
import pandas as pd
import pandas_datareader as pdr
from . import utils, validation
from .notifications import slack_notification, Status
logger = logging.getLogger(__name__)
# Default symbols to fetch
assets = [
"VTSMX", "VFINX", "VIVAX", "VIGRX", "VIMSX", "VMVIX", "VMGIX", "NAESX",
"VISVX", "VISGX", "BRSIX", "VGTSX", "VTMGX", "VFSVX", "EFV", "VEURX",
"VPACX", "VEIEX", "VFISX", "VFITX", "IEF", "VUSTX", "VBMFX", "VIPSX",
"PIGLX", "PGBIX", "VFSTX", "LQD", "VWESX", "VWEHX", "VWSTX", "VWITX",
"VWLTX", "VGSIX", "GLD", "PSAU", "GSG"
]
def fetch_data(symbols=assets):
"""Fetches historical data for given symbols from Tiingo"""
api_key = utils.get_environment_var("TIINGO_API_KEY")
symbols = [symbol.upper() for symbol in symbols]
done, failed = [], []
for symbol in symbols:
try:
symbol_data = pdr.get_data_tiingo(symbol, api_key=api_key)
except ConnectionError as ce:
msg = "Unable to connect to api.tiingo.com when fetching symbol {}".format(
symbol)
logger.error(msg, exc_info=True)
slack_notification(msg, __name__)
raise ce
except TypeError:
# pandas_datareader raises TypeError when fetching invalid symbol
failed.append(symbol)
msg = "Attempted to fetch invalid symbol {}".format(symbol)
logger.error(msg, exc_info=True)
slack_notification(msg, __name__)
except Exception:
msg = "Error fetching symbol {}".format(symbol)
logger.error(msg, exc_info=True)
slack_notification(msg, __name__)
else:
_save_data(symbol, symbol_data.reset_index())
done.append(symbol)
if len(done) > 0:
msg = "Successfully scraped symbols: " + ", ".join(done)
slack_notification(msg, __name__, status=Status.Success)
if len(failed) > 0:
msg = "Failed to scrape symbols: " + ", ".join(failed)
slack_notification(msg, __name__, status=Status.Warning)
def _save_data(symbol, symbol_df):
"""Saves the contents of `symbol_df` to
`$SAVE_DATA_PATH/tiingo/{symbol}/{symbol}_{%date}.csv`"""
filename = date.today().strftime(symbol + "_%Y%m%d.csv")
save_data_path = utils.get_save_data_path()
symbol_dir = os.path.join(save_data_path, "tiingo", symbol)
if not os.path.exists(symbol_dir):
os.makedirs(symbol_dir)
logger.debug("Symbol dir %s created", symbol_dir)
file_path = os.path.join(symbol_dir, filename)
if os.path.exists(file_path) and validation.file_hash_matches_data(
file_path, symbol_df.to_csv()):
logger.debug("File %s already downloaded", file_path)
else:
expected_columns = [
"symbol", "date", "adjClose", "adjHigh", "adjLow", "adjOpen",
"adjVolume", "close", "divCash", "high", "low", "open",
"splitFactor", "volume"
]
if validation.validate_historical_dates(
symbol, symbol_df["date"]) and validation.validate_columns(
expected_columns, symbol_df.columns):
merged_df = _merge(symbol, symbol_df)
pattern = symbol + "_*"
utils.remove_files(symbol_dir, pattern, logger)
merged_df.to_csv(file_path, index=False)
logger.debug("Saved symbol data as %s", file_path)
def _merge(symbol, symbol_df):
"""Merge `symbol_df` with previous data file."""
save_data_path = utils.get_save_data_path()
symbol_dir = os.path.join(save_data_path, "tiingo", symbol)
files = os.listdir(symbol_dir)
if len(files) == 0:
return symbol_df
last_file = sorted(files)[-1]
old_df = pd.read_csv(os.path.join(symbol_dir, last_file),
parse_dates=["date"],
index_col="date")
symbol_df.index = symbol_df["date"]
diffs = old_df.index.difference(symbol_df.index)
if diffs.empty:
return symbol_df
else:
msg = """Old data included dates not present in scraped file for symbol {}
Merged new data with previous file.""".format(symbol)
logger.error(msg)
slack_notification(msg, __name__)
merged_df = pd.concat([symbol_df, old_df.loc[diffs]])
merged_df.sort_index(inplace=True)
return merged_df.reset_index()
-52
View File
@@ -1,52 +0,0 @@
import glob
import json
import os
def get_environment_var(variable):
"""Returns the value of a given environment variable.
Raises `EnvironmentError` if not found.
"""
if variable not in os.environ:
raise EnvironmentError(
"Environment variable {} not set".format(variable))
return os.path.expanduser(os.environ[variable])
def get_save_data_path():
"""Reads data path from environment variable `$SAVE_DATA_PATH`.
If it is not set, defaults to `./data/scraped`.
"""
try:
data_dir = get_environment_var("SAVE_DATA_PATH")
except EnvironmentError:
data_dir = "data/scraped"
os.makedirs(data_dir)
return data_dir
def get_module_config(module, config_file="data_scraper.conf"):
"""Parses configuration file and returns the configuration options
for the chosen `module`.
"""
options = {}
if os.path.exists(config_file):
with open(config_file) as file:
config = json.load(file)
options = config.get(module, {})
return options
def remove_files(data_dir, pattern, logger=None):
"""Removes files in `data_dir` that match `pattern`"""
for file in glob.glob(os.path.join(data_dir, pattern)):
remove_file(file, logger)
def remove_file(file, logger=None):
os.remove(file)
if logger:
logger.debug("Removed file %s", file)
-93
View File
@@ -1,93 +0,0 @@
import logging
import hashlib
import pandas as pd
import pandas_market_calendars as mcal
from . import cboe
from .notifications import slack_notification
logger = logging.getLogger(__name__)
def file_hash_matches_data(file_path, data):
file_hash = file_md5(file_path)
data_md5 = hashlib.md5(data.encode()).hexdigest()
return file_hash == data_md5
def file_md5(file, chunk_size=4096):
md5 = hashlib.md5()
with open(file, "rb") as f:
for chunk in iter(lambda: f.read(chunk_size), b""):
md5.update(chunk)
return md5.hexdigest()
def validate_dates_in_month(symbol, date_range):
"""Compares `date_range` (month) with NYSE trading calendar.
Returns `True` if there are no missing days.
"""
# NYSE and CBOE have the same trading calendar
# https://www.nyse.com/markets/hours-calendars
# http://cfe.cboe.com/about-cfe/holiday-calendar
nyse = mcal.get_calendar("NYSE")
first_date = date_range[0]
period = pd.Period(year=first_date.year, month=first_date.month, freq="M")
trading_days = nyse.valid_days(start_date=period.start_time,
end_date=period.end_time)
# Remove timezone info
trading_days = trading_days.tz_convert(tz=None)
missing_days = trading_days.difference(date_range)
if not missing_days.empty:
logger.error("Error validating monthly dates. Missing: %s",
missing_days)
return missing_days.empty
def validate_historical_dates(symbol, date_range):
"""Compares `date_range` (any time range) with trading calendar.
Returns `True` if there are no missing days.
"""
nyse = mcal.get_calendar("NYSE")
start_date = date_range.min()
end_date = date_range.max()
trading_days = nyse.valid_days(start_date=start_date, end_date=end_date)
# Remove timezone info
trading_days = trading_days.tz_convert(tz=None)
date_range = date_range.dt.tz_convert(tz=None)
missing_days = trading_days.difference(date_range)
if not missing_days.empty:
logger.error("Error validating historical dates. Missing: %s",
missing_days)
return missing_days.empty
def validate_columns(expected, received):
"""Verify that the `received` columns scraped are equal to `expected`"""
valid = all(expected == received)
if not valid:
expected_cols = ", ".join(expected)
received_cols = ", ".join(received)
msg = """Columns expected differ from those received.
Expected: {}
Received: {}""".format(expected_cols, received_cols)
logger.error(msg)
slack_notification(msg, __name__)
return valid
def validate_aggregate_file(aggregate_file, daily_files):
"""Compares `aggregate_file` with the data from `daily_files`."""
aggregate_df = pd.read_csv(aggregate_file)
recreated_df = cboe.concatenate_files(daily_files)
return aggregate_df.equals(recreated_df)
-19
View File
@@ -1,19 +0,0 @@
FROM debian:latest
MAINTAINER Juan Pablo Amoroso <jamoroso@lambdaclass.com>
RUN apt-get update
RUN apt-get install -y python3 python3-pip make cron build-essential pkg-config openssl libssl-dev
RUN python3 -m pip install pipenv
ENV LC_ALL=C.UTF-8 LANG=C.UTF-8
COPY . /finance
WORKDIR /finance
RUN make init
COPY ./docker/data_scraper/crontab /etc/cron.d/scraper-cron
COPY ./docker/data_scraper/entrypoint.sh /usr/bin/entrypoint.sh
COPY ./docker/data_scraper/run-task.sh /usr/bin/run-task
RUN chmod 0644 /etc/cron.d/scraper-cron && crontab /etc/cron.d/scraper-cron
ENTRYPOINT ["entrypoint.sh"]
CMD ["cron", "-f"]
-4
View File
@@ -1,4 +0,0 @@
0 19 * * 1-5 root cd /finance && run-task make scrape scraper=cboe
0 19 * * 1-5 root cd /finance && run-task make scrape scraper=tiingo
0 0 1 * * root cd /finance && run-task make aggregate && run-task make backup
-5
View File
@@ -1,5 +0,0 @@
#!/bin/bash
# cron does not read env, save it here
env > /root/env
exec "$@"
-4
View File
@@ -1,4 +0,0 @@
#!/bin/bash
# import env vars that were written in entrypoint
env - `cat /root/env` $@
-8
View File
@@ -1,8 +0,0 @@
version: "3"
services:
scraper:
image: data_scraper:latest
container_name: data_scraper
volumes:
- ~/finance/data:/finance/data