Added Tiingo datahandler

This commit is contained in:
Juan Pablo Amoroso
2020-02-11 15:26:04 -03:00
parent 4191824b9e
commit 2b04e2d76a
5 changed files with 106 additions and 7 deletions
+1
View File
@@ -1 +1,2 @@
from . import datahandler
from .backtester import Backtest
+1
View File
@@ -1,2 +1,3 @@
from .schema import *
from .historical_options_data import HistoricalOptionsData
from .tiingo_data import TiingoData
@@ -80,7 +80,7 @@ class HistoricalOptionsData:
def default_schema():
"""Returns default schema for Historical Options Data"""
schema = Schema.canonical()
schema = Schema.options()
schema.update({
'contract': 'optionroot',
'date': 'quotedate',
+17 -6
View File
@@ -1,20 +1,31 @@
class Schema:
"""Data schema class.
Used to run validations and provide uniform access to fields in the data set.
Used provide uniform access to fields in the data set.
"""
columns = [
stock_columns = [
"symbol", "date", "open", "close", "high", "low", "volume", "adjClose", "adjHigh", "adjLow", "adjOpen",
"adjVolume", "divCash", "splitFactor"
]
option_columns = [
"underlying", "underlying_last", "date", "contract", "type", "expiration", "strike", "bid", "ask", "volume",
"open_interest"
]
def canonical():
"""Builder method that returns a `Schema` with default mappings"""
mappings = {key: key for key in Schema.columns}
def stocks():
"""Builder method that returns a `Schema` with default mappings for stocks"""
mappings = {key: key for key in Schema.stock_columns}
return Schema(mappings)
def options():
"""Builder method that returns a `Schema` with default mappings for options"""
mappings = {key: key for key in Schema.option_columns}
return Schema(mappings)
def __init__(self, mappings):
assert all((key in mappings for key in Schema.columns))
assert all((key in mappings for key in Schema.stock_columns)) or all(
(key in mappings for key in Schema.option_columns))
self._mappings = mappings
+86
View File
@@ -0,0 +1,86 @@
import os
from .schema import Schema
import pandas as pd
class TiingoData:
"""Tiingo (stocks & indeces) Data container class."""
def __init__(self, file, schema=None, **params):
if schema:
assert isinstance(schema, Schema)
else:
self.schema = TiingoData.default_schema()
file_extension = os.path.splitext(file)[1]
if file_extension == '.h5':
self._data = pd.read_hdf(file, **params)
elif file_extension == '.csv':
params['parse_dates'] = [self.schema.date.mapping]
self._data = pd.read_csv(file, **params)
columns = self._data.columns
assert all((col in columns for _key, col in self.schema))
date_col = self.schema['date']
self.start_date = self._data[date_col].min()
self.end_date = self._data[date_col].max()
def apply_filter(self, f):
"""Apply Filter `f` to the data. Returns a `pd.DataFrame` with the filtered rows."""
return self._data.query(f.query)
def iter_dates(self):
"""Returns `pd.DataFrameGroupBy` that groups stocks by date"""
return self._data.groupby(self.schema['date'])
def iter_months(self):
"""Returns `pd.DataFrameGroupBy` that groups stocks by month"""
date_col = self.schema['date']
iterator = self._data.groupby(pd.Grouper(
key=date_col,
freq="MS")).apply(lambda g: g[g[date_col] == g[date_col].min()]).reset_index(drop=True).groupby(date_col)
return iterator
def __getattr__(self, attr):
"""Pass method invocation to `self._data`"""
method = getattr(self._data, attr)
if hasattr(method, '__call__'):
def df_method(*args, **kwargs):
return method(*args, **kwargs)
return df_method
else:
return method
def __getitem__(self, item):
if isinstance(item, pd.Series):
return self._data[item]
else:
key = self.schema[item]
return self._data[key]
def __setitem__(self, key, value):
self._data[key] = value
if key not in self.schema:
self.schema.update({key: key})
def __len__(self):
return len(self._data)
def __repr__(self):
return self._data.__repr__()
def default_schema():
"""Returns default schema for Tiingo Data"""
return Schema.stocks()
def sma(self, months):
sma = self._data.groupby('symbol').rolling(months)['adjClose'].mean()
sma = sma.reset_index('symbol').sort_index()
sma = sma.fillna(0)
self._data['sma'] = sma['adjClose']
self.schema.update({'sma': 'sma'})