From 2b04e2d76a4c18c8bb486a7fa83e375c10b7067a Mon Sep 17 00:00:00 2001 From: Juan Pablo Amoroso Date: Tue, 11 Feb 2020 15:26:04 -0300 Subject: [PATCH] Added Tiingo datahandler --- backtester/__init__.py | 1 + backtester/datahandler/__init__.py | 1 + .../datahandler/historical_options_data.py | 2 +- backtester/datahandler/schema.py | 23 +++-- backtester/datahandler/tiingo_data.py | 86 +++++++++++++++++++ 5 files changed, 106 insertions(+), 7 deletions(-) create mode 100644 backtester/datahandler/tiingo_data.py diff --git a/backtester/__init__.py b/backtester/__init__.py index 0bd50a9..880d540 100644 --- a/backtester/__init__.py +++ b/backtester/__init__.py @@ -1 +1,2 @@ +from . import datahandler from .backtester import Backtest diff --git a/backtester/datahandler/__init__.py b/backtester/datahandler/__init__.py index da64028..8495f92 100644 --- a/backtester/datahandler/__init__.py +++ b/backtester/datahandler/__init__.py @@ -1,2 +1,3 @@ from .schema import * from .historical_options_data import HistoricalOptionsData +from .tiingo_data import TiingoData diff --git a/backtester/datahandler/historical_options_data.py b/backtester/datahandler/historical_options_data.py index 1158b71..25aceb8 100644 --- a/backtester/datahandler/historical_options_data.py +++ b/backtester/datahandler/historical_options_data.py @@ -80,7 +80,7 @@ class HistoricalOptionsData: def default_schema(): """Returns default schema for Historical Options Data""" - schema = Schema.canonical() + schema = Schema.options() schema.update({ 'contract': 'optionroot', 'date': 'quotedate', diff --git a/backtester/datahandler/schema.py b/backtester/datahandler/schema.py index 7b46e6a..50ad9ec 100644 --- a/backtester/datahandler/schema.py +++ b/backtester/datahandler/schema.py @@ -1,20 +1,31 @@ class Schema: """Data schema class. - Used to run validations and provide uniform access to fields in the data set. + Used provide uniform access to fields in the data set. """ - columns = [ + stock_columns = [ + "symbol", "date", "open", "close", "high", "low", "volume", "adjClose", "adjHigh", "adjLow", "adjOpen", + "adjVolume", "divCash", "splitFactor" + ] + + option_columns = [ "underlying", "underlying_last", "date", "contract", "type", "expiration", "strike", "bid", "ask", "volume", "open_interest" ] - def canonical(): - """Builder method that returns a `Schema` with default mappings""" - mappings = {key: key for key in Schema.columns} + def stocks(): + """Builder method that returns a `Schema` with default mappings for stocks""" + mappings = {key: key for key in Schema.stock_columns} + return Schema(mappings) + + def options(): + """Builder method that returns a `Schema` with default mappings for options""" + mappings = {key: key for key in Schema.option_columns} return Schema(mappings) def __init__(self, mappings): - assert all((key in mappings for key in Schema.columns)) + assert all((key in mappings for key in Schema.stock_columns)) or all( + (key in mappings for key in Schema.option_columns)) self._mappings = mappings diff --git a/backtester/datahandler/tiingo_data.py b/backtester/datahandler/tiingo_data.py new file mode 100644 index 0000000..08b1910 --- /dev/null +++ b/backtester/datahandler/tiingo_data.py @@ -0,0 +1,86 @@ +import os +from .schema import Schema +import pandas as pd + + +class TiingoData: + """Tiingo (stocks & indeces) Data container class.""" + def __init__(self, file, schema=None, **params): + if schema: + assert isinstance(schema, Schema) + else: + self.schema = TiingoData.default_schema() + + file_extension = os.path.splitext(file)[1] + + if file_extension == '.h5': + self._data = pd.read_hdf(file, **params) + elif file_extension == '.csv': + params['parse_dates'] = [self.schema.date.mapping] + self._data = pd.read_csv(file, **params) + + columns = self._data.columns + assert all((col in columns for _key, col in self.schema)) + + date_col = self.schema['date'] + + self.start_date = self._data[date_col].min() + self.end_date = self._data[date_col].max() + + def apply_filter(self, f): + """Apply Filter `f` to the data. Returns a `pd.DataFrame` with the filtered rows.""" + return self._data.query(f.query) + + def iter_dates(self): + """Returns `pd.DataFrameGroupBy` that groups stocks by date""" + return self._data.groupby(self.schema['date']) + + def iter_months(self): + """Returns `pd.DataFrameGroupBy` that groups stocks by month""" + date_col = self.schema['date'] + iterator = self._data.groupby(pd.Grouper( + key=date_col, + freq="MS")).apply(lambda g: g[g[date_col] == g[date_col].min()]).reset_index(drop=True).groupby(date_col) + return iterator + + def __getattr__(self, attr): + """Pass method invocation to `self._data`""" + + method = getattr(self._data, attr) + if hasattr(method, '__call__'): + + def df_method(*args, **kwargs): + return method(*args, **kwargs) + + return df_method + else: + return method + + def __getitem__(self, item): + if isinstance(item, pd.Series): + return self._data[item] + else: + key = self.schema[item] + return self._data[key] + + def __setitem__(self, key, value): + self._data[key] = value + if key not in self.schema: + self.schema.update({key: key}) + + def __len__(self): + return len(self._data) + + def __repr__(self): + return self._data.__repr__() + + def default_schema(): + """Returns default schema for Tiingo Data""" + return Schema.stocks() + + def sma(self, months): + sma = self._data.groupby('symbol').rolling(months)['adjClose'].mean() + sma = sma.reset_index('symbol').sort_index() + sma = sma.fillna(0) + self._data['sma'] = sma['adjClose'] + self.schema.update({'sma': 'sma'})