mirror of
https://github.com/wassname/seq2seq-time.git
synced 2026-06-27 18:44:28 +08:00
20 lines
834 B
Python
20 lines
834 B
Python
import sklearn
|
|
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
|
|
from sklearn_pandas import DataFrameMapper
|
|
|
|
def normalize_encode_dataframe(df, encoder=OrdinalEncoder):
|
|
"""Normalise numeric data, encode categorical data."""
|
|
columns_input_numeric = list(df._get_numeric_data().columns)
|
|
columns_categorical = list(set(df.columns)-set(columns_input_numeric))
|
|
|
|
transformers= [([n], StandardScaler()) for n in columns_input_numeric] + \
|
|
[([n], encoder()) for n in columns_categorical]
|
|
scaler = DataFrameMapper(transformers, df_out=True)
|
|
df_norm = scaler.fit_transform(df)
|
|
return df_norm, scaler
|
|
|
|
def timeseries_split(df, test_fraction=0.2):
|
|
"""Split timeseries data with test in the future"""
|
|
i = int(len(df)*test_fraction)
|
|
return df.iloc[:-i], df.iloc[-i:]
|