From 0bd1c307f56bdbec36e53cb8cc29a74c0d62bdd3 Mon Sep 17 00:00:00 2001 From: wassname Date: Sat, 15 Jul 2017 09:11:10 +0800 Subject: [PATCH] test the utils --- requirements/requirements.test.txt | 1 + src/config.py | 1 + src/data/__init__.py | 0 src/data/utils.py | 23 +++++++++++++++++++++++ src/environments/portfolio.py | 24 ++++-------------------- test/test_env.py | 8 ++++---- test/test_utils.py | 29 +++++++++++++++++++++++++++++ 7 files changed, 62 insertions(+), 24 deletions(-) create mode 100644 src/config.py create mode 100644 src/data/__init__.py create mode 100644 src/data/utils.py create mode 100644 test/test_utils.py diff --git a/requirements/requirements.test.txt b/requirements/requirements.test.txt index e69de29..bd253fe 100644 --- a/requirements/requirements.test.txt +++ b/requirements/requirements.test.txt @@ -0,0 +1 @@ +pytest==3.0.7 diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..5bf5ea7 --- /dev/null +++ b/src/config.py @@ -0,0 +1 @@ +eps = 1e-7 diff --git a/src/data/__init__.py b/src/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/data/utils.py b/src/data/utils.py new file mode 100644 index 0000000..e414c93 --- /dev/null +++ b/src/data/utils.py @@ -0,0 +1,23 @@ +import numpy as np + +from ..config import eps + + +def random_shift(x, fraction): + """Apply a random shift to a pandas series.""" + min_x, max_x = np.min(x), np.max(x) + m = np.random.uniform(-fraction, fraction, size=x.shape) + 1 + c = np.random.uniform(-fraction, fraction, size=x.shape) * x.std() + return np.clip(x * m + c, min_x, max_x) + + +def normalize(x): + """Normalize to a pandas series.""" + x = (x - x.min()) / (x.std() + eps) + return x + + +def scale_to_start(x): + """Scale pandas series so that it starts at one.""" + x = (x + eps) / (x[0] + eps) + return x diff --git a/src/environments/portfolio.py b/src/environments/portfolio.py index 5d743d8..fbc5180 100644 --- a/src/environments/portfolio.py +++ b/src/environments/portfolio.py @@ -3,25 +3,8 @@ import numpy as np import gym import gym.spaces -eps = 1e-7 - - -def random_shift(x, fraction): - min_x, max_x = np.min(x), np.max(x) - m = np.random.uniform(-fraction, fraction, size=x.shape) + 1 - c = np.random.uniform(-fraction, fraction, size=x.shape) * x.std() - return np.clip(x * m + c, min_x, max_x) - - -def normalise(x): - x = (x - x.min()) / (x.std + eps) - return x - - -def scale_to_start(x): - """Scale episode so that it starts at one.""" - x = (x + eps) / (x[0] + eps) - return x +from ..config import eps +from ..data.utils import normalize, random_shift, scale_to_start class DataSrc(object): @@ -51,7 +34,8 @@ class DataSrc(object): # data processing if scale: - df = (df - df.mean(0) + eps) / (df.max(0) - df.min(0) + eps) + # df = (df - df.mean(0) + eps) / (df.max(0) - df.min(0) + eps) + df = df.apply(lambda x: normalize(x)) # get rid of NaN's df = df.fillna(method="pad") diff --git a/test/test_env.py b/test/test_env.py index 40b3203..c75da07 100644 --- a/test/test_env.py +++ b/test/test_env.py @@ -4,7 +4,7 @@ from src.environments.portfolio import PortfolioEnv def test_portfolio_env(): - df = pd.read_pickle('./data/poliniex_30m_train.pickle') + df = pd.read_hdf('./data/poliniex_30m.hf', key='train') asset_names = df.columns.levels[0] # action w = np.random.random((len(asset_names))) @@ -16,14 +16,14 @@ def test_portfolio_env(): def test_portfolio_env_hold(): - df = pd.read_pickle('./data/poliniex_30m_train.pickle') + df = pd.read_hdf('./data/poliniex_30m.hf', key='train') asset_names = df.columns.levels[0] - env = PortfolioEnv(df=pd.read_pickle('./data/poliniex_30m_train.pickle')) + env = PortfolioEnv(df=df) env.reset() for _ in range(5): w = np.array([1.0] + [0] * (len(asset_names) - 1)) obs, reward, done, info = env.step(w) df = pd.DataFrame(info) - assert df.portfolio_value.iloc[-1] > 0.9999 + assert df.portfolio_value.iloc[-1] > 0.9999, 'portfolio should retain value if holding bitcoin' diff --git a/test/test_utils.py b/test/test_utils.py new file mode 100644 index 0000000..c778298 --- /dev/null +++ b/test/test_utils.py @@ -0,0 +1,29 @@ +from src.data.utils import random_shift, normalize, scale_to_start + +import pandas as pd +import numpy as np + + +def test_random_shift(): + s = pd.Series(np.random.random(100)) * 10 + 3 + s1 = random_shift(s.copy(), 0) + assert (s == s1).all(), 'should not do anything if given 0' + + s2 = random_shift(s.copy(), 0.05) + assert (s2 / s).max() > 1.0, 'should shift more than 0.00 given 0.05' + assert (s2 / s).max() < 1.1, 'should shift less than 0.10 given 0.05' + np.testing.assert_almost_equal((s2 / s).mean(), 1.00, 2) + + +def test_normalize(): + s = pd.Series(np.random.random(10)) * 10 + 3 + s1 = normalize(s) + np.testing.assert_almost_equal(s1.std(), 1, err_msg='should make std 0') + np.testing.assert_almost_equal(s1.min(), 0, err_msg='') + + +def test_scale_to_start(): + s = pd.Series(np.random.random(10)) + s1 = scale_to_start(s) + assert s1[0] == 1, 'start should be 1' + assert s1[1] != 1, 'not start should not be 1'