test the utils

2026-06-27 16:46:41 +08:00 · 2017-07-15 09:11:10 +08:00
parent 87983600a4
commit 0bd1c307f5
7 changed files with 62 additions and 24 deletions
@@ -0,0 +1 @@
+pytest==3.0.7
@@ -0,0 +1 @@
+eps = 1e-7
@@ -0,0 +1,23 @@
+import numpy as np
+
+from ..config import eps
+
+
+def random_shift(x, fraction):
+    """Apply a random shift to a pandas series."""
+    min_x, max_x = np.min(x), np.max(x)
+    m = np.random.uniform(-fraction, fraction, size=x.shape) + 1
+    c = np.random.uniform(-fraction, fraction, size=x.shape) * x.std()
+    return np.clip(x * m + c, min_x, max_x)
+
+
+def normalize(x):
+    """Normalize to a pandas series."""
+    x = (x - x.min()) / (x.std() + eps)
+    return x
+
+
+def scale_to_start(x):
+    """Scale pandas series so that it starts at one."""
+    x = (x + eps) / (x[0] + eps)
+    return x
@@ -3,25 +3,8 @@ import numpy as np
 import gym
 import gym.spaces

-eps = 1e-7
-
-
-def random_shift(x, fraction):
-    min_x, max_x = np.min(x), np.max(x)
-    m = np.random.uniform(-fraction, fraction, size=x.shape) + 1
-    c = np.random.uniform(-fraction, fraction, size=x.shape) * x.std()
-    return np.clip(x * m + c, min_x, max_x)
-
-
-def normalise(x):
-    x = (x - x.min()) / (x.std + eps)
-    return x
-
-
-def scale_to_start(x):
-    """Scale episode so that it starts at one."""
-    x = (x + eps) / (x[0] + eps)
-    return x
+from ..config import eps
+from ..data.utils import normalize, random_shift, scale_to_start


 class DataSrc(object):
@@ -51,7 +34,8 @@ class DataSrc(object):

        # data processing
        if scale:
-            df = (df - df.mean(0) + eps) / (df.max(0) - df.min(0) + eps)
+            # df = (df - df.mean(0) + eps) / (df.max(0) - df.min(0) + eps)
+            df = df.apply(lambda x: normalize(x))

        # get rid of NaN's
        df = df.fillna(method="pad")
@@ -4,7 +4,7 @@ from src.environments.portfolio import PortfolioEnv


 def test_portfolio_env():
-    df = pd.read_pickle('./data/poliniex_30m_train.pickle')
+    df = pd.read_hdf('./data/poliniex_30m.hf', key='train')
    asset_names = df.columns.levels[0]
    # action
    w = np.random.random((len(asset_names)))
@@ -16,14 +16,14 @@ def test_portfolio_env():


 def test_portfolio_env_hold():
-    df = pd.read_pickle('./data/poliniex_30m_train.pickle')
+    df = pd.read_hdf('./data/poliniex_30m.hf', key='train')
    asset_names = df.columns.levels[0]

-    env = PortfolioEnv(df=pd.read_pickle('./data/poliniex_30m_train.pickle'))
+    env = PortfolioEnv(df=df)
    env.reset()
    for _ in range(5):
        w = np.array([1.0] + [0] * (len(asset_names) - 1))
        obs, reward, done, info = env.step(w)

    df = pd.DataFrame(info)
-    assert df.portfolio_value.iloc[-1] > 0.9999
+    assert df.portfolio_value.iloc[-1] > 0.9999, 'portfolio should retain value if holding bitcoin'
@@ -0,0 +1,29 @@
+from src.data.utils import random_shift, normalize, scale_to_start
+
+import pandas as pd
+import numpy as np
+
+
+def test_random_shift():
+    s = pd.Series(np.random.random(100)) * 10 + 3
+    s1 = random_shift(s.copy(), 0)
+    assert (s == s1).all(), 'should not do anything if given 0'
+
+    s2 = random_shift(s.copy(), 0.05)
+    assert (s2 / s).max() > 1.0, 'should shift more than 0.00 given 0.05'
+    assert (s2 / s).max() < 1.1, 'should shift less than 0.10 given 0.05'
+    np.testing.assert_almost_equal((s2 / s).mean(), 1.00, 2)
+
+
+def test_normalize():
+    s = pd.Series(np.random.random(10)) * 10 + 3
+    s1 = normalize(s)
+    np.testing.assert_almost_equal(s1.std(), 1, err_msg='should make std 0')
+    np.testing.assert_almost_equal(s1.min(), 0, err_msg='')
+
+
+def test_scale_to_start():
+    s = pd.Series(np.random.random(10))
+    s1 = scale_to_start(s)
+    assert s1[0] == 1, 'start should be 1'
+    assert s1[1] != 1, 'not start should not be 1'