rl-portfolio-management/test/test_env.py

# silence the many message from the gym logger, yuck
import logging
logger = logging.getLogger('gym.envs.tests.spec_list')
logger.setLevel(logging.ERROR)

import pytest
import pandas as pd
import numpy as np
import gym
from gym.envs.tests import test_envs

from rl_portfolio_management.environments import PortfolioEnv, env_specs


@pytest.mark.parametrize("spec_id", env_specs)
def test_src_outputs(spec_id):
    """check we arn't giving future prices to model"""
    env = gym.envs.spec(spec_id).make()
    X0, y0, _ = env.src._step()
    _, y1, _ = env.src._step()
    # so relative price vector for calulating this steps returns is
    # y(t) = y = v(t)/v(t-1)
    # while y(t-1) = y_last =  v(t-1)/v(t-2)
    # the last steps price data is
    # X(t-1) = ..., v(t-3)/v(t-1), v(t-2)/v(t-1), v(t-1)/v(t-1)]
    # so 1/X[-2]==y[-2] as a unit test
    np.testing.assert_almost_equal(1 / X0[:, -2, 0], y0[1:])
    # on the other hand these should not
    try:
        np.testing.assert_almost_equal(1 / X0[:, -2, 0], y1[1:])
    except AssertionError:
        pass
    else:
        raise AssertionError(
            "These should not be equal, or we are giving future prices to the model")

    # also make sure that the last price X, is 1 because we scaled it by itself as in eq 18
    assert (X0[:, -1, 0] == 1).all()


@pytest.mark.parametrize("spec_id", env_specs)
def test_gym_env(spec_id):
    """Run openai test to check for repeatable, observation shape, etc."""
    spec = gym.envs.spec(spec_id)
    test_envs.test_env(spec)


@pytest.mark.parametrize("spec_id", env_specs)
def test_env_outputs(spec_id):
    """Check outputs."""
    env = gym.envs.spec(spec_id).make()
    env.seed(0)

    action = env.action_space.sample()
    action /= action.sum()

    obs1, reward, done, info = env.step(action)
    obs2 = env.reset()

    assert obs1["history"].shape == obs2["history"].shape, 'rest and step should output same shaped observations'
    assert obs1["weights"].shape == obs2["weights"].shape, 'rest and step should output same shaped observations'
    assert env.observation_space.contains(
        obs1), 'state should be within observation space'
    assert np.isfinite(reward), 'reward should be finite'
    assert not done
    for k, v in info.items():
        assert np.isfinite(v), 'env info item %s=%s should be finite' % (k, v)
        assert isinstance(
            v, (int, float)), 'env info item %s=%s should be int or float' % (k, v)


@pytest.mark.parametrize("spec_id", env_specs)
def test_portfolio_env_random_agent(spec_id):
    """Test random actions for 20 steps."""
    env = gym.envs.spec(spec_id).make()
    env.seed(0)

    obs = env.reset()
    for i in range(20):

        action = env.action_space.sample()
        action /= action.sum()

        obs, reward, done, info = env.step(action)
        assert env.observation_space.contains(
            obs), 'state should be within observation space'
        assert np.isfinite(reward), 'reward should be finite'
        assert not done, "shouldn't be done after %s steps" % i

    df_info = pd.DataFrame(env.infos)
    final_value = df_info.portfolio_value.iloc[-1]
    market_value = df_info.market_value.iloc[-1]
    np.testing.assert_allclose(final_value, market_value, rtol=0.1,
                               err_msg='should be similar to market values after 20 random steps')


@pytest.mark.parametrize("spec_id", env_specs)
def test_portfolio_env_hold(spec_id):
    """Test that holding cash gives stable value."""
    env = gym.envs.spec(spec_id).make()
    env.seed(0)
    env.reset()
    for _ in range(5):
        action = env.action_space.sample() * 0
        action[0] = 1
        obs, reward, done, info = env.step(action)

    df = pd.DataFrame(env.infos)
    assert df.portfolio_value.iloc[-1] > 0.9999, 'portfolio should retain value if holding bitcoin'
    assert df.portfolio_value.iloc[-1] < 1.01, 'portfolio should retain value if holding bitcoin'


def test_scaled_non_price_cols():
    """Test env with scaled option."""
    df = pd.read_hdf('./data/poloniex_30m_vol.hf', key='train')
    env1 = PortfolioEnv(df=df, scale=True, window_length=len(df) - 300)
    env1.seed(0)
    obs1 = env1.reset()

    nb_cols = len(env1.src.features)
    nb_price_cols = len(env1.src.price_columns)
    means = obs1["history"].reshape((-1, nb_cols)).mean(0)
    stds = obs1["history"].reshape((-1, nb_cols)).std(0)

    non_price_means = means[nb_price_cols:]

    # if normalized: for a large window, mean non_prices should be near mean=0, std=1
    non_price_std = stds[nb_price_cols:]
    np.testing.assert_almost_equal(non_price_means, [
                                   0, 0], decimal=1, err_msg='non price columns should be normalized to be close to one')
    np.testing.assert_allclose(non_price_std, [
                               1, 1], rtol=0.1, err_msg='non price columns should be normalized to be close to one')


def test_scaled():
    """Test env with scaled option."""
    df = pd.read_hdf('./data/poloniex_30m_vol.hf', key='train')

    env0 = PortfolioEnv(df=df, scale=False, window_length=40)
    env0.seed(0)
    obs0 = env0.reset()

    env1 = PortfolioEnv(df=df, scale=True, window_length=40)
    env1.seed(0)
    obs1 = env1.reset()

    nb_price_cols = len(env1.src.price_columns)
    assert (obs0["history"][:, :, :nb_price_cols] != obs1["history"][:, :,
                                                                     :nb_price_cols]).all(), 'scaled and non-scaled data should differ'

    # if scaled by last opening price: for a small window, mean prices should be near 1
    np.testing.assert_allclose(obs1["history"][:, -1, :nb_price_cols], 1,
                               rtol=0.1, err_msg='last prices should be normalized to be close to one')


@pytest.mark.parametrize("spec_id", env_specs)
def test_invalid_actions(spec_id):
    """Test that holding cash gives stable value."""
    env = gym.envs.spec(spec_id).make()
    env.seed(0)
    env.reset()

    valid_action = env.action_space.sample()
    valid_action /= valid_action.sum()

    invalid_actions = [
        # invalid dont sum to one
        valid_action * 0,
        valid_action * -1,
        valid_action * 10,
        # invalid shapes
        valid_action[:1],
        [valid_action],
        # invalid values
        valid_action * np.nan,
        valid_action * np.inf,
    ]
    for action in invalid_actions:
        try:
            env.step(action)
        except Exception as e:
            pass
        else:
            raise Exception('Expected error for invalid action %s' % action)


@pytest.mark.parametrize("spec_id", env_specs)
def test_costs(spec_id):
    """Test that simple transaction have the cost we expect."""
    env = gym.envs.spec(spec_id).make()
    env.seed(0)

    env.reset()
    obs, reward, done, info = env.step(np.array([1, 0, 0, 0]))
    obs, reward, done, info = env.step(np.array([0, 1, 0, 0]))
    np.testing.assert_almost_equal(
        info['cost'], env.sim.cost, err_msg='trading 100% cash for asset1 should cost 1*trading_cost')

    obs, reward, done, info = env.step(np.array([0, 0, 1, 0]))
    np.testing.assert_almost_equal(
        info['cost'], env.sim.cost * 2, err_msg='trading 100% asset1 for asset2 should cost 2*trading_cost')