diff --git a/python/ray/rllib/ppo/loss.py b/python/ray/rllib/ppo/loss.py index 8c15e68ff..e88b7fc7e 100644 --- a/python/ray/rllib/ppo/loss.py +++ b/python/ray/rllib/ppo/loss.py @@ -32,7 +32,7 @@ class ProximalPolicyLoss(object): # Do not split the last layer of the value function into # mean parameters and standard deviation parameters and # do not make the standard deviations free variables. - vf_config["free_logstd"] = False + vf_config["free_log_std"] = False with tf.variable_scope("value_function"): self.value_function = ModelCatalog.get_model( observations, 1, vf_config).outputs diff --git a/test/jenkins_tests/run_multi_node_tests.sh b/test/jenkins_tests/run_multi_node_tests.sh index 4c95851d1..ed50893f4 100755 --- a/test/jenkins_tests/run_multi_node_tests.sh +++ b/test/jenkins_tests/run_multi_node_tests.sh @@ -68,7 +68,7 @@ docker run --shm-size=10G --memory=10G $DOCKER_SHA \ --env CartPole-v1 \ --alg PPO \ --num-iterations 2 \ - --config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "sgd_stepsize": 1e-4, "sgd_batchsize": 64, "timesteps_per_batch": 2000, "num_workers": 1}' + --config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "sgd_stepsize": 1e-4, "sgd_batchsize": 64, "timesteps_per_batch": 2000, "num_workers": 1, "model": {"free_log_std": true}}' docker run --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \