diff --git a/python/ray/rllib/agents/agent.py b/python/ray/rllib/agents/agent.py index cebfc5d77..18adda82d 100644 --- a/python/ray/rllib/agents/agent.py +++ b/python/ray/rllib/agents/agent.py @@ -331,7 +331,7 @@ class Agent(Trainable): self.env_creator = lambda env_config: None # Merge the supplied config with the class default - merged_config = self._default_config.copy() + merged_config = copy.deepcopy(self._default_config) merged_config = deep_update(merged_config, config, self._allow_unknown_configs, self._allow_unknown_subkeys) diff --git a/python/ray/rllib/models/catalog.py b/python/ray/rllib/models/catalog.py index 35ca2e7c7..8f0b8ac82 100644 --- a/python/ray/rllib/models/catalog.py +++ b/python/ray/rllib/models/catalog.py @@ -200,7 +200,9 @@ class ModelCatalog(object): if options.get("use_lstm"): copy = dict(input_dict) copy["obs"] = model.last_layer - model = LSTM(copy, obs_space, num_outputs, options, state_in, + feature_space = gym.spaces.Box( + -1, 1, shape=(model.last_layer.shape[1], )) + model = LSTM(copy, feature_space, num_outputs, options, state_in, seq_lens) logger.debug("Created model {}: ({} of {}, {}, {}) -> {}, {}".format( diff --git a/python/ray/rllib/test/test_nested_spaces.py b/python/ray/rllib/test/test_nested_spaces.py index 3d9a569f6..490e6af15 100644 --- a/python/ray/rllib/test/test_nested_spaces.py +++ b/python/ray/rllib/test/test_nested_spaces.py @@ -174,7 +174,7 @@ class NestedSpacesTest(unittest.TestCase): }, })) - def doTestNestedDict(self, make_env): + def doTestNestedDict(self, make_env, test_lstm=False): ModelCatalog.register_custom_model("composite", DictSpyModel) register_env("nested", make_env) pg = PGAgent( @@ -184,6 +184,7 @@ class NestedSpacesTest(unittest.TestCase): "sample_batch_size": 5, "model": { "custom_model": "composite", + "use_lstm": test_lstm, }, }) pg.train() @@ -230,6 +231,9 @@ class NestedSpacesTest(unittest.TestCase): def testNestedDictGym(self): self.doTestNestedDict(lambda _: NestedDictEnv()) + def testNestedDictGymLSTM(self): + self.doTestNestedDict(lambda _: NestedDictEnv(), test_lstm=True) + def testNestedDictVector(self): self.doTestNestedDict( lambda _: VectorEnv.wrap(lambda i: NestedDictEnv())) diff --git a/test/jenkins_tests/multi_node_tests/test_rllib_eval.sh b/test/jenkins_tests/multi_node_tests/test_rllib_eval.sh deleted file mode 100644 index c4505832c..000000000 --- a/test/jenkins_tests/multi_node_tests/test_rllib_eval.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/sh - -# TODO: Test AC3 -ALGS='DQN PPO' -GYM_ENV='CartPole-v0' - -for ALG in $ALGS -do - EXPERIMENT_NAME=$GYM_ENV'_'$ALG - python /ray/python/ray/rllib/train.py --run $ALG --env $GYM_ENV \ - --stop '{"training_iteration": 2}' --experiment-name $EXPERIMENT_NAME \ - --checkpoint-freq 1 - - EXPERIMENT_PATH='/tmp/ray/'$EXPERIMENT_NAME - CHECKPOINT_FOLDER=$(ls $EXPERIMENT_PATH) - CHECKPOINT=$EXPERIMENT_PATH'/'$CHECKPOINT_FOLDER'/checkpoint-1' - - python /ray/python/ray/rllib/eval.py $CHECKPOINT --run $ALG \ - --env $GYM_ENV --no-render - - # Clean up - rm -rf $EXPERIMENT_PATH -done diff --git a/test/jenkins_tests/run_multi_node_tests.sh b/test/jenkins_tests/run_multi_node_tests.sh index 0b7b95037..93f00a0ee 100755 --- a/test/jenkins_tests/run_multi_node_tests.sh +++ b/test/jenkins_tests/run_multi_node_tests.sh @@ -234,9 +234,6 @@ docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ --stop '{"training_iteration": 2}' \ --config '{"num_workers": 2, "optimizer": {"num_replay_buffer_shards": 1}, "learning_starts": 100, "min_iter_time_s": 1}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ - sh /ray/test/jenkins_tests/multi_node_tests/test_rllib_eval.sh - docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/test/test_local.py