diff --git a/doc/source/rllib.rst b/doc/source/rllib.rst index e48a256ac..7efed5cbe 100644 --- a/doc/source/rllib.rst +++ b/doc/source/rllib.rst @@ -96,6 +96,39 @@ In an example below, we train A3C by specifying 8 workers through the config fla python ray/python/ray/rllib/train.py --env=PongDeterministic-v4 --run=A3C --config '{"num_workers": 8}' +Evaluating Trained Agents +~~~~~~~~~~~~~~~~~~~~~~~~~ + +In order to save checkpoints from which to evaluate agents, +set ``--checkpoint-freq`` (number of training iterations between checkpoints) +when running ``train.py``. + + +You can evaluate a simple DQN agent with the following command + +:: + + python ray/python/ray/rllib/eval.py \ + /tmp/ray/default/DQN_CartPole-v0_0upjmdgr0/checkpoint-1 \ + --run DQN --env CartPole-v0 + + +By default, the script reconstructs a DQN agent from the checkpoint +located at ``/tmp/ray/default/DQN_CartPole-v0_0upjmdgr0/checkpoint-1`` +and renders its behavior in the environment specified by ``--env``. +Checkpoints are be found within the experiment directory, +specified by ``--local-dir`` and ``--experiment-name`` when running ``train.py``. + + +The ``eval.py`` script has a number of options you can show by running + +:: + python ray/python/ray/rllib/eval.py --help + +The most important argument is the checkpoint positional argument from which +the script reconstructs the agent. The options ``--env`` and ``--run`` +must match the values chosen while running ``train.py``. + Tuned Examples -------------- diff --git a/python/ray/rllib/eval.py b/python/ray/rllib/eval.py new file mode 100644 index 000000000..9b34e63ff --- /dev/null +++ b/python/ray/rllib/eval.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import gym +import json +import ray + +from ray.rllib.agent import get_agent_class + + +EXAMPLE_USAGE = """ +example usage: + ./eval.py /tmp/ray/checkpoint_dir/checkpoint-0 --run DQN --env CartPole-v0 +""" + + +parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description="Evaluates a reinforcement learning agent " + "given a checkpoint.", epilog=EXAMPLE_USAGE) + +parser.add_argument( + "checkpoint", type=str, help="Checkpoint from which to evaluate.") +required_named = parser.add_argument_group("required named arguments") +required_named.add_argument( + "--run", type=str, required=True, + help="The algorithm or model to train. This may refer to the name " + "of a built-on algorithm (e.g. RLLib's DQN or PPO), or a " + "user-defined trainable function or class registered in the " + "tune registry.") +required_named.add_argument( + "--env", type=str, help="The gym environment to use.") +parser.add_argument( + "--no-render", default=False, action="store_const", const=True, + help="Surpress rendering of the environment.") +parser.add_argument( + "--loop-forever", default=False, action="store_const", const=True, + help="Run evaluation of the agent forever.") +parser.add_argument( + "--config", default="{}", type=json.loads, + help="Algorithm-specific configuration (e.g. env, hyperparams), ") + +if __name__ == "__main__": + args = parser.parse_args() + + if not args.env: + if not args.config.get("env"): + parser.error("the following arguments are required: --env") + args.env = args.config.get("env") + + ray.init() + + cls = get_agent_class(args.run) + agent = cls(env=args.env) + agent.restore(args.checkpoint) + + env = gym.make(args.env) + state = env.reset() + done = False + while args.loop_forever or not done: + action = agent.compute_action(state) + state, reward, done, _ = env.step(action) + if not args.no_render: + env.render() diff --git a/test/jenkins_tests/multi_node_tests/test_rllib_eval.sh b/test/jenkins_tests/multi_node_tests/test_rllib_eval.sh new file mode 100644 index 000000000..c4505832c --- /dev/null +++ b/test/jenkins_tests/multi_node_tests/test_rllib_eval.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +# TODO: Test AC3 +ALGS='DQN PPO' +GYM_ENV='CartPole-v0' + +for ALG in $ALGS +do + EXPERIMENT_NAME=$GYM_ENV'_'$ALG + python /ray/python/ray/rllib/train.py --run $ALG --env $GYM_ENV \ + --stop '{"training_iteration": 2}' --experiment-name $EXPERIMENT_NAME \ + --checkpoint-freq 1 + + EXPERIMENT_PATH='/tmp/ray/'$EXPERIMENT_NAME + CHECKPOINT_FOLDER=$(ls $EXPERIMENT_PATH) + CHECKPOINT=$EXPERIMENT_PATH'/'$CHECKPOINT_FOLDER'/checkpoint-1' + + python /ray/python/ray/rllib/eval.py $CHECKPOINT --run $ALG \ + --env $GYM_ENV --no-render + + # Clean up + rm -rf $EXPERIMENT_PATH +done diff --git a/test/jenkins_tests/run_multi_node_tests.sh b/test/jenkins_tests/run_multi_node_tests.sh index ff5269f79..ef99d8079 100755 --- a/test/jenkins_tests/run_multi_node_tests.sh +++ b/test/jenkins_tests/run_multi_node_tests.sh @@ -146,6 +146,9 @@ docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ # --stop '{"training_iteration": 2}' \ # --config '{"num_workers": 2, "use_lstm": false, "use_pytorch": true, "model": {"grayscale": true, "zero_mean": false, "dim": 80, "channel_major": true}}' +docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ + sh /ray/test/jenkins_tests/multi_node_tests/test_rllib_eval.sh + docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/test/test_checkpoint_restore.py