mirror of
https://github.com/wassname/ray.git
synced 2026-07-01 17:11:05 +08:00
68660453e4
This adds a simple DQN+PPO example for multi-agent. We don't do anything fancy here, just syncing weights between two separate trainers. This potentially is wasting some compute, but is very simple to set up. It might be nice to share experience collection between the top-level trainers in the future.
296 lines
11 KiB
Bash
Executable File
296 lines
11 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
# Cause the script to exit if a single command fails.
|
|
set -e
|
|
|
|
# Show explicitly which commands are currently running.
|
|
set -x
|
|
|
|
ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
|
|
|
|
DOCKER_SHA=$($ROOT_DIR/../../build-docker.sh --output-sha --no-cache)
|
|
echo "Using Docker image" $DOCKER_SHA
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env PongDeterministic-v0 \
|
|
--run A3C \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"num_workers": 16}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env CartPole-v1 \
|
|
--run PPO \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "sgd_stepsize": 1e-4, "sgd_batchsize": 64, "timesteps_per_batch": 2000, "num_workers": 1, "model": {"free_log_std": true}}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env CartPole-v1 \
|
|
--run PPO \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"simple_optimizer": false, "num_sgd_iter": 2, "model": {"use_lstm": true}}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env CartPole-v1 \
|
|
--run PPO \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"simple_optimizer": true, "num_sgd_iter": 2, "model": {"use_lstm": true}}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env CartPole-v1 \
|
|
--run PPO \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "sgd_stepsize": 1e-4, "sgd_batchsize": 64, "timesteps_per_batch": 2000, "num_workers": 1, "use_gae": false}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env Pendulum-v0 \
|
|
--run ES \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"stepsize": 0.01, "episodes_per_batch": 20, "timesteps_per_batch": 100}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env Pong-v0 \
|
|
--run ES \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"stepsize": 0.01, "episodes_per_batch": 20, "timesteps_per_batch": 100}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env CartPole-v0 \
|
|
--run A3C \
|
|
--stop '{"training_iteration": 2}' \
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env CartPole-v0 \
|
|
--run DQN \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"lr": 1e-3, "schedule_max_timesteps": 100000, "exploration_fraction": 0.1, "exploration_final_eps": 0.02, "dueling": false, "hiddens": [], "model": {"fcnet_hiddens": [64], "fcnet_activation": "relu"}}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env CartPole-v0 \
|
|
--run DQN \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"num_workers": 2}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env CartPole-v0 \
|
|
--run APEX \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"num_workers": 2, "timesteps_per_iteration": 1000, "gpu": false}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env FrozenLake-v0 \
|
|
--run DQN \
|
|
--stop '{"training_iteration": 2}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env FrozenLake-v0 \
|
|
--run PPO \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"num_sgd_iter": 10, "sgd_batchsize": 64, "timesteps_per_batch": 1000, "num_workers": 1}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env PongDeterministic-v4 \
|
|
--run DQN \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"lr": 1e-4, "schedule_max_timesteps": 2000000, "buffer_size": 10000, "exploration_fraction": 0.1, "exploration_final_eps": 0.01, "sample_batch_size": 4, "learning_starts": 10000, "target_network_update_freq": 1000, "gamma": 0.99, "prioritized_replay": true}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env MontezumaRevenge-v0 \
|
|
--run PPO \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "sgd_stepsize": 1e-4, "sgd_batchsize": 64, "timesteps_per_batch": 2000, "num_workers": 1, "model": {"dim": 40, "conv_filters": [[16, [8, 8], 4], [32, [4, 4], 2], [512, [5, 5], 1]]}}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env PongDeterministic-v4 \
|
|
--run A3C \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"num_workers": 2, "use_pytorch": true, "model": {"use_lstm": false, "grayscale": true, "zero_mean": false, "dim": 80, "channel_major": true}}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env CartPole-v1 \
|
|
--run A3C \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"num_workers": 2, "use_pytorch": true}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env CartPole-v0 \
|
|
--run DQN \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"num_workers": 2}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env CartPole-v0 \
|
|
--run PG \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"sample_batch_size": 500, "num_workers": 1}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env CartPole-v0 \
|
|
--run PG \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"sample_batch_size": 500, "num_workers": 1, "model": {"use_lstm": true, "max_seq_len": 100}}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env CartPole-v0 \
|
|
--run PG \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"sample_batch_size": 500, "num_workers": 1, "num_envs_per_worker": 10}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env Pong-v0 \
|
|
--run PG \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"sample_batch_size": 500, "num_workers": 1}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env FrozenLake-v0 \
|
|
--run PG \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"sample_batch_size": 500, "num_workers": 1}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env Pendulum-v0 \
|
|
--run DDPG \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"num_workers": 1}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env MountainCarContinuous-v0 \
|
|
--run DDPG \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"num_workers": 1}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
rllib train \
|
|
--env MountainCarContinuous-v0 \
|
|
--run DDPG \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"num_workers": 1}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/train.py \
|
|
--env Pendulum-v0 \
|
|
--run APEX_DDPG \
|
|
--ray-num-cpus 8 \
|
|
--stop '{"training_iteration": 2}' \
|
|
--config '{"num_workers": 2, "optimizer": {"num_replay_buffer_shards": 1}, "learning_starts": 100}'
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
sh /ray/test/jenkins_tests/multi_node_tests/test_rllib_eval.sh
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/test/test_checkpoint_restore.py
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/test/test_policy_evaluator.py
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/test/test_serving_env.py
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/test/test_lstm.py
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/test/test_multi_agent_env.py
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/test/test_supported_spaces.py
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/tune/examples/tune_mnist_ray.py \
|
|
--smoke-test
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/tune/examples/pbt_example.py \
|
|
--smoke-test
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/tune/examples/hyperband_example.py \
|
|
--smoke-test
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/tune/examples/async_hyperband_example.py \
|
|
--smoke-test
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/tune/examples/tune_mnist_ray_hyperband.py \
|
|
--smoke-test
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/tune/examples/tune_mnist_async_hyperband.py \
|
|
--smoke-test
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/tune/examples/hyperopt_example.py \
|
|
--smoke-test
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/tune/examples/tune_mnist_keras.py \
|
|
--smoke-test
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/examples/legacy_multiagent/multiagent_mountaincar.py
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/examples/legacy_multiagent/multiagent_pendulum.py
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/examples/multiagent_cartpole.py --num-iters=2
|
|
|
|
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
|
|
python /ray/python/ray/rllib/examples/multiagent_two_trainers.py --num-iters=2
|
|
|
|
python $ROOT_DIR/multi_node_docker_test.py \
|
|
--docker-image=$DOCKER_SHA \
|
|
--num-nodes=5 \
|
|
--num-redis-shards=10 \
|
|
--test-script=/ray/test/jenkins_tests/multi_node_tests/test_0.py
|
|
|
|
python $ROOT_DIR/multi_node_docker_test.py \
|
|
--docker-image=$DOCKER_SHA \
|
|
--num-nodes=5 \
|
|
--num-redis-shards=5 \
|
|
--num-gpus=0,1,2,3,4 \
|
|
--num-drivers=7 \
|
|
--driver-locations=0,1,0,1,2,3,4 \
|
|
--test-script=/ray/test/jenkins_tests/multi_node_tests/remove_driver_test.py
|
|
|
|
python $ROOT_DIR/multi_node_docker_test.py \
|
|
--docker-image=$DOCKER_SHA \
|
|
--num-nodes=5 \
|
|
--num-redis-shards=2 \
|
|
--num-gpus=0,0,5,6,50 \
|
|
--num-drivers=100 \
|
|
--test-script=/ray/test/jenkins_tests/multi_node_tests/many_drivers_test.py
|
|
|
|
python $ROOT_DIR/multi_node_docker_test.py \
|
|
--docker-image=$DOCKER_SHA \
|
|
--num-nodes=1 \
|
|
--mem-size=60G \
|
|
--shm-size=60G \
|
|
--test-script=/ray/test/jenkins_tests/multi_node_tests/large_memory_test.py
|