diff --git a/release/rllib_tests/regression_tests/compact-regression-tests-tf.yaml b/release/rllib_tests/regression_tests/compact-regression-tests-tf.yaml new file mode 100644 index 000000000..4189d3886 --- /dev/null +++ b/release/rllib_tests/regression_tests/compact-regression-tests-tf.yaml @@ -0,0 +1,171 @@ +# This file runs RLlib algorithm learning tests for select algorithms on TF. +# It is suggested to run these on a single g3.16xlarge or p3.16xl node +# in a DLAMI / tensorflow_p36 env. + +# Note: RL runs are inherently high variance, so you'll have to check to +# see if the rewards reached seem reasonably in line with previous results. + +# You can find the reference results here: +# https://github.com/ray-project/ray/tree/master/doc/dev/release_logs + +a2c-tf-atari: + env: BreakoutNoFrameskip-v4 + run: A2C + num_samples: 2 + stop: + time_total_s: 3600 + config: + framework: tf + rollout_fragment_length: 20 + clip_rewards: True + num_workers: 5 + num_envs_per_worker: 5 + num_gpus: 1 + lr_schedule: [ + [0, 0.0007], + [20000000, 0.000000000001], + ] + +apex-dqn-tf-atari: + env: BreakoutNoFrameskip-v4 + run: APEX + num_samples: 2 + stop: + time_total_s: 3600 + config: + framework: tf + double_q: false + dueling: false + num_atoms: 1 + noisy: false + n_step: 3 + lr: .0001 + adam_epsilon: .00015 + hiddens: [512] + buffer_size: 1000000 + exploration_config: + epsilon_timesteps: 200000 + final_epsilon: 0.01 + prioritized_replay_alpha: 0.5 + final_prioritized_replay_beta: 1.0 + prioritized_replay_beta_annealing_timesteps: 2000000 + num_gpus: 1 + num_workers: 8 + num_envs_per_worker: 8 + rollout_fragment_length: 20 + train_batch_size: 512 + target_network_update_freq: 50000 + timesteps_per_iteration: 25000 + +dqn-tf-atari: + env: BreakoutNoFrameskip-v4 + run: DQN + num_samples: 2 + stop: + time_total_s: 3600 + config: + framework: tf + double_q: false + dueling: false + num_atoms: 1 + noisy: false + prioritized_replay: false + n_step: 1 + target_network_update_freq: 8000 + lr: .0000625 + adam_epsilon: .00015 + hiddens: [512] + learning_starts: 20000 + buffer_size: 1000000 + rollout_fragment_length: 4 + train_batch_size: 32 + exploration_config: + epsilon_timesteps: 200000 + final_epsilon: 0.01 + prioritized_replay_alpha: 0.5 + final_prioritized_replay_beta: 1.0 + prioritized_replay_beta_annealing_timesteps: 2000000 + num_gpus: 0.5 + timesteps_per_iteration: 10000 + +impala-tf-atari: + env: BreakoutNoFrameskip-v4 + run: IMPALA + num_samples: 2 + stop: + time_total_s: 3600 + config: + framework: tf + rollout_fragment_length: 50 + train_batch_size: 500 + num_workers: 10 + num_envs_per_worker: 5 + clip_rewards: True + lr_schedule: [ + [0, 0.0005], + [20000000, 0.000000000001], + ] + num_gpus: 1 + +ppo-tf-atari: + env: BreakoutNoFrameskip-v4 + run: PPO + num_samples: 2 + stop: + time_total_s: 3600 + config: + framework: tf + lambda: 0.95 + kl_coeff: 0.5 + clip_rewards: True + clip_param: 0.1 + vf_clip_param: 10.0 + entropy_coeff: 0.01 + train_batch_size: 5000 + rollout_fragment_length: 100 + sgd_minibatch_size: 500 + num_sgd_iter: 10 + num_workers: 10 + num_envs_per_worker: 5 + batch_mode: truncate_episodes + observation_filter: NoFilter + vf_share_layers: true + num_gpus: 1 + +# Expect roughly 1000 reward after 1h on 1GPU +sac-tf-halfcheetah-pybullet: + env: HalfCheetahBulletEnv-v0 + run: SAC + num_samples: 2 + stop: + time_total_s: 3600 + config: + framework: tf + horizon: 1000 + soft_horizon: false + Q_model: + fcnet_activation: relu + fcnet_hiddens: [256, 256] + policy_model: + fcnet_activation: relu + fcnet_hiddens: [256, 256] + tau: 0.005 + target_entropy: auto + no_done_at_end: true + n_step: 1 + rollout_fragment_length: 1 + prioritized_replay: true + train_batch_size: 256 + target_network_update_freq: 1 + timesteps_per_iteration: 1000 + learning_starts: 10000 + optimization: + actor_learning_rate: 0.0003 + critic_learning_rate: 0.0003 + entropy_learning_rate: 0.0003 + num_workers: 0 + num_gpus: 1 + clip_actions: false + normalize_actions: true + evaluation_interval: 1 + metrics_smoothing_episodes: 5 diff --git a/release/rllib_tests/regression_tests/compact-regression-test.yaml b/release/rllib_tests/regression_tests/compact-regression-tests-torch.yaml similarity index 64% rename from release/rllib_tests/regression_tests/compact-regression-test.yaml rename to release/rllib_tests/regression_tests/compact-regression-tests-torch.yaml index 7c5d633b8..2b259aeff 100644 --- a/release/rllib_tests/regression_tests/compact-regression-test.yaml +++ b/release/rllib_tests/regression_tests/compact-regression-tests-torch.yaml @@ -1,81 +1,37 @@ -# This file runs on a single g3.16xl or p3.16xl node. It is suggested -# to run these in a DLAMI / tensorflow_p36 env. Note that RL runs are -# inherently high variance, so you'll have to check to see if the -# rewards reached seem reasonably in line with previous results. -# +# This file runs RLlib algorithm learning tests for select algorithms on torch. +# It is suggested to run these on a single g3.16xlarge or p3.16xl node +# in a DLAMI / tensorflow_p36 env. +# Note: RL runs are inherently high variance, so you'll have to check to +# see if the rewards reached seem reasonably in line with previous results. # You can find the reference results here: # https://github.com/ray-project/ray/tree/master/release/release_logs -atari-impala: + +a2c-torch-atari: env: BreakoutNoFrameskip-v4 - run: IMPALA - num_samples: 4 - stop: - time_total_s: 3600 - config: - rollout_fragment_length: 50 - train_batch_size: 500 - num_workers: 10 - num_envs_per_worker: 5 - clip_rewards: True - lr_schedule: [ - [0, 0.0005], - [20000000, 0.000000000001], - ] - num_gpus: 1 -atari-ppo-tf: - env: BreakoutNoFrameskip-v4 - run: PPO - num_samples: 4 - stop: - time_total_s: 3600 - config: - lambda: 0.95 - kl_coeff: 0.5 - clip_rewards: True - clip_param: 0.1 - vf_clip_param: 10.0 - entropy_coeff: 0.01 - train_batch_size: 5000 - rollout_fragment_length: 100 - sgd_minibatch_size: 500 - num_sgd_iter: 10 - num_workers: 10 - num_envs_per_worker: 5 - batch_mode: truncate_episodes - observation_filter: NoFilter - vf_share_layers: true - num_gpus: 1 -atari-ppo-torch: - env: BreakoutNoFrameskip-v4 - run: PPO - num_samples: 4 + run: A2C + num_samples: 2 stop: time_total_s: 3600 config: framework: torch - lambda: 0.95 - kl_coeff: 0.5 + rollout_fragment_length: 20 clip_rewards: True - clip_param: 0.1 - vf_clip_param: 10.0 - entropy_coeff: 0.01 - train_batch_size: 5000 - rollout_fragment_length: 100 - sgd_minibatch_size: 500 - num_sgd_iter: 10 - num_workers: 10 + num_workers: 5 num_envs_per_worker: 5 - batch_mode: truncate_episodes - observation_filter: NoFilter - vf_share_layers: true num_gpus: 1 -apex: + lr_schedule: [ + [0, 0.0007], + [20000000, 0.000000000001], + ] + +apex-dqn-torch-atari: env: BreakoutNoFrameskip-v4 run: APEX - num_samples: 4 + num_samples: 2 stop: time_total_s: 3600 config: + framework: torch double_q: false dueling: false num_atoms: 1 @@ -98,29 +54,15 @@ apex: train_batch_size: 512 target_network_update_freq: 50000 timesteps_per_iteration: 25000 -atari-a2c: - env: BreakoutNoFrameskip-v4 - run: A2C - num_samples: 4 - stop: - time_total_s: 3600 - config: - rollout_fragment_length: 20 - clip_rewards: True - num_workers: 5 - num_envs_per_worker: 5 - num_gpus: 1 - lr_schedule: [ - [0, 0.0007], - [20000000, 0.000000000001], - ] -atari-basic-dqn: + +dqn-torch-atari: env: BreakoutNoFrameskip-v4 run: DQN - num_samples: 4 + num_samples: 2 stop: time_total_s: 3600 config: + framework: torch double_q: false dueling: false num_atoms: 1 @@ -141,5 +83,87 @@ atari-basic-dqn: prioritized_replay_alpha: 0.5 final_prioritized_replay_beta: 1.0 prioritized_replay_beta_annealing_timesteps: 2000000 - num_gpus: 0.2 + num_gpus: 0.5 timesteps_per_iteration: 10000 + +impala-torch-atari: + env: BreakoutNoFrameskip-v4 + run: IMPALA + num_samples: 2 + stop: + time_total_s: 3600 + config: + framework: torch + rollout_fragment_length: 50 + train_batch_size: 500 + num_workers: 10 + num_envs_per_worker: 5 + clip_rewards: True + lr_schedule: [ + [0, 0.0005], + [20000000, 0.000000000001], + ] + num_gpus: 1 + +ppo-torch-atari: + env: BreakoutNoFrameskip-v4 + run: PPO + num_samples: 2 + stop: + time_total_s: 3600 + config: + framework: torch + lambda: 0.95 + kl_coeff: 0.5 + clip_rewards: True + clip_param: 0.1 + vf_clip_param: 10.0 + entropy_coeff: 0.01 + train_batch_size: 5000 + rollout_fragment_length: 100 + sgd_minibatch_size: 500 + num_sgd_iter: 10 + num_workers: 10 + num_envs_per_worker: 5 + batch_mode: truncate_episodes + observation_filter: NoFilter + vf_share_layers: true + num_gpus: 1 + +# Expect roughly 1000 reward after 1h on 1GPU +sac-torch-halfcheetah-pybullet: + env: HalfCheetahBulletEnv-v0 + run: SAC + num_samples: 2 + stop: + time_total_s: 3600 + config: + framework: torch + horizon: 1000 + soft_horizon: false + Q_model: + fcnet_activation: relu + fcnet_hiddens: [256, 256] + policy_model: + fcnet_activation: relu + fcnet_hiddens: [256, 256] + tau: 0.005 + target_entropy: auto + no_done_at_end: true + n_step: 1 + rollout_fragment_length: 1 + prioritized_replay: true + train_batch_size: 256 + target_network_update_freq: 1 + timesteps_per_iteration: 1000 + learning_starts: 10000 + optimization: + actor_learning_rate: 0.0003 + critic_learning_rate: 0.0003 + entropy_learning_rate: 0.0003 + num_workers: 0 + num_gpus: 1 + clip_actions: false + normalize_actions: true + evaluation_interval: 1 + metrics_smoothing_episodes: 5 diff --git a/release/rllib_tests/regression_tests/requirements.txt b/release/rllib_tests/regression_tests/requirements.txt index 69bde8cf2..72192b332 100644 --- a/release/rllib_tests/regression_tests/requirements.txt +++ b/release/rllib_tests/regression_tests/requirements.txt @@ -1 +1,2 @@ -ray[rllib] \ No newline at end of file +ray[rllib] +pybullet diff --git a/release/rllib_tests/regression_tests/run.sh b/release/rllib_tests/regression_tests/run.sh index ce932a732..4a692c2e5 100755 --- a/release/rllib_tests/regression_tests/run.sh +++ b/release/rllib_tests/regression_tests/run.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -ray_version="" +ray_version="" commit="" ray_branch="" @@ -49,4 +49,7 @@ source activate tensorflow_p36 && pip install -U "$wheel" source activate tensorflow_p36 && pip install "ray[rllib]" "ray[debug]" source activate tensorflow_p36 && pip install torch==1.6 torchvision source activate tensorflow_p36 && pip install boto3==1.4.8 cython==0.29.0 -source activate tensorflow_p36 && rllib train -f compact-regression-test.yaml +# Run tf learning tests. +source activate tensorflow_p36 && rllib train -f compact-regression-tests-tf.yaml +# Run torch learning tests. +source activate tensorflow_p36 && rllib train -f compact-regression-tests-torch.yaml diff --git a/rllib/tuned_examples/sac/halfcheetah-pybullet-sac.yaml b/rllib/tuned_examples/sac/halfcheetah-pybullet-sac.yaml new file mode 100644 index 000000000..a653fe3a1 --- /dev/null +++ b/rllib/tuned_examples/sac/halfcheetah-pybullet-sac.yaml @@ -0,0 +1,37 @@ +halfcheetah-pybullet-sac: + env: HalfCheetahBulletEnv-v0 + run: SAC + stop: + episode_reward_mean: 9000 + config: + # Works for both torch and tf. + framework: tf + horizon: 1000 + soft_horizon: false + Q_model: + fcnet_activation: relu + fcnet_hiddens: [256, 256] + policy_model: + fcnet_activation: relu + fcnet_hiddens: [256, 256] + tau: 0.005 + target_entropy: auto + no_done_at_end: true + n_step: 1 + rollout_fragment_length: 1 + prioritized_replay: true + train_batch_size: 256 + target_network_update_freq: 1 + timesteps_per_iteration: 1000 + learning_starts: 10000 + optimization: + actor_learning_rate: 0.0003 + critic_learning_rate: 0.0003 + entropy_learning_rate: 0.0003 + num_workers: 0 + num_gpus: 1 + clip_actions: false + normalize_actions: true + evaluation_interval: 1 + metrics_smoothing_episodes: 5 +