mirror of
https://github.com/wassname/ray.git
synced 2026-06-29 13:15:35 +08:00
81 lines
2.8 KiB
Python
Executable File
81 lines
2.8 KiB
Python
Executable File
#!/usr/bin/env python
|
|
"""Example of using PBT with RLlib.
|
|
|
|
Note that this requires a cluster with at least 8 GPUs in order for all trials
|
|
to run concurrently, otherwise PBT will round-robin train the trials which
|
|
is less efficient (or you can set {"gpu": 0} to use CPUs for SGD instead).
|
|
"""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import random
|
|
|
|
import ray
|
|
from ray.tune import run_experiments
|
|
from ray.tune.pbt import PopulationBasedTraining
|
|
|
|
if __name__ == "__main__":
|
|
|
|
# Postprocess the perturbed config to ensure it's still valid
|
|
def explore(config):
|
|
# ensure we collect enough timesteps to do sgd
|
|
if config["timesteps_per_batch"] < config["sgd_batchsize"] * 2:
|
|
config["timesteps_per_batch"] = config["sgd_batchsize"] * 2
|
|
# ensure we run at least one sgd iter
|
|
if config["num_sgd_iter"] < 1:
|
|
config["num_sgd_iter"] = 1
|
|
return config
|
|
|
|
pbt = PopulationBasedTraining(
|
|
time_attr="time_total_s",
|
|
reward_attr="episode_reward_mean",
|
|
perturbation_interval=120,
|
|
resample_probability=0.25,
|
|
# Specifies the mutations of these hyperparams
|
|
hyperparam_mutations={
|
|
"lambda": lambda: random.uniform(0.9, 1.0),
|
|
"clip_param": lambda: random.uniform(0.01, 0.5),
|
|
"sgd_stepsize": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
|
|
"num_sgd_iter": lambda: random.randint(1, 30),
|
|
"sgd_batchsize": lambda: random.randint(128, 16384),
|
|
"timesteps_per_batch": lambda: random.randint(2000, 160000),
|
|
},
|
|
custom_explore_fn=explore)
|
|
|
|
ray.init()
|
|
run_experiments(
|
|
{
|
|
"pbt_humanoid_test": {
|
|
"run": "PPO",
|
|
"env": "Humanoid-v1",
|
|
"repeat": 8,
|
|
"config": {
|
|
"kl_coeff":
|
|
1.0,
|
|
"num_workers":
|
|
8,
|
|
"devices": ["/gpu:0"],
|
|
"model": {
|
|
"free_log_std": True
|
|
},
|
|
# These params are tuned from a fixed starting value.
|
|
"lambda":
|
|
0.95,
|
|
"clip_param":
|
|
0.2,
|
|
"sgd_stepsize":
|
|
1e-4,
|
|
# These params start off randomly drawn from a set.
|
|
"num_sgd_iter":
|
|
lambda spec: random.choice([10, 20, 30]),
|
|
"sgd_batchsize":
|
|
lambda spec: random.choice([128, 512, 2048]),
|
|
"timesteps_per_batch":
|
|
lambda spec: random.choice([10000, 20000, 40000])
|
|
},
|
|
},
|
|
},
|
|
scheduler=pbt)
|