mirror of
https://github.com/wassname/ray.git
synced 2026-06-29 15:27:47 +08:00
[rllib] add tuned example for pendulum (#1552)
This commit is contained in:
@@ -64,7 +64,7 @@ DEFAULT_CONFIG = {
|
||||
"timesteps_per_batch": 4000,
|
||||
# Each tasks performs rollouts until at least this
|
||||
# number of steps is obtained
|
||||
"min_steps_per_task": 1000,
|
||||
"min_steps_per_task": 200,
|
||||
# Number of actors used to collect the rollouts
|
||||
"num_workers": 5,
|
||||
# Resource requirements for remote actors
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
# can expect improvement to -140 reward in ~300-500k timesteps
|
||||
pendulum-ppo:
|
||||
env: Pendulum-v0
|
||||
run: PPO
|
||||
resources:
|
||||
cpu: 4
|
||||
config:
|
||||
timesteps_per_batch: 2048
|
||||
lambda: 0.1
|
||||
gamma: 0.95
|
||||
sgd_stepsize: 0.0003
|
||||
sgd_batchsize: 64
|
||||
num_sgd_iter: 10
|
||||
model:
|
||||
fcnet_hiddens: [64, 64]
|
||||
Reference in New Issue
Block a user