From fce006238018f0a0095c452f84f828f6fc91297d Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Sat, 30 Mar 2019 14:07:50 -0700
Subject: [PATCH] [rllib] Switch to tune.run() instead of run_experiments()
 (#4515)

---
 doc/source/rllib-training.rst                 | 80 +++++++++----------
 python/ray/rllib/examples/batch_norm_model.py | 22 +++--
 python/ray/rllib/examples/cartpole_lstm.py    | 29 +++----
 python/ray/rllib/examples/custom_env.py       | 31 ++++---
 python/ray/rllib/examples/custom_loss.py      | 28 +++----
 .../examples/custom_metrics_and_callbacks.py  | 30 ++++---
 python/ray/rllib/examples/custom_train_fn.py  | 22 +++--
 .../rllib/examples/hierarchical_training.py   | 59 +++++++-------
 .../ray/rllib/examples/multiagent_cartpole.py | 29 +++----
 .../examples/parametric_action_cartpole.py    | 28 +++----
 python/ray/rllib/examples/twostep_game.py     | 20 ++---
 python/ray/rllib/utils/debug.py               |  2 +-
 python/ray/tune/tune.py                       |  2 +-
 13 files changed, 174 insertions(+), 208 deletions(-)

diff --git a/doc/source/rllib-training.rst b/doc/source/rllib-training.rst
index c192b674f..9421f1c48 100644
--- a/doc/source/rllib-training.rst
+++ b/doc/source/rllib-training.rst
@@ -145,21 +145,19 @@ All RLlib agents are compatible with the `Tune API <tune-usage.html>`__. This en
 .. code-block:: python
 
     import ray
-    import ray.tune as tune
+    from ray import tune
 
     ray.init()
-    tune.run_experiments({
-        "my_experiment": {
-            "run": "PPO",
+    tune.run(
+        "PPO",
+        stop={"episode_reward_mean": 200},
+        config={
             "env": "CartPole-v0",
-            "stop": {"episode_reward_mean": 200},
-            "config": {
-                "num_gpus": 0,
-                "num_workers": 1,
-                "lr": tune.grid_search([0.01, 0.001, 0.0001]),
-            },
+            "num_gpus": 0,
+            "num_workers": 1,
+            "lr": tune.grid_search([0.01, 0.001, 0.0001]),
         },
-    })
+    )
 
 Tune will schedule the trials to run in parallel on your Ray cluster:
 
@@ -258,20 +256,18 @@ You can provide callback functions to be called at points during policy evaluati
             info["agent"].__name__, info["result"]["episodes_this_iter"]))
 
     ray.init()
-    trials = tune.run_experiments({
-        "test": {
+    trials = tune.run(
+        "PG",
+        config={
             "env": "CartPole-v0",
-            "run": "PG",
-            "config": {
-                "callbacks": {
-                    "on_episode_start": tune.function(on_episode_start),
-                    "on_episode_step": tune.function(on_episode_step),
-                    "on_episode_end": tune.function(on_episode_end),
-                    "on_train_result": tune.function(on_train_result),
-                },
+            "callbacks": {
+                "on_episode_start": tune.function(on_episode_start),
+                "on_episode_step": tune.function(on_episode_step),
+                "on_episode_end": tune.function(on_episode_end),
+                "on_train_result": tune.function(on_train_result),
             },
-        }
-    })
+        },
+    )
 
 Custom metrics can be accessed and visualized like any other training result:
 
@@ -306,20 +302,18 @@ Approach 1: Use the Agent API and update the environment between calls to ``trai
                     lambda env: env.set_phase(phase)))
 
     ray.init()
-    tune.run_experiments({
-        "curriculum": {
-            "run": train,
-            "config": {
-                "num_gpus": 0,
-                "num_workers": 2,
-            },
-            "resources_per_trial": {
-                "cpu": 1,
-                "gpu": lambda spec: spec.config.num_gpus,
-                "extra_cpu": lambda spec: spec.config.num_workers,
-            },
+    tune.run(
+        train,
+        config={
+            "num_gpus": 0,
+            "num_workers": 2,
         },
-    })
+        resources_per_trial={
+            "cpu": 1,
+            "gpu": lambda spec: spec.config.num_gpus,
+            "extra_cpu": lambda spec: spec.config.num_workers,
+        },
+    )
 
 Approach 2: Use the callbacks API to update the environment on new training results:
 
@@ -342,17 +336,15 @@ Approach 2: Use the callbacks API to update the environment on new training resu
                 lambda env: env.set_phase(phase)))
 
     ray.init()
-    tune.run_experiments({
-        "curriculum": {
-            "run": "PPO",
+    tune.run(
+        "PPO",
+        config={
             "env": YourEnv,
-            "config": {
-                "callbacks": {
-                    "on_train_result": tune.function(on_train_result),
-                },
+            "callbacks": {
+                "on_train_result": tune.function(on_train_result),
             },
         },
-    })
+    )
 
 Debugging
 ---------
diff --git a/python/ray/rllib/examples/batch_norm_model.py b/python/ray/rllib/examples/batch_norm_model.py
index abd4b5366..7852a62c2 100644
--- a/python/ray/rllib/examples/batch_norm_model.py
+++ b/python/ray/rllib/examples/batch_norm_model.py
@@ -9,9 +9,9 @@ import tensorflow as tf
 import tensorflow.contrib.slim as slim
 
 import ray
+from ray import tune
 from ray.rllib.models import Model, ModelCatalog
 from ray.rllib.models.misc import normc_initializer
-from ray.tune import run_experiments
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--num-iters", type=int, default=200)
@@ -47,18 +47,14 @@ if __name__ == "__main__":
     ray.init()
 
     ModelCatalog.register_custom_model("bn_model", BatchNormModel)
-    run_experiments({
-        "batch_norm_demo": {
-            "run": args.run,
+    tune.run(
+        args.run,
+        stop={"training_iteration": args.num_iters},
+        config={
             "env": "Pendulum-v0" if args.run == "DDPG" else "CartPole-v0",
-            "stop": {
-                "training_iteration": args.num_iters
-            },
-            "config": {
-                "model": {
-                    "custom_model": "bn_model",
-                },
-                "num_workers": 0,
+            "model": {
+                "custom_model": "bn_model",
             },
+            "num_workers": 0,
         },
-    })
+    )
diff --git a/python/ray/rllib/examples/cartpole_lstm.py b/python/ray/rllib/examples/cartpole_lstm.py
index 1a70a0d04..39996e6cb 100644
--- a/python/ray/rllib/examples/cartpole_lstm.py
+++ b/python/ray/rllib/examples/cartpole_lstm.py
@@ -179,20 +179,15 @@ if __name__ == "__main__":
         },
     }
 
-    tune.run_experiments({
-        "test": {
-            "env": "cartpole_stateless",
-            "run": args.run,
-            "stop": {
-                "episode_reward_mean": args.stop
-            },
-            "config": dict(
-                configs[args.run], **{
-                    "model": {
-                        "use_lstm": True,
-                        "lstm_use_prev_action_reward": args.
-                        use_prev_action_reward,
-                    },
-                }),
-        }
-    })
+    tune.run(
+        args.run,
+        stop={"episode_reward_mean": args.stop},
+        config=dict(
+            configs[args.run], **{
+                "env": "cartpole_stateless",
+                "model": {
+                    "use_lstm": True,
+                    "lstm_use_prev_action_reward": args.use_prev_action_reward,
+                },
+            }),
+    )
diff --git a/python/ray/rllib/examples/custom_env.py b/python/ray/rllib/examples/custom_env.py
index 8535559d3..073959e82 100644
--- a/python/ray/rllib/examples/custom_env.py
+++ b/python/ray/rllib/examples/custom_env.py
@@ -18,7 +18,8 @@ from ray.rllib.models import FullyConnectedNetwork, Model, ModelCatalog
 from gym.spaces import Discrete, Box
 
 import ray
-from ray.tune import run_experiments, grid_search
+from ray import tune
+from ray.tune import grid_search
 
 
 class SimpleCorridor(gym.Env):
@@ -66,22 +67,20 @@ if __name__ == "__main__":
     # register_env("corridor", lambda config: SimpleCorridor(config))
     ray.init()
     ModelCatalog.register_custom_model("my_model", CustomModel)
-    run_experiments({
-        "demo": {
-            "run": "PPO",
+    tune.run(
+        "PPO",
+        stop={
+            "timesteps_total": 10000,
+        },
+        config={
             "env": SimpleCorridor,  # or "corridor" if registered above
-            "stop": {
-                "timesteps_total": 10000,
+            "model": {
+                "custom_model": "my_model",
             },
-            "config": {
-                "model": {
-                    "custom_model": "my_model",
-                },
-                "lr": grid_search([1e-2, 1e-4, 1e-6]),  # try different lrs
-                "num_workers": 1,  # parallelism
-                "env_config": {
-                    "corridor_length": 5,
-                },
+            "lr": grid_search([1e-2, 1e-4, 1e-6]),  # try different lrs
+            "num_workers": 1,  # parallelism
+            "env_config": {
+                "corridor_length": 5,
             },
         },
-    })
+    )
diff --git a/python/ray/rllib/examples/custom_loss.py b/python/ray/rllib/examples/custom_loss.py
index 4f15b9c96..1f04f0fb5 100644
--- a/python/ray/rllib/examples/custom_loss.py
+++ b/python/ray/rllib/examples/custom_loss.py
@@ -18,7 +18,7 @@ import os
 import tensorflow as tf
 
 import ray
-from ray.tune import run_experiments
+from ray import tune
 from ray.rllib.models import (Categorical, FullyConnectedNetwork, Model,
                               ModelCatalog)
 from ray.rllib.models.model import restore_original_dimensions
@@ -82,21 +82,19 @@ if __name__ == "__main__":
     args = parser.parse_args()
 
     ModelCatalog.register_custom_model("custom_loss", CustomLossModel)
-    run_experiments({
-        "custom_loss": {
-            "run": "PG",
+    tune.run(
+        "PG",
+        stop={
+            "training_iteration": args.iters,
+        },
+        config={
             "env": "CartPole-v0",
-            "stop": {
-                "training_iteration": args.iters,
-            },
-            "config": {
-                "num_workers": 0,
-                "model": {
-                    "custom_model": "custom_loss",
-                    "custom_options": {
-                        "input_files": args.input_files,
-                    },
+            "num_workers": 0,
+            "model": {
+                "custom_model": "custom_loss",
+                "custom_options": {
+                    "input_files": args.input_files,
                 },
             },
         },
-    })
+    )
diff --git a/python/ray/rllib/examples/custom_metrics_and_callbacks.py b/python/ray/rllib/examples/custom_metrics_and_callbacks.py
index 0f0dcb040..a9c36ce35 100644
--- a/python/ray/rllib/examples/custom_metrics_and_callbacks.py
+++ b/python/ray/rllib/examples/custom_metrics_and_callbacks.py
@@ -50,24 +50,22 @@ if __name__ == "__main__":
     args = parser.parse_args()
 
     ray.init()
-    trials = tune.run_experiments({
-        "test": {
+    trials = tune.run(
+        "PG",
+        stop={
+            "training_iteration": args.num_iters,
+        },
+        config={
             "env": "CartPole-v0",
-            "run": "PG",
-            "stop": {
-                "training_iteration": args.num_iters,
+            "callbacks": {
+                "on_episode_start": tune.function(on_episode_start),
+                "on_episode_step": tune.function(on_episode_step),
+                "on_episode_end": tune.function(on_episode_end),
+                "on_sample_end": tune.function(on_sample_end),
+                "on_train_result": tune.function(on_train_result),
             },
-            "config": {
-                "callbacks": {
-                    "on_episode_start": tune.function(on_episode_start),
-                    "on_episode_step": tune.function(on_episode_step),
-                    "on_episode_end": tune.function(on_episode_end),
-                    "on_sample_end": tune.function(on_sample_end),
-                    "on_train_result": tune.function(on_train_result),
-                },
-            },
-        }
-    })
+        },
+    )
 
     # verify custom metrics for integration tests
     custom_metrics = trials[0].last_result["custom_metrics"]
diff --git a/python/ray/rllib/examples/custom_train_fn.py b/python/ray/rllib/examples/custom_train_fn.py
index cc25bced9..224a54d51 100644
--- a/python/ray/rllib/examples/custom_train_fn.py
+++ b/python/ray/rllib/examples/custom_train_fn.py
@@ -11,8 +11,8 @@ from __future__ import division
 from __future__ import print_function
 
 import ray
+from ray import tune
 from ray.rllib.agents.ppo import PPOAgent
-from ray.tune import run_experiments
 
 
 def my_train_fn(config, reporter):
@@ -40,15 +40,13 @@ def my_train_fn(config, reporter):
 
 if __name__ == "__main__":
     ray.init()
-    run_experiments({
-        "demo": {
-            "run": my_train_fn,
-            "resources_per_trial": {
-                "cpu": 1,
-            },
-            "config": {
-                "lr": 0.01,
-                "num_workers": 0,
-            },
+    tune.run(
+        my_train_fn,
+        resources_per_trial={
+            "cpu": 1,
         },
-    })
+        config={
+            "lr": 0.01,
+            "num_workers": 0,
+        },
+    )
diff --git a/python/ray/rllib/examples/hierarchical_training.py b/python/ray/rllib/examples/hierarchical_training.py
index 2cb25cbbf..c6d2db968 100644
--- a/python/ray/rllib/examples/hierarchical_training.py
+++ b/python/ray/rllib/examples/hierarchical_training.py
@@ -33,7 +33,8 @@ from gym.spaces import Box, Discrete, Tuple
 import logging
 
 import ray
-from ray.tune import run_experiments, function
+from ray import tune
+from ray.tune import function
 from ray.rllib.env import MultiAgentEnv
 
 parser = argparse.ArgumentParser()
@@ -184,15 +185,13 @@ if __name__ == "__main__":
     args = parser.parse_args()
     ray.init()
     if args.flat:
-        run_experiments({
-            "maze_single": {
-                "run": "PPO",
+        tune.run(
+            "PPO",
+            config={
                 "env": WindyMazeEnv,
-                "config": {
-                    "num_workers": 0,
-                },
+                "num_workers": 0,
             },
-        })
+        )
     else:
         maze = WindyMazeEnv(None)
 
@@ -202,30 +201,28 @@ if __name__ == "__main__":
             else:
                 return "high_level_policy"
 
-        run_experiments({
-            "maze_hier": {
-                "run": "PPO",
+        tune.run(
+            "PPO",
+            config={
                 "env": HierarchicalWindyMazeEnv,
-                "config": {
-                    "num_workers": 0,
-                    "log_level": "INFO",
-                    "entropy_coeff": 0.01,
-                    "multiagent": {
-                        "policy_graphs": {
-                            "high_level_policy": (None, maze.observation_space,
-                                                  Discrete(4), {
-                                                      "gamma": 0.9
-                                                  }),
-                            "low_level_policy": (None,
-                                                 Tuple([
-                                                     maze.observation_space,
-                                                     Discrete(4)
-                                                 ]), maze.action_space, {
-                                                     "gamma": 0.0
-                                                 }),
-                        },
-                        "policy_mapping_fn": function(policy_mapping_fn),
+                "num_workers": 0,
+                "log_level": "INFO",
+                "entropy_coeff": 0.01,
+                "multiagent": {
+                    "policy_graphs": {
+                        "high_level_policy": (None, maze.observation_space,
+                                              Discrete(4), {
+                                                  "gamma": 0.9
+                                              }),
+                        "low_level_policy": (None,
+                                             Tuple([
+                                                 maze.observation_space,
+                                                 Discrete(4)
+                                             ]), maze.action_space, {
+                                                 "gamma": 0.0
+                                             }),
                     },
+                    "policy_mapping_fn": function(policy_mapping_fn),
                 },
             },
-        })
+        )
diff --git a/python/ray/rllib/examples/multiagent_cartpole.py b/python/ray/rllib/examples/multiagent_cartpole.py
index bab549a41..d7485e27a 100644
--- a/python/ray/rllib/examples/multiagent_cartpole.py
+++ b/python/ray/rllib/examples/multiagent_cartpole.py
@@ -23,7 +23,6 @@ import ray
 from ray import tune
 from ray.rllib.models import Model, ModelCatalog
 from ray.rllib.tests.test_multi_agent_env import MultiCartpole
-from ray.tune import run_experiments
 from ray.tune.registry import register_env
 
 parser = argparse.ArgumentParser()
@@ -98,21 +97,17 @@ if __name__ == "__main__":
     }
     policy_ids = list(policy_graphs.keys())
 
-    run_experiments({
-        "test": {
-            "run": "PPO",
+    tune.run(
+        "PPO",
+        stop={"training_iteration": args.num_iters},
+        config={
             "env": "multi_cartpole",
-            "stop": {
-                "training_iteration": args.num_iters
+            "log_level": "DEBUG",
+            "num_sgd_iter": 10,
+            "multiagent": {
+                "policy_graphs": policy_graphs,
+                "policy_mapping_fn": tune.function(
+                    lambda agent_id: random.choice(policy_ids)),
             },
-            "config": {
-                "log_level": "DEBUG",
-                "num_sgd_iter": 10,
-                "multiagent": {
-                    "policy_graphs": policy_graphs,
-                    "policy_mapping_fn": tune.function(
-                        lambda agent_id: random.choice(policy_ids)),
-                },
-            },
-        }
-    })
+        },
+    )
diff --git a/python/ray/rllib/examples/parametric_action_cartpole.py b/python/ray/rllib/examples/parametric_action_cartpole.py
index 8b32e5d9a..a36e58b98 100644
--- a/python/ray/rllib/examples/parametric_action_cartpole.py
+++ b/python/ray/rllib/examples/parametric_action_cartpole.py
@@ -27,9 +27,9 @@ import tensorflow as tf
 import tensorflow.contrib.slim as slim
 
 import ray
+from ray import tune
 from ray.rllib.models import Model, ModelCatalog
 from ray.rllib.models.misc import normc_initializer
-from ray.tune import run_experiments
 from ray.tune.registry import register_env
 
 parser = argparse.ArgumentParser()
@@ -178,18 +178,16 @@ if __name__ == "__main__":
         }
     else:
         cfg = {}  # PG, IMPALA, A2C, etc.
-    run_experiments({
-        "parametric_cartpole": {
-            "run": args.run,
-            "env": "pa_cartpole",
-            "stop": {
-                "episode_reward_mean": args.stop,
-            },
-            "config": dict({
-                "model": {
-                    "custom_model": "pa_model",
-                },
-                "num_workers": 0,
-            }, **cfg),
+    tune.run(
+        args.run,
+        stop={
+            "episode_reward_mean": args.stop,
         },
-    })
+        config=dict({
+            "env": "pa_cartpole",
+            "model": {
+                "custom_model": "pa_model",
+            },
+            "num_workers": 0,
+        }, **cfg),
+    )
diff --git a/python/ray/rllib/examples/twostep_game.py b/python/ray/rllib/examples/twostep_game.py
index 172151c9c..577e834e5 100644
--- a/python/ray/rllib/examples/twostep_game.py
+++ b/python/ray/rllib/examples/twostep_game.py
@@ -8,7 +8,8 @@ import argparse
 from gym.spaces import Tuple, Discrete
 
 import ray
-from ray.tune import register_env, run_experiments, grid_search
+from ray import tune
+from ray.tune import register_env, grid_search
 from ray.rllib.env.multi_agent_env import MultiAgentEnv
 
 parser = argparse.ArgumentParser()
@@ -108,13 +109,12 @@ if __name__ == "__main__":
         group = False
 
     ray.init()
-    run_experiments({
-        "two_step": {
-            "run": args.run,
-            "env": "grouped_twostep" if group else TwoStepGame,
-            "stop": {
-                "timesteps_total": args.stop,
-            },
-            "config": config,
+    tune.run(
+        args.run,
+        stop={
+            "timesteps_total": args.stop,
         },
-    })
+        config=dict(config, **{
+            "env": "grouped_twostep" if group else TwoStepGame,
+        }),
+    )
diff --git a/python/ray/rllib/utils/debug.py b/python/ray/rllib/utils/debug.py
index 63638d292..ce86326f2 100644
--- a/python/ray/rllib/utils/debug.py
+++ b/python/ray/rllib/utils/debug.py
@@ -10,7 +10,7 @@ from ray.rllib.evaluation.sample_batch import SampleBatch, MultiAgentBatch
 
 _logged = set()
 _disabled = False
-_periodic_log = True
+_periodic_log = False
 _last_logged = 0.0
 _printer = pprint.PrettyPrinter(indent=2, width=60)
 
diff --git a/python/ray/tune/tune.py b/python/ray/tune/tune.py
index f46d23fc6..03ab3cd32 100644
--- a/python/ray/tune/tune.py
+++ b/python/ray/tune/tune.py
@@ -95,7 +95,7 @@ def run(run_or_experiment,
 
     Args:
         run_or_experiment (function|class|str|Experiment): If
-            function|class|str, this is the algorithm  or model to train.
+            function|class|str, this is the algorithm or model to train.
             This may refer to the name of a built-on algorithm
             (e.g. RLLib's DQN or PPO), a user-defined trainable
             function or class, or the string identifier of a