From e046a5c767aa5c2d13afa2366143a49d69b4f9f0 Mon Sep 17 00:00:00 2001
From: Richard Liaw <rliaw@berkeley.edu>
Date: Thu, 20 Dec 2018 19:00:47 -0800
Subject: [PATCH] [tune] resources_per_trial from trial_resources (#3580)

Renaming variable due to user errors.
---
 doc/source/rllib-training.rst                 |  2 +-
 doc/source/tune-usage.rst                     |  6 +--
 python/ray/rllib/train.py                     |  6 +--
 python/ray/tune/config_parser.py              |  7 +--
 .../tune/examples/async_hyperband_example.py  |  2 +-
 python/ray/tune/examples/mnist_pytorch.py     |  2 +-
 .../tune/examples/mnist_pytorch_trainable.py  |  2 +-
 .../examples/pbt_tune_cifar10_with_keras.py   |  2 +-
 python/ray/tune/examples/tune_mnist_keras.py  |  2 +-
 python/ray/tune/experiment.py                 | 51 +++++++++++++------
 python/ray/tune/logger.py                     |  3 +-
 python/ray/tune/suggest/variant_generator.py  |  2 +-
 python/ray/tune/test/trial_runner_test.py     | 18 ++++++-
 python/ray/tune/test/tune_server_test.py      |  2 +-
 14 files changed, 73 insertions(+), 34 deletions(-)

diff --git a/doc/source/rllib-training.rst b/doc/source/rllib-training.rst
index 4dc6c900d..291ae0462 100644
--- a/doc/source/rllib-training.rst
+++ b/doc/source/rllib-training.rst
@@ -308,7 +308,7 @@ Approach 1: Use the Agent API and update the environment between calls to ``trai
                 "num_gpus": 0,
                 "num_workers": 2,
             },
-            "trial_resources": {
+            "resources_per_trial": {
                 "cpu": 1,
                 "gpu": lambda spec: spec.config.num_gpus,
                 "extra_cpu": lambda spec: spec.config.num_workers,
diff --git a/doc/source/tune-usage.rst b/doc/source/tune-usage.rst
index da91e6a4c..c4f846401 100644
--- a/doc/source/tune-usage.rst
+++ b/doc/source/tune-usage.rst
@@ -87,7 +87,7 @@ dictionary. Tune will convert the dict into an ``ray.tune.Experiment`` object.
                 "alpha": tune.grid_search([0.2, 0.4, 0.6]),
                 "beta": tune.grid_search([1, 2]),
             },
-            "trial_resources": { "cpu": 1, "gpu": 0 },
+            "resources_per_trial": { "cpu": 1, "gpu": 0 },
             "num_samples": 10,
             "local_dir": "~/ray_results",
             "upload_dir": "s3://your_bucket/path",
@@ -215,7 +215,7 @@ E.g. in the above, ``"num_samples": 10`` repeats the 3x3 grid search 10 times, f
 Using GPUs (Resource Allocation)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Tune will allocate the specified GPU and CPU ``trial_resources`` to each individual trial (defaulting to 1 CPU per trial). Under the hood, Tune runs each trial as a Ray actor, using Ray's resource handling to allocate resources and place actors. A trial will not be scheduled unless at least that amount of resources is available in the cluster, preventing the cluster from being overloaded.
+Tune will allocate the specified GPU and CPU ``resources_per_trial`` to each individual trial (defaulting to 1 CPU per trial). Under the hood, Tune runs each trial as a Ray actor, using Ray's resource handling to allocate resources and place actors. A trial will not be scheduled unless at least that amount of resources is available in the cluster, preventing the cluster from being overloaded.
 
 Fractional values are also supported, (i.e., ``"gpu": 0.2``). You can find an example of this in the `Keras MNIST example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_keras.py>`__.
 
@@ -231,7 +231,7 @@ If your trainable function / class creates further Ray actors or tasks that also
     run_experiments({
         "my_experiment_name": {
             "run": my_trainable,
-            "trial_resources": {
+            "resources_per_trial": {
                 "cpu": 1,
                 "gpu": 1,
                 "extra_gpu": 4
diff --git a/python/ray/rllib/train.py b/python/ray/rllib/train.py
index 72d6fc0b5..d9f7cf58e 100755
--- a/python/ray/rllib/train.py
+++ b/python/ray/rllib/train.py
@@ -100,9 +100,9 @@ def run(args, parser):
                 "run": args.run,
                 "checkpoint_freq": args.checkpoint_freq,
                 "local_dir": args.local_dir,
-                "trial_resources": (
-                    args.trial_resources and
-                    resources_to_json(args.trial_resources)),
+                "resources_per_trial": (
+                    args.resources_per_trial and
+                    resources_to_json(args.resources_per_trial)),
                 "stop": args.stop,
                 "config": dict(args.config, env=args.env),
                 "restore": args.restore,
diff --git a/python/ray/tune/config_parser.py b/python/ray/tune/config_parser.py
index febfba480..22adfc397 100644
--- a/python/ray/tune/config_parser.py
+++ b/python/ray/tune/config_parser.py
@@ -83,7 +83,7 @@ def make_parser(parser_creator=None, **kwargs):
         help="Algorithm-specific configuration (e.g. env, hyperparams), "
         "specified in JSON.")
     parser.add_argument(
-        "--trial-resources",
+        "--resources-per-trial",
         default=None,
         type=json_to_resources,
         help="Override the machine resources to allocate per trial, e.g. "
@@ -197,8 +197,9 @@ def create_trial_from_spec(spec, output_path, parser, **trial_kwargs):
         args = parser.parse_args(to_argv(spec))
     except SystemExit:
         raise TuneError("Error parsing args, see above message", spec)
-    if "trial_resources" in spec:
-        trial_kwargs["resources"] = json_to_resources(spec["trial_resources"])
+    if "resources_per_trial" in spec:
+        trial_kwargs["resources"] = json_to_resources(
+            spec["resources_per_trial"])
     return Trial(
         # Submitting trial via server in py2.7 creates Unicode, which does not
         # convert to string in a straightforward manner.
diff --git a/python/ray/tune/examples/async_hyperband_example.py b/python/ray/tune/examples/async_hyperband_example.py
index 871e8c171..a2e4b63e4 100644
--- a/python/ray/tune/examples/async_hyperband_example.py
+++ b/python/ray/tune/examples/async_hyperband_example.py
@@ -71,7 +71,7 @@ if __name__ == "__main__":
                     "training_iteration": 1 if args.smoke_test else 99999
                 },
                 "num_samples": 20,
-                "trial_resources": {
+                "resources_per_trial": {
                     "cpu": 1,
                     "gpu": 0
                 },
diff --git a/python/ray/tune/examples/mnist_pytorch.py b/python/ray/tune/examples/mnist_pytorch.py
index d9e336a76..a5fe48e5d 100644
--- a/python/ray/tune/examples/mnist_pytorch.py
+++ b/python/ray/tune/examples/mnist_pytorch.py
@@ -175,7 +175,7 @@ if __name__ == '__main__':
                     "mean_accuracy": 0.98,
                     "training_iteration": 1 if args.smoke_test else 20
                 },
-                "trial_resources": {
+                "resources_per_trial": {
                     "cpu": 3
                 },
                 "run": "train_mnist",
diff --git a/python/ray/tune/examples/mnist_pytorch_trainable.py b/python/ray/tune/examples/mnist_pytorch_trainable.py
index 75da205f1..24fc4951d 100644
--- a/python/ray/tune/examples/mnist_pytorch_trainable.py
+++ b/python/ray/tune/examples/mnist_pytorch_trainable.py
@@ -187,7 +187,7 @@ if __name__ == '__main__':
                     "mean_accuracy": 0.95,
                     "training_iteration": 1 if args.smoke_test else 20,
                 },
-                "trial_resources": {
+                "resources_per_trial": {
                     "cpu": 3
                 },
                 "run": TrainMNIST,
diff --git a/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py b/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py
index 692c967cf..2b7520aeb 100755
--- a/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py
+++ b/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py
@@ -181,7 +181,7 @@ if __name__ == "__main__":
 
     train_spec = {
         "run": Cifar10Model,
-        "trial_resources": {
+        "resources_per_trial": {
             "cpu": 1,
             "gpu": 1
         },
diff --git a/python/ray/tune/examples/tune_mnist_keras.py b/python/ray/tune/examples/tune_mnist_keras.py
index cbe9f626d..03afe6594 100644
--- a/python/ray/tune/examples/tune_mnist_keras.py
+++ b/python/ray/tune/examples/tune_mnist_keras.py
@@ -187,7 +187,7 @@ if __name__ == '__main__':
                 },
                 "run": "train_mnist",
                 "num_samples": 1 if args.smoke_test else 10,
-                "trial_resources": {
+                "resources_per_trial": {
                     "cpu": args.threads,
                     "gpu": 0.5 if args.use_gpu else 0
                 },
diff --git a/python/ray/tune/experiment.py b/python/ray/tune/experiment.py
index 6fc34dd41..585948752 100644
--- a/python/ray/tune/experiment.py
+++ b/python/ray/tune/experiment.py
@@ -15,6 +15,23 @@ from ray.tune.result import DEFAULT_RESULTS_DIR
 logger = logging.getLogger(__name__)
 
 
+def _raise_deprecation_note(deprecated, replacement, soft=False):
+    """User notification for deprecated parameter.
+
+    Arguments:
+        deprecated (str): Deprecated parameter.
+        replacement (str): Replacement parameter to use instead.
+        soft (bool): Fatal if True.
+    """
+    error_msg = ("`{deprecated}` is deprecated. Please use `{replacement}`. "
+                 "`{deprecated}` will be removed in future versions of "
+                 "Ray.".format(deprecated=deprecated, replacement=replacement))
+    if soft:
+        logger.warning(error_msg)
+    else:
+        raise DeprecationWarning(error_msg)
+
+
 class Experiment(object):
     """Tracks experiment specifications.
 
@@ -31,12 +48,10 @@ class Experiment(object):
         config (dict): Algorithm-specific configuration for Tune variant
             generation (e.g. env, hyperparams). Defaults to empty dict.
             Custom search algorithms may ignore this.
-        trial_resources (dict): Machine resources to allocate per trial,
+        resources_per_trial (dict): Machine resources to allocate per trial,
             e.g. ``{"cpu": 64, "gpu": 8}``. Note that GPUs will not be
             assigned unless you specify them here. Defaults to 1 CPU and 0
             GPUs in ``Trainable.default_resource_request()``.
-        repeat (int): Deprecated and will be removed in future versions of
-            Ray. Use `num_samples` instead.
         num_samples (int): Number of times to sample from the
             hyperparameter space. Defaults to 1. If `grid_search` is
             provided as an argument, the grid will be repeated
@@ -62,6 +77,10 @@ class Experiment(object):
             checkpointing is enabled. Defaults to 3.
         restore (str): Path to checkpoint. Only makes sense to set if
             running 1 trial. Defaults to None.
+        repeat: Deprecated and will be removed in future versions of
+            Ray. Use `num_samples` instead.
+        trial_resources: Deprecated and will be removed in future versions of
+            Ray. Use `resources_per_trial` instead.
 
 
     Examples:
@@ -73,7 +92,7 @@ class Experiment(object):
         >>>         "alpha": tune.grid_search([0.2, 0.4, 0.6]),
         >>>         "beta": tune.grid_search([1, 2]),
         >>>     },
-        >>>     trial_resources={
+        >>>     resources_per_trial={
         >>>         "cpu": 1,
         >>>         "gpu": 0
         >>>     },
@@ -90,8 +109,7 @@ class Experiment(object):
                  run,
                  stop=None,
                  config=None,
-                 trial_resources=None,
-                 repeat=1,
+                 resources_per_trial=None,
                  num_samples=1,
                  local_dir=None,
                  upload_dir=None,
@@ -101,15 +119,25 @@ class Experiment(object):
                  checkpoint_freq=0,
                  checkpoint_at_end=False,
                  max_failures=3,
-                 restore=None):
+                 restore=None,
+                 repeat=None,
+                 trial_resources=None):
         validate_sync_function(sync_function)
         if sync_function:
             assert upload_dir, "Need `upload_dir` if sync_function given."
+
+        if repeat:
+            _raise_deprecation_note("repeat", "num_samples", soft=False)
+        if trial_resources:
+            _raise_deprecation_note(
+                "trial_resources", "resources_per_trial", soft=True)
+            resources_per_trial = trial_resources
+
         spec = {
             "run": self._register_if_needed(run),
             "stop": stop or {},
             "config": config or {},
-            "trial_resources": trial_resources,
+            "resources_per_trial": resources_per_trial,
             "num_samples": num_samples,
             "local_dir": local_dir or DEFAULT_RESULTS_DIR,
             "upload_dir": upload_dir or "",  # argparse converts None to "null"
@@ -136,13 +164,6 @@ class Experiment(object):
         if "run" not in spec:
             raise TuneError("No trainable specified!")
 
-        if "repeat" in spec:
-            raise DeprecationWarning("The parameter `repeat` is deprecated; \
-                converting to `num_samples`. `repeat` will be removed in \
-                future versions of Ray.")
-            spec["num_samples"] = spec["repeat"]
-            del spec["repeat"]
-
         # Special case the `env` param for RLlib by automatically
         # moving it into the `config` section.
         if "env" in spec:
diff --git a/python/ray/tune/logger.py b/python/ray/tune/logger.py
index 75c888c2a..972705487 100644
--- a/python/ray/tune/logger.py
+++ b/python/ray/tune/logger.py
@@ -100,7 +100,8 @@ class UnifiedLogger(Logger):
             try:
                 self._loggers.append(cls(self.config, self.logdir, self.uri))
             except Exception:
-                logger.exception("Could not instantiate {} - skipping.")
+                logger.exception("Could not instantiate {} - skipping.".format(
+                    str(cls)))
         self._log_syncer = get_syncer(
             self.logdir, self.uri, sync_function=self._sync_function)
 
diff --git a/python/ray/tune/suggest/variant_generator.py b/python/ray/tune/suggest/variant_generator.py
index 7e9ab95f9..c3ca83e0a 100644
--- a/python/ray/tune/suggest/variant_generator.py
+++ b/python/ray/tune/suggest/variant_generator.py
@@ -96,7 +96,7 @@ _MAX_RESOLUTION_PASSES = 20
 def format_vars(resolved_vars):
     out = []
     for path, value in sorted(resolved_vars.items()):
-        if path[0] in ["run", "env", "trial_resources"]:
+        if path[0] in ["run", "env", "resources_per_trial"]:
             continue  # TrialRunner already has these in the experiment_tag
         pieces = []
         last_string = True
diff --git a/python/ray/tune/test/trial_runner_test.py b/python/ray/tune/test/trial_runner_test.py
index 170049ceb..40b6575ce 100644
--- a/python/ray/tune/test/trial_runner_test.py
+++ b/python/ray/tune/test/trial_runner_test.py
@@ -294,7 +294,7 @@ class TrainableFunctionApiTest(unittest.TestCase):
             run_experiments({
                 "foo": {
                     "run": "PPO",
-                    "trial_resources": {
+                    "resources_per_trial": {
                         "asdf": 1
                     }
                 }
@@ -681,6 +681,22 @@ class RunExperimentTest(unittest.TestCase):
             self.assertEqual(trial.status, Trial.TERMINATED)
             self.assertTrue(trial.has_checkpoint())
 
+    def testDeprecatedResources(self):
+        class train(Trainable):
+            def _train(self):
+                return {"timesteps_this_iter": 1, "done": True}
+
+        trials = run_experiments({
+            "foo": {
+                "run": train,
+                "trial_resources": {
+                    "cpu": 1
+                }
+            }
+        })
+        for trial in trials:
+            self.assertEqual(trial.status, Trial.TERMINATED)
+
     def testCustomLogger(self):
         class CustomLogger(Logger):
             def on_result(self, result):
diff --git a/python/ray/tune/test/tune_server_test.py b/python/ray/tune/test/tune_server_test.py
index a535b421b..db99aae2e 100644
--- a/python/ray/tune/test/tune_server_test.py
+++ b/python/ray/tune/test/tune_server_test.py
@@ -65,7 +65,7 @@ class TuneServerSuite(unittest.TestCase):
             "stop": {
                 "training_iteration": 3
             },
-            "trial_resources": {
+            "resources_per_trial": {
                 'cpu': 1,
                 'gpu': 1
             },