From ea10cd212cd0e91640dab35c2c2e4408af3e7cad Mon Sep 17 00:00:00 2001
From: Richard Liaw <rliaw@berkeley.edu>
Date: Tue, 17 Mar 2020 23:44:18 -0700
Subject: [PATCH] [tune] add accessible trial_info (#7378)

* add accessible trial_info

* trial name and info

* doc

* fix
gp

* Update doc/source/tune-package-ref.rst

* Apply suggestions from code review

* fix

* trial

* fixtest

* testfix
---
 python/ray/tune/function_runner.py    | 25 ++++++++++++++++++--
 python/ray/tune/ray_trial_executor.py |  8 +++++--
 python/ray/tune/result.py             |  4 ++++
 python/ray/tune/tests/test_api.py     | 30 ++++++++++++++++++++++++
 python/ray/tune/track/__init__.py     | 23 ++++++++++++++++++-
 python/ray/tune/track/session.py      | 33 +++++++++++++++++++--------
 python/ray/tune/trainable.py          | 30 ++++++++++++++++++++++--
 python/ray/tune/trial.py              | 21 +++++++++++++++++
 8 files changed, 158 insertions(+), 16 deletions(-)

diff --git a/python/ray/tune/function_runner.py b/python/ray/tune/function_runner.py
index a7f46c23c..f156bd625 100644
--- a/python/ray/tune/function_runner.py
+++ b/python/ray/tune/function_runner.py
@@ -29,10 +29,17 @@ class StatusReporter:
         >>>     reporter(timesteps_this_iter=1)
     """
 
-    def __init__(self, result_queue, continue_semaphore, logdir=None):
+    def __init__(self,
+                 result_queue,
+                 continue_semaphore,
+                 trial_name=None,
+                 trial_id=None,
+                 logdir=None):
         self._queue = result_queue
         self._last_report_time = None
         self._continue_semaphore = continue_semaphore
+        self._trial_name = trial_name
+        self._trial_id = trial_id
         self._logdir = logdir
 
     def __call__(self, **kwargs):
@@ -78,6 +85,16 @@ class StatusReporter:
     def logdir(self):
         return self._logdir
 
+    @property
+    def trial_name(self):
+        """Trial name for the corresponding trial of this Trainable."""
+        return self._trial_name
+
+    @property
+    def trial_id(self):
+        """Trial id for the corresponding trial of this Trainable."""
+        return self._trial_id
+
 
 class _RunnerThread(threading.Thread):
     """Supervisor thread that runs your script."""
@@ -133,7 +150,11 @@ class FunctionRunner(Trainable):
         self._error_queue = queue.Queue(1)
 
         self._status_reporter = StatusReporter(
-            self._results_queue, self._continue_semaphore, self.logdir)
+            self._results_queue,
+            self._continue_semaphore,
+            trial_name=self.trial_name,
+            trial_id=self.trial_id,
+            logdir=self.logdir)
         self._last_result = {}
         config = config.copy()
 
diff --git a/python/ray/tune/ray_trial_executor.py b/python/ray/tune/ray_trial_executor.py
index df98ec980..8a25379db 100644
--- a/python/ray/tune/ray_trial_executor.py
+++ b/python/ray/tune/ray_trial_executor.py
@@ -1,4 +1,5 @@
 # coding: utf-8
+import copy
 import logging
 import os
 import random
@@ -13,9 +14,10 @@ from ray.resource_spec import ResourceSpec
 from ray.tune.durable_trainable import DurableTrainable
 from ray.tune.error import AbortTrialExecution, TuneError
 from ray.tune.logger import NoopLogger
+from ray.tune.result import TRIAL_INFO
 from ray.tune.resources import Resources
 from ray.tune.trainable import TrainableUtil
-from ray.tune.trial import Trial, Checkpoint, Location
+from ray.tune.trial import Trial, Checkpoint, Location, TrialInfo
 from ray.tune.trial_executor import TrialExecutor
 from ray.tune.utils import warn_if_slow
 
@@ -119,8 +121,10 @@ class RayTrialExecutor(TrialExecutor):
         logger.debug("Trial %s: Setting up new remote runner.", trial)
         # Logging for trials is handled centrally by TrialRunner, so
         # configure the remote runner to use a noop-logger.
+        trial_config = copy.deepcopy(trial.config)
+        trial_config[TRIAL_INFO] = TrialInfo(trial)
         kwargs = {
-            "config": trial.config,
+            "config": trial_config,
             "logger_creator": logger_creator,
         }
         if issubclass(trial.get_trainable_cls(), DurableTrainable):
diff --git a/python/ray/tune/result.py b/python/ray/tune/result.py
index 2861efc57..88fbb3e96 100644
--- a/python/ray/tune/result.py
+++ b/python/ray/tune/result.py
@@ -65,6 +65,10 @@ DEFAULT_RESULT_KEYS = (TRAINING_ITERATION, TIME_TOTAL_S, TIMESTEPS_TOTAL,
 # avoid double-logging results when using the Function API.
 RESULT_DUPLICATE = "__duplicate__"
 
+# __trial_info__ is a magic keyword used internally to pass trial_info
+# to the Trainable via the constructor.
+TRIAL_INFO = "__trial_info__"
+
 # Where Tune writes result files by default
 DEFAULT_RESULTS_DIR = (os.environ.get("TEST_TMPDIR")
                        or os.environ.get("TUNE_RESULT_DIR")
diff --git a/python/ray/tune/tests/test_api.py b/python/ray/tune/tests/test_api.py
index a906022eb..0866da638 100644
--- a/python/ray/tune/tests/test_api.py
+++ b/python/ray/tune/tests/test_api.py
@@ -584,6 +584,36 @@ class TrainableFunctionApiTest(unittest.TestCase):
         self.assertEqual(trial.status, Trial.TERMINATED)
         self.assertEqual(trial.last_result["mean_accuracy"], float("inf"))
 
+    def testTrialInfoAccess(self):
+        class TestTrainable(Trainable):
+            def _train(self):
+                result = {"name": self.trial_name, "trial_id": self.trial_id}
+                print(result)
+                return result
+
+        analysis = tune.run(TestTrainable, stop={TRAINING_ITERATION: 1})
+        trial = analysis.trials[0]
+        self.assertEqual(trial.last_result.get("name"), str(trial))
+        self.assertEqual(trial.last_result.get("trial_id"), trial.trial_id)
+
+    def testTrialInfoAccessFunction(self):
+        def train(config, reporter):
+            reporter(name=reporter.trial_name, trial_id=reporter.trial_id)
+
+        analysis = tune.run(train, stop={TRAINING_ITERATION: 1})
+        trial = analysis.trials[0]
+        self.assertEqual(trial.last_result.get("name"), str(trial))
+        self.assertEqual(trial.last_result.get("trial_id"), trial.trial_id)
+
+        def track_train(config):
+            tune.track.log(
+                name=tune.track.trial_name(), trial_id=tune.track.trial_id())
+
+        analysis = tune.run(track_train, stop={TRAINING_ITERATION: 1})
+        trial = analysis.trials[0]
+        self.assertEqual(trial.last_result.get("name"), str(trial))
+        self.assertEqual(trial.last_result.get("trial_id"), trial.trial_id)
+
     def testNestedResults(self):
         def create_result(i):
             return {"test": {"1": {"2": {"3": i, "4": False}}}}
diff --git a/python/ray/tune/track/__init__.py b/python/ray/tune/track/__init__.py
index ceef69181..98273978e 100644
--- a/python/ray/tune/track/__init__.py
+++ b/python/ray/tune/track/__init__.py
@@ -64,4 +64,25 @@ def trial_dir():
     return _session.logdir
 
 
-__all__ = ["TrackSession", "session", "log", "trial_dir", "init", "shutdown"]
+def trial_name():
+    """Trial name for the corresponding trial of this Trainable.
+
+    This is not set if not using Tune.
+    """
+    _session = get_session()
+    return _session.trial_name
+
+
+def trial_id():
+    """Trial id for the corresponding trial of this Trainable.
+
+    This is not set if not using Tune.
+    """
+    _session = get_session()
+    return _session.trial_id
+
+
+__all__ = [
+    "TrackSession", "session", "log", "trial_dir", "init", "shutdown",
+    "trial_name", "trial_id"
+]
diff --git a/python/ray/tune/track/session.py b/python/ray/tune/track/session.py
index ab402559d..0ce9e58a2 100644
--- a/python/ray/tune/track/session.py
+++ b/python/ray/tune/track/session.py
@@ -17,7 +17,8 @@ class _ReporterHook(Logger):
 class TrackSession:
     """Manages results for a single session.
 
-    Represents a single Trial in an experiment.
+    Represents a single Trial in an experiment. This is automatically
+    created when using ``tune.run``.
 
     Attributes:
         trial_name (str): Custom trial name.
@@ -31,7 +32,7 @@ class TrackSession:
     """
 
     def __init__(self,
-                 trial_name="",
+                 trial_name=None,
                  experiment_dir=None,
                  upload_dir=None,
                  trial_config=None,
@@ -42,18 +43,17 @@ class TrackSession:
         self.trial_config = None
         self._iteration = -1
         self.is_tune_session = bool(_tune_reporter)
-        self.trial_id = Trial.generate_id()
-        if trial_name:
-            self.trial_id = trial_name + "_" + self.trial_id
         if self.is_tune_session:
             self._logger = _ReporterHook(_tune_reporter)
             self._logdir = _tune_reporter.logdir
+            self._trial_name = _tune_reporter.trial_name
+            self._trial_id = _tune_reporter.trial_id
         else:
-            self._initialize_logging(trial_name, experiment_dir, upload_dir,
-                                     trial_config)
+            self._trial_id = Trial.generate_id()
+            self._trial_name = trial_name or self._trial_id
+            self._initialize_logging(experiment_dir, upload_dir, trial_config)
 
     def _initialize_logging(self,
-                            trial_name="",
                             experiment_dir=None,
                             upload_dir=None,
                             trial_config=None):
@@ -67,7 +67,8 @@ class TrackSession:
         self._experiment_dir = os.path.expanduser(experiment_dir)
 
         # TODO(rliaw): Refactor `logdir` to `trial_dir`.
-        self._logdir = Trial.create_logdir(trial_name, self._experiment_dir)
+        self._logdir = Trial.create_logdir(self.trial_name,
+                                           self._experiment_dir)
         self._upload_dir = upload_dir
         self.trial_config = trial_config or {}
 
@@ -95,6 +96,10 @@ class TrackSession:
         self._logger.on_result(metrics_dict)
 
     def close(self):
+        """Closes loggers.
+
+        No need to call this when using ``tune.run``.
+        """
         self.trial_config["trial_completed"] = True
         self.trial_config["end_time"] = datetime.now().isoformat()
         # TODO(rliaw): Have Tune support updated configs
@@ -106,3 +111,13 @@ class TrackSession:
     def logdir(self):
         """Trial logdir (subdir of given experiment directory)"""
         return self._logdir
+
+    @property
+    def trial_name(self):
+        """Trial name for the corresponding trial of this Trainable"""
+        return self._trial_name
+
+    @property
+    def trial_id(self):
+        """Trial id for the corresponding trial of this Trainable"""
+        return self._trial_id
diff --git a/python/ray/tune/trainable.py b/python/ray/tune/trainable.py
index 30ffbe77f..a3fe79c2b 100644
--- a/python/ray/tune/trainable.py
+++ b/python/ray/tune/trainable.py
@@ -18,7 +18,7 @@ from ray.tune.logger import UnifiedLogger
 from ray.tune.result import (DEFAULT_RESULTS_DIR, TIME_THIS_ITER_S,
                              TIMESTEPS_THIS_ITER, DONE, TIMESTEPS_TOTAL,
                              EPISODES_THIS_ITER, EPISODES_TOTAL,
-                             TRAINING_ITERATION, RESULT_DUPLICATE)
+                             TRAINING_ITERATION, RESULT_DUPLICATE, TRIAL_INFO)
 from ray.tune.utils import UtilMonitor
 
 logger = logging.getLogger(__name__)
@@ -147,6 +147,7 @@ class Trainable:
 
         self._experiment_id = uuid.uuid4().hex
         self.config = config or {}
+        trial_info = self.config.pop(TRIAL_INFO, None)
 
         if logger_creator:
             self._result_logger = logger_creator(self.config)
@@ -167,6 +168,7 @@ class Trainable:
         self._timesteps_since_restore = 0
         self._iterations_since_restore = 0
         self._restored = False
+        self._trial_info = trial_info
 
         start_time = time.time()
         self._setup(copy.deepcopy(self.config))
@@ -207,7 +209,7 @@ class Trainable:
         return ""
 
     def current_ip(self):
-        logger.warning("Getting current IP.")
+        logger.info("Getting current IP.")
         self._local_ip = ray.services.get_node_ip_address()
         return self._local_ip
 
@@ -511,6 +513,30 @@ class Trainable:
         """
         return os.path.join(self._logdir, "")
 
+    @property
+    def trial_name(self):
+        """Trial name for the corresponding trial of this Trainable.
+
+        This is not set if not using Tune.
+
+        .. code-block:: python
+
+            name = self.trial_name
+        """
+        return self._trial_info.trial_name
+
+    @property
+    def trial_id(self):
+        """Trial ID for the corresponding trial of this Trainable.
+
+        This is not set if not using Tune.
+
+        .. code-block:: python
+
+            trial_id = self.trial_id
+        """
+        return self._trial_info.trial_id
+
     @property
     def iteration(self):
         """Current training iteration.
diff --git a/python/ray/tune/trial.py b/python/ray/tune/trial.py
index 0d7f655de..6bbc7e0ed 100644
--- a/python/ray/tune/trial.py
+++ b/python/ray/tune/trial.py
@@ -102,6 +102,27 @@ def checkpoint_deleter(trial_id, runner):
     return delete
 
 
+class TrialInfo:
+    """Serializable struct for holding information for a Trial.
+
+    Attributes:
+        trial_name (str): String name of the currernt trial.
+        trial_id (str): trial_id of the trial
+    """
+
+    def __init__(self, trial):
+        self._trial_name = str(trial)
+        self._trial_id = trial.trial_id
+
+    @property
+    def trial_name(self):
+        return self._trial_name
+
+    @property
+    def trial_id(self):
+        return self._trial_id
+
+
 class Trial:
     """A trial object holds the state for one model training run.