From 7aa06fb25cd62d17a32cbd2f8c528b64bf5ea085 Mon Sep 17 00:00:00 2001
From: Hersh Godse <hersh.godse@berkeley.edu>
Date: Fri, 15 Nov 2019 12:47:50 -0800
Subject: [PATCH] [tune] ExperimentalAnalysis in-memory cache (#5962)

---
 .../ray/tune/analysis/experiment_analysis.py  | 83 +++++++++++++++++++
 .../tune/tests/test_experiment_analysis.py    | 61 +++++++++++++-
 python/ray/tune/trial.py                      | 19 +++++
 3 files changed, 161 insertions(+), 2 deletions(-)

diff --git a/python/ray/tune/analysis/experiment_analysis.py b/python/ray/tune/analysis/experiment_analysis.py
index 3093b73ef..0ebe99192 100644
--- a/python/ray/tune/analysis/experiment_analysis.py
+++ b/python/ray/tune/analysis/experiment_analysis.py
@@ -183,6 +183,89 @@ class ExperimentAnalysis(Analysis):
         super(ExperimentAnalysis, self).__init__(
             os.path.dirname(experiment_checkpoint_path))
 
+    def get_best_trial(self, metric, mode="max", scope="all"):
+        """Retrieve the best trial object.
+
+        Compares all trials' scores on `metric`.
+
+        Args:
+            metric (str): Key for trial info to order on.
+            mode (str): One of [min, max].
+            scope (str): One of [all, last]. If `scope=last`, only look at
+                each trial's final step for `metric`, and compare across
+                trials based on `mode=[min,max]`. If `scope=all`, find each
+                trial's min/max score for `metric` based on `mode`, and
+                compare trials based on `mode=[min,max]`.
+        """
+        if mode not in ["max", "min"]:
+            raise ValueError(
+                "ExperimentAnalysis: attempting to get best trial for "
+                "metric {} for mode {} not in [\"max\", \"min\"]".format(
+                    metric, mode))
+        if scope not in ["all", "last"]:
+            raise ValueError(
+                "ExperimentAnalysis: attempting to get best trial for "
+                "metric {} for scope {} not in [\"all\", \"last\"]".format(
+                    metric, scope))
+        best_trial = None
+        best_metric_score = None
+        for trial in self.trials:
+            if metric not in trial.metric_analysis:
+                continue
+
+            if scope == "last":
+                metric_score = trial.metric_analysis[metric]["last"]
+            else:
+                metric_score = trial.metric_analysis[metric][mode]
+
+            if best_metric_score is None:
+                best_metric_score = metric_score
+                best_trial = trial
+                continue
+
+            if (mode == "max") and (best_metric_score < metric_score):
+                best_metric_score = metric_score
+                best_trial = trial
+            elif (mode == "min") and (best_metric_score > metric_score):
+                best_metric_score = metric_score
+                best_trial = trial
+
+        return best_trial
+
+    def get_best_config(self, metric, mode="max", scope="all"):
+        """Retrieve the best config corresponding to the trial.
+
+        Compares all trials' scores on `metric`.
+
+        Args:
+            metric (str): Key for trial info to order on.
+            mode (str): One of [min, max].
+            scope (str): One of [all, last]. If `scope=last`, only look at
+                each trial's final step for `metric`, and compare across
+                trials based on `mode=[min,max]`. If `scope=all`, find each
+                trial's min/max score for `metric` based on `mode`, and
+                compare trials based on `mode=[min,max]`.
+        """
+        best_trial = self.get_best_trial(metric, mode, scope)
+        return best_trial.config if best_trial else None
+
+    def get_best_logdir(self, metric, mode="max", scope="all"):
+        """Retrieve the logdir corresponding to the best trial.
+
+        Compares all trials' scores on `metric`.
+
+        Args:
+            metric (str): Key for trial info to order on.
+            mode (str): One of [min, max].
+            scope (str): One of [all, last]. If `scope=last`, only look at
+                each trial's final step for `metric`, and compare across
+                trials based on `mode=[min,max]`. If `scope=all`, find each
+                trial's min/max score for `metric` based on `mode`, and
+                compare trials based on `mode=[min,max]`.
+        """
+        best_trial = self.get_best_trial(metric, mode, scope)
+        return best_trial.logdir if best_trial else None
+
     def stats(self):
         """Returns a dictionary of the statistics of the experiment."""
         return self._experiment_state.get("stats")
diff --git a/python/ray/tune/tests/test_experiment_analysis.py b/python/ray/tune/tests/test_experiment_analysis.py
index d9f7e766e..b1c830359 100644
--- a/python/ray/tune/tests/test_experiment_analysis.py
+++ b/python/ray/tune/tests/test_experiment_analysis.py
@@ -10,13 +10,70 @@ import os
 import pandas as pd
 
 import ray
-from ray.tune import run, sample_from, Analysis
+from ray.tune import run, Trainable, sample_from, Analysis, grid_search
 from ray.tune.examples.async_hyperband_example import MyTrainableClass
 
 
+class ExperimentAnalysisInMemorySuite(unittest.TestCase):
+    def setUp(self):
+        class MockTrainable(Trainable):
+            def _setup(self, config):
+                self.id = config["id"]
+                self.idx = 0
+                self.scores_dict = {
+                    0: [5, 0],
+                    1: [4, 1],
+                    2: [2, 8],
+                    3: [9, 6],
+                    4: [7, 3]
+                }
+
+            def _train(self):
+                val = self.scores_dict[self.id][self.idx]
+                self.idx += 1
+                return {"score": val}
+
+            def _save(self, checkpoint_dir):
+                pass
+
+            def _restore(self, checkpoint_path):
+                pass
+
+        self.MockTrainable = MockTrainable
+        ray.init(local_mode=False, num_cpus=1)
+
+    def tearDown(self):
+        shutil.rmtree(self.test_dir, ignore_errors=True)
+        ray.shutdown()
+
+    def testCompareTrials(self):
+        self.test_dir = tempfile.mkdtemp()
+        scores_all = [5, 4, 2, 9, 7, 0, 1, 8, 6, 3]
+        scores_last = scores_all[5:]
+
+        ea = run(
+            self.MockTrainable,
+            name="analysis_exp",
+            local_dir=self.test_dir,
+            stop={"training_iteration": 2},
+            num_samples=1,
+            config={"id": grid_search(list(range(5)))})
+
+        max_all = ea.get_best_trial("score",
+                                    "max").metric_analysis["score"]["max"]
+        min_all = ea.get_best_trial("score",
+                                    "min").metric_analysis["score"]["min"]
+        max_last = ea.get_best_trial("score", "max",
+                                     "last").metric_analysis["score"]["last"]
+        self.assertEqual(max_all, max(scores_all))
+        self.assertEqual(min_all, min(scores_all))
+        self.assertEqual(max_last, max(scores_last))
+        self.assertNotEqual(max_last, max(scores_all))
+
+
 class ExperimentAnalysisSuite(unittest.TestCase):
     def setUp(self):
-        ray.init(local_mode=True)
+        ray.init(local_mode=False)
         self.test_dir = tempfile.mkdtemp()
         self.test_name = "analysis_exp"
         self.num_samples = 10
diff --git a/python/ray/tune/trial.py b/python/ray/tune/trial.py
index 5568a659a..41900c71b 100644
--- a/python/ray/tune/trial.py
+++ b/python/ray/tune/trial.py
@@ -10,8 +10,10 @@ import uuid
 import time
 import tempfile
 import os
+from numbers import Number
 from ray.tune import TuneError
 from ray.tune.logger import pretty_print, UnifiedLogger
+from ray.tune.util import flatten_dict
 # NOTE(rkn): We import ray.tune.registry here instead of importing the names we
 # need because there are cyclic imports that may cause specific names to not
 # have been defined yet. See https://github.com/ray-project/ray/issues/1716.
@@ -156,6 +158,9 @@ class Trial(object):
         self.checkpoint_freq = checkpoint_freq
         self.checkpoint_at_end = checkpoint_at_end
 
+        # stores in memory max/min/last result for each metric by trial
+        self.metric_analysis = {}
+
         self.history = []
         self.keep_checkpoints_num = keep_checkpoints_num
         self._cmp_greater = not checkpoint_score_attr.startswith("min-")
@@ -325,6 +330,20 @@ class Trial(object):
         self.last_result = result
         self.last_update_time = time.time()
         self.result_logger.on_result(self.last_result)
+        for metric, value in flatten_dict(result).items():
+            if isinstance(value, Number):
+                if metric not in self.metric_analysis:
+                    self.metric_analysis[metric] = {
+                        "max": value,
+                        "min": value,
+                        "last": value
+                    }
+                else:
+                    self.metric_analysis[metric]["max"] = max(
+                        value, self.metric_analysis[metric]["max"])
+                    self.metric_analysis[metric]["min"] = min(
+                        value, self.metric_analysis[metric]["min"])
+                    self.metric_analysis[metric]["last"] = value
 
     def compare_checkpoints(self, attr_mean):
         """Compares two checkpoints based on the attribute attr_mean param.