From 4633d81c390fd33d54aa62a5eb43fe104062bb41 Mon Sep 17 00:00:00 2001
From: krfricke <krfricke@users.noreply.github.com>
Date: Fri, 15 May 2020 00:20:43 +0200
Subject: [PATCH] [tune] added average scope to experiment analysis (#8445)

---
 .../ray/tune/analysis/experiment_analysis.py  | 20 +++++++++-----
 .../tests/test_experiment_analysis_mem.py     | 27 ++++++++++++-------
 python/ray/tune/trial.py                      |  7 ++++-
 3 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/python/ray/tune/analysis/experiment_analysis.py b/python/ray/tune/analysis/experiment_analysis.py
index 917580676..5ed038aa7 100644
--- a/python/ray/tune/analysis/experiment_analysis.py
+++ b/python/ray/tune/analysis/experiment_analysis.py
@@ -220,8 +220,10 @@ class ExperimentAnalysis(Analysis):
         Args:
             metric (str): Key for trial info to order on.
             mode (str): One of [min, max].
-            scope (str): One of [all, last]. If `scope=last`, only look at
+            scope (str): One of [all, last, avg]. If `scope=last`, only look at
                 each trial's final step for `metric`, and compare across
+                trials based on `mode=[min,max]`. If `scope=avg`, consider the
+                simple average over all steps for `metric` and compare across
                 trials based on `mode=[min,max]`. If `scope=all`, find each
                 trial's min/max score for `metric` based on `mode`, and
                 compare trials based on `mode=[min,max]`.
@@ -231,11 +233,11 @@ class ExperimentAnalysis(Analysis):
                 "ExperimentAnalysis: attempting to get best trial for "
                 "metric {} for mode {} not in [\"max\", \"min\"]".format(
                     metric, mode))
-        if scope not in ["all", "last"]:
+        if scope not in ["all", "last", "avg"]:
             raise ValueError(
                 "ExperimentAnalysis: attempting to get best trial for "
-                "metric {} for scope {} not in [\"all\", \"last\"]".format(
-                    metric, scope))
+                "metric {} for scope {} not in [\"all\", \"last\", \"avg\"]".
+                format(metric, scope))
         best_trial = None
         best_metric_score = None
         for trial in self.trials:
@@ -244,6 +246,8 @@ class ExperimentAnalysis(Analysis):
 
             if scope == "last":
                 metric_score = trial.metric_analysis[metric]["last"]
+            elif scope == "avg":
+                metric_score = trial.metric_analysis[metric]["avg"]
             else:
                 metric_score = trial.metric_analysis[metric][mode]
 
@@ -269,8 +273,10 @@ class ExperimentAnalysis(Analysis):
         Args:
             metric (str): Key for trial info to order on.
             mode (str): One of [min, max].
-            scope (str): One of [all, last]. If `scope=last`, only look at
+            scope (str): One of [all, last, avg]. If `scope=last`, only look at
                 each trial's final step for `metric`, and compare across
+                trials based on `mode=[min,max]`. If `scope=avg`, consider the
+                simple average over all steps for `metric` and compare across
                 trials based on `mode=[min,max]`. If `scope=all`, find each
                 trial's min/max score for `metric` based on `mode`, and
                 compare trials based on `mode=[min,max]`.
@@ -286,8 +292,10 @@ class ExperimentAnalysis(Analysis):
         Args:
             metric (str): Key for trial info to order on.
             mode (str): One of [min, max].
-            scope (str): One of [all, last]. If `scope=last`, only look at
+            scope (str): One of [all, last, avg]. If `scope=last`, only look at
                 each trial's final step for `metric`, and compare across
+                trials based on `mode=[min,max]`. If `scope=avg`, consider the
+                simple average over all steps for `metric` and compare across
                 trials based on `mode=[min,max]`. If `scope=all`, find each
                 trial's min/max score for `metric` based on `mode`, and
                 compare trials based on `mode=[min,max]`.
diff --git a/python/ray/tune/tests/test_experiment_analysis_mem.py b/python/ray/tune/tests/test_experiment_analysis_mem.py
index e0b660543..319c97bab 100644
--- a/python/ray/tune/tests/test_experiment_analysis_mem.py
+++ b/python/ray/tune/tests/test_experiment_analysis_mem.py
@@ -3,6 +3,7 @@ import shutil
 import tempfile
 import random
 import pandas as pd
+import numpy as np
 
 import ray
 from ray.tune import run, Trainable, sample_from, Analysis, grid_search
@@ -12,16 +13,17 @@ from ray.tune.examples.async_hyperband_example import MyTrainableClass
 class ExperimentAnalysisInMemorySuite(unittest.TestCase):
     def setUp(self):
         class MockTrainable(Trainable):
+            scores_dict = {
+                0: [5, 4, 0],
+                1: [4, 3, 1],
+                2: [2, 1, 8],
+                3: [9, 7, 6],
+                4: [7, 5, 3]
+            }
+
             def _setup(self, config):
                 self.id = config["id"]
                 self.idx = 0
-                self.scores_dict = {
-                    0: [5, 0],
-                    1: [4, 1],
-                    2: [2, 8],
-                    3: [9, 6],
-                    4: [7, 3]
-                }
 
             def _train(self):
                 val = self.scores_dict[self.id][self.idx]
@@ -43,14 +45,15 @@ class ExperimentAnalysisInMemorySuite(unittest.TestCase):
 
     def testCompareTrials(self):
         self.test_dir = tempfile.mkdtemp()
-        scores_all = [5, 4, 2, 9, 7, 0, 1, 8, 6, 3]
+        scores = np.asarray(list(self.MockTrainable.scores_dict.values()))
+        scores_all = scores.flatten("F")
         scores_last = scores_all[5:]
 
         ea = run(
             self.MockTrainable,
             name="analysis_exp",
             local_dir=self.test_dir,
-            stop={"training_iteration": 2},
+            stop={"training_iteration": 3},
             num_samples=1,
             config={"id": grid_search(list(range(5)))})
 
@@ -60,9 +63,15 @@ class ExperimentAnalysisInMemorySuite(unittest.TestCase):
                                     "min").metric_analysis["score"]["min"]
         max_last = ea.get_best_trial("score", "max",
                                      "last").metric_analysis["score"]["last"]
+        max_avg = ea.get_best_trial("score", "max",
+                                    "avg").metric_analysis["score"]["avg"]
+        min_avg = ea.get_best_trial("score", "min",
+                                    "avg").metric_analysis["score"]["avg"]
         self.assertEqual(max_all, max(scores_all))
         self.assertEqual(min_all, min(scores_all))
         self.assertEqual(max_last, max(scores_last))
+        self.assertAlmostEqual(max_avg, max(np.mean(scores, axis=1)))
+        self.assertAlmostEqual(min_avg, min(np.mean(scores, axis=1)))
         self.assertNotEqual(max_last, max(scores_all))
 
 
diff --git a/python/ray/tune/trial.py b/python/ray/tune/trial.py
index 802f8086d..e0e352a57 100644
--- a/python/ray/tune/trial.py
+++ b/python/ray/tune/trial.py
@@ -214,7 +214,7 @@ class Trial:
         self.last_result = {}
         self.last_update_time = -float("inf")
 
-        # stores in memory max/min/last result for each metric by trial
+        # stores in memory max/min/avg/last result for each metric by trial
         self.metric_analysis = {}
 
         self.export_formats = export_formats
@@ -476,13 +476,18 @@ class Trial:
                     self.metric_analysis[metric] = {
                         "max": value,
                         "min": value,
+                        "avg": value,
                         "last": value
                     }
                 else:
+                    step = result["training_iteration"] or 1
                     self.metric_analysis[metric]["max"] = max(
                         value, self.metric_analysis[metric]["max"])
                     self.metric_analysis[metric]["min"] = min(
                         value, self.metric_analysis[metric]["min"])
+                    self.metric_analysis[metric]["avg"] = 1 / step * (
+                        value +
+                        (step - 1) * self.metric_analysis[metric]["avg"])
                     self.metric_analysis[metric]["last"] = value
 
     def get_trainable_cls(self):