From 4633d81c390fd33d54aa62a5eb43fe104062bb41 Mon Sep 17 00:00:00 2001 From: krfricke Date: Fri, 15 May 2020 00:20:43 +0200 Subject: [PATCH] [tune] added average scope to experiment analysis (#8445) --- .../ray/tune/analysis/experiment_analysis.py | 20 +++++++++----- .../tests/test_experiment_analysis_mem.py | 27 ++++++++++++------- python/ray/tune/trial.py | 7 ++++- 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/python/ray/tune/analysis/experiment_analysis.py b/python/ray/tune/analysis/experiment_analysis.py index 917580676..5ed038aa7 100644 --- a/python/ray/tune/analysis/experiment_analysis.py +++ b/python/ray/tune/analysis/experiment_analysis.py @@ -220,8 +220,10 @@ class ExperimentAnalysis(Analysis): Args: metric (str): Key for trial info to order on. mode (str): One of [min, max]. - scope (str): One of [all, last]. If `scope=last`, only look at + scope (str): One of [all, last, avg]. If `scope=last`, only look at each trial's final step for `metric`, and compare across + trials based on `mode=[min,max]`. If `scope=avg`, consider the + simple average over all steps for `metric` and compare across trials based on `mode=[min,max]`. If `scope=all`, find each trial's min/max score for `metric` based on `mode`, and compare trials based on `mode=[min,max]`. @@ -231,11 +233,11 @@ class ExperimentAnalysis(Analysis): "ExperimentAnalysis: attempting to get best trial for " "metric {} for mode {} not in [\"max\", \"min\"]".format( metric, mode)) - if scope not in ["all", "last"]: + if scope not in ["all", "last", "avg"]: raise ValueError( "ExperimentAnalysis: attempting to get best trial for " - "metric {} for scope {} not in [\"all\", \"last\"]".format( - metric, scope)) + "metric {} for scope {} not in [\"all\", \"last\", \"avg\"]". + format(metric, scope)) best_trial = None best_metric_score = None for trial in self.trials: @@ -244,6 +246,8 @@ class ExperimentAnalysis(Analysis): if scope == "last": metric_score = trial.metric_analysis[metric]["last"] + elif scope == "avg": + metric_score = trial.metric_analysis[metric]["avg"] else: metric_score = trial.metric_analysis[metric][mode] @@ -269,8 +273,10 @@ class ExperimentAnalysis(Analysis): Args: metric (str): Key for trial info to order on. mode (str): One of [min, max]. - scope (str): One of [all, last]. If `scope=last`, only look at + scope (str): One of [all, last, avg]. If `scope=last`, only look at each trial's final step for `metric`, and compare across + trials based on `mode=[min,max]`. If `scope=avg`, consider the + simple average over all steps for `metric` and compare across trials based on `mode=[min,max]`. If `scope=all`, find each trial's min/max score for `metric` based on `mode`, and compare trials based on `mode=[min,max]`. @@ -286,8 +292,10 @@ class ExperimentAnalysis(Analysis): Args: metric (str): Key for trial info to order on. mode (str): One of [min, max]. - scope (str): One of [all, last]. If `scope=last`, only look at + scope (str): One of [all, last, avg]. If `scope=last`, only look at each trial's final step for `metric`, and compare across + trials based on `mode=[min,max]`. If `scope=avg`, consider the + simple average over all steps for `metric` and compare across trials based on `mode=[min,max]`. If `scope=all`, find each trial's min/max score for `metric` based on `mode`, and compare trials based on `mode=[min,max]`. diff --git a/python/ray/tune/tests/test_experiment_analysis_mem.py b/python/ray/tune/tests/test_experiment_analysis_mem.py index e0b660543..319c97bab 100644 --- a/python/ray/tune/tests/test_experiment_analysis_mem.py +++ b/python/ray/tune/tests/test_experiment_analysis_mem.py @@ -3,6 +3,7 @@ import shutil import tempfile import random import pandas as pd +import numpy as np import ray from ray.tune import run, Trainable, sample_from, Analysis, grid_search @@ -12,16 +13,17 @@ from ray.tune.examples.async_hyperband_example import MyTrainableClass class ExperimentAnalysisInMemorySuite(unittest.TestCase): def setUp(self): class MockTrainable(Trainable): + scores_dict = { + 0: [5, 4, 0], + 1: [4, 3, 1], + 2: [2, 1, 8], + 3: [9, 7, 6], + 4: [7, 5, 3] + } + def _setup(self, config): self.id = config["id"] self.idx = 0 - self.scores_dict = { - 0: [5, 0], - 1: [4, 1], - 2: [2, 8], - 3: [9, 6], - 4: [7, 3] - } def _train(self): val = self.scores_dict[self.id][self.idx] @@ -43,14 +45,15 @@ class ExperimentAnalysisInMemorySuite(unittest.TestCase): def testCompareTrials(self): self.test_dir = tempfile.mkdtemp() - scores_all = [5, 4, 2, 9, 7, 0, 1, 8, 6, 3] + scores = np.asarray(list(self.MockTrainable.scores_dict.values())) + scores_all = scores.flatten("F") scores_last = scores_all[5:] ea = run( self.MockTrainable, name="analysis_exp", local_dir=self.test_dir, - stop={"training_iteration": 2}, + stop={"training_iteration": 3}, num_samples=1, config={"id": grid_search(list(range(5)))}) @@ -60,9 +63,15 @@ class ExperimentAnalysisInMemorySuite(unittest.TestCase): "min").metric_analysis["score"]["min"] max_last = ea.get_best_trial("score", "max", "last").metric_analysis["score"]["last"] + max_avg = ea.get_best_trial("score", "max", + "avg").metric_analysis["score"]["avg"] + min_avg = ea.get_best_trial("score", "min", + "avg").metric_analysis["score"]["avg"] self.assertEqual(max_all, max(scores_all)) self.assertEqual(min_all, min(scores_all)) self.assertEqual(max_last, max(scores_last)) + self.assertAlmostEqual(max_avg, max(np.mean(scores, axis=1))) + self.assertAlmostEqual(min_avg, min(np.mean(scores, axis=1))) self.assertNotEqual(max_last, max(scores_all)) diff --git a/python/ray/tune/trial.py b/python/ray/tune/trial.py index 802f8086d..e0e352a57 100644 --- a/python/ray/tune/trial.py +++ b/python/ray/tune/trial.py @@ -214,7 +214,7 @@ class Trial: self.last_result = {} self.last_update_time = -float("inf") - # stores in memory max/min/last result for each metric by trial + # stores in memory max/min/avg/last result for each metric by trial self.metric_analysis = {} self.export_formats = export_formats @@ -476,13 +476,18 @@ class Trial: self.metric_analysis[metric] = { "max": value, "min": value, + "avg": value, "last": value } else: + step = result["training_iteration"] or 1 self.metric_analysis[metric]["max"] = max( value, self.metric_analysis[metric]["max"]) self.metric_analysis[metric]["min"] = min( value, self.metric_analysis[metric]["min"]) + self.metric_analysis[metric]["avg"] = 1 / step * ( + value + + (step - 1) * self.metric_analysis[metric]["avg"]) self.metric_analysis[metric]["last"] = value def get_trainable_cls(self):