diff --git a/python/ray/tune/analysis/experiment_analysis.py b/python/ray/tune/analysis/experiment_analysis.py index 5ed038aa7..12023a377 100644 --- a/python/ray/tune/analysis/experiment_analysis.py +++ b/python/ray/tune/analysis/experiment_analysis.py @@ -220,34 +220,35 @@ class ExperimentAnalysis(Analysis): Args: metric (str): Key for trial info to order on. mode (str): One of [min, max]. - scope (str): One of [all, last, avg]. If `scope=last`, only look at - each trial's final step for `metric`, and compare across - trials based on `mode=[min,max]`. If `scope=avg`, consider the - simple average over all steps for `metric` and compare across - trials based on `mode=[min,max]`. If `scope=all`, find each - trial's min/max score for `metric` based on `mode`, and - compare trials based on `mode=[min,max]`. + scope (str): One of [all, last, avg, last-5-avg, last-10-avg]. + If `scope=last`, only look at each trial's final step for + `metric`, and compare across trials based on `mode=[min,max]`. + If `scope=avg`, consider the simple average over all steps + for `metric` and compare across trials based on + `mode=[min,max]`. If `scope=last-5-avg` or `scope=last-10-avg`, + consider the simple average over the last 5 or 10 steps for + `metric` and compare across trials based on `mode=[min,max]`. + If `scope=all`, find each trial's min/max score for `metric` + based on `mode`, and compare trials based on `mode=[min,max]`. """ if mode not in ["max", "min"]: raise ValueError( "ExperimentAnalysis: attempting to get best trial for " "metric {} for mode {} not in [\"max\", \"min\"]".format( metric, mode)) - if scope not in ["all", "last", "avg"]: + if scope not in ["all", "last", "avg", "last-5-avg", "last-10-avg"]: raise ValueError( "ExperimentAnalysis: attempting to get best trial for " - "metric {} for scope {} not in [\"all\", \"last\", \"avg\"]". - format(metric, scope)) + "metric {} for scope {} not in [\"all\", \"last\", \"avg\", " + "\"last-5-avg\", \"last-10-avg\"]".format(metric, scope)) best_trial = None best_metric_score = None for trial in self.trials: if metric not in trial.metric_analysis: continue - if scope == "last": - metric_score = trial.metric_analysis[metric]["last"] - elif scope == "avg": - metric_score = trial.metric_analysis[metric]["avg"] + if scope in ["last", "avg", "last-5-avg", "last-10-avg"]: + metric_score = trial.metric_analysis[metric][scope] else: metric_score = trial.metric_analysis[metric][mode] @@ -273,13 +274,16 @@ class ExperimentAnalysis(Analysis): Args: metric (str): Key for trial info to order on. mode (str): One of [min, max]. - scope (str): One of [all, last, avg]. If `scope=last`, only look at - each trial's final step for `metric`, and compare across - trials based on `mode=[min,max]`. If `scope=avg`, consider the - simple average over all steps for `metric` and compare across - trials based on `mode=[min,max]`. If `scope=all`, find each - trial's min/max score for `metric` based on `mode`, and - compare trials based on `mode=[min,max]`. + scope (str): One of [all, last, avg, last-5-avg, last-10-avg]. + If `scope=last`, only look at each trial's final step for + `metric`, and compare across trials based on `mode=[min,max]`. + If `scope=avg`, consider the simple average over all steps + for `metric` and compare across trials based on + `mode=[min,max]`. If `scope=last-5-avg` or `scope=last-10-avg`, + consider the simple average over the last 5 or 10 steps for + `metric` and compare across trials based on `mode=[min,max]`. + If `scope=all`, find each trial's min/max score for `metric` + based on `mode`, and compare trials based on `mode=[min,max]`. """ best_trial = self.get_best_trial(metric, mode, scope) return best_trial.config if best_trial else None @@ -292,13 +296,16 @@ class ExperimentAnalysis(Analysis): Args: metric (str): Key for trial info to order on. mode (str): One of [min, max]. - scope (str): One of [all, last, avg]. If `scope=last`, only look at - each trial's final step for `metric`, and compare across - trials based on `mode=[min,max]`. If `scope=avg`, consider the - simple average over all steps for `metric` and compare across - trials based on `mode=[min,max]`. If `scope=all`, find each - trial's min/max score for `metric` based on `mode`, and - compare trials based on `mode=[min,max]`. + scope (str): One of [all, last, avg, last-5-avg, last-10-avg]. + If `scope=last`, only look at each trial's final step for + `metric`, and compare across trials based on `mode=[min,max]`. + If `scope=avg`, consider the simple average over all steps + for `metric` and compare across trials based on + `mode=[min,max]`. If `scope=last-5-avg` or `scope=last-10-avg`, + consider the simple average over the last 5 or 10 steps for + `metric` and compare across trials based on `mode=[min,max]`. + If `scope=all`, find each trial's min/max score for `metric` + based on `mode`, and compare trials based on `mode=[min,max]`. """ best_trial = self.get_best_trial(metric, mode, scope) return best_trial.logdir if best_trial else None diff --git a/python/ray/tune/tests/test_experiment_analysis_mem.py b/python/ray/tune/tests/test_experiment_analysis_mem.py index 319c97bab..4667c87aa 100644 --- a/python/ray/tune/tests/test_experiment_analysis_mem.py +++ b/python/ray/tune/tests/test_experiment_analysis_mem.py @@ -14,11 +14,11 @@ class ExperimentAnalysisInMemorySuite(unittest.TestCase): def setUp(self): class MockTrainable(Trainable): scores_dict = { - 0: [5, 4, 0], - 1: [4, 3, 1], - 2: [2, 1, 8], - 3: [9, 7, 6], - 4: [7, 5, 3] + 0: [5, 4, 4, 4, 4, 4, 4, 4, 0], + 1: [4, 3, 3, 3, 3, 3, 3, 3, 1], + 2: [2, 1, 1, 1, 1, 1, 1, 1, 8], + 3: [9, 7, 7, 7, 7, 7, 7, 7, 6], + 4: [7, 5, 5, 5, 5, 5, 5, 5, 3] } def _setup(self, config): @@ -53,7 +53,7 @@ class ExperimentAnalysisInMemorySuite(unittest.TestCase): self.MockTrainable, name="analysis_exp", local_dir=self.test_dir, - stop={"training_iteration": 3}, + stop={"training_iteration": len(scores[0])}, num_samples=1, config={"id": grid_search(list(range(5)))}) @@ -67,12 +67,33 @@ class ExperimentAnalysisInMemorySuite(unittest.TestCase): "avg").metric_analysis["score"]["avg"] min_avg = ea.get_best_trial("score", "min", "avg").metric_analysis["score"]["avg"] + max_avg_5 = ea.get_best_trial( + "score", "max", + "last-5-avg").metric_analysis["score"]["last-5-avg"] + min_avg_5 = ea.get_best_trial( + "score", "min", + "last-5-avg").metric_analysis["score"]["last-5-avg"] + max_avg_10 = ea.get_best_trial( + "score", "max", + "last-10-avg").metric_analysis["score"]["last-10-avg"] + min_avg_10 = ea.get_best_trial( + "score", "min", + "last-10-avg").metric_analysis["score"]["last-10-avg"] self.assertEqual(max_all, max(scores_all)) self.assertEqual(min_all, min(scores_all)) self.assertEqual(max_last, max(scores_last)) + self.assertNotEqual(max_last, max(scores_all)) + self.assertAlmostEqual(max_avg, max(np.mean(scores, axis=1))) self.assertAlmostEqual(min_avg, min(np.mean(scores, axis=1))) - self.assertNotEqual(max_last, max(scores_all)) + + self.assertAlmostEqual(max_avg_5, max(np.mean(scores[:, -5:], axis=1))) + self.assertAlmostEqual(min_avg_5, min(np.mean(scores[:, -5:], axis=1))) + + self.assertAlmostEqual(max_avg_10, max( + np.mean(scores[:, -10:], axis=1))) + self.assertAlmostEqual(min_avg_10, min( + np.mean(scores[:, -10:], axis=1))) class AnalysisSuite(unittest.TestCase): diff --git a/python/ray/tune/trial.py b/python/ray/tune/trial.py index e0e352a57..ca273c155 100644 --- a/python/ray/tune/trial.py +++ b/python/ray/tune/trial.py @@ -1,4 +1,5 @@ import ray.cloudpickle as cloudpickle +from collections import deque import copy from datetime import datetime import logging @@ -214,9 +215,14 @@ class Trial: self.last_result = {} self.last_update_time = -float("inf") - # stores in memory max/min/avg/last result for each metric by trial + # stores in memory max/min/avg/last-n-avg/last result for each + # metric by trial self.metric_analysis = {} + # keep a moving average over these last n steps + self.n_steps = [5, 10] + self.metric_n_steps = {} + self.export_formats = export_formats self.status = Trial.PENDING self.start_time = None @@ -470,6 +476,7 @@ class Trial: self.last_result = result self.last_update_time = time.time() self.result_logger.on_result(self.last_result) + for metric, value in flatten_dict(result).items(): if isinstance(value, Number): if metric not in self.metric_analysis: @@ -479,6 +486,13 @@ class Trial: "avg": value, "last": value } + self.metric_n_steps[metric] = {} + for n in self.n_steps: + key = "last-{:d}-avg".format(n) + self.metric_analysis[metric][key] = value + # Store n as string for correct restore. + self.metric_n_steps[metric][str(n)] = deque( + [value], maxlen=n) else: step = result["training_iteration"] or 1 self.metric_analysis[metric]["max"] = max( @@ -490,6 +504,13 @@ class Trial: (step - 1) * self.metric_analysis[metric]["avg"]) self.metric_analysis[metric]["last"] = value + for n in self.n_steps: + key = "last-{:d}-avg".format(n) + self.metric_n_steps[metric][str(n)].append(value) + self.metric_analysis[metric][key] = sum( + self.metric_n_steps[metric][str(n)]) / len( + self.metric_n_steps[metric][str(n)]) + def get_trainable_cls(self): return get_trainable_cls(self.trainable_name)