From 153813936bdc83f15fdd5a0b9ab80e710fae956c Mon Sep 17 00:00:00 2001
From: Richard Liaw <rliaw@berkeley.edu>
Date: Wed, 9 Sep 2020 09:53:47 -0700
Subject: [PATCH] [tune] auto infer metrics (#10663)

Co-authored-by: Kai Fricke <krfricke@users.noreply.github.com>
Co-authored-by: Kai Fricke <kai@anyscale.com>
---
 doc/source/tune/api_docs/sklearn.rst          |  6 +++
 python/ray/tune/examples/mxnet_example.py     |  3 +-
 python/ray/tune/progress_reporter.py          | 40 +++++++++++++++++-
 python/ray/tune/result.py                     | 23 +++++++++++
 .../ray/tune/tests/test_progress_reporter.py  | 41 ++++++++++++++++++-
 5 files changed, 107 insertions(+), 6 deletions(-)

diff --git a/doc/source/tune/api_docs/sklearn.rst b/doc/source/tune/api_docs/sklearn.rst
index 0067a952c..02a015727 100644
--- a/doc/source/tune/api_docs/sklearn.rst
+++ b/doc/source/tune/api_docs/sklearn.rst
@@ -5,10 +5,16 @@ Scikit-Learn API  (tune.sklearn)
 
 .. _tunegridsearchcv-docs:
 
+TuneGridSearchCV
+----------------
+
 .. autoclass:: ray.tune.sklearn.TuneGridSearchCV
 	:inherited-members:
 
 .. _tunesearchcv-docs:
 
+TuneSearchCV
+------------
+
 .. autoclass:: ray.tune.sklearn.TuneSearchCV
 	:inherited-members:
diff --git a/python/ray/tune/examples/mxnet_example.py b/python/ray/tune/examples/mxnet_example.py
index b128c121d..dd959e481 100644
--- a/python/ray/tune/examples/mxnet_example.py
+++ b/python/ray/tune/examples/mxnet_example.py
@@ -66,8 +66,7 @@ def tune_mnist_mxnet(num_samples=10, num_epochs=10):
         reduction_factor=2)
 
     reporter = CLIReporter(
-        parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"],
-        metric_columns=["loss", "mean_accuracy", "training_iteration"])
+        parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"])
 
     tune.run(
         partial(train_mnist_mxnet, mnist=mnist_data, num_epochs=num_epochs),
diff --git a/python/ray/tune/progress_reporter.py b/python/ray/tune/progress_reporter.py
index c1325f102..ca60adf29 100644
--- a/python/ray/tune/progress_reporter.py
+++ b/python/ray/tune/progress_reporter.py
@@ -1,10 +1,12 @@
 from __future__ import print_function
 
 import collections
+import numpy as np
 import time
 
 from ray.tune.result import (EPISODE_REWARD_MEAN, MEAN_ACCURACY, MEAN_LOSS,
-                             TRAINING_ITERATION, TIME_TOTAL_S, TIMESTEPS_TOTAL)
+                             TRAINING_ITERATION, TIME_TOTAL_S, TIMESTEPS_TOTAL,
+                             AUTO_RESULT_KEYS)
 from ray.tune.utils import unflattened_lookup
 
 try:
@@ -51,6 +53,10 @@ class ProgressReporter:
 class TuneReporterBase(ProgressReporter):
     """Abstract base class for the default Tune reporters.
 
+    If metric_columns is not overriden, Tune will attempt to automatically
+    infer the metrics being outputted, up to 'infer_limit' number of
+    metrics.
+
     Args:
         metric_columns (dict[str, str]|list[str]): Names of metrics to
             include in progress table. If this is a dict, the keys should
@@ -80,17 +86,25 @@ class TuneReporterBase(ProgressReporter):
         TIMESTEPS_TOTAL: "ts",
         EPISODE_REWARD_MEAN: "reward",
     })
+    VALID_SUMMARY_TYPES = {
+        int, float, np.float32, np.float64, np.int32, np.int64,
+        type(None)
+    }
 
     def __init__(self,
                  metric_columns=None,
                  parameter_columns=None,
                  max_progress_rows=20,
                  max_error_rows=20,
-                 max_report_frequency=5):
+                 max_report_frequency=5,
+                 infer_limit=3):
+        self._metrics_override = metric_columns is not None
+        self._inferred_metrics = {}
         self._metric_columns = metric_columns or self.DEFAULT_COLUMNS.copy()
         self._parameter_columns = parameter_columns or []
         self._max_progress_rows = max_progress_rows
         self._max_error_rows = max_error_rows
+        self._infer_limit = infer_limit
 
         self._max_report_freqency = max_report_frequency
         self._last_report_time = 0
@@ -110,6 +124,7 @@ class TuneReporterBase(ProgressReporter):
             representation (str): Representation to use in table. Defaults to
                 `metric`.
         """
+        self._metrics_override = True
         if metric in self._metric_columns:
             raise ValueError("Column {} already exists.".format(metric))
 
@@ -161,6 +176,9 @@ class TuneReporterBase(ProgressReporter):
             fmt (str): Table format. See `tablefmt` in tabulate API.
             delim (str): Delimiter between messages.
         """
+        if not self._metrics_override:
+            user_metrics = self._infer_user_metrics(trials, self._infer_limit)
+            self._metric_columns.update(user_metrics)
         messages = ["== Status ==", memory_debug_str(), *sys_info]
         if done:
             max_progress = None
@@ -178,6 +196,24 @@ class TuneReporterBase(ProgressReporter):
         messages.append(trial_errors_str(trials, fmt=fmt, max_rows=max_error))
         return delim.join(messages) + delim
 
+    def _infer_user_metrics(self, trials, limit=4):
+        """Try to infer the metrics to print out."""
+        if len(self._inferred_metrics) >= limit:
+            return self._inferred_metrics
+        self._inferred_metrics = {}
+        for t in trials:
+            if not t.last_result:
+                continue
+            for metric, value in t.last_result.items():
+                if metric not in self.DEFAULT_COLUMNS:
+                    if metric not in AUTO_RESULT_KEYS:
+                        if type(value) in self.VALID_SUMMARY_TYPES:
+                            self._inferred_metrics[metric] = metric
+
+                if len(self._inferred_metrics) >= limit:
+                    return self._inferred_metrics
+        return self._inferred_metrics
+
 
 class JupyterNotebookReporter(TuneReporterBase):
     """Jupyter notebook-friendly Reporter that can update display in-place.
diff --git a/python/ray/tune/result.py b/python/ray/tune/result.py
index 70b311bf7..8fab799e9 100644
--- a/python/ray/tune/result.py
+++ b/python/ray/tune/result.py
@@ -29,6 +29,9 @@ EPISODE_REWARD_MEAN = "episode_reward_mean"
 # (Optional) Mean loss for training iteration
 MEAN_LOSS = "mean_loss"
 
+# (Optional) Mean loss for training iteration
+NEG_MEAN_LOSS = "neg_mean_loss"
+
 # (Optional) Mean accuracy for training iteration
 MEAN_ACCURACY = "mean_accuracy"
 
@@ -61,6 +64,26 @@ DEFAULT_EXPERIMENT_INFO_KEYS = ("trainable_name", EXPERIMENT_TAG, TRIAL_ID)
 DEFAULT_RESULT_KEYS = (TRAINING_ITERATION, TIME_TOTAL_S, TIMESTEPS_TOTAL,
                        MEAN_ACCURACY, MEAN_LOSS)
 
+# Make sure this doesn't regress
+AUTO_RESULT_KEYS = (
+    TRAINING_ITERATION,
+    TIME_TOTAL_S,
+    EPISODES_TOTAL,
+    TIMESTEPS_TOTAL,
+    NODE_IP,
+    HOSTNAME,
+    PID,
+    TIME_TOTAL_S,
+    TIME_THIS_ITER_S,
+    "timestamp",
+    "experiment_id",
+    "date",
+    "time_since_restore",
+    "iterations_since_restore",
+    "timesteps_since_restore",
+    "config",
+)
+
 # __duplicate__ is a magic keyword used internally to
 # avoid double-logging results when using the Function API.
 RESULT_DUPLICATE = "__duplicate__"
diff --git a/python/ray/tune/tests/test_progress_reporter.py b/python/ray/tune/tests/test_progress_reporter.py
index 144f59ede..7b5db7365 100644
--- a/python/ray/tune/tests/test_progress_reporter.py
+++ b/python/ray/tune/tests/test_progress_reporter.py
@@ -3,9 +3,10 @@ import collections
 import os
 import unittest
 from unittest.mock import MagicMock, Mock
-
+from ray import tune
 from ray.test_utils import run_string_as_driver
 from ray.tune.trial import Trial
+from ray.tune.result import AUTO_RESULT_KEYS
 from ray.tune.progress_reporter import (CLIReporter, _fair_filter_trials,
                                         trial_progress_str)
 
@@ -233,6 +234,43 @@ class ProgressReporterTest(unittest.TestCase):
         reporter.add_metric_column("foo", "bar")
         self.assertIn("foo", reporter._metric_columns)
 
+    def testInfer(self):
+        reporter = CLIReporter()
+        test_result = dict(foo_result=1, baz_result=4123, bar_result="testme")
+
+        def test(config):
+            for i in range(3):
+                tune.report(**test_result)
+
+        analysis = tune.run(test, num_samples=3)
+        all_trials = analysis.trials
+        inferred_results = reporter._infer_user_metrics(all_trials)
+        for metric in inferred_results:
+            self.assertNotIn(metric, AUTO_RESULT_KEYS)
+            self.assertTrue(metric in test_result)
+
+        class TestReporter(CLIReporter):
+            _output = []
+
+            def __init__(self, *args, **kwargs):
+                super().__init__(*args, **kwargs)
+                self._max_report_freqency = 0
+
+            def report(self, *args, **kwargs):
+                progress_str = self._progress_str(*args, **kwargs)
+                self._output.append(progress_str)
+
+        reporter = TestReporter()
+        analysis = tune.run(test, num_samples=3, progress_reporter=reporter)
+        found = {k: False for k in test_result}
+        for output in reporter._output:
+            for key in test_result:
+                if key in output:
+                    found[key] = True
+        assert found["foo_result"]
+        assert found["baz_result"]
+        assert not found["bar_result"]
+
     def testProgressStr(self):
         trials = []
         for i in range(5):
@@ -285,7 +323,6 @@ class ProgressReporterTest(unittest.TestCase):
             }, {"a": "A"},
             fmt="psql",
             max_rows=3)
-        print(prog3)
         assert prog3 == EXPECTED_RESULT_3
 
     def testEndToEndReporting(self):