mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 00:29:38 +08:00
[tune] verbosity refactor second attempt (#12571)
Co-authored-by: Richard Liaw <rliaw@berkeley.edu>
This commit is contained in:
@@ -1,21 +1,26 @@
|
||||
from __future__ import print_function
|
||||
|
||||
import collections
|
||||
import sys
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
import collections
|
||||
import os
|
||||
import sys
|
||||
import numpy as np
|
||||
import time
|
||||
|
||||
from ray.tune.callback import Callback
|
||||
from ray.tune.logger import pretty_print
|
||||
from ray.tune.result import (DEFAULT_METRIC, EPISODE_REWARD_MEAN,
|
||||
MEAN_ACCURACY, MEAN_LOSS, TRAINING_ITERATION,
|
||||
TIME_TOTAL_S, TIMESTEPS_TOTAL, AUTO_RESULT_KEYS)
|
||||
from ray.tune.trial import Trial
|
||||
from ray.tune.trial import DEBUG_PRINT_INTERVAL, Trial
|
||||
from ray.tune.utils import unflattened_lookup
|
||||
from ray.tune.utils.log import Verbosity, has_verbosity
|
||||
|
||||
try:
|
||||
from collections.abc import Mapping
|
||||
from collections.abc import Mapping, MutableMapping
|
||||
except ImportError:
|
||||
from collections import Mapping
|
||||
from collections import Mapping, MutableMapping
|
||||
|
||||
try:
|
||||
from tabulate import tabulate
|
||||
@@ -33,7 +38,7 @@ class ProgressReporter:
|
||||
receiving training results, and so on.
|
||||
"""
|
||||
|
||||
def should_report(self, trials, done=False):
|
||||
def should_report(self, trials: List[Trial], done: bool = False):
|
||||
"""Returns whether or not progress should be reported.
|
||||
|
||||
Args:
|
||||
@@ -42,7 +47,7 @@ class ProgressReporter:
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def report(self, trials, done, *sys_info):
|
||||
def report(self, trials: List[Trial], done: bool, *sys_info: Dict):
|
||||
"""Reports progress across trials.
|
||||
|
||||
Args:
|
||||
@@ -80,6 +85,12 @@ class TuneReporterBase(ProgressReporter):
|
||||
Defaults to 5s.
|
||||
infer_limit (int): Maximum number of metrics to automatically infer
|
||||
from tune results.
|
||||
print_intermediate_tables (bool|None): Print intermediate result
|
||||
tables. If None (default), will be set to True for verbosity
|
||||
levels above 3, otherwise False. If True, intermediate tables
|
||||
will be printed with experiment progress. If False, tables
|
||||
will only be printed at then end of the tuning run for verbosity
|
||||
levels greater than 2.
|
||||
metric (str): Metric used to determine best current trial.
|
||||
mode (str): One of [min, max]. Determines whether objective is
|
||||
minimizing or maximizing the metric attribute.
|
||||
@@ -99,16 +110,18 @@ class TuneReporterBase(ProgressReporter):
|
||||
type(None)
|
||||
}
|
||||
|
||||
def __init__(self,
|
||||
metric_columns=None,
|
||||
parameter_columns=None,
|
||||
total_samples=None,
|
||||
max_progress_rows=20,
|
||||
max_error_rows=20,
|
||||
max_report_frequency=5,
|
||||
infer_limit=3,
|
||||
metric=None,
|
||||
mode=None):
|
||||
def __init__(
|
||||
self,
|
||||
metric_columns: Union[None, List[str], Dict[str, str]] = None,
|
||||
parameter_columns: Union[None, List[str], Dict[str, str]] = None,
|
||||
total_samples: Optional[int] = None,
|
||||
max_progress_rows: int = 20,
|
||||
max_error_rows: int = 20,
|
||||
max_report_frequency: int = 5,
|
||||
infer_limit: int = 3,
|
||||
print_intermediate_tables: Optional[bool] = None,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None):
|
||||
self._total_samples = total_samples
|
||||
self._metrics_override = metric_columns is not None
|
||||
self._inferred_metrics = {}
|
||||
@@ -118,13 +131,20 @@ class TuneReporterBase(ProgressReporter):
|
||||
self._max_error_rows = max_error_rows
|
||||
self._infer_limit = infer_limit
|
||||
|
||||
if print_intermediate_tables is None:
|
||||
self._print_intermediate_tables = has_verbosity(
|
||||
Verbosity.V3_TRIAL_DETAILS)
|
||||
else:
|
||||
self._print_intermediate_tables = print_intermediate_tables
|
||||
|
||||
self._max_report_freqency = max_report_frequency
|
||||
self._last_report_time = 0
|
||||
|
||||
self._metric = metric
|
||||
self._mode = mode
|
||||
|
||||
def set_search_properties(self, metric, mode):
|
||||
def set_search_properties(self, metric: Optional[str],
|
||||
mode: Optional[str]):
|
||||
if self._metric and metric:
|
||||
return False
|
||||
if self._mode and mode:
|
||||
@@ -141,16 +161,18 @@ class TuneReporterBase(ProgressReporter):
|
||||
|
||||
return True
|
||||
|
||||
def set_total_samples(self, total_samples):
|
||||
def set_total_samples(self, total_samples: int):
|
||||
self._total_samples = total_samples
|
||||
|
||||
def should_report(self, trials, done=False):
|
||||
def should_report(self, trials: List[Trial], done: bool = False):
|
||||
if time.time() - self._last_report_time > self._max_report_freqency:
|
||||
self._last_report_time = time.time()
|
||||
return True
|
||||
return done
|
||||
|
||||
def add_metric_column(self, metric, representation=None):
|
||||
def add_metric_column(self,
|
||||
metric: str,
|
||||
representation: Optional[str] = None):
|
||||
"""Adds a metric to the existing columns.
|
||||
|
||||
Args:
|
||||
@@ -163,7 +185,7 @@ class TuneReporterBase(ProgressReporter):
|
||||
if metric in self._metric_columns:
|
||||
raise ValueError("Column {} already exists.".format(metric))
|
||||
|
||||
if isinstance(self._metric_columns, Mapping):
|
||||
if isinstance(self._metric_columns, MutableMapping):
|
||||
representation = representation or metric
|
||||
self._metric_columns[metric] = representation
|
||||
else:
|
||||
@@ -174,7 +196,9 @@ class TuneReporterBase(ProgressReporter):
|
||||
"of metric columns.")
|
||||
self._metric_columns.append(metric)
|
||||
|
||||
def add_parameter_column(self, parameter, representation=None):
|
||||
def add_parameter_column(self,
|
||||
parameter: str,
|
||||
representation: Optional[str] = None):
|
||||
"""Adds a parameter to the existing columns.
|
||||
|
||||
Args:
|
||||
@@ -186,7 +210,7 @@ class TuneReporterBase(ProgressReporter):
|
||||
if parameter in self._parameter_columns:
|
||||
raise ValueError("Column {} already exists.".format(parameter))
|
||||
|
||||
if isinstance(self._parameter_columns, Mapping):
|
||||
if isinstance(self._parameter_columns, MutableMapping):
|
||||
representation = representation or parameter
|
||||
self._parameter_columns[parameter] = representation
|
||||
else:
|
||||
@@ -197,7 +221,12 @@ class TuneReporterBase(ProgressReporter):
|
||||
"of metric columns.")
|
||||
self._parameter_columns.append(parameter)
|
||||
|
||||
def _progress_str(self, trials, done, *sys_info, fmt="psql", delim="\n"):
|
||||
def _progress_str(self,
|
||||
trials: List[Trial],
|
||||
done: bool,
|
||||
*sys_info: Dict,
|
||||
fmt: str = "psql",
|
||||
delim: str = "\n"):
|
||||
"""Returns full progress string.
|
||||
|
||||
This string contains a progress table and error table. The progress
|
||||
@@ -228,19 +257,24 @@ class TuneReporterBase(ProgressReporter):
|
||||
best_trial_str(current_best_trial, metric,
|
||||
self._parameter_columns))
|
||||
|
||||
messages.append(
|
||||
trial_progress_str(
|
||||
trials,
|
||||
metric_columns=self._metric_columns,
|
||||
parameter_columns=self._parameter_columns,
|
||||
total_samples=self._total_samples,
|
||||
fmt=fmt,
|
||||
max_rows=max_progress))
|
||||
messages.append(trial_errors_str(trials, fmt=fmt, max_rows=max_error))
|
||||
if has_verbosity(Verbosity.V1_EXPERIMENT):
|
||||
# Will filter the table in `trial_progress_str`
|
||||
messages.append(
|
||||
trial_progress_str(
|
||||
trials,
|
||||
metric_columns=self._metric_columns,
|
||||
parameter_columns=self._parameter_columns,
|
||||
total_samples=self._total_samples,
|
||||
force_table=self._print_intermediate_tables,
|
||||
fmt=fmt,
|
||||
max_rows=max_progress,
|
||||
done=done))
|
||||
messages.append(
|
||||
trial_errors_str(trials, fmt=fmt, max_rows=max_error))
|
||||
|
||||
return delim.join(messages) + delim
|
||||
|
||||
def _infer_user_metrics(self, trials, limit=4):
|
||||
def _infer_user_metrics(self, trials: List[Trial], limit: int = 4):
|
||||
"""Try to infer the metrics to print out."""
|
||||
if len(self._inferred_metrics) >= limit:
|
||||
return self._inferred_metrics
|
||||
@@ -258,7 +292,7 @@ class TuneReporterBase(ProgressReporter):
|
||||
return self._inferred_metrics
|
||||
return self._inferred_metrics
|
||||
|
||||
def _current_best_trial(self, trials):
|
||||
def _current_best_trial(self, trials: List[Trial]):
|
||||
if not trials:
|
||||
return None, None
|
||||
|
||||
@@ -309,26 +343,39 @@ class JupyterNotebookReporter(TuneReporterBase):
|
||||
corresponding to each trial. Defaults to 20.
|
||||
max_report_frequency (int): Maximum report frequency in seconds.
|
||||
Defaults to 5s.
|
||||
infer_limit (int): Maximum number of metrics to automatically infer
|
||||
from tune results.
|
||||
print_intermediate_tables (bool|None): Print intermediate result
|
||||
tables. If None (default), will be set to True for verbosity
|
||||
levels above 3, otherwise False. If True, intermediate tables
|
||||
will be printed with experiment progress. If False, tables
|
||||
will only be printed at then end of the tuning run for verbosity
|
||||
levels greater than 2.
|
||||
metric (str): Metric used to determine best current trial.
|
||||
mode (str): One of [min, max]. Determines whether objective is
|
||||
minimizing or maximizing the metric attribute.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
overwrite,
|
||||
metric_columns=None,
|
||||
parameter_columns=None,
|
||||
total_samples=None,
|
||||
max_progress_rows=20,
|
||||
max_error_rows=20,
|
||||
max_report_frequency=5,
|
||||
infer_limit=3,
|
||||
metric=None,
|
||||
mode=None):
|
||||
super(JupyterNotebookReporter,
|
||||
self).__init__(metric_columns, parameter_columns, total_samples,
|
||||
max_progress_rows, max_error_rows,
|
||||
max_report_frequency, infer_limit, metric, mode)
|
||||
def __init__(
|
||||
self,
|
||||
overwrite: bool,
|
||||
metric_columns: Union[None, List[str], Dict[str, str]] = None,
|
||||
parameter_columns: Union[None, List[str], Dict[str, str]] = None,
|
||||
total_samples: Optional[int] = None,
|
||||
max_progress_rows: int = 20,
|
||||
max_error_rows: int = 20,
|
||||
max_report_frequency: int = 5,
|
||||
infer_limit: int = 3,
|
||||
print_intermediate_tables: Optional[bool] = None,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None):
|
||||
super(JupyterNotebookReporter, self).__init__(
|
||||
metric_columns, parameter_columns, total_samples,
|
||||
max_progress_rows, max_error_rows, max_report_frequency,
|
||||
infer_limit, print_intermediate_tables, metric, mode)
|
||||
self._overwrite = overwrite
|
||||
|
||||
def report(self, trials, done, *sys_info):
|
||||
def report(self, trials: List[Trial], done: bool, *sys_info: Dict):
|
||||
from IPython.display import clear_output
|
||||
from IPython.core.display import display, HTML
|
||||
if self._overwrite:
|
||||
@@ -359,25 +406,38 @@ class CLIReporter(TuneReporterBase):
|
||||
corresponding to each trial. Defaults to 20.
|
||||
max_report_frequency (int): Maximum report frequency in seconds.
|
||||
Defaults to 5s.
|
||||
infer_limit (int): Maximum number of metrics to automatically infer
|
||||
from tune results.
|
||||
print_intermediate_tables (bool|None): Print intermediate result
|
||||
tables. If None (default), will be set to True for verbosity
|
||||
levels above 3, otherwise False. If True, intermediate tables
|
||||
will be printed with experiment progress. If False, tables
|
||||
will only be printed at then end of the tuning run for verbosity
|
||||
levels greater than 2.
|
||||
metric (str): Metric used to determine best current trial.
|
||||
mode (str): One of [min, max]. Determines whether objective is
|
||||
minimizing or maximizing the metric attribute.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
metric_columns=None,
|
||||
parameter_columns=None,
|
||||
total_samples=None,
|
||||
max_progress_rows=20,
|
||||
max_error_rows=20,
|
||||
max_report_frequency=5,
|
||||
infer_limit=3,
|
||||
metric=None,
|
||||
mode=None):
|
||||
def __init__(
|
||||
self,
|
||||
metric_columns: Union[None, List[str], Dict[str, str]] = None,
|
||||
parameter_columns: Union[None, List[str], Dict[str, str]] = None,
|
||||
total_samples: Optional[int] = None,
|
||||
max_progress_rows: int = 20,
|
||||
max_error_rows: int = 20,
|
||||
max_report_frequency: int = 5,
|
||||
infer_limit: int = 3,
|
||||
print_intermediate_tables: Optional[bool] = None,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None):
|
||||
|
||||
super(CLIReporter,
|
||||
self).__init__(metric_columns, parameter_columns, total_samples,
|
||||
max_progress_rows, max_error_rows,
|
||||
max_report_frequency, infer_limit, metric, mode)
|
||||
super(CLIReporter, self).__init__(
|
||||
metric_columns, parameter_columns, total_samples,
|
||||
max_progress_rows, max_error_rows, max_report_frequency,
|
||||
infer_limit, print_intermediate_tables, metric, mode)
|
||||
|
||||
def report(self, trials, done, *sys_info):
|
||||
def report(self, trials: List[Trial], done: bool, *sys_info: Dict):
|
||||
print(self._progress_str(trials, done, *sys_info))
|
||||
|
||||
|
||||
@@ -403,12 +463,22 @@ def memory_debug_str():
|
||||
"(or ray[debug]) to resolve)")
|
||||
|
||||
|
||||
def trial_progress_str(trials,
|
||||
metric_columns,
|
||||
parameter_columns=None,
|
||||
total_samples=0,
|
||||
fmt="psql",
|
||||
max_rows=None):
|
||||
def _get_trials_by_state(trials: List[Trial]):
|
||||
trials_by_state = collections.defaultdict(list)
|
||||
for t in trials:
|
||||
trials_by_state[t.status].append(t)
|
||||
return trials_by_state
|
||||
|
||||
|
||||
def trial_progress_str(
|
||||
trials: List[Trial],
|
||||
metric_columns: Union[List[str], Dict[str, str]],
|
||||
parameter_columns: Union[None, List[str], Dict[str, str]] = None,
|
||||
total_samples: int = 0,
|
||||
force_table: bool = False,
|
||||
fmt: str = "psql",
|
||||
max_rows: Optional[int] = None,
|
||||
done: bool = False):
|
||||
"""Returns a human readable message for printing to the console.
|
||||
|
||||
This contains a table where each row represents a trial, its parameters
|
||||
@@ -426,9 +496,13 @@ def trial_progress_str(trials,
|
||||
the parameter name is used in the message directly. If this is
|
||||
empty, all parameters are used in the message.
|
||||
total_samples (int): Total number of trials that will be generated.
|
||||
force_table (bool): Force printing a table. If False, a table will
|
||||
be printed only at the end of the training for verbosity levels
|
||||
above `Verbosity.V2_TRIAL_NORM`.
|
||||
fmt (str): Output format (see tablefmt in tabulate API).
|
||||
max_rows (int): Maximum number of rows in the trial table. Defaults to
|
||||
unlimited.
|
||||
done (bool): True indicates that the tuning run finished.
|
||||
"""
|
||||
messages = []
|
||||
delim = "<br>" if fmt == "html" else "\n"
|
||||
@@ -436,9 +510,7 @@ def trial_progress_str(trials,
|
||||
return delim.join(messages)
|
||||
|
||||
num_trials = len(trials)
|
||||
trials_by_state = collections.defaultdict(list)
|
||||
for t in trials:
|
||||
trials_by_state[t.status].append(t)
|
||||
trials_by_state = _get_trials_by_state(trials)
|
||||
|
||||
for local_dir in sorted({t.local_dir for t in trials}):
|
||||
messages.append("Result logdir: {}".format(local_dir))
|
||||
@@ -448,6 +520,30 @@ def trial_progress_str(trials,
|
||||
for state in sorted(trials_by_state)
|
||||
]
|
||||
|
||||
if total_samples and total_samples >= sys.maxsize:
|
||||
total_samples = "infinite"
|
||||
|
||||
messages.append("Number of trials: {}{} ({})".format(
|
||||
num_trials, f"/{total_samples}"
|
||||
if total_samples else "", ", ".join(num_trials_strs)))
|
||||
|
||||
if force_table or (has_verbosity(Verbosity.V2_TRIAL_NORM) and done):
|
||||
messages += trial_progress_table(trials, metric_columns,
|
||||
parameter_columns, fmt, max_rows)
|
||||
|
||||
return delim.join(messages)
|
||||
|
||||
|
||||
def trial_progress_table(
|
||||
trials: List[Trial],
|
||||
metric_columns: Union[List[str], Dict[str, str]],
|
||||
parameter_columns: Union[None, List[str], Dict[str, str]] = None,
|
||||
fmt: str = "psql",
|
||||
max_rows: Optional[int] = None):
|
||||
messages = []
|
||||
num_trials = len(trials)
|
||||
trials_by_state = _get_trials_by_state(trials)
|
||||
|
||||
state_tbl_order = [
|
||||
Trial.RUNNING, Trial.PAUSED, Trial.PENDING, Trial.TERMINATED,
|
||||
Trial.ERROR
|
||||
@@ -472,19 +568,13 @@ def trial_progress_str(trials,
|
||||
overflow_str = ", ".join(overflow_strs)
|
||||
else:
|
||||
overflow = False
|
||||
overflow_str = ""
|
||||
trials = []
|
||||
for state in state_tbl_order:
|
||||
if state not in trials_by_state:
|
||||
continue
|
||||
trials += trials_by_state[state]
|
||||
|
||||
if total_samples and total_samples >= sys.maxsize:
|
||||
total_samples = "infinite"
|
||||
|
||||
messages.append("Number of trials: {}{} ({})".format(
|
||||
num_trials, f"/{total_samples}"
|
||||
if total_samples else "", ", ".join(num_trials_strs)))
|
||||
|
||||
# Pre-process trials to figure out what columns to show.
|
||||
if isinstance(metric_columns, Mapping):
|
||||
metric_keys = list(metric_columns.keys())
|
||||
@@ -526,10 +616,12 @@ def trial_progress_str(trials,
|
||||
if overflow:
|
||||
messages.append("... {} more trials not shown ({})".format(
|
||||
overflow, overflow_str))
|
||||
return delim.join(messages)
|
||||
return messages
|
||||
|
||||
|
||||
def trial_errors_str(trials, fmt="psql", max_rows=None):
|
||||
def trial_errors_str(trials: List[Trial],
|
||||
fmt: str = "psql",
|
||||
max_rows: Optional[int] = None):
|
||||
"""Returns a readable message regarding trial errors.
|
||||
|
||||
Args:
|
||||
@@ -558,7 +650,10 @@ def trial_errors_str(trials, fmt="psql", max_rows=None):
|
||||
return delim.join(messages)
|
||||
|
||||
|
||||
def best_trial_str(trial, metric, parameter_columns=None):
|
||||
def best_trial_str(
|
||||
trial: Trial,
|
||||
metric: str,
|
||||
parameter_columns: Union[None, List[str], Dict[str, str]] = None):
|
||||
"""Returns a readable message stating the current best trial."""
|
||||
val = trial.last_result[metric]
|
||||
config = trial.last_result.get("config", {})
|
||||
@@ -570,7 +665,8 @@ def best_trial_str(trial, metric, parameter_columns=None):
|
||||
f"parameters={params}"
|
||||
|
||||
|
||||
def _fair_filter_trials(trials_by_state, max_trials):
|
||||
def _fair_filter_trials(trials_by_state: Dict[str, List[Trial]],
|
||||
max_trials: int):
|
||||
"""Filters trials such that each state is represented fairly.
|
||||
|
||||
The oldest trials are truncated if necessary.
|
||||
@@ -605,7 +701,7 @@ def _fair_filter_trials(trials_by_state, max_trials):
|
||||
return filtered_trials
|
||||
|
||||
|
||||
def _get_trial_info(trial, parameters, metrics):
|
||||
def _get_trial_info(trial: Trial, parameters: List[str], metrics: List[str]):
|
||||
"""Returns the following information about a trial:
|
||||
|
||||
name | status | loc | params... | metrics...
|
||||
@@ -625,3 +721,109 @@ def _get_trial_info(trial, parameters, metrics):
|
||||
unflattened_lookup(metric, result, default=None) for metric in metrics
|
||||
]
|
||||
return trial_info
|
||||
|
||||
|
||||
class TrialProgressCallback(Callback):
|
||||
"""Reports (prints) intermediate trial progress.
|
||||
|
||||
This callback is automatically added to the callback stack. When a
|
||||
result is obtained, this callback will print the results according to
|
||||
the specified verbosity level.
|
||||
|
||||
For ``Verbosity.V3_TRIAL_DETAILS``, a full result list is printed.
|
||||
|
||||
For ``Verbosity.V2_TRIAL_NORM``, only one line is printed per received
|
||||
result.
|
||||
|
||||
All other verbosity levels do not print intermediate trial progress.
|
||||
|
||||
Result printing is throttled on a per-trial basis. Per default, results are
|
||||
printed only once every 30 seconds. Results are always printed when a trial
|
||||
finished or errored.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, metric: Optional[str] = None):
|
||||
self._last_print = collections.defaultdict(float)
|
||||
self._completed_trials = set()
|
||||
self._last_result_str = {}
|
||||
self._metric = metric
|
||||
|
||||
def on_trial_result(self, iteration: int, trials: List["Trial"],
|
||||
trial: "Trial", result: Dict, **info):
|
||||
self.log_result(trial, result, error=False)
|
||||
|
||||
def on_trial_error(self, iteration: int, trials: List["Trial"],
|
||||
trial: "Trial", **info):
|
||||
self.log_result(trial, trial.last_result, error=True)
|
||||
|
||||
def on_trial_complete(self, iteration: int, trials: List["Trial"],
|
||||
trial: "Trial", **info):
|
||||
# Only log when we never logged that a trial was completed
|
||||
if trial not in self._completed_trials:
|
||||
self._completed_trials.add(trial)
|
||||
|
||||
print_result_str = self._print_result(trial.last_result)
|
||||
last_result_str = self._last_result_str.get(trial, "")
|
||||
# If this is a new result, print full result string
|
||||
if print_result_str != last_result_str:
|
||||
self.log_result(trial, trial.last_result, error=False)
|
||||
else:
|
||||
print(f"Trial {trial} completed. "
|
||||
f"Last result: {print_result_str}")
|
||||
|
||||
def log_result(self, trial: "Trial", result: Dict, error: bool = False):
|
||||
done = result.get("done", False) is True
|
||||
last_print = self._last_print[trial]
|
||||
if done and trial not in self._completed_trials:
|
||||
self._completed_trials.add(trial)
|
||||
if has_verbosity(Verbosity.V3_TRIAL_DETAILS) and \
|
||||
(done or error or time.time() - last_print > DEBUG_PRINT_INTERVAL):
|
||||
print("Result for {}:".format(trial))
|
||||
print(" {}".format(pretty_print(result).replace("\n", "\n ")))
|
||||
self._last_print[trial] = time.time()
|
||||
elif has_verbosity(Verbosity.V2_TRIAL_NORM) and (
|
||||
done or error
|
||||
or time.time() - last_print > DEBUG_PRINT_INTERVAL):
|
||||
info = ""
|
||||
if done:
|
||||
info = " This trial completed."
|
||||
|
||||
metric_name = self._metric or "_metric"
|
||||
metric_value = result.get(metric_name, -99.)
|
||||
|
||||
print_result_str = self._print_result(result)
|
||||
|
||||
self._last_result_str[trial] = print_result_str
|
||||
|
||||
error_file = os.path.join(trial.logdir, "error.txt")
|
||||
|
||||
if error:
|
||||
message = f"The trial {trial} errored with " \
|
||||
f"parameters={trial.config}. " \
|
||||
f"Error file: {error_file}"
|
||||
elif self._metric:
|
||||
message = f"Trial {trial} reported " \
|
||||
f"{metric_name}={metric_value:.2f} " \
|
||||
f"with parameters={trial.config}.{info}"
|
||||
else:
|
||||
message = f"Trial {trial} reported " \
|
||||
f"{print_result_str} " \
|
||||
f"with parameters={trial.config}.{info}"
|
||||
|
||||
print(message)
|
||||
self._last_print[trial] = time.time()
|
||||
|
||||
def _print_result(self, result: Dict):
|
||||
print_result = result.copy()
|
||||
print_result.pop("config", None)
|
||||
print_result.pop("hist_stats", None)
|
||||
print_result.pop("trial_id", None)
|
||||
print_result.pop("experiment_tag", None)
|
||||
print_result.pop("done", None)
|
||||
for auto_result in AUTO_RESULT_KEYS:
|
||||
print_result.pop(auto_result, None)
|
||||
|
||||
print_result_str = ",".join(
|
||||
[f"{k}={v}" for k, v in print_result.items()])
|
||||
return print_result_str
|
||||
|
||||
@@ -73,7 +73,7 @@ tune.run_experiments({
|
||||
"c": tune.grid_search(list(range(10))),
|
||||
},
|
||||
},
|
||||
}, verbose=1, progress_reporter=reporter)"""
|
||||
}, verbose=3, progress_reporter=reporter)"""
|
||||
|
||||
EXPECTED_END_TO_END_START = """Number of trials: 1/30 (1 RUNNING)
|
||||
+---------------+----------+-------+-----+
|
||||
@@ -160,6 +160,48 @@ EXPECTED_BEST_1 = "Current best trial: 00001 with metric_1=0.5 and " \
|
||||
EXPECTED_BEST_2 = "Current best trial: 00004 with metric_1=2.0 and " \
|
||||
"parameters={'a': 4}"
|
||||
|
||||
VERBOSE_EXP_OUT_1 = "Number of trials: 1/3 (1 RUNNING)"
|
||||
VERBOSE_EXP_OUT_2 = "Number of trials: 3/3 (3 TERMINATED)"
|
||||
|
||||
VERBOSE_TRIAL_NORM = "Trial train_xxxxx_00000 reported acc=5 with " + \
|
||||
"""parameters={'do': 'complete'}. This trial completed.
|
||||
Trial train_xxxxx_00001 reported _metric=6 with parameters={'do': 'once'}.
|
||||
Trial train_xxxxx_00001 completed. Last result: _metric=6
|
||||
Trial train_xxxxx_00002 reported acc=7 with parameters={'do': 'twice'}.
|
||||
Trial train_xxxxx_00002 reported acc=8 with parameters={'do': 'twice'}. """ + \
|
||||
"This trial completed."
|
||||
|
||||
VERBOSE_TRIAL_DETAIL = """+-------------------+----------+-------+----------+
|
||||
| Trial name | status | loc | do |
|
||||
|-------------------+----------+-------+----------|
|
||||
| train_xxxxx_00000 | RUNNING | | complete |
|
||||
+-------------------+----------+-------+----------+"""
|
||||
|
||||
VERBOSE_CMD = """from ray import tune
|
||||
import random
|
||||
import numpy as np
|
||||
|
||||
|
||||
def train(config):
|
||||
if config["do"] == "complete":
|
||||
tune.report(acc=5, done=True)
|
||||
elif config["do"] == "once":
|
||||
tune.report(6)
|
||||
else:
|
||||
tune.report(acc=7)
|
||||
tune.report(acc=8)
|
||||
|
||||
random.seed(1234)
|
||||
np.random.seed(1234)
|
||||
|
||||
tune.run(
|
||||
train,
|
||||
config={
|
||||
"do": tune.grid_search(["complete", "once", "twice"])
|
||||
},"""
|
||||
|
||||
# Add "verbose=3)" etc
|
||||
|
||||
|
||||
class ProgressReporterTest(unittest.TestCase):
|
||||
def mock_trial(self, status, i):
|
||||
@@ -294,12 +336,16 @@ class ProgressReporterTest(unittest.TestCase):
|
||||
trials.append(t)
|
||||
# One metric, two parameters
|
||||
prog1 = trial_progress_str(
|
||||
trials, ["metric_1"], ["a", "b"], fmt="psql", max_rows=3)
|
||||
trials, ["metric_1"], ["a", "b"],
|
||||
fmt="psql",
|
||||
max_rows=3,
|
||||
force_table=True)
|
||||
print(prog1)
|
||||
assert prog1 == EXPECTED_RESULT_1
|
||||
|
||||
# No metric, all parameters
|
||||
prog2 = trial_progress_str(trials, [], None, fmt="psql", max_rows=None)
|
||||
prog2 = trial_progress_str(
|
||||
trials, [], None, fmt="psql", max_rows=None, force_table=True)
|
||||
print(prog2)
|
||||
assert prog2 == EXPECTED_RESULT_2
|
||||
|
||||
@@ -310,7 +356,8 @@ class ProgressReporterTest(unittest.TestCase):
|
||||
"metric_2": "Metric 2"
|
||||
}, {"a": "A"},
|
||||
fmt="psql",
|
||||
max_rows=3)
|
||||
max_rows=3,
|
||||
force_table=True)
|
||||
print(prog3)
|
||||
assert prog3 == EXPECTED_RESULT_3
|
||||
|
||||
@@ -363,6 +410,64 @@ class ProgressReporterTest(unittest.TestCase):
|
||||
finally:
|
||||
del os.environ["_TEST_TUNE_TRIAL_UUID"]
|
||||
|
||||
def testVerboseReporting(self):
|
||||
try:
|
||||
os.environ["_TEST_TUNE_TRIAL_UUID"] = "xxxxx"
|
||||
|
||||
verbose_0_cmd = VERBOSE_CMD + "verbose=0)"
|
||||
output = run_string_as_driver(verbose_0_cmd)
|
||||
try:
|
||||
self.assertNotIn(VERBOSE_EXP_OUT_1, output)
|
||||
self.assertNotIn(VERBOSE_EXP_OUT_2, output)
|
||||
self.assertNotIn(VERBOSE_TRIAL_NORM, output)
|
||||
self.assertNotIn(VERBOSE_TRIAL_DETAIL, output)
|
||||
except Exception:
|
||||
print("*** BEGIN OUTPUT ***")
|
||||
print(output)
|
||||
print("*** END OUTPUT ***")
|
||||
raise
|
||||
|
||||
verbose_1_cmd = VERBOSE_CMD + "verbose=1)"
|
||||
output = run_string_as_driver(verbose_1_cmd)
|
||||
try:
|
||||
self.assertIn(VERBOSE_EXP_OUT_1, output)
|
||||
self.assertIn(VERBOSE_EXP_OUT_2, output)
|
||||
self.assertNotIn(VERBOSE_TRIAL_NORM, output)
|
||||
self.assertNotIn(VERBOSE_TRIAL_DETAIL, output)
|
||||
except Exception:
|
||||
print("*** BEGIN OUTPUT ***")
|
||||
print(output)
|
||||
print("*** END OUTPUT ***")
|
||||
raise
|
||||
|
||||
verbose_2_cmd = VERBOSE_CMD + "verbose=2)"
|
||||
output = run_string_as_driver(verbose_2_cmd)
|
||||
try:
|
||||
self.assertIn(VERBOSE_EXP_OUT_1, output)
|
||||
self.assertIn(VERBOSE_EXP_OUT_2, output)
|
||||
self.assertIn(VERBOSE_TRIAL_NORM, output)
|
||||
self.assertNotIn(VERBOSE_TRIAL_DETAIL, output)
|
||||
except Exception:
|
||||
print("*** BEGIN OUTPUT ***")
|
||||
print(output)
|
||||
print("*** END OUTPUT ***")
|
||||
raise
|
||||
|
||||
verbose_3_cmd = VERBOSE_CMD + "verbose=3)"
|
||||
output = run_string_as_driver(verbose_3_cmd)
|
||||
try:
|
||||
self.assertIn(VERBOSE_EXP_OUT_1, output)
|
||||
self.assertIn(VERBOSE_EXP_OUT_2, output)
|
||||
self.assertNotIn(VERBOSE_TRIAL_NORM, output)
|
||||
self.assertIn(VERBOSE_TRIAL_DETAIL, output)
|
||||
except Exception:
|
||||
print("*** BEGIN OUTPUT ***")
|
||||
print(output)
|
||||
print("*** END OUTPUT ***")
|
||||
raise
|
||||
finally:
|
||||
del os.environ["_TEST_TUNE_TRIAL_UUID"]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
@@ -16,7 +16,6 @@ from ray.tune.checkpoint_manager import Checkpoint, CheckpointManager
|
||||
# NOTE(rkn): We import ray.tune.registry here instead of importing the names we
|
||||
# need because there are cyclic imports that may cause specific names to not
|
||||
# have been defined yet. See https://github.com/ray-project/ray/issues/1716.
|
||||
from ray.tune.logger import pretty_print
|
||||
from ray.tune.registry import get_trainable_cls, validate_trainable
|
||||
from ray.tune.result import DEFAULT_RESULTS_DIR, DONE, TRAINING_ITERATION
|
||||
from ray.tune.resources import Resources, json_to_resources, resources_to_json
|
||||
@@ -230,7 +229,6 @@ class Trial:
|
||||
or not len(self.log_to_file) == 2:
|
||||
self.log_to_file = (None, None)
|
||||
|
||||
self.verbose = True
|
||||
self.max_failures = max_failures
|
||||
|
||||
# Local trial state that is updated during the run
|
||||
@@ -480,11 +478,7 @@ class Trial:
|
||||
def update_last_result(self, result, terminate=False):
|
||||
if self.experiment_tag:
|
||||
result.update(experiment_tag=self.experiment_tag)
|
||||
if self.verbose and (terminate or time.time() - self.last_debug >
|
||||
DEBUG_PRINT_INTERVAL):
|
||||
print("Result for {}:".format(self))
|
||||
print(" {}".format(pretty_print(result).replace("\n", "\n ")))
|
||||
self.last_debug = time.time()
|
||||
|
||||
self.set_location(Location(result.get("node_ip"), result.get("pid")))
|
||||
self.last_result = result
|
||||
self.last_update_time = time.time()
|
||||
@@ -527,9 +521,6 @@ class Trial:
|
||||
def get_trainable_cls(self):
|
||||
return get_trainable_cls(self.trainable_name)
|
||||
|
||||
def set_verbose(self, verbose):
|
||||
self.verbose = verbose
|
||||
|
||||
def is_finished(self):
|
||||
return self.status in [Trial.ERROR, Trial.TERMINATED]
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ from ray.tune.trial import Checkpoint, Trial
|
||||
from ray.tune.schedulers import FIFOScheduler, TrialScheduler
|
||||
from ray.tune.suggest import BasicVariantGenerator
|
||||
from ray.tune.utils import warn_if_slow, flatten_dict, env_integer
|
||||
from ray.tune.utils.log import Verbosity, has_verbosity
|
||||
from ray.tune.utils.serialization import TuneFunctionDecoder, \
|
||||
TuneFunctionEncoder
|
||||
from ray.tune.web_server import TuneServer
|
||||
@@ -78,8 +79,6 @@ class TrialRunner:
|
||||
If fail_fast='raise' provided, Tune will automatically
|
||||
raise the exception received by the Trainable. fail_fast='raise'
|
||||
can easily leak resources and should be used with caution.
|
||||
verbose (bool): Flag for verbosity. If False, trial results
|
||||
will not be output.
|
||||
checkpoint_period (int): Trial runner checkpoint periodicity in
|
||||
seconds. Defaults to 10.
|
||||
trial_executor (TrialExecutor): Defaults to RayTrialExecutor.
|
||||
@@ -102,7 +101,6 @@ class TrialRunner:
|
||||
resume=False,
|
||||
server_port=None,
|
||||
fail_fast=False,
|
||||
verbose=True,
|
||||
checkpoint_period=None,
|
||||
trial_executor=None,
|
||||
callbacks=None,
|
||||
@@ -135,7 +133,6 @@ class TrialRunner:
|
||||
else:
|
||||
raise ValueError("fail_fast must be one of {bool, RAISE}. "
|
||||
f"Got {self._fail_fast}.")
|
||||
self._verbose = verbose
|
||||
|
||||
self._server = None
|
||||
self._server_port = server_port
|
||||
@@ -165,7 +162,7 @@ class TrialRunner:
|
||||
self.resume(run_errored_only=errored_only)
|
||||
self._resumed = True
|
||||
except Exception as e:
|
||||
if self._verbose:
|
||||
if has_verbosity(Verbosity.V3_TRIAL_DETAILS):
|
||||
logger.error(str(e))
|
||||
logger.exception("Runner restore failed.")
|
||||
if self._fail_fast:
|
||||
@@ -405,7 +402,6 @@ class TrialRunner:
|
||||
Args:
|
||||
trial (Trial): Trial to queue.
|
||||
"""
|
||||
trial.set_verbose(self._verbose)
|
||||
self._trials.append(trial)
|
||||
with warn_if_slow("scheduler.on_trial_add"):
|
||||
self._scheduler_alg.on_trial_add(self, trial)
|
||||
@@ -565,6 +561,8 @@ class TrialRunner:
|
||||
with warn_if_slow("scheduler.on_trial_result"):
|
||||
decision = self._scheduler_alg.on_trial_result(
|
||||
self, trial, flat_result)
|
||||
if decision == TrialScheduler.STOP:
|
||||
result.update(done=True)
|
||||
with warn_if_slow("search_alg.on_trial_result"):
|
||||
self._search_alg.on_trial_result(trial.trial_id,
|
||||
flat_result)
|
||||
@@ -583,7 +581,6 @@ class TrialRunner:
|
||||
iteration=self._iteration,
|
||||
trials=self._trials,
|
||||
trial=trial)
|
||||
result.update(done=True)
|
||||
|
||||
if not is_duplicate:
|
||||
trial.update_last_result(
|
||||
|
||||
+17
-12
@@ -18,6 +18,7 @@ from ray.tune.syncer import wait_for_sync, set_sync_periods, \
|
||||
from ray.tune.trial_runner import TrialRunner
|
||||
from ray.tune.progress_reporter import CLIReporter, JupyterNotebookReporter
|
||||
from ray.tune.schedulers import FIFOScheduler
|
||||
from ray.tune.utils.log import Verbosity, has_verbosity, set_verbosity
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -70,7 +71,7 @@ def run(
|
||||
checkpoint_score_attr=None,
|
||||
checkpoint_freq=0,
|
||||
checkpoint_at_end=False,
|
||||
verbose=2,
|
||||
verbose=Verbosity.V3_TRIAL_DETAILS,
|
||||
progress_reporter=None,
|
||||
log_to_file=False,
|
||||
trial_name_creator=None,
|
||||
@@ -188,8 +189,9 @@ def run(
|
||||
checkpoint_at_end (bool): Whether to checkpoint at the end of the
|
||||
experiment regardless of the checkpoint_freq. Default is False.
|
||||
This has no effect when using the Functional Training API.
|
||||
verbose (int): 0, 1, or 2. Verbosity mode. 0 = silent,
|
||||
1 = only status updates, 2 = status and trial results.
|
||||
verbose (Union[int, Verbosity]): 0, 1, 2, or 3. Verbosity mode.
|
||||
0 = silent, 1 = only status updates, 2 = status and brief trial
|
||||
results, 3 = status and detailed trial results. Defaults to 3.
|
||||
progress_reporter (ProgressReporter): Progress reporter for reporting
|
||||
intermediate experiment progress. Defaults to CLIReporter if
|
||||
running in command-line, or JupyterNotebookReporter if running in
|
||||
@@ -281,6 +283,8 @@ def run(
|
||||
"The `mode` parameter passed to `tune.run()` has to be one of "
|
||||
"['min', 'max']")
|
||||
|
||||
set_verbosity(verbose)
|
||||
|
||||
config = config or {}
|
||||
sync_config = sync_config or SyncConfig()
|
||||
set_sync_periods(sync_config)
|
||||
@@ -353,9 +357,9 @@ def run(
|
||||
"own `metric` and `mode` parameters. Either remove the arguments "
|
||||
"from your scheduler or from your call to `tune.run()`")
|
||||
|
||||
# Create logger and syncer callbacks
|
||||
# Create syncer callbacks
|
||||
callbacks = create_default_callbacks(
|
||||
callbacks, sync_config, loggers=loggers)
|
||||
callbacks, sync_config, metric=metric, loggers=loggers)
|
||||
|
||||
runner = TrialRunner(
|
||||
search_alg=search_alg,
|
||||
@@ -366,7 +370,6 @@ def run(
|
||||
stopper=experiments[0].stopper,
|
||||
resume=resume,
|
||||
server_port=server_port,
|
||||
verbose=bool(verbose > 1),
|
||||
fail_fast=fail_fast,
|
||||
trial_executor=trial_executor,
|
||||
callbacks=callbacks,
|
||||
@@ -380,7 +383,8 @@ def run(
|
||||
|
||||
if progress_reporter is None:
|
||||
if IS_NOTEBOOK:
|
||||
progress_reporter = JupyterNotebookReporter(overwrite=verbose < 2)
|
||||
progress_reporter = JupyterNotebookReporter(
|
||||
overwrite=not has_verbosity(Verbosity.V2_TRIAL_NORM))
|
||||
else:
|
||||
progress_reporter = CLIReporter()
|
||||
|
||||
@@ -413,7 +417,7 @@ def run(
|
||||
tune_start = time.time()
|
||||
while not runner.is_finished():
|
||||
runner.step()
|
||||
if verbose:
|
||||
if has_verbosity(Verbosity.V1_EXPERIMENT):
|
||||
_report_progress(runner, progress_reporter)
|
||||
tune_taken = time.time() - tune_start
|
||||
|
||||
@@ -422,7 +426,7 @@ def run(
|
||||
except Exception as e:
|
||||
logger.warning(f"Trial Runner checkpointing failed: {str(e)}")
|
||||
|
||||
if verbose:
|
||||
if has_verbosity(Verbosity.V1_EXPERIMENT):
|
||||
_report_progress(runner, progress_reporter, done=True)
|
||||
|
||||
wait_for_sync()
|
||||
@@ -440,8 +444,9 @@ def run(
|
||||
logger.error("Trials did not complete: %s", incomplete_trials)
|
||||
|
||||
all_taken = time.time() - all_start
|
||||
logger.info(f"Total run time: {all_taken:.2f} seconds "
|
||||
f"({tune_taken:.2f} seconds for the tuning loop).")
|
||||
if has_verbosity(Verbosity.V1_EXPERIMENT):
|
||||
logger.info(f"Total run time: {all_taken:.2f} seconds "
|
||||
f"({tune_taken:.2f} seconds for the tuning loop).")
|
||||
|
||||
trials = runner.get_trials()
|
||||
return ExperimentAnalysis(
|
||||
@@ -454,7 +459,7 @@ def run(
|
||||
def run_experiments(experiments,
|
||||
scheduler=None,
|
||||
server_port=None,
|
||||
verbose=2,
|
||||
verbose=Verbosity.V3_TRIAL_DETAILS,
|
||||
progress_reporter=None,
|
||||
resume=False,
|
||||
queue_trials=False,
|
||||
|
||||
@@ -4,6 +4,7 @@ import logging
|
||||
import os
|
||||
|
||||
from ray.tune.callback import Callback
|
||||
from ray.tune.progress_reporter import TrialProgressCallback
|
||||
from ray.tune.syncer import SyncConfig, detect_sync_to_driver
|
||||
from ray.tune.logger import CSVLoggerCallback, CSVLogger, LoggerCallback, \
|
||||
JsonLoggerCallback, JsonLogger, LegacyLoggerCallback, Logger, \
|
||||
@@ -15,14 +16,44 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def create_default_callbacks(callbacks: Optional[List[Callback]],
|
||||
sync_config: SyncConfig,
|
||||
loggers: Optional[List[Logger]]):
|
||||
loggers: Optional[List[Logger]],
|
||||
metric: Optional[str] = None):
|
||||
"""Create default callbacks for `tune.run()`.
|
||||
|
||||
This function takes a list of existing callbacks and adds default
|
||||
callbacks to it.
|
||||
|
||||
Specifically, three kinds of callbacks will be added:
|
||||
|
||||
1. Loggers. Ray Tune's experiment analysis relies on CSV and JSON logging.
|
||||
2. Syncer. Ray Tune synchronizes logs and checkpoint between workers and
|
||||
the head node.
|
||||
2. Trial progress reporter. For reporting intermediate progress, like trial
|
||||
results, Ray Tune uses a callback.
|
||||
|
||||
These callbacks will only be added if they don't already exist, i.e. if
|
||||
they haven't been passed (and configured) by the user. A notable case
|
||||
is when a Logger is passed, which is not a CSV or JSON logger - then
|
||||
a CSV and JSON logger will still be created.
|
||||
|
||||
Lastly, this function will ensure that the Syncer callback comes after all
|
||||
Logger callbacks, to ensure that the most up-to-date logs and checkpoints
|
||||
are synced across nodes.
|
||||
|
||||
"""
|
||||
callbacks = callbacks or []
|
||||
has_syncer_callback = False
|
||||
has_csv_logger = False
|
||||
has_json_logger = False
|
||||
has_tbx_logger = False
|
||||
|
||||
has_trial_progress_callback = any(
|
||||
isinstance(c, TrialProgressCallback) for c in callbacks)
|
||||
|
||||
if not has_trial_progress_callback:
|
||||
trial_progress_callback = TrialProgressCallback(metric=metric)
|
||||
callbacks.append(trial_progress_callback)
|
||||
|
||||
# Track syncer obj/index to move callback after loggers
|
||||
last_logger_index = None
|
||||
syncer_index = None
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
from enum import Enum
|
||||
from typing import Union
|
||||
|
||||
|
||||
class Verbosity(Enum):
|
||||
V0_MINIMAL = 0
|
||||
V1_EXPERIMENT = 1
|
||||
V2_TRIAL_NORM = 2
|
||||
V3_TRIAL_DETAILS = 3
|
||||
|
||||
def __int__(self):
|
||||
return self.value
|
||||
|
||||
|
||||
verbosity: Union[int, Verbosity] = Verbosity.V3_TRIAL_DETAILS
|
||||
|
||||
|
||||
def set_verbosity(level: Union[int, Verbosity]):
|
||||
global verbosity
|
||||
|
||||
if isinstance(level, int):
|
||||
verbosity = Verbosity(level)
|
||||
else:
|
||||
verbosity = verbosity
|
||||
|
||||
|
||||
def has_verbosity(level: Union[int, Verbosity]) -> bool:
|
||||
"""Return True if passed level exceeds global verbosity level."""
|
||||
global verbosity
|
||||
|
||||
log_level = int(level)
|
||||
verbosity_level = int(verbosity)
|
||||
|
||||
return verbosity_level >= log_level
|
||||
Reference in New Issue
Block a user