diff --git a/python/ray/tune/analysis/experiment_analysis.py b/python/ray/tune/analysis/experiment_analysis.py index 463d34190..22dba1892 100644 --- a/python/ray/tune/analysis/experiment_analysis.py +++ b/python/ray/tune/analysis/experiment_analysis.py @@ -16,8 +16,8 @@ except ImportError: DataFrame = None from ray.tune.error import TuneError -from ray.tune.result import EXPR_PROGRESS_FILE, EXPR_PARAM_FILE,\ - CONFIG_PREFIX, TRAINING_ITERATION +from ray.tune.result import DEFAULT_METRIC, EXPR_PROGRESS_FILE, \ + EXPR_PARAM_FILE, CONFIG_PREFIX, TRAINING_ITERATION from ray.tune.trial import Trial from ray.tune.utils.trainable import TrainableUtil @@ -33,7 +33,8 @@ class Analysis: experiment_dir (str): Directory of the experiment to load. default_metric (str): Default metric for comparing results. Can be overwritten with the ``metric`` parameter in the respective - functions. + functions. If None but a mode was passed, the anonymous metric + `ray.tune.result.DEFAULT_METRIC` will be used per default. default_mode (str): Default mode for comparing results. Has to be one of [min, max]. Can be overwritten with the ``mode`` parameter in the respective functions. @@ -57,6 +58,10 @@ class Analysis: "`default_mode` has to be None or one of [min, max]") self.default_mode = default_mode + if self.default_metric is None and self.default_mode: + # If only a mode was passed, use anonymous metric + self.default_metric = DEFAULT_METRIC + if not pd: logger.warning( "pandas not installed. Run `pip install pandas` for " diff --git a/python/ray/tune/function_runner.py b/python/ray/tune/function_runner.py index 9fa19e6d2..f9938bc2d 100644 --- a/python/ray/tune/function_runner.py +++ b/python/ray/tune/function_runner.py @@ -16,8 +16,8 @@ from six.moves import queue from ray.util.debug import log_once from ray.tune import TuneError, session from ray.tune.trainable import Trainable, TrainableUtil -from ray.tune.result import (TIME_THIS_ITER_S, RESULT_DUPLICATE, - SHOULD_CHECKPOINT) +from ray.tune.result import (DEFAULT_METRIC, TIME_THIS_ITER_S, + RESULT_DUPLICATE, SHOULD_CHECKPOINT) from ray.tune.utils import (detect_checkpoint_function, detect_config_single, detect_reporter) @@ -164,7 +164,7 @@ class StatusReporter: "report __call__ is made to ensure correct runtime metrics.") if _metric: - kwargs["_metric"] = _metric + kwargs[DEFAULT_METRIC] = _metric # time per iteration is recorded directly in the reporter to ensure # any delays in logging results aren't counted diff --git a/python/ray/tune/progress_reporter.py b/python/ray/tune/progress_reporter.py index 8ff397237..59c540dd8 100644 --- a/python/ray/tune/progress_reporter.py +++ b/python/ray/tune/progress_reporter.py @@ -6,9 +6,9 @@ import sys import numpy as np import time -from ray.tune.result import (EPISODE_REWARD_MEAN, MEAN_ACCURACY, MEAN_LOSS, - TRAINING_ITERATION, TIME_TOTAL_S, TIMESTEPS_TOTAL, - AUTO_RESULT_KEYS) +from ray.tune.result import (DEFAULT_METRIC, EPISODE_REWARD_MEAN, + MEAN_ACCURACY, MEAN_LOSS, TRAINING_ITERATION, + TIME_TOTAL_S, TIMESTEPS_TOTAL, AUTO_RESULT_KEYS) from ray.tune.trial import Trial from ray.tune.utils import unflattened_lookup @@ -135,6 +135,10 @@ class TuneReporterBase(ProgressReporter): if mode: self._mode = mode + if self._metric is None and self._mode: + # If only a mode was passed, use anonymous metric + self._metric = DEFAULT_METRIC + return True def set_total_samples(self, total_samples): diff --git a/python/ray/tune/result.py b/python/ray/tune/result.py index 7aedc2837..e9eb7f402 100644 --- a/python/ray/tune/result.py +++ b/python/ray/tune/result.py @@ -23,6 +23,9 @@ NODE_IP = "node_ip" # (Auto-filled) The pid of the training process. PID = "pid" +# (Optional) Default (anonymous) metric when using tune.report(x) +DEFAULT_METRIC = "_metric" + # (Optional) Mean reward for current training iteration EPISODE_REWARD_MEAN = "episode_reward_mean" diff --git a/python/ray/tune/schedulers/async_hyperband.py b/python/ray/tune/schedulers/async_hyperband.py index 375245fb9..bfd683963 100644 --- a/python/ray/tune/schedulers/async_hyperband.py +++ b/python/ray/tune/schedulers/async_hyperband.py @@ -4,6 +4,7 @@ from typing import Dict, Optional, Union import numpy as np from ray.tune import trial_runner +from ray.tune.result import DEFAULT_METRIC from ray.tune.schedulers.trial_scheduler import FIFOScheduler, TrialScheduler from ray.tune.trial import Trial @@ -26,7 +27,8 @@ class AsyncHyperBandScheduler(FIFOScheduler): `training_iteration` as a measure of progress, the only requirement is that the attribute should increase monotonically. metric (str): The training result objective value attribute. Stopping - procedures will use this attribute. + procedures will use this attribute. If None but a mode was passed, + the `ray.tune.result.DEFAULT_METRIC` will be used per default. mode (str): One of {min, max}. Determines whether objective is minimizing or maximizing the metric attribute. max_t (float): max time units per trial. Trials will be stopped after @@ -103,6 +105,10 @@ class AsyncHyperBandScheduler(FIFOScheduler): elif self._mode == "min": self._metric_op = -1. + if self._metric is None and self._mode: + # If only a mode was passed, use anonymous metric + self._metric = DEFAULT_METRIC + return True def on_trial_add(self, trial_runner: "trial_runner.TrialRunner", diff --git a/python/ray/tune/schedulers/hyperband.py b/python/ray/tune/schedulers/hyperband.py index 46cb7a81b..a7e31fc6a 100644 --- a/python/ray/tune/schedulers/hyperband.py +++ b/python/ray/tune/schedulers/hyperband.py @@ -5,6 +5,7 @@ import numpy as np import logging from ray.tune import trial_runner +from ray.tune.result import DEFAULT_METRIC from ray.tune.schedulers.trial_scheduler import FIFOScheduler, TrialScheduler from ray.tune.trial import Trial from ray.tune.error import TuneError @@ -64,7 +65,8 @@ class HyperBandScheduler(FIFOScheduler): `training_iteration` as a measure of progress, the only requirement is that the attribute should increase monotonically. metric (str): The training result objective value attribute. Stopping - procedures will use this attribute. + procedures will use this attribute. If None but a mode was passed, + the `ray.tune.result.DEFAULT_METRIC` will be used per default. mode (str): One of {min, max}. Determines whether objective is minimizing or maximizing the metric attribute. max_t (int): max time units per trial. Trials will be stopped after @@ -138,6 +140,10 @@ class HyperBandScheduler(FIFOScheduler): elif self._mode == "min": self._metric_op = -1. + if self._metric is None and self._mode: + # If only a mode was passed, use anonymous metric + self._metric = DEFAULT_METRIC + return True def on_trial_add(self, trial_runner: "trial_runner.TrialRunner", diff --git a/python/ray/tune/schedulers/median_stopping_rule.py b/python/ray/tune/schedulers/median_stopping_rule.py index b446547ec..12eb956cd 100644 --- a/python/ray/tune/schedulers/median_stopping_rule.py +++ b/python/ray/tune/schedulers/median_stopping_rule.py @@ -5,6 +5,7 @@ from typing import Dict, List, Optional import numpy as np from ray.tune import trial_runner +from ray.tune.result import DEFAULT_METRIC from ray.tune.trial import Trial from ray.tune.schedulers.trial_scheduler import FIFOScheduler, TrialScheduler @@ -22,7 +23,8 @@ class MedianStoppingRule(FIFOScheduler): `training_iteration` as a measure of progress, the only requirement is that the attribute should increase monotonically. metric (str): The training result objective value attribute. Stopping - procedures will use this attribute. + procedures will use this attribute. If None but a mode was passed, + the `ray.tune.result.DEFAULT_METRIC` will be used per default. mode (str): One of {min, max}. Determines whether objective is minimizing or maximizing the metric attribute. grace_period (float): Only stop trials at least this old in time. @@ -93,6 +95,10 @@ class MedianStoppingRule(FIFOScheduler): self._worst = float("-inf") if self._mode == "max" else float("inf") self._compare_op = max if self._mode == "max" else min + if self._metric is None and self._mode: + # If only a mode was passed, use anonymous metric + self._metric = DEFAULT_METRIC + return True def on_trial_add(self, trial_runner: "trial_runner.TrialRunner", diff --git a/python/ray/tune/schedulers/pbt.py b/python/ray/tune/schedulers/pbt.py index 20d12819a..96dbd7cbc 100644 --- a/python/ray/tune/schedulers/pbt.py +++ b/python/ray/tune/schedulers/pbt.py @@ -10,7 +10,7 @@ from typing import Callable, Dict, List, Optional, Tuple, Union from ray.tune import trial_runner from ray.tune import trial_executor from ray.tune.error import TuneError -from ray.tune.result import TRAINING_ITERATION +from ray.tune.result import DEFAULT_METRIC, TRAINING_ITERATION from ray.tune.utils.util import SafeFallbackEncoder from ray.tune.sample import Domain, Function from ray.tune.schedulers import FIFOScheduler, TrialScheduler @@ -141,7 +141,8 @@ class PopulationBasedTraining(FIFOScheduler): `training_iteration` as a measure of progress, the only requirement is that the attribute should increase monotonically. metric (str): The training result objective value attribute. Stopping - procedures will use this attribute. + procedures will use this attribute. If None but a mode was passed, + the `ray.tune.result.DEFAULT_METRIC` will be used per default. mode (str): One of {min, max}. Determines whether objective is minimizing or maximizing the metric attribute. perturbation_interval (float): Models will be considered for @@ -310,6 +311,10 @@ class PopulationBasedTraining(FIFOScheduler): elif self._mode == "min": self._metric_op = -1. + if self._metric is None and self._mode: + # If only a mode was passed, use anonymous metric + self._metric = DEFAULT_METRIC + return True def on_trial_add(self, trial_runner: "trial_runner.TrialRunner", diff --git a/python/ray/tune/schedulers/trial_scheduler.py b/python/ray/tune/schedulers/trial_scheduler.py index 56df73943..2dc4f918b 100644 --- a/python/ray/tune/schedulers/trial_scheduler.py +++ b/python/ray/tune/schedulers/trial_scheduler.py @@ -1,6 +1,7 @@ from typing import Dict, Optional from ray.tune import trial_runner +from ray.tune.result import DEFAULT_METRIC from ray.tune.trial import Trial @@ -32,6 +33,11 @@ class TrialScheduler: return False if metric: self._metric = metric + + if self._metric is None: + # Per default, use anonymous metric + self._metric = DEFAULT_METRIC + return True def on_trial_add(self, trial_runner: "trial_runner.TrialRunner", diff --git a/python/ray/tune/suggest/ax.py b/python/ray/tune/suggest/ax.py index 8cc6949ff..e2540fe0a 100644 --- a/python/ray/tune/suggest/ax.py +++ b/python/ray/tune/suggest/ax.py @@ -1,6 +1,7 @@ from typing import Dict, List, Optional, Union from ax.service.ax_client import AxClient +from ray.tune.result import DEFAULT_METRIC from ray.tune.sample import Categorical, Float, Integer, LogUniform, \ Quantized, Uniform from ray.tune.suggest.suggestion import UNRESOLVED_SEARCH_SPACE, \ @@ -45,7 +46,8 @@ class AxSearch(Searcher): metric (str): Name of the metric used as objective in this experiment. This metric must be present in `raw_data` argument to `log_data`. This metric must also be present in the dict - reported/returned by the Trainable. + reported/returned by the Trainable. If None but a mode was passed, + the `ray.tune.result.DEFAULT_METRIC` will be used per default. mode (str): One of {min, max}. Determines whether objective is minimizing or maximizing the metric attribute. Defaults to "max". parameter_constraints (list[str]): Parameter constraints, such as @@ -146,9 +148,13 @@ class AxSearch(Searcher): self._live_trial_mapping = {} if self._ax or self._space: - self.setup_experiment() + self._setup_experiment() + + def _setup_experiment(self): + if self._metric is None and self._mode: + # If only a mode was passed, use anonymous metric + self._metric = DEFAULT_METRIC - def setup_experiment(self): if not self._ax: self._ax = AxClient() @@ -200,7 +206,8 @@ class AxSearch(Searcher): self._metric = metric if mode: self._mode = mode - self.setup_experiment() + + self._setup_experiment() return True def suggest(self, trial_id: str) -> Optional[Dict]: diff --git a/python/ray/tune/suggest/bayesopt.py b/python/ray/tune/suggest/bayesopt.py index af3538058..a3620bb17 100644 --- a/python/ray/tune/suggest/bayesopt.py +++ b/python/ray/tune/suggest/bayesopt.py @@ -5,6 +5,7 @@ import json from typing import Dict, Optional, Tuple from ray.tune import ExperimentAnalysis +from ray.tune.result import DEFAULT_METRIC from ray.tune.sample import Domain, Float, Quantized from ray.tune.suggest.suggestion import UNRESOLVED_SEARCH_SPACE, \ UNDEFINED_METRIC_MODE, UNDEFINED_SEARCH_SPACE @@ -53,7 +54,9 @@ class BayesOptSearch(Searcher): Args: space (dict): Continuous search space. Parameters will be sampled from this space which will be used to run trials. - metric (str): The training result objective value attribute. + metric (str): The training result objective value attribute. If None + but a mode was passed, the anonymous metric `_metric` will be used + per default. mode (str): One of {min, max}. Determines whether objective is minimizing or maximizing the metric attribute. utility_kwargs (dict): Parameters to define the utility function. @@ -205,9 +208,13 @@ class BayesOptSearch(Searcher): self.optimizer = None if space: - self.setup_optimizer() + self._setup_optimizer() + + def _setup_optimizer(self): + if self._metric is None and self._mode: + # If only a mode was passed, use anonymous metric + self._metric = DEFAULT_METRIC - def setup_optimizer(self): self.optimizer = byo.BayesianOptimization( f=None, pbounds=self._space, @@ -230,7 +237,7 @@ class BayesOptSearch(Searcher): elif self._mode == "min": self._metric_op = -1. - self.setup_optimizer() + self._setup_optimizer() return True def suggest(self, trial_id: str) -> Optional[Dict]: diff --git a/python/ray/tune/suggest/bohb.py b/python/ray/tune/suggest/bohb.py index e5f371674..e6e803d30 100644 --- a/python/ray/tune/suggest/bohb.py +++ b/python/ray/tune/suggest/bohb.py @@ -6,6 +6,7 @@ import math from typing import Dict, Optional, Union import ConfigSpace +from ray.tune.result import DEFAULT_METRIC from ray.tune.sample import Categorical, Domain, Float, Integer, LogUniform, \ Normal, \ Quantized, \ @@ -45,7 +46,9 @@ class TuneBOHB(Searcher): bohb_config (dict): configuration for HpBandSter BOHB algorithm max_concurrent (int): Number of maximum concurrent trials. Defaults to 10. - metric (str): The training result objective value attribute. + metric (str): The training result objective value attribute. If None + but a mode was passed, the anonymous metric `_metric` will be used + per default. mode (str): One of {min, max}. Determines whether objective is minimizing or maximizing the metric attribute. seed (int): Optional random seed to initialize the random number @@ -133,11 +136,15 @@ class TuneBOHB(Searcher): super(TuneBOHB, self).__init__(metric=self._metric, mode=mode) if self._space: - self.setup_bohb() + self._setup_bohb() - def setup_bohb(self): + def _setup_bohb(self): from hpbandster.optimizers.config_generators.bohb import BOHB + if self._metric is None and self._mode: + # If only a mode was passed, use anonymous metric + self._metric = DEFAULT_METRIC + if self._mode == "max": self._metric_op = -1. elif self._mode == "min": @@ -161,7 +168,7 @@ class TuneBOHB(Searcher): if mode: self._mode = mode - self.setup_bohb() + self._setup_bohb() return True def suggest(self, trial_id: str) -> Optional[Dict]: diff --git a/python/ray/tune/suggest/dragonfly.py b/python/ray/tune/suggest/dragonfly.py index c19f217bb..4be1f47dd 100644 --- a/python/ray/tune/suggest/dragonfly.py +++ b/python/ray/tune/suggest/dragonfly.py @@ -7,6 +7,7 @@ import logging import pickle from typing import Dict, List, Optional, Union +from ray.tune.result import DEFAULT_METRIC from ray.tune.sample import Domain, Float, Quantized from ray.tune.suggest.suggestion import UNRESOLVED_SEARCH_SPACE, \ UNDEFINED_METRIC_MODE, UNDEFINED_SEARCH_SPACE @@ -62,7 +63,9 @@ class DragonflySearch(Searcher): an optimizer as the `optimizer` argument. Defines the search space and requires a `domain` to be set. Can be automatically converted from the `config` dict passed to `tune.run()`. - metric (str): The training result objective value attribute. + metric (str): The training result objective value attribute. If None + but a mode was passed, the anonymous metric `_metric` will be used + per default. mode (str): One of {min, max}. Determines whether objective is minimizing or maximizing the metric attribute. points_to_evaluate (list of lists): A list of points you'd like to run @@ -177,9 +180,9 @@ class DragonflySearch(Searcher): self._opt = optimizer self.init_dragonfly() elif self._space: - self.setup_dragonfly() + self._setup_dragonfly() - def setup_dragonfly(self): + def _setup_dragonfly(self): """Setup dragonfly when no optimizer has been passed.""" assert not self._opt, "Optimizer already set." @@ -259,6 +262,10 @@ class DragonflySearch(Searcher): elif self._mode == "max": self._metric_op = 1. + if self._metric is None and self._mode: + # If only a mode was passed, use anonymous metric + self._metric = DEFAULT_METRIC + def set_search_properties(self, metric: Optional[str], mode: Optional[str], config: Dict) -> bool: if self._opt: @@ -270,7 +277,7 @@ class DragonflySearch(Searcher): if mode: self._mode = mode - self.setup_dragonfly() + self._setup_dragonfly() return True def suggest(self, trial_id: str) -> Optional[Dict]: diff --git a/python/ray/tune/suggest/hyperopt.py b/python/ray/tune/suggest/hyperopt.py index 133dc1da3..3f0b1a939 100644 --- a/python/ray/tune/suggest/hyperopt.py +++ b/python/ray/tune/suggest/hyperopt.py @@ -6,6 +6,7 @@ import logging from functools import partial import pickle +from ray.tune.result import DEFAULT_METRIC from ray.tune.sample import Categorical, Domain, Float, Integer, LogUniform, \ Normal, \ Quantized, \ @@ -50,7 +51,9 @@ class HyperOptSearch(Searcher): space (dict): HyperOpt configuration. Parameters will be sampled from this configuration and will be used to override parameters generated in the variant generation process. - metric (str): The training result objective value attribute. + metric (str): The training result objective value attribute. If None + but a mode was passed, the anonymous metric `_metric` will be used + per default. mode (str): One of {min, max}. Determines whether objective is minimizing or maximizing the metric attribute. points_to_evaluate (list): Initial parameter suggestions to be run @@ -177,14 +180,22 @@ class HyperOptSearch(Searcher): UNRESOLVED_SEARCH_SPACE.format( par="space", cls=type(self))) space = self.convert_search_space(space) - self.domain = hpo.Domain(lambda spc: spc, space) + self._space = space + self._setup_hyperopt() + + def _setup_hyperopt(self): + if self._metric is None and self._mode: + # If only a mode was passed, use anonymous metric + self._metric = DEFAULT_METRIC + + self.domain = hpo.Domain(lambda spc: spc, self._space) def set_search_properties(self, metric: Optional[str], mode: Optional[str], config: Dict) -> bool: if self.domain: return False space = self.convert_search_space(config) - self.domain = hpo.Domain(lambda spc: spc, space) + self._space = space if metric: self._metric = metric @@ -196,6 +207,7 @@ class HyperOptSearch(Searcher): elif self._mode == "min": self.metric_op = 1. + self._setup_hyperopt() return True def suggest(self, trial_id: str) -> Optional[Dict]: diff --git a/python/ray/tune/suggest/nevergrad.py b/python/ray/tune/suggest/nevergrad.py index 67cb3db55..d2592dfe9 100644 --- a/python/ray/tune/suggest/nevergrad.py +++ b/python/ray/tune/suggest/nevergrad.py @@ -2,6 +2,7 @@ import logging import pickle from typing import Dict, Optional, Union, List, Sequence +from ray.tune.result import DEFAULT_METRIC from ray.tune.sample import Categorical, Domain, Float, Integer, LogUniform, \ Quantized from ray.tune.suggest.suggestion import UNRESOLVED_SEARCH_SPACE, \ @@ -45,7 +46,9 @@ class NevergradSearch(Searcher): space (list|nevergrad.parameter.Parameter): Nevergrad parametrization to be passed to optimizer on instantiation, or list of parameter names if you passed an optimizer object. - metric (str): The training result objective value attribute. + metric (str): The training result objective value attribute. If None + but a mode was passed, the anonymous metric `_metric` will be used + per default. mode (str): One of {min, max}. Determines whether objective is minimizing or maximizing the metric attribute. points_to_evaluate (list): Initial parameter suggestions to be run @@ -165,9 +168,9 @@ class NevergradSearch(Searcher): self.max_concurrent = max_concurrent if self._nevergrad_opt or self._space: - self.setup_nevergrad() + self._setup_nevergrad() - def setup_nevergrad(self): + def _setup_nevergrad(self): if self._opt_factory: self._nevergrad_opt = self._opt_factory(self._space) @@ -177,6 +180,10 @@ class NevergradSearch(Searcher): elif self._mode == "min": self._metric_op = 1. + if self._metric is None and self._mode: + # If only a mode was passed, use anonymous metric + self._metric = DEFAULT_METRIC + if hasattr(self._nevergrad_opt, "instrumentation"): # added in v0.2.0 if self._nevergrad_opt.instrumentation.kwargs: if self._nevergrad_opt.instrumentation.args: @@ -209,7 +216,7 @@ class NevergradSearch(Searcher): if mode: self._mode = mode - self.setup_nevergrad() + self._setup_nevergrad() return True def suggest(self, trial_id: str) -> Optional[Dict]: diff --git a/python/ray/tune/suggest/optuna.py b/python/ray/tune/suggest/optuna.py index 9c5470135..7076b1f59 100644 --- a/python/ray/tune/suggest/optuna.py +++ b/python/ray/tune/suggest/optuna.py @@ -2,7 +2,7 @@ import logging import pickle from typing import Dict, List, Optional, Tuple, Union -from ray.tune.result import TRAINING_ITERATION +from ray.tune.result import DEFAULT_METRIC, TRAINING_ITERATION from ray.tune.sample import Categorical, Domain, Float, Integer, LogUniform, \ Quantized, Uniform from ray.tune.suggest.suggestion import UNRESOLVED_SEARCH_SPACE, \ @@ -57,8 +57,9 @@ class OptunaSearch(Searcher): space (list): Hyperparameter search space definition for Optuna's sampler. This is a list, and samples for the parameters will be obtained in order. - metric (str): Metric that is reported back to Optuna on trial - completion. + metric (str): The training result objective value attribute. If None + but a mode was passed, the anonymous metric `_metric` will be used + per default. mode (str): One of {min, max}. Determines whether objective is minimizing or maximizing the metric attribute. sampler (optuna.samplers.BaseSampler): Optuna sampler used to @@ -139,9 +140,13 @@ class OptunaSearch(Searcher): self._ot_trials = {} self._ot_study = None if self._space: - self.setup_study(mode) + self._setup_study(mode) + + def _setup_study(self, mode: str): + if self._metric is None and self._mode: + # If only a mode was passed, use anonymous metric + self._metric = DEFAULT_METRIC - def setup_study(self, mode: str): self._ot_study = ot.study.create_study( storage=self._storage, sampler=self._sampler, @@ -160,7 +165,8 @@ class OptunaSearch(Searcher): self._metric = metric if mode: self._mode = mode - self.setup_study(mode) + + self._setup_study(mode) return True def suggest(self, trial_id: str) -> Optional[Dict]: diff --git a/python/ray/tune/suggest/skopt.py b/python/ray/tune/suggest/skopt.py index 3dee909c6..f8b8d9196 100644 --- a/python/ray/tune/suggest/skopt.py +++ b/python/ray/tune/suggest/skopt.py @@ -2,6 +2,7 @@ import logging import pickle from typing import Dict, List, Optional, Tuple, Union +from ray.tune.result import DEFAULT_METRIC from ray.tune.sample import Categorical, Domain, Float, Integer, Quantized from ray.tune.suggest.suggestion import UNRESOLVED_SEARCH_SPACE, \ UNDEFINED_METRIC_MODE, UNDEFINED_SEARCH_SPACE @@ -75,7 +76,9 @@ class SkOptSearch(Searcher): parameters. If you passed an optimizer instance as the `optimizer` argument, this should be a list of parameter names instead. - metric (str): The training result objective value attribute. + metric (str): The training result objective value attribute. If None + but a mode was passed, the anonymous metric `_metric` will be used + per default. mode (str): One of {min, max}. Determines whether objective is minimizing or maximizing the metric attribute. points_to_evaluate (list of lists): A list of points you'd like to run @@ -184,11 +187,11 @@ class SkOptSearch(Searcher): self._skopt_opt = optimizer if self._skopt_opt or self._space: - self.setup_skopt() + self._setup_skopt() self._live_trial_mapping = {} - def setup_skopt(self): + def _setup_skopt(self): _validate_warmstart(self._parameter_names, self._points_to_evaluate, self._evaluated_rewards) @@ -213,6 +216,10 @@ class SkOptSearch(Searcher): elif self._mode == "min": self._metric_op = 1. + if self._metric is None and self._mode: + # If only a mode was passed, use anonymous metric + self._metric = DEFAULT_METRIC + def set_search_properties(self, metric: Optional[str], mode: Optional[str], config: Dict) -> bool: if self._skopt_opt: @@ -228,7 +235,7 @@ class SkOptSearch(Searcher): if mode: self._mode = mode - self.setup_skopt() + self._setup_skopt() return True def suggest(self, trial_id: str) -> Optional[Dict]: diff --git a/python/ray/tune/suggest/zoopt.py b/python/ray/tune/suggest/zoopt.py index 23177ddaf..966a73f7d 100644 --- a/python/ray/tune/suggest/zoopt.py +++ b/python/ray/tune/suggest/zoopt.py @@ -4,6 +4,7 @@ from typing import Dict, Optional, Tuple import ray import ray.cloudpickle as pickle +from ray.tune.result import DEFAULT_METRIC from ray.tune.sample import Categorical, Domain, Float, Integer, Quantized, \ Uniform from ray.tune.suggest.suggestion import UNRESOLVED_SEARCH_SPACE, \ @@ -113,11 +114,11 @@ class ZOOptSearch(Searcher): For discrete dimensions: (discrete, search_range, has_order); For grid dimensions: (grid, grid_list). More details can be found in zoopt package. - metric (str): The training result objective value attribute. - Defaults to "episode_reward_mean". + metric (str): The training result objective value attribute. If None + but a mode was passed, the anonymous metric `_metric` will be used + per default. mode (str): One of {min, max}. Determines whether objective is minimizing or maximizing the metric attribute. - Defaults to "min". parallel_num (int): How many workers to parallel. Note that initial phase may start less workers than this number. More details can be found in zoopt package. @@ -171,9 +172,13 @@ class ZOOptSearch(Searcher): super(ZOOptSearch, self).__init__(metric=self._metric, mode=mode) if self._dim_dict: - self.setup_zoopt() + self._setup_zoopt() + + def _setup_zoopt(self): + if self._metric is None and self._mode: + # If only a mode was passed, use anonymous metric + self._metric = DEFAULT_METRIC - def setup_zoopt(self): _dim_list = [] for k in self._dim_dict: self._dim_keys.append(k) @@ -203,7 +208,7 @@ class ZOOptSearch(Searcher): elif self._mode == "min": self._metric_op = 1. - self.setup_zoopt() + self._setup_zoopt() return True def suggest(self, trial_id: str) -> Optional[Dict]: diff --git a/python/ray/tune/tests/test_function_api.py b/python/ray/tune/tests/test_function_api.py index 4bb27ebd9..305e8abb0 100644 --- a/python/ray/tune/tests/test_function_api.py +++ b/python/ray/tune/tests/test_function_api.py @@ -12,7 +12,7 @@ from ray.tune.logger import NoopLogger from ray.tune.utils.trainable import TrainableUtil from ray.tune.function_runner import with_parameters, wrap_function, \ FuncCheckpointUtil -from ray.tune.result import TRAINING_ITERATION +from ray.tune.result import DEFAULT_METRIC, TRAINING_ITERATION def creator_generator(logdir): @@ -468,7 +468,7 @@ class FunctionApiTest(unittest.TestCase): self.assertEquals(trial_2.last_result["metric"], 500_000) self.assertEquals(trial_2.last_result["cp"], "DIR") - def test_return_anonymous(self): + def testReturnAnonymous(self): def train(config): return config["a"] @@ -477,10 +477,10 @@ class FunctionApiTest(unittest.TestCase): "a": tune.grid_search([4, 8]) }).trials - self.assertEquals(trial_1.last_result["_metric"], 4) - self.assertEquals(trial_2.last_result["_metric"], 8) + self.assertEquals(trial_1.last_result[DEFAULT_METRIC], 4) + self.assertEquals(trial_2.last_result[DEFAULT_METRIC], 8) - def test_return_specific(self): + def testReturnSpecific(self): def train(config): return {"m": config["a"]} @@ -492,7 +492,7 @@ class FunctionApiTest(unittest.TestCase): self.assertEquals(trial_1.last_result["m"], 4) self.assertEquals(trial_2.last_result["m"], 8) - def test_yield_anonymous(self): + def testYieldAnonymous(self): def train(config): for i in range(10): yield config["a"] + i @@ -502,10 +502,10 @@ class FunctionApiTest(unittest.TestCase): "a": tune.grid_search([4, 8]) }).trials - self.assertEquals(trial_1.last_result["_metric"], 4 + 9) - self.assertEquals(trial_2.last_result["_metric"], 8 + 9) + self.assertEquals(trial_1.last_result[DEFAULT_METRIC], 4 + 9) + self.assertEquals(trial_2.last_result[DEFAULT_METRIC], 8 + 9) - def test_yield_specific(self): + def testYieldSpecific(self): def train(config): for i in range(10): yield {"m": config["a"] + i} diff --git a/python/ray/tune/tests/test_searchers.py b/python/ray/tune/tests/test_searchers.py index 102111573..0b50be49d 100644 --- a/python/ray/tune/tests/test_searchers.py +++ b/python/ray/tune/tests/test_searchers.py @@ -21,6 +21,12 @@ def _invalid_objective(config): class InvalidValuesTest(unittest.TestCase): + """ + Test searcher handling of invalid values (NaN, -inf, inf). + Implicitly tests automatic config conversion and default (anonymous) + mode handling. + """ + def setUp(self): self.config = {"report": tune.uniform(0.0, 5.0)} @@ -65,7 +71,6 @@ class InvalidValuesTest(unittest.TestCase): # At least one nan, inf, -inf and float search_alg=BayesOptSearch(random_state=1234), config=self.config, - metric="_metric", mode="max", num_samples=8, reuse_actors=False) @@ -80,7 +85,6 @@ class InvalidValuesTest(unittest.TestCase): _invalid_objective, search_alg=TuneBOHB(seed=1000), config=self.config, - metric="_metric", mode="max", num_samples=8, reuse_actors=False) @@ -97,7 +101,6 @@ class InvalidValuesTest(unittest.TestCase): _invalid_objective, search_alg=DragonflySearch(domain="euclidean", optimizer="random"), config=self.config, - metric="_metric", mode="max", num_samples=8, reuse_actors=False) @@ -113,7 +116,6 @@ class InvalidValuesTest(unittest.TestCase): # At least one nan, inf, -inf and float search_alg=HyperOptSearch(random_state_seed=1234), config=self.config, - metric="_metric", mode="max", num_samples=8, reuse_actors=False) @@ -131,7 +133,6 @@ class InvalidValuesTest(unittest.TestCase): _invalid_objective, search_alg=NevergradSearch(optimizer=ng.optimizers.RandomSearch), config=self.config, - metric="_metric", mode="max", num_samples=16, reuse_actors=False) @@ -149,7 +150,6 @@ class InvalidValuesTest(unittest.TestCase): _invalid_objective, search_alg=OptunaSearch(sampler=RandomSampler(seed=1234)), config=self.config, - metric="_metric", mode="max", num_samples=8, reuse_actors=False) @@ -166,7 +166,6 @@ class InvalidValuesTest(unittest.TestCase): _invalid_objective, search_alg=SkOptSearch(), config=self.config, - metric="_metric", mode="max", num_samples=8, reuse_actors=False) @@ -183,7 +182,6 @@ class InvalidValuesTest(unittest.TestCase): _invalid_objective, search_alg=ZOOptSearch(budget=100, parallel_num=4), config=self.config, - metric="_metric", mode="max", num_samples=8, reuse_actors=False) diff --git a/python/ray/tune/tests/test_trial_scheduler.py b/python/ray/tune/tests/test_trial_scheduler.py index a2cd02fb2..30724ed46 100644 --- a/python/ray/tune/tests/test_trial_scheduler.py +++ b/python/ray/tune/tests/test_trial_scheduler.py @@ -2065,6 +2065,36 @@ class AsyncHyperBandSuite(unittest.TestCase): self._test_metrics(result2, "mean_loss", "min") + def _testAnonymousMetricEndToEnd(self, scheduler_cls, searcher=None): + def train(config): + return config["value"] + + out = tune.run( + train, + mode="max", + num_samples=1, + config={"value": tune.uniform(-2., 2.)}, + scheduler=scheduler_cls(), + search_alg=searcher) + + self.assertTrue(bool(out.best_trial)) + + def testAnonymousMetricEndToEndFIFO(self): + self._testAnonymousMetricEndToEnd(FIFOScheduler) + + def testAnonymousMetricEndToEndASHA(self): + self._testAnonymousMetricEndToEnd(AsyncHyperBandScheduler) + + def testAnonymousMetricEndToEndBOHB(self): + from ray.tune.suggest.bohb import TuneBOHB + self._testAnonymousMetricEndToEnd(HyperBandForBOHB, TuneBOHB()) + + def testAnonymousMetricEndToEndMedian(self): + self._testAnonymousMetricEndToEnd(MedianStoppingRule) + + def testAnonymousMetricEndToEndPBT(self): + self._testAnonymousMetricEndToEnd(PopulationBasedTraining) + if __name__ == "__main__": import pytest diff --git a/python/ray/tune/trial_runner.py b/python/ray/tune/trial_runner.py index 8fab9450c..800d30304 100644 --- a/python/ray/tune/trial_runner.py +++ b/python/ray/tune/trial_runner.py @@ -12,8 +12,8 @@ from ray.tune import TuneError from ray.tune.callback import CallbackList from ray.tune.stopper import NoopStopper from ray.tune.ray_trial_executor import RayTrialExecutor -from ray.tune.result import (TIME_THIS_ITER_S, RESULT_DUPLICATE, - SHOULD_CHECKPOINT) +from ray.tune.result import (DEFAULT_METRIC, TIME_THIS_ITER_S, + RESULT_DUPLICATE, SHOULD_CHECKPOINT) from ray.tune.syncer import get_cloud_syncer from ray.tune.trial import Checkpoint, Trial from ray.tune.schedulers import FIFOScheduler, TrialScheduler @@ -614,13 +614,19 @@ class TrialRunner: in the last result. If the only item is `done=True`, this means that no result was ever received and the trial just returned. This is also okay and will not raise an error. + + This will ignore checking for the DEFAULT_METRIC. """ if int(os.environ.get("TUNE_DISABLE_STRICT_METRIC_CHECKING", 0)) != 1 and (len(result) > 1 or "done" not in result): - base_metric = self._metric - scheduler_metric = self._scheduler_alg.metric - search_metrics = self._search_alg.metric + base_metric = self._metric \ + if self._metric != DEFAULT_METRIC else None + scheduler_metric = self._scheduler_alg.metric \ + if self._scheduler_alg.metric != DEFAULT_METRIC else None + search_metrics = self._search_alg.metric \ + if self._search_alg.metric != DEFAULT_METRIC else None + if isinstance(search_metrics, str): search_metrics = [search_metrics]