diff --git a/python/ray/tune/automl/search_policy.py b/python/ray/tune/automl/search_policy.py index 19745c74e..9cf0f7615 100644 --- a/python/ray/tune/automl/search_policy.py +++ b/python/ray/tune/automl/search_policy.py @@ -56,6 +56,8 @@ class AutoMLSearcher(SearchAlgorithm): self._unfinished_count = 0 self._running_trials = {} self._completed_trials = {} + self._next_trials = [] + self._next_trial_iter = None self._iteration = 0 self._total_trial_num = 0 @@ -68,10 +70,27 @@ class AutoMLSearcher(SearchAlgorithm): """Returns the Trial object with the best reward_attr""" return self.best_trial - def next_trials(self): + def next_trial(self): + if not self._next_trial_iter: + self._generate_next_trials() + if not self._next_trials: + self.set_finished() + return None + self._next_trial_iter = iter(self._next_trials) + + try: + return next(self._next_trial_iter) + except StopIteration: + self._next_trials = [] + self._next_trial_iter = None + return None + + def _generate_next_trials(self): + self._next_trials = [] + if self._unfinished_count > 0: # Last round not finished - return [] + return trials = [] raw_param_list, extra_arg_list = self._select() @@ -110,7 +129,7 @@ class AutoMLSearcher(SearchAlgorithm): "new": ntrial, "total": self._total_trial_num }) - return trials + self._next_trials = trials def on_trial_result(self, trial_id, result): if not result: diff --git a/python/ray/tune/progress_reporter.py b/python/ray/tune/progress_reporter.py index ca60adf29..ccb576faf 100644 --- a/python/ray/tune/progress_reporter.py +++ b/python/ray/tune/progress_reporter.py @@ -1,6 +1,8 @@ from __future__ import print_function import collections +import sys + import numpy as np import time @@ -94,10 +96,12 @@ class TuneReporterBase(ProgressReporter): def __init__(self, metric_columns=None, parameter_columns=None, + total_samples=None, max_progress_rows=20, max_error_rows=20, max_report_frequency=5, infer_limit=3): + self._total_samples = total_samples self._metrics_override = metric_columns is not None self._inferred_metrics = {} self._metric_columns = metric_columns or self.DEFAULT_COLUMNS.copy() @@ -109,6 +113,9 @@ class TuneReporterBase(ProgressReporter): self._max_report_freqency = max_report_frequency self._last_report_time = 0 + def set_total_samples(self, total_samples): + self._total_samples = total_samples + def should_report(self, trials, done=False): if time.time() - self._last_report_time > self._max_report_freqency: self._last_report_time = time.time() @@ -191,6 +198,7 @@ class TuneReporterBase(ProgressReporter): trials, metric_columns=self._metric_columns, parameter_columns=self._parameter_columns, + total_samples=self._total_samples, fmt=fmt, max_rows=max_progress)) messages.append(trial_errors_str(trials, fmt=fmt, max_rows=max_error)) @@ -243,12 +251,13 @@ class JupyterNotebookReporter(TuneReporterBase): overwrite, metric_columns=None, parameter_columns=None, + total_samples=None, max_progress_rows=20, max_error_rows=20, max_report_frequency=5): super(JupyterNotebookReporter, self).__init__( - metric_columns, parameter_columns, max_progress_rows, - max_error_rows, max_report_frequency) + metric_columns, parameter_columns, total_samples, + max_progress_rows, max_error_rows, max_report_frequency) self._overwrite = overwrite def report(self, trials, done, *sys_info): @@ -287,13 +296,14 @@ class CLIReporter(TuneReporterBase): def __init__(self, metric_columns=None, parameter_columns=None, + total_samples=None, max_progress_rows=20, max_error_rows=20, max_report_frequency=5): super(CLIReporter, self).__init__(metric_columns, parameter_columns, - max_progress_rows, max_error_rows, - max_report_frequency) + total_samples, max_progress_rows, + max_error_rows, max_report_frequency) def report(self, trials, done, *sys_info): print(self._progress_str(trials, done, *sys_info)) @@ -324,6 +334,7 @@ def memory_debug_str(): def trial_progress_str(trials, metric_columns, parameter_columns=None, + total_samples=0, fmt="psql", max_rows=None): """Returns a human readable message for printing to the console. @@ -342,6 +353,7 @@ def trial_progress_str(trials, values are the names to use in the message. If this is a list, the parameter name is used in the message directly. If this is empty, all parameters are used in the message. + total_samples (int): Total number of trials that will be generated. fmt (str): Output format (see tablefmt in tabulate API). max_rows (int): Maximum number of rows in the trial table. Defaults to unlimited. @@ -381,8 +393,13 @@ def trial_progress_str(trials, overflow_str = ", ".join(overflow_strs) else: overflow = False - messages.append("Number of trials: {} ({})".format( - num_trials, ", ".join(num_trials_strs))) + + if total_samples >= sys.maxsize: + total_samples = "infinite" + + messages.append("Number of trials: {}{} ({})".format( + num_trials, f"/{total_samples}" + if total_samples else "", ", ".join(num_trials_strs))) # Pre-process trials to figure out what columns to show. if isinstance(metric_columns, Mapping): diff --git a/python/ray/tune/suggest/basic_variant.py b/python/ray/tune/suggest/basic_variant.py index 68cf7dcfe..0a847d194 100644 --- a/python/ray/tune/suggest/basic_variant.py +++ b/python/ray/tune/suggest/basic_variant.py @@ -1,14 +1,13 @@ import itertools import os -import random import uuid from typing import Dict, List, Union from ray.tune.error import TuneError from ray.tune.experiment import Experiment, convert_to_experiment_list from ray.tune.config_parser import make_parser, create_trial_from_spec -from ray.tune.suggest.variant_generator import (generate_variants, format_vars, - flatten_resolved_vars) +from ray.tune.suggest.variant_generator import ( + count_variants, generate_variants, format_vars, flatten_resolved_vars) from ray.tune.suggest.search import SearchAlgorithm @@ -17,10 +16,6 @@ class BasicVariantGenerator(SearchAlgorithm): See also: `ray.tune.suggest.variant_generator`. - - Parameters: - shuffle (bool): Shuffles the generated list of configurations. - User API: .. code-block:: python @@ -39,19 +34,19 @@ class BasicVariantGenerator(SearchAlgorithm): searcher = BasicVariantGenerator() searcher.add_configurations({"experiment": { ... }}) - list_of_trials = searcher.next_trials() + trial = searcher.next_trial() searcher.is_finished == True """ - def __init__(self, shuffle: bool = False): + def __init__(self): """Initializes the Variant Generator. """ self._parser = make_parser() self._trial_generator = [] + self._trial_iter = None self._counter = 0 self._finished = False - self._shuffle = shuffle # Unique prefix for all trials generated, e.g., trial ids start as # 2f1e_00001, 2f1ef_00002, 2f1ef_0003, etc. Overridable for testing. @@ -61,6 +56,12 @@ class BasicVariantGenerator(SearchAlgorithm): else: self._uuid_prefix = str(uuid.uuid1().hex)[:5] + "_" + self._total_samples = 0 + + @property + def total_samples(self): + return self._total_samples + def add_configurations( self, experiments: Union[Experiment, List[Experiment], Dict[str, Dict]]): @@ -71,23 +72,28 @@ class BasicVariantGenerator(SearchAlgorithm): """ experiment_list = convert_to_experiment_list(experiments) for experiment in experiment_list: + self._total_samples += count_variants(experiment.spec) self._trial_generator = itertools.chain( self._trial_generator, self._generate_trials( experiment.spec.get("num_samples", 1), experiment.spec, experiment.name)) - def next_trials(self): - """Provides Trial objects to be queued into the TrialRunner. + def next_trial(self): + """Provides one Trial object to be queued into the TrialRunner. Returns: - trials (list): Returns a list of trials. + Trial: Returns a single trial. """ - trials = list(self._trial_generator) - if self._shuffle: - random.shuffle(trials) - self.set_finished() - return trials + if not self._trial_iter: + self._trial_iter = iter(self._trial_generator) + try: + return next(self._trial_iter) + except StopIteration: + self._trial_generator = [] + self._trial_iter = None + self.set_finished() + return None def _generate_trials(self, num_samples, unresolved_spec, output_path=""): """Generates Trial objects with the variant generation process. diff --git a/python/ray/tune/suggest/search.py b/python/ray/tune/suggest/search.py index 87d7da3e7..7654924e5 100644 --- a/python/ray/tune/suggest/search.py +++ b/python/ray/tune/suggest/search.py @@ -1,7 +1,6 @@ from typing import Dict, List, Optional, Union from ray.tune.experiment import Experiment -from ray.tune.trial import Trial class SearchAlgorithm: @@ -36,6 +35,11 @@ class SearchAlgorithm: """ return True + @property + def total_samples(self): + """Get number of total trials to be generated""" + return 0 + def add_configurations( self, experiments: Union[Experiment, List[Experiment], Dict[str, Dict]]): @@ -46,11 +50,11 @@ class SearchAlgorithm: """ raise NotImplementedError - def next_trials(self) -> List[Trial]: - """Provides Trial objects to be queued into the TrialRunner. + def next_trial(self): + """Returns single Trial object to be queued into the TrialRunner. Returns: - trials (list): Returns a list of trials. + trial (Trial): Returns a Trial object. """ raise NotImplementedError diff --git a/python/ray/tune/suggest/search_generator.py b/python/ray/tune/suggest/search_generator.py index 7edd26452..19dfa8be3 100644 --- a/python/ray/tune/suggest/search_generator.py +++ b/python/ray/tune/suggest/search_generator.py @@ -67,13 +67,17 @@ class SearchGenerator(SearchAlgorithm): self._parser = make_parser() self._experiment = None self._counter = 0 # Keeps track of number of trials created. - self._total_samples = None # int: total samples to evaluate. + self._total_samples = 0 # int: total samples to evaluate. self._finished = False def set_search_properties(self, metric: Optional[str], mode: Optional[str], config: Dict) -> bool: return self.searcher.set_search_properties(metric, mode, config) + @property + def total_samples(self): + return self._total_samples + def add_configurations( self, experiments: Union[Experiment, List[Experiment], Dict[str, Dict]]): @@ -95,20 +99,16 @@ class SearchGenerator(SearchAlgorithm): if "run" not in experiment_spec: raise TuneError("Must specify `run` in {}".format(experiment_spec)) - def next_trials(self) -> List[Trial]: - """Provides a batch of Trial objects to be queued into the TrialRunner. + def next_trial(self): + """Provides one Trial object to be queued into the TrialRunner. Returns: - List[Trial]: A list of trials for the Runner to consume. + Trial: Returns a single trial. """ - trials = [] - while not self.is_finished(): - trial = self.create_trial_if_possible(self._experiment.spec, - self._experiment.name) - if trial is None: - break - trials.append(trial) - return trials + if not self.is_finished(): + return self.create_trial_if_possible(self._experiment.spec, + self._experiment.name) + return None def create_trial_if_possible(self, experiment_spec: Dict, output_path: str) -> Optional[Trial]: diff --git a/python/ray/tune/suggest/variant_generator.py b/python/ray/tune/suggest/variant_generator.py index 294dd3c74..7048cd804 100644 --- a/python/ray/tune/suggest/variant_generator.py +++ b/python/ray/tune/suggest/variant_generator.py @@ -138,6 +138,15 @@ def parse_spec_vars(spec: Dict) -> Tuple[List[Tuple[Tuple, Any]], List[Tuple[ return resolved_vars, domain_vars, grid_vars +def count_variants(spec: Dict) -> int: + spec = copy.deepcopy(spec) + _, domain_vars, grid_vars = parse_spec_vars(spec) + grid_count = 1 + for path, domain in grid_vars: + grid_count *= len(domain.categories) + return spec.get("num_samples", 1) * grid_count + + def _generate_variants(spec: Dict) -> Tuple[Dict, Dict]: spec = copy.deepcopy(spec) _, domain_vars, grid_vars = parse_spec_vars(spec) diff --git a/python/ray/tune/tests/test_automl_searcher.py b/python/ray/tune/tests/test_automl_searcher.py index 22e98896e..e333e9069 100644 --- a/python/ray/tune/tests/test_automl_searcher.py +++ b/python/ray/tune/tests/test_automl_searcher.py @@ -5,6 +5,16 @@ from ray.tune import register_trainable from ray.tune.automl import SearchSpace, DiscreteSpace, GridSearch +def next_trials(searcher): + trials = [] + while not searcher.is_finished(): + trial = searcher.next_trial() + if not trial: + break + trials.append(trial) + return trials + + class AutoMLSearcherTest(unittest.TestCase): def setUp(self): def dummy_train(config, reporter): @@ -20,7 +30,7 @@ class AutoMLSearcherTest(unittest.TestCase): ]) searcher = GridSearch(space, "reward") searcher.add_configurations(exp) - trials = searcher.next_trials() + trials = next_trials(searcher) self.assertEqual(len(trials), 4) self.assertTrue(trials[0].config["a"]["b"]["c"] in [1, 2]) @@ -34,9 +44,9 @@ class AutoMLSearcherTest(unittest.TestCase): ]) searcher = GridSearch(space, "reward") searcher.add_configurations(exp) - trials = searcher.next_trials() + trials = next_trials(searcher) - self.assertEqual(len(searcher.next_trials()), 0) + self.assertEqual(searcher.next_trial(), None) for trial in trials[1:]: searcher.on_trial_complete(trial.trial_id) searcher.on_trial_complete(trials[0].trial_id, error=True) @@ -51,10 +61,11 @@ class AutoMLSearcherTest(unittest.TestCase): ]) searcher = GridSearch(space, "reward") searcher.add_configurations(exp) - trials = searcher.next_trials() + trials = next_trials(searcher) - self.assertEqual(len(searcher.next_trials()), 0) + self.assertEqual(searcher.next_trial(), None) for i, trial in enumerate(trials): + print("TRIAL {}".format(trial)) rewards = list(range(i, i + 10)) random.shuffle(rewards) for reward in rewards: diff --git a/python/ray/tune/tests/test_progress_reporter.py b/python/ray/tune/tests/test_progress_reporter.py index 7b5db7365..70ffc07ad 100644 --- a/python/ray/tune/tests/test_progress_reporter.py +++ b/python/ray/tune/tests/test_progress_reporter.py @@ -73,34 +73,14 @@ tune.run_experiments({ }, }, verbose=1)""" -EXPECTED_END_TO_END_START = """Number of trials: 30 (29 PENDING, 1 RUNNING) -+---------------+----------+-------+-----+-----+ -| Trial name | status | loc | a | b | -|---------------+----------+-------+-----+-----| -| f_xxxxx_00001 | PENDING | | 1 | | -| f_xxxxx_00002 | PENDING | | 2 | | -| f_xxxxx_00003 | PENDING | | 3 | | -| f_xxxxx_00004 | PENDING | | 4 | | -| f_xxxxx_00005 | PENDING | | 5 | | -| f_xxxxx_00006 | PENDING | | 6 | | -| f_xxxxx_00007 | PENDING | | 7 | | -| f_xxxxx_00008 | PENDING | | 8 | | -| f_xxxxx_00009 | PENDING | | 9 | | -| f_xxxxx_00010 | PENDING | | | 0 | -| f_xxxxx_00011 | PENDING | | | 1 | -| f_xxxxx_00012 | PENDING | | | 2 | -| f_xxxxx_00013 | PENDING | | | 3 | -| f_xxxxx_00014 | PENDING | | | 4 | -| f_xxxxx_00015 | PENDING | | | 5 | -| f_xxxxx_00016 | PENDING | | | 6 | -| f_xxxxx_00017 | PENDING | | | 7 | -| f_xxxxx_00018 | PENDING | | | 8 | -| f_xxxxx_00019 | PENDING | | | 9 | -| f_xxxxx_00000 | RUNNING | | 0 | | -+---------------+----------+-------+-----+-----+ -... 10 more trials not shown (10 PENDING)""" +EXPECTED_END_TO_END_START = """Number of trials: 1/30 (1 RUNNING) ++---------------+----------+-------+-----+ +| Trial name | status | loc | a | +|---------------+----------+-------+-----| +| f_xxxxx_00000 | RUNNING | | 0 | ++---------------+----------+-------+-----+""" -EXPECTED_END_TO_END_END = """Number of trials: 30 (30 TERMINATED) +EXPECTED_END_TO_END_END = """Number of trials: 30/30 (30 TERMINATED) +---------------+------------+-------+-----+-----+-----+ | Trial name | status | loc | a | b | c | |---------------+------------+-------+-----+-----+-----| @@ -136,7 +116,7 @@ EXPECTED_END_TO_END_END = """Number of trials: 30 (30 TERMINATED) | f_xxxxx_00029 | TERMINATED | | | | 9 | +---------------+------------+-------+-----+-----+-----+""" -EXPECTED_END_TO_END_AC = """Number of trials: 30 (30 TERMINATED) +EXPECTED_END_TO_END_AC = """Number of trials: 30/30 (30 TERMINATED) +---------------+------------+-------+-----+-----+-----+ | Trial name | status | loc | a | b | c | |---------------+------------+-------+-----+-----+-----| diff --git a/python/ray/tune/tests/test_ray_trial_executor.py b/python/ray/tune/tests/test_ray_trial_executor.py index e2ee69013..c2afe6a59 100644 --- a/python/ray/tune/tests/test_ray_trial_executor.py +++ b/python/ray/tune/tests/test_ray_trial_executor.py @@ -170,7 +170,14 @@ class RayTrialExecutorTest(unittest.TestCase): def generate_trials(spec, name): suggester = BasicVariantGenerator() suggester.add_configurations({name: spec}) - return suggester.next_trials() + trials = [] + while not suggester.is_finished(): + trial = suggester.next_trial() + if trial: + trials.append(trial) + else: + break + return trials def process_trial_save(self, trial): """Simulates trial runner save.""" diff --git a/python/ray/tune/tests/test_trial_runner.py b/python/ray/tune/tests/test_trial_runner.py index c947a1dae..1dbac72ae 100644 --- a/python/ray/tune/tests/test_trial_runner.py +++ b/python/ray/tune/tests/test_trial_runner.py @@ -57,7 +57,10 @@ class TrialRunnerTest(unittest.TestCase): for name, spec in experiments.items(): trial_generator = BasicVariantGenerator() trial_generator.add_configurations({name: spec}) - for trial in trial_generator.next_trials(): + while not trial_generator.is_finished(): + trial = trial_generator.next_trial() + if not trial: + break trial_executor.start_trial(trial) self.assertLessEqual(len(os.path.basename(trial.logdir)), 200) trial_executor.stop_trial(trial) diff --git a/python/ray/tune/tests/test_trial_runner_3.py b/python/ray/tune/tests/test_trial_runner_3.py index a3fc80479..5b14ad953 100644 --- a/python/ray/tune/tests/test_trial_runner_3.py +++ b/python/ray/tune/tests/test_trial_runner_3.py @@ -228,19 +228,18 @@ class TrialRunnerTest3(unittest.TestCase): class FinishFastAlg(_MockSuggestionAlgorithm): _index = 0 - def next_trials(self): + def next_trial(self): spec = self._experiment.spec - trials = [] + trial = None if self._index < spec["num_samples"]: trial = Trial( spec.get("run"), stopping_criterion=spec.get("stop")) - trials.append(trial) self._index += 1 if self._index > 4: self.set_finished() - return trials + return trial def suggest(self, trial_id): return {} @@ -625,7 +624,7 @@ class SearchAlgorithmTest(unittest.TestCase): searcher = TestSuggestion() alg = SearchGenerator(searcher) alg.add_configurations({"test": {"run": "__fake"}}) - trial = alg.next_trials()[0] + trial = alg.next_trial() self.assertTrue("e=5" in trial.experiment_tag) self.assertTrue("d=4" in trial.experiment_tag) diff --git a/python/ray/tune/tests/test_var.py b/python/ray/tune/tests/test_var.py index bf9e0c4f9..2b9182529 100644 --- a/python/ray/tune/tests/test_var.py +++ b/python/ray/tune/tests/test_var.py @@ -24,7 +24,14 @@ class VariantGeneratorTest(unittest.TestCase): def generate_trials(self, spec, name): suggester = BasicVariantGenerator() suggester.add_configurations({name: spec}) - return suggester.next_trials() + trials = [] + while not suggester.is_finished(): + trial = suggester.next_trial() + if trial: + trials.append(trial) + else: + break + return trials def testParseToTrials(self): trials = self.generate_trials({ diff --git a/python/ray/tune/trial_runner.py b/python/ray/tune/trial_runner.py index ba08bc3fa..0bd134718 100644 --- a/python/ray/tune/trial_runner.py +++ b/python/ray/tune/trial_runner.py @@ -734,19 +734,19 @@ class TrialRunner: or is_finished (timeout or search algorithm finishes). timeout (int): Seconds before blocking times out. """ - trials = self._search_alg.next_trials() - if blocking and not trials: + trial = self._search_alg.next_trial() + if blocking and not trial: start = time.time() # Checking `is_finished` instead of _search_alg.is_finished # is fine because blocking only occurs if all trials are # finished and search_algorithm is not yet finished - while (not trials and not self.is_finished() + while (not trial and not self.is_finished() and time.time() - start < timeout): logger.info("Blocking for next trial...") - trials = self._search_alg.next_trials() + trial = self._search_alg.next_trial() time.sleep(1) - for trial in trials: + if trial: self.add_trial(trial) def request_stop_trial(self, trial): diff --git a/python/ray/tune/tune.py b/python/ray/tune/tune.py index 2e0a2a1e8..3dff1cc25 100644 --- a/python/ray/tune/tune.py +++ b/python/ray/tune/tune.py @@ -1,4 +1,5 @@ import logging +import sys from ray.tune.error import TuneError from ray.tune.experiment import convert_to_experiment_list, Experiment @@ -177,7 +178,8 @@ def run( num_samples (int): Number of times to sample from the hyperparameter space. Defaults to 1. If `grid_search` is provided as an argument, the grid will be repeated - `num_samples` of times. + `num_samples` of times. If this is -1, (virtually) infinite + samples are generated until a stopping condition is met. local_dir (str): Local dir to save training results to. Defaults to ``~/ray_results``. search_alg (Searcher): Search algorithm for optimization. @@ -292,6 +294,9 @@ def run( sync_config = sync_config or SyncConfig() set_sync_periods(sync_config) + if num_samples == -1: + num_samples = sys.maxsize + trial_executor = trial_executor or RayTrialExecutor( reuse_actors=reuse_actors, queue_trials=queue_trials) if isinstance(run_or_experiment, list): @@ -383,6 +388,8 @@ def run( else: progress_reporter = CLIReporter() + progress_reporter.set_total_samples(search_alg.total_samples) + # User Warning for GPUs if trial_executor.has_gpus(): if isinstance(resources_per_trial, diff --git a/python/ray/tune/web_server.py b/python/ray/tune/web_server.py index c515a6390..02219bea5 100644 --- a/python/ray/tune/web_server.py +++ b/python/ray/tune/web_server.py @@ -211,7 +211,10 @@ def RunnerHandler(runner): resource["trials"] = [] trial_generator = BasicVariantGenerator() trial_generator.add_configurations({name: spec}) - for trial in trial_generator.next_trials(): + while not trial_generator.is_finished(): + trial = trial_generator.next_trial() + if not trial: + break runner.add_trial(trial) resource["trials"].append(self._trial_info(trial)) return resource