mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 01:46:10 +08:00
[tune] Add command line support for choosing early stopping schedulers (#1209)
* command line support * add checkpoint freq * fix other flags * fix * docs * doc
This commit is contained in:
@@ -142,7 +142,10 @@ class Agent(object):
|
||||
start = time.time()
|
||||
result = self._train()
|
||||
self._iteration += 1
|
||||
time_this_iter = time.time() - start
|
||||
if result.time_this_iter_s is not None:
|
||||
time_this_iter = result.time_this_iter_s
|
||||
else:
|
||||
time_this_iter = time.time() - start
|
||||
|
||||
assert result.timesteps_this_iter is not None
|
||||
|
||||
@@ -340,6 +343,30 @@ class _MockAgent(Agent):
|
||||
return self.info
|
||||
|
||||
|
||||
class _SigmoidFakeData(_MockAgent):
|
||||
"""Agent that returns sigmoid learning curves.
|
||||
|
||||
This can be helpful for evaluating early stopping algorithms."""
|
||||
|
||||
_agent_name = "SigmoidFakeData"
|
||||
_default_config = {
|
||||
"width": 100,
|
||||
"height": 100,
|
||||
"offset": 0,
|
||||
"iter_time": 10,
|
||||
"iter_timesteps": 1,
|
||||
}
|
||||
|
||||
def _train(self):
|
||||
i = max(0, self.iteration - self.config["offset"])
|
||||
v = np.tanh(float(i) / self.config["width"])
|
||||
v *= self.config["height"]
|
||||
return TrainingResult(
|
||||
episode_reward_mean=v, episode_len_mean=v,
|
||||
timesteps_this_iter=self.config["iter_timesteps"],
|
||||
time_this_iter_s=self.config["iter_time"], info={})
|
||||
|
||||
|
||||
def get_agent_class(alg):
|
||||
"""Returns the class of an known agent given its name."""
|
||||
|
||||
@@ -360,6 +387,8 @@ def get_agent_class(alg):
|
||||
return script_runner.ScriptRunner
|
||||
elif alg == "__fake":
|
||||
return _MockAgent
|
||||
elif alg == "__sigmoid_fake_data":
|
||||
return _SigmoidFakeData
|
||||
else:
|
||||
raise Exception(
|
||||
("Unknown algorithm {}, check --alg argument. Valid choices " +
|
||||
|
||||
@@ -9,7 +9,7 @@ import sys
|
||||
import yaml
|
||||
|
||||
from ray.tune.config_parser import make_parser, resources_to_json
|
||||
from ray.tune.tune import run_experiments
|
||||
from ray.tune.tune import make_scheduler, run_experiments
|
||||
|
||||
|
||||
EXAMPLE_USAGE = """
|
||||
@@ -18,6 +18,8 @@ Training example:
|
||||
|
||||
Grid search example:
|
||||
./train.py -f tuned_examples/cartpole-grid-search-example.yaml
|
||||
|
||||
Note that -f overrides all other trial-specific command-line options.
|
||||
"""
|
||||
|
||||
|
||||
@@ -33,6 +35,8 @@ parser.add_argument("--num-cpus", default=None, type=int,
|
||||
help="Number of CPUs to allocate to Ray.")
|
||||
parser.add_argument("--num-gpus", default=None, type=int,
|
||||
help="Number of GPUs to allocate to Ray.")
|
||||
parser.add_argument("--experiment-name", default="default", type=str,
|
||||
help="Name of experiment dir.")
|
||||
parser.add_argument("-f", "--config-file", default=None, type=str,
|
||||
help="If specified, use config options from this file.")
|
||||
|
||||
@@ -43,15 +47,19 @@ if __name__ == "__main__":
|
||||
with open(args.config_file) as f:
|
||||
experiments = yaml.load(f)
|
||||
else:
|
||||
# Note: keep this in sync with tune/config_parser.py
|
||||
experiments = {
|
||||
"default": { # i.e. log to /tmp/ray/default
|
||||
args.experiment_name: { # i.e. log to /tmp/ray/default
|
||||
"alg": args.alg,
|
||||
"checkpoint_freq": args.checkpoint_freq,
|
||||
"local_dir": args.local_dir,
|
||||
"env": args.env,
|
||||
"resources": resources_to_json(args.resources),
|
||||
"stop": args.stop,
|
||||
"config": args.config,
|
||||
"restore": args.restore,
|
||||
"repeat": args.repeat,
|
||||
"upload_dir": args.upload_dir,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,5 +70,6 @@ if __name__ == "__main__":
|
||||
parser.error("the following arguments are required: --env")
|
||||
|
||||
run_experiments(
|
||||
experiments, redis_address=args.redis_address,
|
||||
experiments, scheduler=make_scheduler(args),
|
||||
redis_address=args.redis_address,
|
||||
num_cpus=args.num_cpus, num_gpus=args.num_gpus)
|
||||
|
||||
@@ -31,6 +31,7 @@ def make_parser(**kwargs):
|
||||
|
||||
parser = argparse.ArgumentParser(**kwargs)
|
||||
|
||||
# Note: keep this in sync with rllib/train.py
|
||||
parser.add_argument("--alg", default=None, type=str,
|
||||
help="The learning algorithm to train.")
|
||||
parser.add_argument("--stop", default="{}", type=json.loads,
|
||||
@@ -44,10 +45,14 @@ def make_parser(**kwargs):
|
||||
help="Number of times to repeat each trial.")
|
||||
parser.add_argument("--local-dir", default="/tmp/ray", type=str,
|
||||
help="Local dir to save training results to.")
|
||||
parser.add_argument("--upload-dir", default=None, type=str,
|
||||
parser.add_argument("--upload-dir", default="", type=str,
|
||||
help="URI to upload training results to.")
|
||||
parser.add_argument("--checkpoint-freq", default=None, type=int,
|
||||
parser.add_argument("--checkpoint-freq", default=0, type=int,
|
||||
help="How many iterations between checkpoints.")
|
||||
parser.add_argument("--scheduler", default="FIFO", type=str,
|
||||
help="FIFO, MedianStopping, or HyperBand")
|
||||
parser.add_argument("--scheduler-config", default="{}", type=json.loads,
|
||||
help="Config options to pass to the scheduler.")
|
||||
|
||||
# Note: this currently only makes sense when running a single trial
|
||||
parser.add_argument("--restore", default=None, type=str,
|
||||
|
||||
@@ -32,7 +32,7 @@ class HyperBandScheduler(FIFOScheduler):
|
||||
and band and will spill over to new brackets/bands accordingly.
|
||||
"""
|
||||
|
||||
def __init__(self, max_iter, eta=3):
|
||||
def __init__(self, max_iter=200, eta=3):
|
||||
"""
|
||||
args:
|
||||
max_iter (int): maximum iterations per configuration
|
||||
|
||||
@@ -0,0 +1,100 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import collections
|
||||
import numpy as np
|
||||
|
||||
from ray.tune.trial_scheduler import FIFOScheduler, TrialScheduler
|
||||
|
||||
|
||||
class MedianStoppingRule(FIFOScheduler):
|
||||
"""Implements the median stopping rule as described in the Vizier paper:
|
||||
|
||||
https://research.google.com/pubs/pub46180.html
|
||||
|
||||
Args:
|
||||
time_attr (str): The TrainingResult attr to use for comparing time.
|
||||
Note that you can pass in something non-temporal such as
|
||||
`training_iteration` as a measure of progress, the only requirement
|
||||
is that the attribute should increase monotonically.
|
||||
reward_attr (str): The TrainingResult objective value attribute. As
|
||||
with `time_attr`, this may refer to any objective value that
|
||||
is supposed to increase with time.
|
||||
grace_period (float): Only stop trials at least this old in time.
|
||||
The units are the same as the attribute named by `time_attr`.
|
||||
min_samples_required (int): Min samples to compute median over.
|
||||
hard_stop (bool): If false, pauses trials instead of stopping
|
||||
them. When all other trials are complete, paused trials will be
|
||||
resumed and allowed to run FIFO.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, time_attr='time_total_s', reward_attr='episode_reward_mean',
|
||||
grace_period=60.0, min_samples_required=3, hard_stop=True):
|
||||
FIFOScheduler.__init__(self)
|
||||
self._stopped_trials = set()
|
||||
self._completed_trials = set()
|
||||
self._results = collections.defaultdict(list)
|
||||
self._grace_period = grace_period
|
||||
self._min_samples_required = min_samples_required
|
||||
self._reward_attr = reward_attr
|
||||
self._time_attr = time_attr
|
||||
self._hard_stop = hard_stop
|
||||
|
||||
def on_trial_result(self, trial_runner, trial, result):
|
||||
"""Callback for early stopping.
|
||||
|
||||
This stopping rule stops a running trial if the trial's best objective
|
||||
value by step `t` is strictly worse than the median of the running
|
||||
averages of all completed trials' objectives reported up to step `t`.
|
||||
"""
|
||||
|
||||
if trial in self._stopped_trials:
|
||||
assert not self._hard_stop
|
||||
return TrialScheduler.CONTINUE # fall back to FIFO
|
||||
|
||||
time = getattr(result, self._time_attr)
|
||||
self._results[trial].append(result)
|
||||
median_result = self._get_median_result(time)
|
||||
best_result = self._best_result(trial)
|
||||
print("Trial {} best res={} vs median res={} at t={}".format(
|
||||
trial, best_result, median_result, time))
|
||||
if best_result < median_result and time > self._grace_period:
|
||||
print("MedianStoppingRule: early stopping {}".format(trial))
|
||||
self._stopped_trials.add(trial)
|
||||
if self._hard_stop:
|
||||
return TrialScheduler.STOP
|
||||
else:
|
||||
return TrialScheduler.PAUSE
|
||||
else:
|
||||
return TrialScheduler.CONTINUE
|
||||
|
||||
def on_trial_complete(self, trial_runner, trial, result):
|
||||
self._results[trial].append(result)
|
||||
self._completed_trials.add(trial)
|
||||
|
||||
def debug_string(self):
|
||||
return "Using MedianStoppingRule: num_stopped={}.".format(
|
||||
len(self._stopped_trials))
|
||||
|
||||
def _get_median_result(self, time):
|
||||
scores = []
|
||||
for trial in self._completed_trials:
|
||||
scores.append(self._running_result(trial, time))
|
||||
if len(scores) >= self._min_samples_required:
|
||||
return np.median(scores)
|
||||
else:
|
||||
return float('-inf')
|
||||
|
||||
def _running_result(self, trial, t_max=float('inf')):
|
||||
results = self._results[trial]
|
||||
# TODO(ekl) we could do interpolation to be more precise, but for now
|
||||
# assume len(results) is large and the time diffs are roughly equal
|
||||
return np.mean(
|
||||
[getattr(r, self._reward_attr)
|
||||
for r in results if getattr(r, self._time_attr) <= t_max])
|
||||
|
||||
def _best_result(self, trial):
|
||||
results = self._results[trial]
|
||||
return max([getattr(r, self._reward_attr) for r in results])
|
||||
@@ -46,7 +46,8 @@ TrainingResult = namedtuple("TrainingResult", [
|
||||
# (Auto-filled) Number of timesteps in the simulator in this iteration.
|
||||
"timesteps_this_iter",
|
||||
|
||||
# (Auto-filled) Time in seconds this iteration took to run.
|
||||
# (Auto-filled) Time in seconds this iteration took to run. This may be
|
||||
# overriden in order to override the system-computed time difference.
|
||||
"time_this_iter_s",
|
||||
|
||||
# (Auto-filled) Accumulated time in seconds for this entire experiment.
|
||||
|
||||
@@ -60,7 +60,7 @@ class Trial(object):
|
||||
def __init__(
|
||||
self, env_creator, alg, config={}, local_dir='/tmp/ray',
|
||||
experiment_tag=None, resources=Resources(cpu=1, gpu=0),
|
||||
stopping_criterion={}, checkpoint_freq=None,
|
||||
stopping_criterion={}, checkpoint_freq=0,
|
||||
restore_path=None, upload_dir=None):
|
||||
"""Initialize a new trial.
|
||||
|
||||
@@ -179,7 +179,7 @@ class Trial(object):
|
||||
def should_checkpoint(self):
|
||||
"""Whether this trial is due for checkpointing."""
|
||||
|
||||
if self.checkpoint_freq is None:
|
||||
if not self.checkpoint_freq:
|
||||
return False
|
||||
|
||||
return self.last_result.training_iteration % self.checkpoint_freq == 0
|
||||
|
||||
@@ -2,6 +2,7 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import ray
|
||||
import time
|
||||
import traceback
|
||||
@@ -42,9 +43,21 @@ class TrialRunner(object):
|
||||
self._committed_resources = Resources(cpu=0, gpu=0)
|
||||
self._resources_initialized = False
|
||||
|
||||
# For debugging, it may be useful to halt trials after some time has
|
||||
# elapsed. TODO(ekl) consider exposing this in the API.
|
||||
self._global_time_limit = float(
|
||||
os.environ.get("TRIALRUNNER_WALLTIME_LIMIT", float('inf')))
|
||||
self._total_time = 0
|
||||
|
||||
def is_finished(self):
|
||||
"""Returns whether all trials have finished running."""
|
||||
|
||||
if self._total_time > self._global_time_limit:
|
||||
print(
|
||||
"Exceeded global time limit {} / {}".format(
|
||||
self._total_time, self._global_time_limit))
|
||||
return True
|
||||
|
||||
for t in self._trials:
|
||||
if t.status in [Trial.PENDING, Trial.RUNNING, Trial.PAUSED]:
|
||||
return False
|
||||
@@ -148,6 +161,7 @@ class TrialRunner(object):
|
||||
result = ray.get(result_id)
|
||||
print("result", result)
|
||||
trial.last_result = result
|
||||
self._total_time += result.time_this_iter_s
|
||||
|
||||
if trial.should_stop(result):
|
||||
self._scheduler_alg.on_trial_complete(self, trial, result)
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
|
||||
import collections
|
||||
import numpy as np
|
||||
|
||||
from ray.tune.trial import Trial
|
||||
|
||||
|
||||
@@ -73,89 +70,11 @@ class FIFOScheduler(TrialScheduler):
|
||||
if (trial.status == Trial.PENDING and
|
||||
trial_runner.has_resources(trial.resources)):
|
||||
return trial
|
||||
for trial in trial_runner.get_trials():
|
||||
if (trial.status == Trial.PAUSED and
|
||||
trial_runner.has_resources(trial.resources)):
|
||||
return trial
|
||||
return None
|
||||
|
||||
def debug_string(self):
|
||||
return "Using FIFO scheduling algorithm."
|
||||
|
||||
|
||||
# TODO(ekl) expose this in the command line API
|
||||
class MedianStoppingRule(FIFOScheduler):
|
||||
"""Implements the median stopping rule as described in the Vizier paper:
|
||||
|
||||
https://research.google.com/pubs/pub46180.html
|
||||
|
||||
Args:
|
||||
time_attr (str): The TrainingResult attr to use for comparing time.
|
||||
Note that you can pass in something non-temporal such as
|
||||
`training_iteration` as a measure of progress, the only requirement
|
||||
is that the attribute should increase monotonically.
|
||||
reward_attr (str): The TrainingResult objective value attribute. As
|
||||
with `time_attr`, this may refer to any objective value that
|
||||
is supposed to increase with time.
|
||||
grace_period (float): Only stop trials at least this old in time.
|
||||
The units are the same as the attribute named by `time_attr`.
|
||||
min_samples_required (int): Min samples to compute median over.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, time_attr='time_total_s', reward_attr='episode_reward_mean',
|
||||
grace_period=60.0, min_samples_required=3):
|
||||
FIFOScheduler.__init__(self)
|
||||
self._completed_trials = set()
|
||||
self._results = collections.defaultdict(list)
|
||||
self._grace_period = grace_period
|
||||
self._min_samples_required = min_samples_required
|
||||
self._reward_attr = reward_attr
|
||||
self._time_attr = time_attr
|
||||
self._num_stopped = 0
|
||||
|
||||
def on_trial_result(self, trial_runner, trial, result):
|
||||
"""Callback for early stopping.
|
||||
|
||||
This stopping rule stops a running trial if the trial's best objective
|
||||
value by step `t` is strictly worse than the median of the running
|
||||
averages of all completed trials' objectives reported up to step `t`.
|
||||
"""
|
||||
|
||||
time = getattr(result, self._time_attr)
|
||||
self._results[trial].append(result)
|
||||
median_result = self._get_median_result(time)
|
||||
best_result = self._best_result(trial)
|
||||
print("Trial {} best res={} vs median res={} at t={}".format(
|
||||
trial, best_result, median_result, time))
|
||||
if best_result < median_result and time > self._grace_period:
|
||||
print("MedianStoppingRule: early stopping {}".format(trial))
|
||||
self._num_stopped += 1
|
||||
return TrialScheduler.STOP
|
||||
else:
|
||||
return TrialScheduler.CONTINUE
|
||||
|
||||
def on_trial_complete(self, trial_runner, trial, result):
|
||||
self._results[trial].append(result)
|
||||
self._completed_trials.add(trial)
|
||||
|
||||
def debug_string(self):
|
||||
return "Using MedianStoppingRule: num_stopped={}.".format(
|
||||
self._num_stopped)
|
||||
|
||||
def _get_median_result(self, time):
|
||||
scores = []
|
||||
for trial in self._completed_trials:
|
||||
scores.append(self._running_result(trial, time))
|
||||
if len(scores) >= self._min_samples_required:
|
||||
return np.median(scores)
|
||||
else:
|
||||
return float('-inf')
|
||||
|
||||
def _running_result(self, trial, t_max=float('inf')):
|
||||
results = self._results[trial]
|
||||
# TODO(ekl) we could do interpolation to be more precise, but for now
|
||||
# assume len(results) is large and the time diffs are roughly equal
|
||||
return np.mean(
|
||||
[getattr(r, self._reward_attr)
|
||||
for r in results if getattr(r, self._time_attr) <= t_max])
|
||||
|
||||
def _best_result(self, trial):
|
||||
results = self._results[trial]
|
||||
return max([getattr(r, self._reward_attr) for r in results])
|
||||
|
||||
+29
-4
@@ -5,12 +5,16 @@ from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import yaml
|
||||
|
||||
import ray
|
||||
from ray.tune.trial_runner import TrialRunner
|
||||
from ray.tune.hyperband import HyperBandScheduler
|
||||
from ray.tune.median_stopping_rule import MedianStoppingRule
|
||||
from ray.tune.trial import Trial
|
||||
from ray.tune.trial_runner import TrialRunner
|
||||
from ray.tune.trial_scheduler import FIFOScheduler
|
||||
from ray.tune.variant_generator import generate_trials
|
||||
|
||||
|
||||
@@ -32,12 +36,33 @@ parser.add_argument("--num-cpus", default=None, type=int,
|
||||
help="Number of CPUs to allocate to Ray.")
|
||||
parser.add_argument("--num-gpus", default=None, type=int,
|
||||
help="Number of GPUs to allocate to Ray.")
|
||||
parser.add_argument("--scheduler", default="FIFO", type=str,
|
||||
help="FIFO, MedianStopping, or HyperBand")
|
||||
parser.add_argument("--scheduler-config", default="{}", type=json.loads,
|
||||
help="Config options to pass to the scheduler.")
|
||||
parser.add_argument("-f", "--config-file", required=True, type=str,
|
||||
help="Read experiment options from this JSON/YAML file.")
|
||||
|
||||
|
||||
def run_experiments(experiments, **ray_args):
|
||||
runner = TrialRunner()
|
||||
SCHEDULERS = {
|
||||
"FIFO": FIFOScheduler,
|
||||
"MedianStopping": MedianStoppingRule,
|
||||
"HyperBand": HyperBandScheduler,
|
||||
}
|
||||
|
||||
|
||||
def make_scheduler(args):
|
||||
if args.scheduler in SCHEDULERS:
|
||||
return SCHEDULERS[args.scheduler](**args.scheduler_config)
|
||||
else:
|
||||
assert False, "Unknown scheduler: {}, should be one of {}".format(
|
||||
args.scheduler, SCHEDULERS.keys())
|
||||
|
||||
|
||||
def run_experiments(experiments, scheduler=None, **ray_args):
|
||||
if scheduler is None:
|
||||
scheduler = make_scheduler(args)
|
||||
runner = TrialRunner(scheduler)
|
||||
|
||||
for name, spec in experiments.items():
|
||||
for trial in generate_trials(spec, name):
|
||||
@@ -63,5 +88,5 @@ if __name__ == "__main__":
|
||||
with open(args.config_file) as f:
|
||||
experiments = yaml.load(f)
|
||||
run_experiments(
|
||||
experiments, redis_address=args.redis_address,
|
||||
experiments, make_scheduler(args), redis_address=args.redis_address,
|
||||
num_cpus=args.num_cpus, num_gpus=args.num_gpus)
|
||||
|
||||
Reference in New Issue
Block a user