From 5518a738b33874c9fd3edaecd41f008accebb36b Mon Sep 17 00:00:00 2001 From: Sven Mika Date: Thu, 13 Feb 2020 08:46:49 +0100 Subject: [PATCH] [RLlib] Fix erroneous use of LinearSchedule (in DDPG's exploration annealing). (#7125) * Fix erroneous use of LinearSchedule (in DDPG's exploration annealing). Erase schedules_obsoleted.py. * Trigger re-test. * Re-test. --- ci/jenkins_tests/run_rllib_tests.sh | 2 +- rllib/agents/ddpg/ddpg.py | 12 ++-- rllib/utils/schedules_obsoleted.py | 105 ---------------------------- 3 files changed, 7 insertions(+), 112 deletions(-) delete mode 100644 rllib/utils/schedules_obsoleted.py diff --git a/ci/jenkins_tests/run_rllib_tests.sh b/ci/jenkins_tests/run_rllib_tests.sh index 54aaf777b..868ad37a0 100755 --- a/ci/jenkins_tests/run_rllib_tests.sh +++ b/ci/jenkins_tests/run_rllib_tests.sh @@ -380,7 +380,6 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ /ray/ci/suppress_output python /ray/rllib/tests/multiagent_pendulum.py - docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ /ray/ci/suppress_output python /ray/rllib/examples/multiagent_cartpole.py --num-iters=2 @@ -493,3 +492,4 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ /ray/ci/suppress_output python /ray/rllib/examples/random_env.py + diff --git a/rllib/agents/ddpg/ddpg.py b/rllib/agents/ddpg/ddpg.py index c30a88c54..6170affb5 100644 --- a/rllib/agents/ddpg/ddpg.py +++ b/rllib/agents/ddpg/ddpg.py @@ -1,7 +1,7 @@ from ray.rllib.agents.trainer import with_common_config from ray.rllib.agents.dqn.dqn import GenericOffPolicyTrainer from ray.rllib.agents.ddpg.ddpg_policy import DDPGTFPolicy -from ray.rllib.utils.schedules import ConstantSchedule, LinearSchedule +from ray.rllib.utils.schedules import ConstantSchedule, PiecewiseSchedule # yapf: disable # __sphinx_doc_begin__ @@ -176,11 +176,11 @@ def make_exploration_schedule(config, worker_index): # run properly return ConstantSchedule(0.0) elif config["exploration_should_anneal"]: - return LinearSchedule( - schedule_timesteps=int(config["exploration_fraction"] * - config["schedule_max_timesteps"]), - initial_p=1.0, - final_p=config["exploration_final_scale"]) + return PiecewiseSchedule( + endpoints=[(0, 1.0), (int(config["exploration_fraction"] * + config["schedule_max_timesteps"]), + config["exploration_final_scale"])], + outside_value=config["exploration_final_scale"]) else: # *always* add exploration noise return ConstantSchedule(1.0) diff --git a/rllib/utils/schedules_obsoleted.py b/rllib/utils/schedules_obsoleted.py deleted file mode 100644 index 36953c2c6..000000000 --- a/rllib/utils/schedules_obsoleted.py +++ /dev/null @@ -1,105 +0,0 @@ -"""This file is used for specifying various schedules that evolve over -time throughout the execution of the algorithm, such as: - - learning rate for the optimizer - - exploration epsilon for the epsilon greedy exploration strategy - - beta parameter for beta parameter in prioritized replay - -Each schedule has a function `value(t)` which returns the current value -of the parameter given the timestep t of the optimization procedure. -""" - - -class Schedule: - def value(self, t): - """Value of the schedule at time t""" - raise NotImplementedError() - - -class ConstantSchedule: - def __init__(self, value): - """Value remains constant over time. - - Parameters - ---------- - value: float - Constant value of the schedule - """ - self._v = value - - def value(self, t): - """See Schedule.value""" - return self._v - - -def linear_interpolation(l, r, alpha): - return l + alpha * (r - l) - - -class PiecewiseSchedule: - def __init__(self, - endpoints, - interpolation=linear_interpolation, - outside_value=None): - """Piecewise schedule. - - endpoints: [(int, int)] - list of pairs `(time, value)` meanining that schedule should output - `value` when `t==time`. All the values for time must be sorted in - an increasing order. When t is between two times, e.g. - `(time_a, value_a)` - and `(time_b, value_b)`, such that `time_a <= t < time_b` then value - outputs `interpolation(value_a, value_b, alpha)` where alpha is a - fraction of time passed between `time_a` and `time_b` for time `t`. - interpolation: lambda float, float, float: float - a function that takes value to the left and to the right of t - according to the `endpoints`. Alpha is the fraction of distance from - left endpoint to right endpoint that t has covered. See - linear_interpolation for example. - outside_value: float - if the value is requested outside of all the intervals sepecified in - `endpoints` this value is returned. If None then AssertionError is - raised when outside value is requested. - """ - idxes = [e[0] for e in endpoints] - assert idxes == sorted(idxes) - self._interpolation = interpolation - self._outside_value = outside_value - self._endpoints = endpoints - - def value(self, t): - """See Schedule.value""" - for (l_t, l), (r_t, r) in zip(self._endpoints[:-1], - self._endpoints[1:]): - if l_t <= t and t < r_t: - alpha = float(t - l_t) / (r_t - l_t) - return self._interpolation(l, r, alpha) - - # t does not belong to any of the pieces, so doom. - assert self._outside_value is not None - return self._outside_value - - -class LinearSchedule: - def __init__(self, schedule_timesteps, final_p, initial_p=1.0): - """Linear interpolation between initial_p and final_p over - schedule_timesteps. After this many timesteps pass final_p is - returned. - - Parameters - ---------- - schedule_timesteps: int - Number of timesteps for which to linearly anneal initial_p - to final_p - initial_p: float - initial output value - final_p: float - final output value - """ - self.schedule_timesteps = schedule_timesteps - self.final_p = final_p - self.initial_p = initial_p - - def value(self, t): - """See Schedule.value""" - fraction = min(float(t) / max(1, self.schedule_timesteps), 1.0) - return self.initial_p + fraction * (self.final_p - self.initial_p)