[RLlib] Fix erroneous use of LinearSchedule (in DDPG's exploration annealing). (#7125)

* Fix erroneous use of LinearSchedule (in DDPG's exploration annealing). Erase schedules_obsoleted.py. * Trigger re-test. * Re-test.
2026-07-05 22:54:17 +08:00 · 2020-02-13 08:46:49 +01:00
parent 9fc3e2e50f
commit 5518a738b3
3 changed files with 7 additions and 112 deletions
@@ -380,7 +380,6 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
    docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
        /ray/ci/suppress_output python /ray/rllib/tests/multiagent_pendulum.py

-
 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
    /ray/ci/suppress_output python /ray/rllib/examples/multiagent_cartpole.py --num-iters=2

@@ -493,3 +492,4 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \

 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
    /ray/ci/suppress_output python /ray/rllib/examples/random_env.py
+
@@ -1,7 +1,7 @@
 from ray.rllib.agents.trainer import with_common_config
 from ray.rllib.agents.dqn.dqn import GenericOffPolicyTrainer
 from ray.rllib.agents.ddpg.ddpg_policy import DDPGTFPolicy
-from ray.rllib.utils.schedules import ConstantSchedule, LinearSchedule
+from ray.rllib.utils.schedules import ConstantSchedule, PiecewiseSchedule

 # yapf: disable
 # __sphinx_doc_begin__
@@ -176,11 +176,11 @@ def make_exploration_schedule(config, worker_index):
            # run properly
            return ConstantSchedule(0.0)
    elif config["exploration_should_anneal"]:
-        return LinearSchedule(
-            schedule_timesteps=int(config["exploration_fraction"] *
-                                   config["schedule_max_timesteps"]),
-            initial_p=1.0,
-            final_p=config["exploration_final_scale"])
+        return PiecewiseSchedule(
+            endpoints=[(0, 1.0), (int(config["exploration_fraction"] *
+                                      config["schedule_max_timesteps"]),
+                                  config["exploration_final_scale"])],
+            outside_value=config["exploration_final_scale"])
    else:
        # *always* add exploration noise
        return ConstantSchedule(1.0)
@@ -1,105 +0,0 @@
-"""This file is used for specifying various schedules that evolve over
-time throughout the execution of the algorithm, such as:
- - learning rate for the optimizer
- - exploration epsilon for the epsilon greedy exploration strategy
- - beta parameter for beta parameter in prioritized replay
-
-Each schedule has a function `value(t)` which returns the current value
-of the parameter given the timestep t of the optimization procedure.
-"""
-
-
-class Schedule:
-    def value(self, t):
-        """Value of the schedule at time t"""
-        raise NotImplementedError()
-
-
-class ConstantSchedule:
-    def __init__(self, value):
-        """Value remains constant over time.
-
-        Parameters
-        ----------
-        value: float
-          Constant value of the schedule
-        """
-        self._v = value
-
-    def value(self, t):
-        """See Schedule.value"""
-        return self._v
-
-
-def linear_interpolation(l, r, alpha):
-    return l + alpha * (r - l)
-
-
-class PiecewiseSchedule:
-    def __init__(self,
-                 endpoints,
-                 interpolation=linear_interpolation,
-                 outside_value=None):
-        """Piecewise schedule.
-
-        endpoints: [(int, int)]
-          list of pairs `(time, value)` meanining that schedule should output
-          `value` when `t==time`. All the values for time must be sorted in
-          an increasing order. When t is between two times, e.g.
-          `(time_a, value_a)`
-          and `(time_b, value_b)`, such that `time_a <= t < time_b` then value
-          outputs `interpolation(value_a, value_b, alpha)` where alpha is a
-          fraction of time passed between `time_a` and `time_b` for time `t`.
-        interpolation: lambda float, float, float: float
-          a function that takes value to the left and to the right of t
-          according to the `endpoints`. Alpha is the fraction of distance from
-          left endpoint to right endpoint that t has covered. See
-          linear_interpolation for example.
-        outside_value: float
-          if the value is requested outside of all the intervals sepecified in
-          `endpoints` this value is returned. If None then AssertionError is
-          raised when outside value is requested.
-        """
-        idxes = [e[0] for e in endpoints]
-        assert idxes == sorted(idxes)
-        self._interpolation = interpolation
-        self._outside_value = outside_value
-        self._endpoints = endpoints
-
-    def value(self, t):
-        """See Schedule.value"""
-        for (l_t, l), (r_t, r) in zip(self._endpoints[:-1],
-                                      self._endpoints[1:]):
-            if l_t <= t and t < r_t:
-                alpha = float(t - l_t) / (r_t - l_t)
-                return self._interpolation(l, r, alpha)
-
-        # t does not belong to any of the pieces, so doom.
-        assert self._outside_value is not None
-        return self._outside_value
-
-
-class LinearSchedule:
-    def __init__(self, schedule_timesteps, final_p, initial_p=1.0):
-        """Linear interpolation between initial_p and final_p over
-        schedule_timesteps. After this many timesteps pass final_p is
-        returned.
-
-        Parameters
-        ----------
-        schedule_timesteps: int
-          Number of timesteps for which to linearly anneal initial_p
-          to final_p
-        initial_p: float
-          initial output value
-        final_p: float
-          final output value
-        """
-        self.schedule_timesteps = schedule_timesteps
-        self.final_p = final_p
-        self.initial_p = initial_p
-
-    def value(self, t):
-        """See Schedule.value"""
-        fraction = min(float(t) / max(1, self.schedule_timesteps), 1.0)
-        return self.initial_p + fraction * (self.final_p - self.initial_p)