[rllib] Set num_cpu=None for workers in the default settings (#1793)

2026-06-28 05:43:03 +08:00 · 2018-03-29 16:33:40 -07:00
parent 4116c64698
commit faaa123046
3 changed files with 10 additions and 4 deletions
@@ -106,6 +106,8 @@ DEFAULT_CONFIG = dict(
    num_workers=0,
    # Whether to allocate GPUs for workers (if > 0).
    num_gpus_per_worker=0,
+    # Whether to reserve CPUs for workers (if not None).
+    num_cpus_per_worker=None,
    # Optimizer class to use.
    optimizer_class="LocalSyncReplayOptimizer",
    # Config to pass to the optimizer.
@@ -126,7 +128,8 @@ class DQNAgent(Agent):
        self.local_evaluator = DQNEvaluator(
            self.registry, self.env_creator, self.config, self.logdir, 0)
        remote_cls = ray.remote(
-            num_cpus=1, num_gpus=self.config["num_gpus_per_worker"])(
+            num_cpus=self.config["num_cpus_per_worker"],
+            num_gpus=self.config["num_gpus_per_worker"])(
            DQNEvaluator)
        self.remote_evaluators = [
            remote_cls.remote(
@@ -26,11 +26,14 @@ class PolicyOptimizer(object):
        remote_evaluators (list): List of remote evaluator replicas, or [].
        num_steps_trained (int): Number of timesteps trained on so far.
        num_steps_sampled (int): Number of timesteps sampled so far.
+        evaluator_resources (dict): Optional resource requests to set for
+            evaluators created by this optimizer.
    """

    @classmethod
    def make(
-            cls, evaluator_cls, evaluator_args, num_workers, optimizer_config):
+            cls, evaluator_cls, evaluator_args, num_workers, optimizer_config,
+            evaluator_resources={"num_cpus": None}):
        """Create evaluators and an optimizer instance using those evaluators.

        Args:
@@ -43,7 +46,7 @@ class PolicyOptimizer(object):
        """

        local_evaluator = evaluator_cls(*evaluator_args)
-        remote_cls = ray.remote(num_cpus=1)(evaluator_cls)
+        remote_cls = ray.remote(**evaluator_resources)(evaluator_cls)
        remote_evaluators = [
            remote_cls.remote(*evaluator_args)
            for _ in range(num_workers)]
@@ -70,7 +70,7 @@ DEFAULT_CONFIG = {
    # Number of actors used to collect the rollouts
    "num_workers": 5,
    # Resource requirements for remote actors
-    "worker_resources": {"num_cpus": 1},
+    "worker_resources": {"num_cpus": None},
    # Dump TensorFlow timeline after this many SGD minibatches
    "full_trace_nth_sgd_batch": -1,
    # Whether to profile data loading