diff --git a/python/ray/rllib/dqn/dqn.py b/python/ray/rllib/dqn/dqn.py index fdea65415..21382cd5b 100644 --- a/python/ray/rllib/dqn/dqn.py +++ b/python/ray/rllib/dqn/dqn.py @@ -106,6 +106,8 @@ DEFAULT_CONFIG = dict( num_workers=0, # Whether to allocate GPUs for workers (if > 0). num_gpus_per_worker=0, + # Whether to reserve CPUs for workers (if not None). + num_cpus_per_worker=None, # Optimizer class to use. optimizer_class="LocalSyncReplayOptimizer", # Config to pass to the optimizer. @@ -126,7 +128,8 @@ class DQNAgent(Agent): self.local_evaluator = DQNEvaluator( self.registry, self.env_creator, self.config, self.logdir, 0) remote_cls = ray.remote( - num_cpus=1, num_gpus=self.config["num_gpus_per_worker"])( + num_cpus=self.config["num_cpus_per_worker"], + num_gpus=self.config["num_gpus_per_worker"])( DQNEvaluator) self.remote_evaluators = [ remote_cls.remote( diff --git a/python/ray/rllib/optimizers/policy_optimizer.py b/python/ray/rllib/optimizers/policy_optimizer.py index cf3f5d755..1e31edc66 100644 --- a/python/ray/rllib/optimizers/policy_optimizer.py +++ b/python/ray/rllib/optimizers/policy_optimizer.py @@ -26,11 +26,14 @@ class PolicyOptimizer(object): remote_evaluators (list): List of remote evaluator replicas, or []. num_steps_trained (int): Number of timesteps trained on so far. num_steps_sampled (int): Number of timesteps sampled so far. + evaluator_resources (dict): Optional resource requests to set for + evaluators created by this optimizer. """ @classmethod def make( - cls, evaluator_cls, evaluator_args, num_workers, optimizer_config): + cls, evaluator_cls, evaluator_args, num_workers, optimizer_config, + evaluator_resources={"num_cpus": None}): """Create evaluators and an optimizer instance using those evaluators. Args: @@ -43,7 +46,7 @@ class PolicyOptimizer(object): """ local_evaluator = evaluator_cls(*evaluator_args) - remote_cls = ray.remote(num_cpus=1)(evaluator_cls) + remote_cls = ray.remote(**evaluator_resources)(evaluator_cls) remote_evaluators = [ remote_cls.remote(*evaluator_args) for _ in range(num_workers)] diff --git a/python/ray/rllib/ppo/ppo.py b/python/ray/rllib/ppo/ppo.py index 5811b2e60..7cbb4569a 100644 --- a/python/ray/rllib/ppo/ppo.py +++ b/python/ray/rllib/ppo/ppo.py @@ -70,7 +70,7 @@ DEFAULT_CONFIG = { # Number of actors used to collect the rollouts "num_workers": 5, # Resource requirements for remote actors - "worker_resources": {"num_cpus": 1}, + "worker_resources": {"num_cpus": None}, # Dump TensorFlow timeline after this many SGD minibatches "full_trace_nth_sgd_batch": -1, # Whether to profile data loading