diff --git a/python/ray/rllib/agents/ppo/ppo.py b/python/ray/rllib/agents/ppo/ppo.py index ab1cca706..cfdd0d4cb 100644 --- a/python/ray/rllib/agents/ppo/ppo.py +++ b/python/ray/rllib/agents/ppo/ppo.py @@ -91,8 +91,9 @@ class PPOAgent(Agent): "sgd_batch_size": self.config["sgd_batchsize"], "sgd_stepsize": self.config["sgd_stepsize"], "num_sgd_iter": self.config["num_sgd_iter"], + "num_gpus": self.config["num_gpus"], "timesteps_per_batch": self.config["timesteps_per_batch"], - "standardize_fields": ["advantages"] + "standardize_fields": ["advantages"], }) def _train(self): diff --git a/python/ray/rllib/optimizers/multi_gpu_optimizer.py b/python/ray/rllib/optimizers/multi_gpu_optimizer.py index c0720339c..c79bf2562 100644 --- a/python/ray/rllib/optimizers/multi_gpu_optimizer.py +++ b/python/ray/rllib/optimizers/multi_gpu_optimizer.py @@ -35,16 +35,16 @@ class LocalMultiGPUOptimizer(PolicyOptimizer): sgd_stepsize=5e-5, num_sgd_iter=10, timesteps_per_batch=1024, + num_gpus=0, standardize_fields=[]): self.batch_size = sgd_batch_size self.sgd_stepsize = sgd_stepsize self.num_sgd_iter = num_sgd_iter self.timesteps_per_batch = timesteps_per_batch - gpu_ids = ray.get_gpu_ids() - if not gpu_ids: + if not num_gpus: self.devices = ["/cpu:0"] else: - self.devices = ["/gpu:{}".format(i) for i in range(len(gpu_ids))] + self.devices = ["/gpu:{}".format(i) for i in range(num_gpus)] self.batch_size = int(sgd_batch_size / len(self.devices)) * len( self.devices) assert self.batch_size % len(self.devices) == 0