diff --git a/python/ray/rllib/tuned_examples/cartpole-grid-search-example.yaml b/python/ray/rllib/tuned_examples/cartpole-grid-search-example.yaml index 2d3be6fd7..56d33d3c0 100644 --- a/python/ray/rllib/tuned_examples/cartpole-grid-search-example.yaml +++ b/python/ray/rllib/tuned_examples/cartpole-grid-search-example.yaml @@ -7,6 +7,7 @@ cartpole-ppo: time_total_s: 180 resources: cpu: 2 + driver_cpu_limit: 1 config: num_workers: 2 num_sgd_iter: diff --git a/python/ray/rllib/tuned_examples/hopper-ppo.yaml b/python/ray/rllib/tuned_examples/hopper-ppo.yaml index b20a434a4..6c2ecae9f 100644 --- a/python/ray/rllib/tuned_examples/hopper-ppo.yaml +++ b/python/ray/rllib/tuned_examples/hopper-ppo.yaml @@ -5,4 +5,6 @@ hopper-ppo: resources: cpu: 64 gpu: 4 + driver_cpu_limit: 4 + driver_gpu_limit: 4 config: {"gamma": 0.995, "kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": .0001, "sgd_batchsize": 32768, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 160000, "num_workers": 64} diff --git a/python/ray/rllib/tuned_examples/humanoid-es.yaml b/python/ray/rllib/tuned_examples/humanoid-es.yaml index fd3a67596..a4fe32e61 100644 --- a/python/ray/rllib/tuned_examples/humanoid-es.yaml +++ b/python/ray/rllib/tuned_examples/humanoid-es.yaml @@ -3,6 +3,7 @@ humanoid-es: alg: ES resources: cpu: 100 + driver_cpu_limit: 4 stop: episode_reward_mean: 6000 config: diff --git a/python/ray/rllib/tuned_examples/humanoid-ppo-gae.yaml b/python/ray/rllib/tuned_examples/humanoid-ppo-gae.yaml index 61e43e738..83e4d1813 100644 --- a/python/ray/rllib/tuned_examples/humanoid-ppo-gae.yaml +++ b/python/ray/rllib/tuned_examples/humanoid-ppo-gae.yaml @@ -7,5 +7,6 @@ humanoid-ppo-gae: resources: cpu: 64 gpu: 4 + driver_cpu_limit: 4 config: {"lambda": 0.95, "clip_param": 0.2, "kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": .0001, "sgd_batchsize": 32768, "horizon": 5000, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_workers": 64, "model": {"free_log_std": true}, "write_logs": false} diff --git a/python/ray/rllib/tuned_examples/humanoid-ppo.yaml b/python/ray/rllib/tuned_examples/humanoid-ppo.yaml index 99f9dfa91..098798469 100644 --- a/python/ray/rllib/tuned_examples/humanoid-ppo.yaml +++ b/python/ray/rllib/tuned_examples/humanoid-ppo.yaml @@ -7,4 +7,5 @@ humanoid-ppo: resources: cpu: 64 gpu: 4 + driver_cpu_limit: 4 config: {"kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": .0001, "sgd_batchsize": 32768, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_workers": 64, "model": {"free_log_std": true}, "use_gae": false} diff --git a/python/ray/rllib/tuned_examples/pong-a3c.yaml b/python/ray/rllib/tuned_examples/pong-a3c.yaml index 0bdcfe315..f288d76fc 100644 --- a/python/ray/rllib/tuned_examples/pong-a3c.yaml +++ b/python/ray/rllib/tuned_examples/pong-a3c.yaml @@ -3,6 +3,7 @@ pong-a3c: alg: A3C resources: cpu: 16 + driver_cpu_limit: 1 config: num_workers: 16 num_batches_per_iteration: 1000 diff --git a/python/ray/rllib/tuned_examples/walker2d-ppo.yaml b/python/ray/rllib/tuned_examples/walker2d-ppo.yaml index f94d3370b..ea757ab7b 100644 --- a/python/ray/rllib/tuned_examples/walker2d-ppo.yaml +++ b/python/ray/rllib/tuned_examples/walker2d-ppo.yaml @@ -5,4 +5,5 @@ walker2d-v1-ppo: resources: cpu: 64 gpu: 4 + driver_cpu_limit: 4 config: {"kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": .0001, "sgd_batchsize": 32768, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_workers": 64} diff --git a/python/ray/tune/README.rst b/python/ray/tune/README.rst index 9716c9b16..d05022ab8 100644 --- a/python/ray/tune/README.rst +++ b/python/ray/tune/README.rst @@ -130,7 +130,8 @@ expression. episode_reward_mean: 200 time_total_s: 180 resources: - cpu: 4 + cpu: 5 + driver_cpu_limit: 1 # of the 5 CPUs, only 1 is used by the driver config: num_workers: 4 num_sgd_iter: diff --git a/python/ray/tune/config_parser.py b/python/ray/tune/config_parser.py index 1cba83f73..0f941b409 100644 --- a/python/ray/tune/config_parser.py +++ b/python/ray/tune/config_parser.py @@ -14,7 +14,9 @@ from ray.tune.trial import Trial, Resources def _resource_json(data): values = json.loads(data) - return Resources(values.get('cpu', 0), values.get('gpu', 0)) + return Resources( + values.get('cpu', 0), values.get('gpu', 0), + values.get('driver_cpu_limit'), values.get('driver_gpu_limit')) def make_parser(description): diff --git a/python/ray/tune/trial.py b/python/ray/tune/trial.py index 01d365270..b4140ff04 100644 --- a/python/ray/tune/trial.py +++ b/python/ray/tune/trial.py @@ -10,8 +10,32 @@ from collections import namedtuple from ray.rllib.agent import get_agent_class -# Ray resources required to schedule a Trial -Resources = namedtuple("Resources", ["cpu", "gpu"]) +class Resources( + namedtuple("Resources", [ + "cpu", "gpu", "driver_cpu_limit", "driver_gpu_limit"])): + """Ray resources required to schedule a trial. + + Attributes: + cpu (int): Number of CPUs required for the trial total. + gpu (int): Number of GPUs required for the trial total. + driver_cpu_limit (int): Max CPUs allocated to the driver. + Defaults to all of the required CPUs. + driver_gpu_limit (int): Max GPUs allocated to the driver. + Defaults to all of the required GPUs. + """ + __slots__ = () + + def __new__(cls, cpu, gpu, driver_cpu_limit=None, driver_gpu_limit=None): + if driver_cpu_limit is not None: + assert driver_cpu_limit <= cpu + else: + driver_cpu_limit = cpu + if driver_gpu_limit is not None: + assert driver_gpu_limit <= gpu + else: + driver_gpu_limit = gpu + return super(Resources, cls).__new__( + cls, cpu, gpu, driver_cpu_limit, driver_gpu_limit) class Trial(object): @@ -22,6 +46,9 @@ class Trial(object): Trials start in the PENDING state, and transition to RUNNING once started. On error it transitions to ERROR, otherwise TERMINATED on success. + + The driver for the trial will be allocated at most `driver_cpu_limit` and + `driver_gpu_limit` CPUs and GPUs. """ PENDING = "PENDING" @@ -206,8 +233,8 @@ class Trial(object): self.status = Trial.RUNNING agent_cls = get_agent_class(self.alg) cls = ray.remote( - num_cpus=self.resources.cpu, num_gpus=self.resources.gpu)( - agent_cls) + num_cpus=self.resources.driver_cpu_limit, + num_gpus=self.resources.driver_gpu_limit)(agent_cls) self.agent = cls.remote( self.env_creator, self.config, self.local_dir, self.upload_dir, experiment_tag=self.experiment_tag)