From df4788e5011d62b2ab85f39d8df6899bcbb4ec17 Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Mon, 3 Sep 2018 11:12:23 -0700 Subject: [PATCH] [rllib/tune] Add test for fractional gpu support in xray mode; add rllib support for fractional gpu (#2768) * frac gpu * doc * Update rllib-training.rst * yapf * remove xray --- doc/source/rllib-training.rst | 5 +++++ python/ray/rllib/agents/a3c/a2c.py | 2 +- python/ray/rllib/agents/agent.py | 2 ++ python/ray/rllib/agents/bc/bc.py | 4 ++-- python/ray/rllib/agents/ddpg/apex.py | 2 +- python/ray/rllib/agents/dqn/apex.py | 2 +- python/ray/rllib/agents/dqn/dqn.py | 2 +- python/ray/rllib/agents/impala/impala.py | 2 +- python/ray/tune/test/trial_runner_test.py | 23 ++++++++++++++++++++++ test/jenkins_tests/run_multi_node_tests.sh | 6 ++++-- 10 files changed, 41 insertions(+), 9 deletions(-) diff --git a/doc/source/rllib-training.rst b/doc/source/rllib-training.rst index 104909abf..937c36852 100644 --- a/doc/source/rllib-training.rst +++ b/doc/source/rllib-training.rst @@ -50,6 +50,11 @@ In an example below, we train A2C by specifying 8 workers through the config fla python ray/python/ray/rllib/train.py --env=PongDeterministic-v4 \ --run=A2C --config '{"num_workers": 8, "monitor": true}' +Specifying Resources +~~~~~~~~~~~~~~~~~~~~ + +You can control the degree of parallelism used by setting the ``num_workers`` hyperparameter for most agents. Many agents also provide a ``num_gpus`` or ``gpu`` option. In addition, you can allocate a fraction of a GPU by setting ``gpu_fraction: f``. For example, with DQN you can pack five agents onto one GPU by setting ``gpu_fraction: 0.2``. Note that fractional GPU support requires enabling the experimental Xray backend by setting the environment variable ``RAY_USE_XRAY=1``. + Evaluating Trained Agents ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/python/ray/rllib/agents/a3c/a2c.py b/python/ray/rllib/agents/a3c/a2c.py index 9ea8a54d7..a792d1d16 100644 --- a/python/ray/rllib/agents/a3c/a2c.py +++ b/python/ray/rllib/agents/a3c/a2c.py @@ -34,6 +34,6 @@ class A2CAgent(A3CAgent): cf = merge_dicts(cls._default_config, config) return Resources( cpu=1, - gpu=1 if cf["gpu"] else 0, + gpu=cf["gpu_fraction"] if cf["gpu"] else 0, extra_cpu=cf["num_workers"], extra_gpu=cf["use_gpu_for_workers"] and cf["num_workers"] or 0) diff --git a/python/ray/rllib/agents/agent.py b/python/ray/rllib/agents/agent.py index 9a74a3ff4..abd3379a3 100644 --- a/python/ray/rllib/agents/agent.py +++ b/python/ray/rllib/agents/agent.py @@ -64,6 +64,8 @@ COMMON_CONFIG = { "compress_observations": False, # Whether to write episode stats and videos to the agent log dir "monitor": False, + # Allocate a fraction of a GPU instead of one (e.g., 0.3 GPUs) + "gpu_fraction": 1, # === Multiagent === "multiagent": { diff --git a/python/ray/rllib/agents/bc/bc.py b/python/ray/rllib/agents/bc/bc.py index c23b31a09..b2552bf99 100644 --- a/python/ray/rllib/agents/bc/bc.py +++ b/python/ray/rllib/agents/bc/bc.py @@ -53,12 +53,12 @@ class BCAgent(Agent): def default_resource_request(cls, config): cf = merge_dicts(cls._default_config, config) if cf["use_gpu_for_workers"]: - num_gpus_per_worker = 1 + num_gpus_per_worker = cf["gpu_fraction"] else: num_gpus_per_worker = 0 return Resources( cpu=1, - gpu=cf["gpu"] and 1 or 0, + gpu=cf["gpu"] and cf["gpu_fraction"] or 0, extra_cpu=cf["num_workers"], extra_gpu=num_gpus_per_worker * cf["num_workers"]) diff --git a/python/ray/rllib/agents/ddpg/apex.py b/python/ray/rllib/agents/ddpg/apex.py index d0508463c..c2276d0a9 100644 --- a/python/ray/rllib/agents/ddpg/apex.py +++ b/python/ray/rllib/agents/ddpg/apex.py @@ -48,7 +48,7 @@ class ApexDDPGAgent(DDPGAgent): cf = merge_dicts(cls._default_config, config) return Resources( cpu=1 + cf["optimizer"]["num_replay_buffer_shards"], - gpu=cf["gpu"] and 1 or 0, + gpu=cf["gpu"] and cf["gpu_fraction"] or 0, extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"], extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"]) diff --git a/python/ray/rllib/agents/dqn/apex.py b/python/ray/rllib/agents/dqn/apex.py index b120a0fbb..e6058b41f 100644 --- a/python/ray/rllib/agents/dqn/apex.py +++ b/python/ray/rllib/agents/dqn/apex.py @@ -47,7 +47,7 @@ class ApexAgent(DQNAgent): cf = merge_dicts(cls._default_config, config) return Resources( cpu=1 + cf["optimizer"]["num_replay_buffer_shards"], - gpu=cf["gpu"] and 1 or 0, + gpu=cf["gpu"] and cf["gpu_fraction"] or 0, extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"], extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"]) diff --git a/python/ray/rllib/agents/dqn/dqn.py b/python/ray/rllib/agents/dqn/dqn.py index d2873e602..7ee8385c2 100644 --- a/python/ray/rllib/agents/dqn/dqn.py +++ b/python/ray/rllib/agents/dqn/dqn.py @@ -132,7 +132,7 @@ class DQNAgent(Agent): cf = merge_dicts(cls._default_config, config) return Resources( cpu=1, - gpu=cf["gpu"] and 1 or 0, + gpu=cf["gpu"] and cf["gpu_fraction"] or 0, extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"], extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"]) diff --git a/python/ray/rllib/agents/impala/impala.py b/python/ray/rllib/agents/impala/impala.py index 69ed02bf2..8ad6d67a3 100644 --- a/python/ray/rllib/agents/impala/impala.py +++ b/python/ray/rllib/agents/impala/impala.py @@ -69,7 +69,7 @@ class ImpalaAgent(Agent): cf = dict(cls._default_config, **config) return Resources( cpu=1, - gpu=cf["gpu"] and 1 or 0, + gpu=cf["gpu"] and cf["gpu_fraction"] or 0, extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"], extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"]) diff --git a/python/ray/tune/test/trial_runner_test.py b/python/ray/tune/test/trial_runner_test.py index d8be2c02a..e34137a1d 100644 --- a/python/ray/tune/test/trial_runner_test.py +++ b/python/ray/tune/test/trial_runner_test.py @@ -800,6 +800,29 @@ class TrialRunnerTest(unittest.TestCase): self.assertEqual(trials[0].status, Trial.TERMINATED) self.assertEqual(trials[1].status, Trial.PENDING) + def testFractionalGpus(self): + ray.init(num_cpus=4, num_gpus=1, use_raylet=True) + runner = TrialRunner(BasicVariantGenerator()) + kwargs = { + "resources": Resources(cpu=1, gpu=0.5), + } + trials = [ + Trial("__fake", **kwargs), + Trial("__fake", **kwargs), + Trial("__fake", **kwargs), + Trial("__fake", **kwargs) + ] + for t in trials: + runner.add_trial(t) + + for _ in range(10): + runner.step() + + self.assertEqual(trials[0].status, Trial.RUNNING) + self.assertEqual(trials[1].status, Trial.RUNNING) + self.assertEqual(trials[2].status, Trial.PENDING) + self.assertEqual(trials[3].status, Trial.PENDING) + def testResourceScheduler(self): ray.init(num_cpus=4, num_gpus=1) runner = TrialRunner(BasicVariantGenerator()) diff --git a/test/jenkins_tests/run_multi_node_tests.sh b/test/jenkins_tests/run_multi_node_tests.sh index b6e6c5105..15dbb540e 100755 --- a/test/jenkins_tests/run_multi_node_tests.sh +++ b/test/jenkins_tests/run_multi_node_tests.sh @@ -289,14 +289,16 @@ docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/examples/multiagent_two_trainers.py --num-iters=2 -docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +# No Xray for PyTorch +docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env PongDeterministic-v4 \ --run A3C \ --stop '{"training_iteration": 2}' \ --config '{"num_workers": 2, "use_pytorch": true, "model": {"use_lstm": false, "grayscale": true, "zero_mean": false, "dim": 84, "channel_major": true}, "preprocessor_pref": "rllib"}' -docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +# No Xray for PyTorch +docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v1 \ --run A3C \