From df4788e5011d62b2ab85f39d8df6899bcbb4ec17 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Mon, 3 Sep 2018 11:12:23 -0700
Subject: [PATCH] [rllib/tune] Add test for fractional gpu support in xray
 mode; add rllib support for fractional gpu (#2768)

* frac gpu

* doc

* Update rllib-training.rst

* yapf

* remove xray
---
 doc/source/rllib-training.rst              |  5 +++++
 python/ray/rllib/agents/a3c/a2c.py         |  2 +-
 python/ray/rllib/agents/agent.py           |  2 ++
 python/ray/rllib/agents/bc/bc.py           |  4 ++--
 python/ray/rllib/agents/ddpg/apex.py       |  2 +-
 python/ray/rllib/agents/dqn/apex.py        |  2 +-
 python/ray/rllib/agents/dqn/dqn.py         |  2 +-
 python/ray/rllib/agents/impala/impala.py   |  2 +-
 python/ray/tune/test/trial_runner_test.py  | 23 ++++++++++++++++++++++
 test/jenkins_tests/run_multi_node_tests.sh |  6 ++++--
 10 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/doc/source/rllib-training.rst b/doc/source/rllib-training.rst
index 104909abf..937c36852 100644
--- a/doc/source/rllib-training.rst
+++ b/doc/source/rllib-training.rst
@@ -50,6 +50,11 @@ In an example below, we train A2C by specifying 8 workers through the config fla
     python ray/python/ray/rllib/train.py --env=PongDeterministic-v4 \
         --run=A2C --config '{"num_workers": 8, "monitor": true}'
 
+Specifying Resources
+~~~~~~~~~~~~~~~~~~~~
+
+You can control the degree of parallelism used by setting the ``num_workers`` hyperparameter for most agents. Many agents also provide a ``num_gpus`` or ``gpu`` option. In addition, you can allocate a fraction of a GPU by setting ``gpu_fraction: f``. For example, with DQN you can pack five agents onto one GPU by setting ``gpu_fraction: 0.2``. Note that fractional GPU support requires enabling the experimental Xray backend by setting the environment variable ``RAY_USE_XRAY=1``.
+
 Evaluating Trained Agents
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/python/ray/rllib/agents/a3c/a2c.py b/python/ray/rllib/agents/a3c/a2c.py
index 9ea8a54d7..a792d1d16 100644
--- a/python/ray/rllib/agents/a3c/a2c.py
+++ b/python/ray/rllib/agents/a3c/a2c.py
@@ -34,6 +34,6 @@ class A2CAgent(A3CAgent):
         cf = merge_dicts(cls._default_config, config)
         return Resources(
             cpu=1,
-            gpu=1 if cf["gpu"] else 0,
+            gpu=cf["gpu_fraction"] if cf["gpu"] else 0,
             extra_cpu=cf["num_workers"],
             extra_gpu=cf["use_gpu_for_workers"] and cf["num_workers"] or 0)
diff --git a/python/ray/rllib/agents/agent.py b/python/ray/rllib/agents/agent.py
index 9a74a3ff4..abd3379a3 100644
--- a/python/ray/rllib/agents/agent.py
+++ b/python/ray/rllib/agents/agent.py
@@ -64,6 +64,8 @@ COMMON_CONFIG = {
     "compress_observations": False,
     # Whether to write episode stats and videos to the agent log dir
     "monitor": False,
+    # Allocate a fraction of a GPU instead of one (e.g., 0.3 GPUs)
+    "gpu_fraction": 1,
 
     # === Multiagent ===
     "multiagent": {
diff --git a/python/ray/rllib/agents/bc/bc.py b/python/ray/rllib/agents/bc/bc.py
index c23b31a09..b2552bf99 100644
--- a/python/ray/rllib/agents/bc/bc.py
+++ b/python/ray/rllib/agents/bc/bc.py
@@ -53,12 +53,12 @@ class BCAgent(Agent):
     def default_resource_request(cls, config):
         cf = merge_dicts(cls._default_config, config)
         if cf["use_gpu_for_workers"]:
-            num_gpus_per_worker = 1
+            num_gpus_per_worker = cf["gpu_fraction"]
         else:
             num_gpus_per_worker = 0
         return Resources(
             cpu=1,
-            gpu=cf["gpu"] and 1 or 0,
+            gpu=cf["gpu"] and cf["gpu_fraction"] or 0,
             extra_cpu=cf["num_workers"],
             extra_gpu=num_gpus_per_worker * cf["num_workers"])
 
diff --git a/python/ray/rllib/agents/ddpg/apex.py b/python/ray/rllib/agents/ddpg/apex.py
index d0508463c..c2276d0a9 100644
--- a/python/ray/rllib/agents/ddpg/apex.py
+++ b/python/ray/rllib/agents/ddpg/apex.py
@@ -48,7 +48,7 @@ class ApexDDPGAgent(DDPGAgent):
         cf = merge_dicts(cls._default_config, config)
         return Resources(
             cpu=1 + cf["optimizer"]["num_replay_buffer_shards"],
-            gpu=cf["gpu"] and 1 or 0,
+            gpu=cf["gpu"] and cf["gpu_fraction"] or 0,
             extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"],
             extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"])
 
diff --git a/python/ray/rllib/agents/dqn/apex.py b/python/ray/rllib/agents/dqn/apex.py
index b120a0fbb..e6058b41f 100644
--- a/python/ray/rllib/agents/dqn/apex.py
+++ b/python/ray/rllib/agents/dqn/apex.py
@@ -47,7 +47,7 @@ class ApexAgent(DQNAgent):
         cf = merge_dicts(cls._default_config, config)
         return Resources(
             cpu=1 + cf["optimizer"]["num_replay_buffer_shards"],
-            gpu=cf["gpu"] and 1 or 0,
+            gpu=cf["gpu"] and cf["gpu_fraction"] or 0,
             extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"],
             extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"])
 
diff --git a/python/ray/rllib/agents/dqn/dqn.py b/python/ray/rllib/agents/dqn/dqn.py
index d2873e602..7ee8385c2 100644
--- a/python/ray/rllib/agents/dqn/dqn.py
+++ b/python/ray/rllib/agents/dqn/dqn.py
@@ -132,7 +132,7 @@ class DQNAgent(Agent):
         cf = merge_dicts(cls._default_config, config)
         return Resources(
             cpu=1,
-            gpu=cf["gpu"] and 1 or 0,
+            gpu=cf["gpu"] and cf["gpu_fraction"] or 0,
             extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"],
             extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"])
 
diff --git a/python/ray/rllib/agents/impala/impala.py b/python/ray/rllib/agents/impala/impala.py
index 69ed02bf2..8ad6d67a3 100644
--- a/python/ray/rllib/agents/impala/impala.py
+++ b/python/ray/rllib/agents/impala/impala.py
@@ -69,7 +69,7 @@ class ImpalaAgent(Agent):
         cf = dict(cls._default_config, **config)
         return Resources(
             cpu=1,
-            gpu=cf["gpu"] and 1 or 0,
+            gpu=cf["gpu"] and cf["gpu_fraction"] or 0,
             extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"],
             extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"])
 
diff --git a/python/ray/tune/test/trial_runner_test.py b/python/ray/tune/test/trial_runner_test.py
index d8be2c02a..e34137a1d 100644
--- a/python/ray/tune/test/trial_runner_test.py
+++ b/python/ray/tune/test/trial_runner_test.py
@@ -800,6 +800,29 @@ class TrialRunnerTest(unittest.TestCase):
         self.assertEqual(trials[0].status, Trial.TERMINATED)
         self.assertEqual(trials[1].status, Trial.PENDING)
 
+    def testFractionalGpus(self):
+        ray.init(num_cpus=4, num_gpus=1, use_raylet=True)
+        runner = TrialRunner(BasicVariantGenerator())
+        kwargs = {
+            "resources": Resources(cpu=1, gpu=0.5),
+        }
+        trials = [
+            Trial("__fake", **kwargs),
+            Trial("__fake", **kwargs),
+            Trial("__fake", **kwargs),
+            Trial("__fake", **kwargs)
+        ]
+        for t in trials:
+            runner.add_trial(t)
+
+        for _ in range(10):
+            runner.step()
+
+        self.assertEqual(trials[0].status, Trial.RUNNING)
+        self.assertEqual(trials[1].status, Trial.RUNNING)
+        self.assertEqual(trials[2].status, Trial.PENDING)
+        self.assertEqual(trials[3].status, Trial.PENDING)
+
     def testResourceScheduler(self):
         ray.init(num_cpus=4, num_gpus=1)
         runner = TrialRunner(BasicVariantGenerator())
diff --git a/test/jenkins_tests/run_multi_node_tests.sh b/test/jenkins_tests/run_multi_node_tests.sh
index b6e6c5105..15dbb540e 100755
--- a/test/jenkins_tests/run_multi_node_tests.sh
+++ b/test/jenkins_tests/run_multi_node_tests.sh
@@ -289,14 +289,16 @@ docker run  -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \
 docker run  -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \
     python /ray/python/ray/rllib/examples/multiagent_two_trainers.py --num-iters=2
 
-docker run  -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \
+# No Xray for PyTorch
+docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
     python /ray/python/ray/rllib/train.py \
     --env PongDeterministic-v4 \
     --run A3C \
     --stop '{"training_iteration": 2}' \
     --config '{"num_workers": 2, "use_pytorch": true, "model": {"use_lstm": false, "grayscale": true, "zero_mean": false, "dim": 84, "channel_major": true}, "preprocessor_pref": "rllib"}'
 
-docker run  -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \
+# No Xray for PyTorch
+docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
     python /ray/python/ray/rllib/train.py \
     --env CartPole-v1 \
     --run A3C \