[Placement Groups] Support CUDA_VISIBLE_DEVICES (#10053)

This commit is contained in:
Eric Liang
2020-08-13 18:00:04 -07:00
committed by GitHub
parent 01f38bc5d1
commit c9f13b0833
2 changed files with 27 additions and 8 deletions
+21 -1
View File
@@ -1,9 +1,11 @@
import pytest
import os
import sys
try:
import pytest_timeout
except ImportError:
pytest_timeout = None
import sys
import ray
import ray.test_utils
@@ -172,5 +174,23 @@ def test_placement_group_hang(ray_start_cluster):
assert "CPU_group_" in list(resources.keys())[0], resources
def test_cuda_visible_devices(ray_start_cluster):
@ray.remote(num_gpus=1)
def f():
return os.environ["CUDA_VISIBLE_DEVICES"]
cluster = ray_start_cluster
num_nodes = 1
for _ in range(num_nodes):
cluster.add_node(num_gpus=1)
ray.init(address=cluster.address)
g1 = ray.experimental.placement_group([{"CPU": 1, "GPU": 1}])
o1 = f.options(placement_group_id=g1).remote()
devices = ray.get(o1)
assert devices == "0", devices
if __name__ == "__main__":
sys.exit(pytest.main(["-v", __file__]))
+6 -7
View File
@@ -381,19 +381,18 @@ def get_gpu_ids(as_str=False):
IDs in CUDA_VISIBLE_DEVICES. If not, the IDs will fall in the range
[0, NUM_GPUS - 1], where NUM_GPUS is the number of GPUs that the node has.
Args:
as_str (Boolean): If true, return gpu ids in string format. By default,
it is False. This will change to default to True in the future.
Returns:
A list of GPU IDs.
"""
# TODO(ilr) Handle inserting resources in local mode
all_resource_ids = global_worker.core_worker.resource_ids()
assigned_ids = [
resource_id for resource_id, _ in all_resource_ids.get("GPU", [])
]
assigned_ids = []
for resource, assignment in all_resource_ids.items():
# Handle both normal and placement group GPU resources.
if resource == "GPU" or resource.startswith("GPU_group_"):
for resource_id, _ in assignment:
assigned_ids.append(resource_id)
# If the user had already set CUDA_VISIBLE_DEVICES, then respect that (in
# the sense that only GPU IDs that appear in CUDA_VISIBLE_DEVICES should be
# returned).