diff --git a/python/ray/tests/test_placement_group.py b/python/ray/tests/test_placement_group.py index 755da5678..5a4651cbc 100644 --- a/python/ray/tests/test_placement_group.py +++ b/python/ray/tests/test_placement_group.py @@ -1,9 +1,11 @@ import pytest +import os +import sys + try: import pytest_timeout except ImportError: pytest_timeout = None -import sys import ray import ray.test_utils @@ -172,5 +174,23 @@ def test_placement_group_hang(ray_start_cluster): assert "CPU_group_" in list(resources.keys())[0], resources +def test_cuda_visible_devices(ray_start_cluster): + @ray.remote(num_gpus=1) + def f(): + return os.environ["CUDA_VISIBLE_DEVICES"] + + cluster = ray_start_cluster + num_nodes = 1 + for _ in range(num_nodes): + cluster.add_node(num_gpus=1) + ray.init(address=cluster.address) + + g1 = ray.experimental.placement_group([{"CPU": 1, "GPU": 1}]) + o1 = f.options(placement_group_id=g1).remote() + + devices = ray.get(o1) + assert devices == "0", devices + + if __name__ == "__main__": sys.exit(pytest.main(["-v", __file__])) diff --git a/python/ray/worker.py b/python/ray/worker.py index 749917c54..ee0e073f2 100644 --- a/python/ray/worker.py +++ b/python/ray/worker.py @@ -381,19 +381,18 @@ def get_gpu_ids(as_str=False): IDs in CUDA_VISIBLE_DEVICES. If not, the IDs will fall in the range [0, NUM_GPUS - 1], where NUM_GPUS is the number of GPUs that the node has. - Args: - as_str (Boolean): If true, return gpu ids in string format. By default, - it is False. This will change to default to True in the future. - Returns: A list of GPU IDs. """ # TODO(ilr) Handle inserting resources in local mode all_resource_ids = global_worker.core_worker.resource_ids() - assigned_ids = [ - resource_id for resource_id, _ in all_resource_ids.get("GPU", []) - ] + assigned_ids = [] + for resource, assignment in all_resource_ids.items(): + # Handle both normal and placement group GPU resources. + if resource == "GPU" or resource.startswith("GPU_group_"): + for resource_id, _ in assignment: + assigned_ids.append(resource_id) # If the user had already set CUDA_VISIBLE_DEVICES, then respect that (in # the sense that only GPU IDs that appear in CUDA_VISIBLE_DEVICES should be # returned).