mirror of
https://github.com/wassname/ray.git
synced 2026-07-02 12:49:05 +08:00
[Placement Groups] Support CUDA_VISIBLE_DEVICES (#10053)
This commit is contained in:
@@ -1,9 +1,11 @@
|
||||
import pytest
|
||||
import os
|
||||
import sys
|
||||
|
||||
try:
|
||||
import pytest_timeout
|
||||
except ImportError:
|
||||
pytest_timeout = None
|
||||
import sys
|
||||
|
||||
import ray
|
||||
import ray.test_utils
|
||||
@@ -172,5 +174,23 @@ def test_placement_group_hang(ray_start_cluster):
|
||||
assert "CPU_group_" in list(resources.keys())[0], resources
|
||||
|
||||
|
||||
def test_cuda_visible_devices(ray_start_cluster):
|
||||
@ray.remote(num_gpus=1)
|
||||
def f():
|
||||
return os.environ["CUDA_VISIBLE_DEVICES"]
|
||||
|
||||
cluster = ray_start_cluster
|
||||
num_nodes = 1
|
||||
for _ in range(num_nodes):
|
||||
cluster.add_node(num_gpus=1)
|
||||
ray.init(address=cluster.address)
|
||||
|
||||
g1 = ray.experimental.placement_group([{"CPU": 1, "GPU": 1}])
|
||||
o1 = f.options(placement_group_id=g1).remote()
|
||||
|
||||
devices = ray.get(o1)
|
||||
assert devices == "0", devices
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(pytest.main(["-v", __file__]))
|
||||
|
||||
@@ -381,19 +381,18 @@ def get_gpu_ids(as_str=False):
|
||||
IDs in CUDA_VISIBLE_DEVICES. If not, the IDs will fall in the range
|
||||
[0, NUM_GPUS - 1], where NUM_GPUS is the number of GPUs that the node has.
|
||||
|
||||
Args:
|
||||
as_str (Boolean): If true, return gpu ids in string format. By default,
|
||||
it is False. This will change to default to True in the future.
|
||||
|
||||
Returns:
|
||||
A list of GPU IDs.
|
||||
"""
|
||||
|
||||
# TODO(ilr) Handle inserting resources in local mode
|
||||
all_resource_ids = global_worker.core_worker.resource_ids()
|
||||
assigned_ids = [
|
||||
resource_id for resource_id, _ in all_resource_ids.get("GPU", [])
|
||||
]
|
||||
assigned_ids = []
|
||||
for resource, assignment in all_resource_ids.items():
|
||||
# Handle both normal and placement group GPU resources.
|
||||
if resource == "GPU" or resource.startswith("GPU_group_"):
|
||||
for resource_id, _ in assignment:
|
||||
assigned_ids.append(resource_id)
|
||||
# If the user had already set CUDA_VISIBLE_DEVICES, then respect that (in
|
||||
# the sense that only GPU IDs that appear in CUDA_VISIBLE_DEVICES should be
|
||||
# returned).
|
||||
|
||||
Reference in New Issue
Block a user