mirror of
https://github.com/wassname/ray.git
synced 2026-06-29 22:34:24 +08:00
[Core] Do not convert gpu id to int (#9744)
Co-authored-by: Richard Liaw <rliaw@berkeley.edu>
This commit is contained in:
@@ -353,7 +353,7 @@ cdef execute_task(
|
||||
CFiberEvent task_done_event
|
||||
|
||||
# Automatically restrict the GPUs available to this task.
|
||||
ray.utils.set_cuda_visible_devices(ray.get_gpu_ids())
|
||||
ray.utils.set_cuda_visible_devices(ray.get_gpu_ids(as_str=True))
|
||||
|
||||
function_descriptor = CFunctionDescriptorToPython(
|
||||
ray_function.GetFunctionDescriptor())
|
||||
|
||||
@@ -95,10 +95,10 @@ def test_actor_gpus(ray_start_cluster):
|
||||
@ray.remote(num_gpus=1)
|
||||
class Actor1:
|
||||
def __init__(self):
|
||||
self.gpu_ids = ray.get_gpu_ids()
|
||||
self.gpu_ids = ray.get_gpu_ids(as_str=True)
|
||||
|
||||
def get_location_and_ids(self):
|
||||
assert ray.get_gpu_ids() == self.gpu_ids
|
||||
assert ray.get_gpu_ids(as_str=True) == self.gpu_ids
|
||||
return (ray.worker.global_worker.node.unique_id,
|
||||
tuple(self.gpu_ids))
|
||||
|
||||
|
||||
@@ -633,6 +633,25 @@ def save_gpu_ids_shutdown_only():
|
||||
del os.environ["CUDA_VISIBLE_DEVICES"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("as_str", [False, True])
|
||||
def test_gpu_ids_as_str(save_gpu_ids_shutdown_only, as_str):
|
||||
allowed_gpu_ids = [4, 5, 6]
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
|
||||
str(i) for i in allowed_gpu_ids)
|
||||
ray.init()
|
||||
|
||||
@ray.remote
|
||||
def get_gpu_ids(as_str):
|
||||
gpu_ids = ray.get_gpu_ids(as_str)
|
||||
for gpu_id in gpu_ids:
|
||||
if as_str:
|
||||
assert isinstance(gpu_id, str)
|
||||
else:
|
||||
assert isinstance(gpu_id, int)
|
||||
|
||||
ray.get([get_gpu_ids.remote(as_str) for _ in range(10)])
|
||||
|
||||
|
||||
def test_specific_gpus(save_gpu_ids_shutdown_only):
|
||||
allowed_gpu_ids = [4, 5, 6]
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
|
||||
@@ -643,14 +662,14 @@ def test_specific_gpus(save_gpu_ids_shutdown_only):
|
||||
def f():
|
||||
gpu_ids = ray.get_gpu_ids()
|
||||
assert len(gpu_ids) == 1
|
||||
assert gpu_ids[0] in allowed_gpu_ids
|
||||
assert int(gpu_ids[0]) in allowed_gpu_ids
|
||||
|
||||
@ray.remote(num_gpus=2)
|
||||
def g():
|
||||
gpu_ids = ray.get_gpu_ids()
|
||||
assert len(gpu_ids) == 2
|
||||
assert gpu_ids[0] in allowed_gpu_ids
|
||||
assert gpu_ids[1] in allowed_gpu_ids
|
||||
assert int(gpu_ids[0]) in allowed_gpu_ids
|
||||
assert int(gpu_ids[1]) in allowed_gpu_ids
|
||||
|
||||
ray.get([f.remote() for _ in range(100)])
|
||||
ray.get([g.remote() for _ in range(100)])
|
||||
@@ -671,7 +690,7 @@ def test_local_mode_gpus(save_gpu_ids_shutdown_only):
|
||||
gpu_ids = ray.get_gpu_ids()
|
||||
assert len(gpu_ids) == 3
|
||||
for gpu in gpu_ids:
|
||||
assert gpu in allowed_gpu_ids
|
||||
assert int(gpu) in allowed_gpu_ids
|
||||
|
||||
ray.get([f.remote() for _ in range(100)])
|
||||
|
||||
|
||||
+6
-5
@@ -271,9 +271,9 @@ def get_cuda_visible_devices():
|
||||
"""Get the device IDs in the CUDA_VISIBLE_DEVICES environment variable.
|
||||
|
||||
Returns:
|
||||
if CUDA_VISIBLE_DEVICES is set, this returns a list of integers with
|
||||
the IDs of the GPUs. If it is not set or is set to NoDevFiles,
|
||||
this returns None.
|
||||
devices (List[str]): If CUDA_VISIBLE_DEVICES is set, returns a
|
||||
list of strings representing the IDs of the visible GPUs.
|
||||
If it is not set or is set to NoDevFiles, returns empty list.
|
||||
"""
|
||||
gpu_ids_str = os.environ.get("CUDA_VISIBLE_DEVICES", None)
|
||||
|
||||
@@ -286,7 +286,8 @@ def get_cuda_visible_devices():
|
||||
if gpu_ids_str == "NoDevFiles":
|
||||
return []
|
||||
|
||||
return [int(i) for i in gpu_ids_str.split(",")]
|
||||
# GPU identifiers are given as strings representing integers or UUIDs.
|
||||
return list(gpu_ids_str.split(","))
|
||||
|
||||
|
||||
last_set_gpu_ids = None
|
||||
@@ -296,7 +297,7 @@ def set_cuda_visible_devices(gpu_ids):
|
||||
"""Set the CUDA_VISIBLE_DEVICES environment variable.
|
||||
|
||||
Args:
|
||||
gpu_ids: This is a list of integers representing GPU IDs.
|
||||
gpu_ids (List[str]): List of strings representing GPU IDs.
|
||||
"""
|
||||
|
||||
global last_set_gpu_ids
|
||||
|
||||
+16
-2
@@ -373,7 +373,7 @@ class Worker:
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
def get_gpu_ids():
|
||||
def get_gpu_ids(as_str=False):
|
||||
"""Get the IDs of the GPUs that are available to the worker.
|
||||
|
||||
If the CUDA_VISIBLE_DEVICES environment variable was set when the worker
|
||||
@@ -381,6 +381,10 @@ def get_gpu_ids():
|
||||
IDs in CUDA_VISIBLE_DEVICES. If not, the IDs will fall in the range
|
||||
[0, NUM_GPUS - 1], where NUM_GPUS is the number of GPUs that the node has.
|
||||
|
||||
Args:
|
||||
as_str (Boolean): If true, return gpu ids in string format. By default,
|
||||
it is False. This will change to default to True in the future.
|
||||
|
||||
Returns:
|
||||
A list of GPU IDs.
|
||||
"""
|
||||
@@ -400,7 +404,17 @@ def get_gpu_ids():
|
||||
# Give all GPUs in local_mode.
|
||||
if global_worker.mode == LOCAL_MODE:
|
||||
max_gpus = global_worker.node.get_resource_spec().num_gpus
|
||||
return global_worker.original_gpu_ids[:max_gpus]
|
||||
assigned_ids = global_worker.original_gpu_ids[:max_gpus]
|
||||
|
||||
if not as_str:
|
||||
from ray.util.debug import log_once
|
||||
if log_once("ray.get_gpu_ids.as_str"):
|
||||
logger.warning(
|
||||
"ray.get_gpu_ids() will return a list of strings by default"
|
||||
" in a future version of Ray for compatibility with CUDA. "
|
||||
"To enable the forward-compatible behavior, use "
|
||||
"`ray.get_gpu_ids(as_str=True)`.")
|
||||
assigned_ids = [int(assigned_id) for assigned_id in assigned_ids]
|
||||
|
||||
return assigned_ids
|
||||
|
||||
|
||||
Reference in New Issue
Block a user