[Core] Do not convert gpu id to int (#9744)

Co-authored-by: Richard Liaw <rliaw@berkeley.edu>
This commit is contained in:
yncxcw
2020-08-11 13:09:46 -06:00
committed by GitHub
parent d6226b80bb
commit 32cd94b750
9 changed files with 54 additions and 20 deletions
+1 -1
View File
@@ -353,7 +353,7 @@ cdef execute_task(
CFiberEvent task_done_event
# Automatically restrict the GPUs available to this task.
ray.utils.set_cuda_visible_devices(ray.get_gpu_ids())
ray.utils.set_cuda_visible_devices(ray.get_gpu_ids(as_str=True))
function_descriptor = CFunctionDescriptorToPython(
ray_function.GetFunctionDescriptor())
+2 -2
View File
@@ -95,10 +95,10 @@ def test_actor_gpus(ray_start_cluster):
@ray.remote(num_gpus=1)
class Actor1:
def __init__(self):
self.gpu_ids = ray.get_gpu_ids()
self.gpu_ids = ray.get_gpu_ids(as_str=True)
def get_location_and_ids(self):
assert ray.get_gpu_ids() == self.gpu_ids
assert ray.get_gpu_ids(as_str=True) == self.gpu_ids
return (ray.worker.global_worker.node.unique_id,
tuple(self.gpu_ids))
+23 -4
View File
@@ -633,6 +633,25 @@ def save_gpu_ids_shutdown_only():
del os.environ["CUDA_VISIBLE_DEVICES"]
@pytest.mark.parametrize("as_str", [False, True])
def test_gpu_ids_as_str(save_gpu_ids_shutdown_only, as_str):
allowed_gpu_ids = [4, 5, 6]
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
str(i) for i in allowed_gpu_ids)
ray.init()
@ray.remote
def get_gpu_ids(as_str):
gpu_ids = ray.get_gpu_ids(as_str)
for gpu_id in gpu_ids:
if as_str:
assert isinstance(gpu_id, str)
else:
assert isinstance(gpu_id, int)
ray.get([get_gpu_ids.remote(as_str) for _ in range(10)])
def test_specific_gpus(save_gpu_ids_shutdown_only):
allowed_gpu_ids = [4, 5, 6]
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
@@ -643,14 +662,14 @@ def test_specific_gpus(save_gpu_ids_shutdown_only):
def f():
gpu_ids = ray.get_gpu_ids()
assert len(gpu_ids) == 1
assert gpu_ids[0] in allowed_gpu_ids
assert int(gpu_ids[0]) in allowed_gpu_ids
@ray.remote(num_gpus=2)
def g():
gpu_ids = ray.get_gpu_ids()
assert len(gpu_ids) == 2
assert gpu_ids[0] in allowed_gpu_ids
assert gpu_ids[1] in allowed_gpu_ids
assert int(gpu_ids[0]) in allowed_gpu_ids
assert int(gpu_ids[1]) in allowed_gpu_ids
ray.get([f.remote() for _ in range(100)])
ray.get([g.remote() for _ in range(100)])
@@ -671,7 +690,7 @@ def test_local_mode_gpus(save_gpu_ids_shutdown_only):
gpu_ids = ray.get_gpu_ids()
assert len(gpu_ids) == 3
for gpu in gpu_ids:
assert gpu in allowed_gpu_ids
assert int(gpu) in allowed_gpu_ids
ray.get([f.remote() for _ in range(100)])
+6 -5
View File
@@ -271,9 +271,9 @@ def get_cuda_visible_devices():
"""Get the device IDs in the CUDA_VISIBLE_DEVICES environment variable.
Returns:
if CUDA_VISIBLE_DEVICES is set, this returns a list of integers with
the IDs of the GPUs. If it is not set or is set to NoDevFiles,
this returns None.
devices (List[str]): If CUDA_VISIBLE_DEVICES is set, returns a
list of strings representing the IDs of the visible GPUs.
If it is not set or is set to NoDevFiles, returns empty list.
"""
gpu_ids_str = os.environ.get("CUDA_VISIBLE_DEVICES", None)
@@ -286,7 +286,8 @@ def get_cuda_visible_devices():
if gpu_ids_str == "NoDevFiles":
return []
return [int(i) for i in gpu_ids_str.split(",")]
# GPU identifiers are given as strings representing integers or UUIDs.
return list(gpu_ids_str.split(","))
last_set_gpu_ids = None
@@ -296,7 +297,7 @@ def set_cuda_visible_devices(gpu_ids):
"""Set the CUDA_VISIBLE_DEVICES environment variable.
Args:
gpu_ids: This is a list of integers representing GPU IDs.
gpu_ids (List[str]): List of strings representing GPU IDs.
"""
global last_set_gpu_ids
+16 -2
View File
@@ -373,7 +373,7 @@ class Worker:
sys.exit(0)
def get_gpu_ids():
def get_gpu_ids(as_str=False):
"""Get the IDs of the GPUs that are available to the worker.
If the CUDA_VISIBLE_DEVICES environment variable was set when the worker
@@ -381,6 +381,10 @@ def get_gpu_ids():
IDs in CUDA_VISIBLE_DEVICES. If not, the IDs will fall in the range
[0, NUM_GPUS - 1], where NUM_GPUS is the number of GPUs that the node has.
Args:
as_str (Boolean): If true, return gpu ids in string format. By default,
it is False. This will change to default to True in the future.
Returns:
A list of GPU IDs.
"""
@@ -400,7 +404,17 @@ def get_gpu_ids():
# Give all GPUs in local_mode.
if global_worker.mode == LOCAL_MODE:
max_gpus = global_worker.node.get_resource_spec().num_gpus
return global_worker.original_gpu_ids[:max_gpus]
assigned_ids = global_worker.original_gpu_ids[:max_gpus]
if not as_str:
from ray.util.debug import log_once
if log_once("ray.get_gpu_ids.as_str"):
logger.warning(
"ray.get_gpu_ids() will return a list of strings by default"
" in a future version of Ray for compatibility with CUDA. "
"To enable the forward-compatible behavior, use "
"`ray.get_gpu_ids(as_str=True)`.")
assigned_ids = [int(assigned_id) for assigned_id in assigned_ids]
return assigned_ids