mirror of
https://github.com/wassname/ray.git
synced 2026-06-29 18:27:03 +08:00
Ray, Tune, and RLlib support for memory, object_store_memory options (#5226)
This commit is contained in:
committed by
Robert Nishihara
parent
c852213b83
commit
e2e30ca507
+35
-3
@@ -273,9 +273,11 @@ def set_cuda_visible_devices(gpu_ids):
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in gpu_ids])
|
||||
|
||||
|
||||
def resources_from_resource_arguments(default_num_cpus, default_num_gpus,
|
||||
default_resources, runtime_num_cpus,
|
||||
runtime_num_gpus, runtime_resources):
|
||||
def resources_from_resource_arguments(
|
||||
default_num_cpus, default_num_gpus, default_memory,
|
||||
default_object_store_memory, default_resources, runtime_num_cpus,
|
||||
runtime_num_gpus, runtime_memory, runtime_object_store_memory,
|
||||
runtime_resources):
|
||||
"""Determine a task's resource requirements.
|
||||
|
||||
Args:
|
||||
@@ -283,12 +285,19 @@ def resources_from_resource_arguments(default_num_cpus, default_num_gpus,
|
||||
or actor method.
|
||||
default_num_gpus: The default number of GPUs required by this function
|
||||
or actor method.
|
||||
default_memory: The default heap memory required by this function
|
||||
or actor method.
|
||||
default_object_store_memory: The default object store memory required
|
||||
by this function or actor method.
|
||||
default_resources: The default custom resources required by this
|
||||
function or actor method.
|
||||
runtime_num_cpus: The number of CPUs requested when the task was
|
||||
invoked.
|
||||
runtime_num_gpus: The number of GPUs requested when the task was
|
||||
invoked.
|
||||
runtime_memory: The heap memory requested when the task was invoked.
|
||||
runtime_object_store_memory: The object store memory requested when
|
||||
the task was invoked.
|
||||
runtime_resources: The custom resources requested when the task was
|
||||
invoked.
|
||||
|
||||
@@ -305,6 +314,9 @@ def resources_from_resource_arguments(default_num_cpus, default_num_gpus,
|
||||
if "CPU" in resources or "GPU" in resources:
|
||||
raise ValueError("The resources dictionary must not "
|
||||
"contain the key 'CPU' or 'GPU'")
|
||||
elif "memory" in resources or "object_store_memory" in resources:
|
||||
raise ValueError("The resources dictionary must not "
|
||||
"contain the key 'memory' or 'object_store_memory'")
|
||||
|
||||
assert default_num_cpus is not None
|
||||
resources["CPU"] = (default_num_cpus
|
||||
@@ -315,6 +327,16 @@ def resources_from_resource_arguments(default_num_cpus, default_num_gpus,
|
||||
elif default_num_gpus is not None:
|
||||
resources["GPU"] = default_num_gpus
|
||||
|
||||
memory = default_memory or runtime_memory
|
||||
object_store_memory = (default_object_store_memory
|
||||
or runtime_object_store_memory)
|
||||
if memory is not None:
|
||||
resources["memory"] = ray_constants.to_memory_units(
|
||||
memory, round_up=True)
|
||||
if object_store_memory is not None:
|
||||
resources["object_store_memory"] = ray_constants.to_memory_units(
|
||||
object_store_memory, round_up=True)
|
||||
|
||||
return resources
|
||||
|
||||
|
||||
@@ -422,6 +444,16 @@ def estimate_available_memory():
|
||||
overestimate if psutil is not installed.
|
||||
"""
|
||||
|
||||
# check cgroup memory first
|
||||
try:
|
||||
with open("/sys/fs/cgroup/memory/memory.usage_in_bytes", "rb") as f:
|
||||
cgroup_memory_usage = int(f.read())
|
||||
except IOError:
|
||||
cgroup_memory_usage = None
|
||||
|
||||
if cgroup_memory_usage is not None:
|
||||
return get_system_memory() - cgroup_memory_usage
|
||||
|
||||
# Use psutil if it is available.
|
||||
try:
|
||||
import psutil
|
||||
|
||||
Reference in New Issue
Block a user