mirror of
https://github.com/wassname/ray.git
synced 2026-07-03 16:24:23 +08:00
[docker] Detect CPUs in container correctly (#10507)
Co-authored-by: simon-mo <simon.mo@hey.com> Co-authored-by: Richard Liaw <rliaw@berkeley.edu> Co-authored-by: Alex Wu <itswu.alex@gmail.com>
This commit is contained in:
@@ -3,6 +3,7 @@ import errno
|
||||
import hashlib
|
||||
import inspect
|
||||
import logging
|
||||
import multiprocessing
|
||||
import numpy as np
|
||||
import os
|
||||
import signal
|
||||
@@ -498,6 +499,56 @@ def get_system_memory():
|
||||
return psutil_memory_in_bytes
|
||||
|
||||
|
||||
def _get_docker_cpus():
|
||||
# 1. Try using CFS Quota (https://bugs.openjdk.java.net/browse/JDK-8146115)
|
||||
# 2. Try Nproc (CPU sets)
|
||||
cpu_quota_file_name = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"
|
||||
cpu_share_file_name = "/sys/fs/cgroup/cpu/cpu.cfs_period_us"
|
||||
num_cpus = 0
|
||||
if os.path.exists(cpu_quota_file_name) and os.path.exists(
|
||||
cpu_quota_file_name):
|
||||
with open(cpu_quota_file_name, "r") as f:
|
||||
num_cpus = int(f.read())
|
||||
if num_cpus != -1:
|
||||
with open(cpu_share_file_name, "r") as f:
|
||||
num_cpus /= int(f.read())
|
||||
return num_cpus
|
||||
|
||||
return int(subprocess.check_output("nproc"))
|
||||
|
||||
|
||||
def get_num_cpus():
|
||||
cpu_count = multiprocessing.cpu_count()
|
||||
if "RAY_USE_MULTIPROCESSING_CPU_COUNT" in os.environ:
|
||||
logger.info(
|
||||
"Using multiprocessing.cpu_count() to detect the number of CPUs. "
|
||||
"This method of CPU detection is buggy when used inside docker. "
|
||||
"To correctly detect CPUs remove the enivronment variable: "
|
||||
"`RAY_USE_MULTIPROCESSING_CPU_COUNT`.")
|
||||
return cpu_count
|
||||
try:
|
||||
# Not easy to get cpu count in docker, see:
|
||||
# https://bugs.python.org/issue36054
|
||||
docker_count = _get_docker_cpus()
|
||||
if docker_count != cpu_count:
|
||||
cpu_count = docker_count
|
||||
if "RAY_DISABLE_DOCKER_CPU_WARNING" not in os.environ:
|
||||
logger.warning(
|
||||
"Detecting limited number of CPUs due to docker. In "
|
||||
"previous versions of Ray, CPU detection in containers "
|
||||
"was buggy. Please ensure that Ray has enough CPUs "
|
||||
"allocated. This message will be removed in future "
|
||||
"version of Ray. You can set the environment variable: "
|
||||
"`RAY_DISABLE_DOCKER_CPU_WARNING` to remove it now.")
|
||||
|
||||
except Exception:
|
||||
# `nproc` and cgroup are linux-only. If docker only works on linux
|
||||
# (will run in a linux VM on other platforms), so this is fine.
|
||||
pass
|
||||
|
||||
return cpu_count
|
||||
|
||||
|
||||
def get_used_memory():
|
||||
"""Return the currently used system memory in bytes
|
||||
|
||||
|
||||
Reference in New Issue
Block a user