[docker] Detect CPUs in container correctly (#10507)

Co-authored-by: simon-mo <simon.mo@hey.com>
Co-authored-by: Richard Liaw <rliaw@berkeley.edu>
Co-authored-by: Alex Wu <itswu.alex@gmail.com>
This commit is contained in:
Ian Rodney
2020-09-13 23:40:48 -07:00
committed by GitHub
parent 660aee6311
commit 5bc2ba38fd
35 changed files with 173 additions and 45 deletions
+51
View File
@@ -3,6 +3,7 @@ import errno
import hashlib
import inspect
import logging
import multiprocessing
import numpy as np
import os
import signal
@@ -498,6 +499,56 @@ def get_system_memory():
return psutil_memory_in_bytes
def _get_docker_cpus():
# 1. Try using CFS Quota (https://bugs.openjdk.java.net/browse/JDK-8146115)
# 2. Try Nproc (CPU sets)
cpu_quota_file_name = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"
cpu_share_file_name = "/sys/fs/cgroup/cpu/cpu.cfs_period_us"
num_cpus = 0
if os.path.exists(cpu_quota_file_name) and os.path.exists(
cpu_quota_file_name):
with open(cpu_quota_file_name, "r") as f:
num_cpus = int(f.read())
if num_cpus != -1:
with open(cpu_share_file_name, "r") as f:
num_cpus /= int(f.read())
return num_cpus
return int(subprocess.check_output("nproc"))
def get_num_cpus():
cpu_count = multiprocessing.cpu_count()
if "RAY_USE_MULTIPROCESSING_CPU_COUNT" in os.environ:
logger.info(
"Using multiprocessing.cpu_count() to detect the number of CPUs. "
"This method of CPU detection is buggy when used inside docker. "
"To correctly detect CPUs remove the enivronment variable: "
"`RAY_USE_MULTIPROCESSING_CPU_COUNT`.")
return cpu_count
try:
# Not easy to get cpu count in docker, see:
# https://bugs.python.org/issue36054
docker_count = _get_docker_cpus()
if docker_count != cpu_count:
cpu_count = docker_count
if "RAY_DISABLE_DOCKER_CPU_WARNING" not in os.environ:
logger.warning(
"Detecting limited number of CPUs due to docker. In "
"previous versions of Ray, CPU detection in containers "
"was buggy. Please ensure that Ray has enough CPUs "
"allocated. This message will be removed in future "
"version of Ray. You can set the environment variable: "
"`RAY_DISABLE_DOCKER_CPU_WARNING` to remove it now.")
except Exception:
# `nproc` and cgroup are linux-only. If docker only works on linux
# (will run in a linux VM on other platforms), so this is fine.
pass
return cpu_count
def get_used_memory():
"""Return the currently used system memory in bytes