Display GPU Utilization in the Dashboard (#8564)

This commit is contained in:
Max Fitton
2020-06-15 13:27:44 -07:00
committed by GitHub
parent 6c49c01837
commit ddb9368f2c
13 changed files with 313 additions and 48 deletions
+29 -1
View File
@@ -10,7 +10,6 @@ import platform
import subprocess
import sys
from concurrent import futures
import ray
import psutil
import ray.ray_constants as ray_constants
@@ -24,6 +23,13 @@ from ray.core.generated import reporter_pb2_grpc
# entry/init points.
logger = logging.getLogger(__name__)
try:
import gpustat.core as gpustat
except ImportError:
gpustat = None
logger.warning(
"Install gpustat with 'pip install gpustat' to enable GPU monitoring.")
class ReporterServer(reporter_pb2_grpc.ReporterServiceServicer):
def __init__(self):
@@ -107,6 +113,27 @@ class Reporter:
def get_cpu_percent():
return psutil.cpu_percent()
@staticmethod
def get_gpu_usage():
if gpustat is None:
return []
gpu_utilizations = []
gpus = []
try:
gpus = gpustat.new_query().gpus
except Exception as e:
logger.debug(
"gpustat failed to retrieve GPU information: {}".format(e))
for gpu in gpus:
# Note the keys in this dict have periods which throws
# off javascript so we change .s to _s
gpu_data = {
"_".join(key.split(".")): val
for key, val in gpu.entry.items()
}
gpu_utilizations.append(gpu_data)
return gpu_utilizations
@staticmethod
def get_boot_time():
return psutil.boot_time()
@@ -179,6 +206,7 @@ class Reporter:
"boot_time": self.get_boot_time(),
"load_avg": self.get_load_avg(),
"disk": self.get_disk_usage(),
"gpus": self.get_gpu_usage(),
"net": netstats,
}