mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 17:50:55 +08:00
Add a web dashboard for monitoring node resource usage (#4066)
This commit is contained in:
committed by
Robert Nishihara
parent
3ac8fd7ee8
commit
2e30f7ba38
+57
-2
@@ -10,6 +10,7 @@ import json
|
||||
import os
|
||||
import logging
|
||||
import signal
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
@@ -24,6 +25,8 @@ from ray.utils import try_to_create_directory
|
||||
# using logging.basicConfig in its entry/init points.
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
PY3 = sys.version_info.major >= 3
|
||||
|
||||
|
||||
class Node(object):
|
||||
"""An encapsulation of the Ray processes on a single node.
|
||||
@@ -81,6 +84,7 @@ class Node(object):
|
||||
self._plasma_store_socket_name = None
|
||||
self._raylet_socket_name = None
|
||||
self._webui_url = None
|
||||
self._dashboard_url = None
|
||||
else:
|
||||
self._plasma_store_socket_name = (
|
||||
ray_params.plasma_store_socket_name)
|
||||
@@ -284,6 +288,35 @@ class Node(object):
|
||||
process_info
|
||||
]
|
||||
|
||||
def start_reporter(self):
|
||||
"""Start the reporter."""
|
||||
stdout_file, stderr_file = self.new_log_files("reporter", True)
|
||||
process_info = ray.services.start_reporter(
|
||||
self.redis_address,
|
||||
stdout_file=stdout_file,
|
||||
stderr_file=stderr_file,
|
||||
redis_password=self._ray_params.redis_password)
|
||||
assert ray_constants.PROCESS_TYPE_REPORTER not in self.all_processes
|
||||
if process_info is not None:
|
||||
self.all_processes[ray_constants.PROCESS_TYPE_REPORTER] = [
|
||||
process_info
|
||||
]
|
||||
|
||||
def start_dashboard(self):
|
||||
"""Start the dashboard."""
|
||||
stdout_file, stderr_file = self.new_log_files("dashboard", True)
|
||||
self._dashboard_url, process_info = ray.services.start_dashboard(
|
||||
self.redis_address,
|
||||
self._temp_dir,
|
||||
stdout_file=stdout_file,
|
||||
stderr_file=stderr_file,
|
||||
redis_password=self._ray_params.redis_password)
|
||||
assert ray_constants.PROCESS_TYPE_DASHBOARD not in self.all_processes
|
||||
if process_info is not None:
|
||||
self.all_processes[ray_constants.PROCESS_TYPE_DASHBOARD] = [
|
||||
process_info
|
||||
]
|
||||
|
||||
def start_ui(self):
|
||||
"""Start the web UI."""
|
||||
stdout_file, stderr_file = self.new_log_files("webui")
|
||||
@@ -408,14 +441,16 @@ class Node(object):
|
||||
self.start_redis()
|
||||
self.start_monitor()
|
||||
self.start_raylet_monitor()
|
||||
if PY3 and self._ray_params.include_webui:
|
||||
self.start_dashboard()
|
||||
|
||||
self.start_plasma_store()
|
||||
self.start_raylet()
|
||||
if PY3 and self._ray_params.include_webui:
|
||||
self.start_reporter()
|
||||
|
||||
if self._ray_params.include_log_monitor:
|
||||
self.start_log_monitor()
|
||||
if self._ray_params.include_webui:
|
||||
self.start_ui()
|
||||
|
||||
def _kill_process_type(self,
|
||||
process_type,
|
||||
@@ -545,6 +580,26 @@ class Node(object):
|
||||
self._kill_process_type(
|
||||
ray_constants.PROCESS_TYPE_LOG_MONITOR, check_alive=check_alive)
|
||||
|
||||
def kill_reporter(self, check_alive=True):
|
||||
"""Kill the reporter.
|
||||
|
||||
Args:
|
||||
check_alive (bool): Raise an exception if the process was already
|
||||
dead.
|
||||
"""
|
||||
self._kill_process_type(
|
||||
ray_constants.PROCESS_TYPE_REPORTER, check_alive=check_alive)
|
||||
|
||||
def kill_dashboard(self, check_alive=True):
|
||||
"""Kill the dashboard.
|
||||
|
||||
Args:
|
||||
check_alive (bool): Raise an exception if the process was already
|
||||
dead.
|
||||
"""
|
||||
self._kill_process_type(
|
||||
ray_constants.PROCESS_TYPE_DASHBOARD, check_alive=check_alive)
|
||||
|
||||
def kill_monitor(self, check_alive=True):
|
||||
"""Kill the monitor.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user