mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 21:23:10 +08:00
[Stats] metrics agent exporter (#9361)
This commit is contained in:
@@ -108,6 +108,7 @@ class Node:
|
||||
include_log_monitor=True,
|
||||
resources={},
|
||||
temp_dir=ray.utils.get_ray_temp_dir(),
|
||||
metrics_agent_port=self._get_unused_port()[0],
|
||||
worker_path=os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)),
|
||||
"workers/default_worker.py"))
|
||||
@@ -554,6 +555,7 @@ class Node:
|
||||
open_log(reporter_err_name))
|
||||
process_info = ray.services.start_reporter(
|
||||
self.redis_address,
|
||||
self._ray_params.metrics_agent_port,
|
||||
stdout_file=stdout_file,
|
||||
stderr_file=stderr_file,
|
||||
redis_password=self._ray_params.redis_password,
|
||||
@@ -661,6 +663,7 @@ class Node:
|
||||
self._ray_params.max_worker_port,
|
||||
self._ray_params.object_manager_port,
|
||||
self._ray_params.redis_password,
|
||||
self._ray_params.metrics_agent_port,
|
||||
use_valgrind=use_valgrind,
|
||||
use_profiler=use_profiler,
|
||||
stdout_file=stdout_file,
|
||||
|
||||
@@ -87,6 +87,7 @@ class RayParams:
|
||||
Java worker.
|
||||
java_worker_options (list): The command options for Java worker.
|
||||
load_code_from_local: Whether load code from local file or from GCS.
|
||||
metrics_agent_port(int): The port to bind metrics agent.
|
||||
_internal_config (str): JSON configuration for overriding
|
||||
RayConfig defaults. For testing purposes ONLY.
|
||||
lru_evict (bool): Enable LRU eviction if space is needed.
|
||||
@@ -132,6 +133,7 @@ class RayParams:
|
||||
java_worker_options=None,
|
||||
load_code_from_local=False,
|
||||
_internal_config=None,
|
||||
metrics_agent_port=None,
|
||||
lru_evict=False):
|
||||
self.object_ref_seed = object_ref_seed
|
||||
self.redis_address = redis_address
|
||||
@@ -169,6 +171,7 @@ class RayParams:
|
||||
self.include_java = include_java
|
||||
self.java_worker_options = java_worker_options
|
||||
self.load_code_from_local = load_code_from_local
|
||||
self.metrics_agent_port = metrics_agent_port
|
||||
self._internal_config = _internal_config
|
||||
self._lru_evict = lru_evict
|
||||
self._check_usage()
|
||||
|
||||
+14
-3
@@ -55,6 +55,10 @@ class ReporterServer(reporter_pb2_grpc.ReporterServiceServicer):
|
||||
return reporter_pb2.GetProfilingStatsReply(
|
||||
profiling_stats=profiling_stats, stdout=stdout, stderr=stderr)
|
||||
|
||||
def ReportMetrics(self, request, context):
|
||||
# TODO(sang): Process metrics here.
|
||||
return reporter_pb2.ReportMetricsReply()
|
||||
|
||||
|
||||
def recursive_asdict(o):
|
||||
if isinstance(o, tuple) and hasattr(o, "_asdict"):
|
||||
@@ -94,11 +98,12 @@ class Reporter:
|
||||
redis_client: A client used to communicate with the Redis server.
|
||||
"""
|
||||
|
||||
def __init__(self, redis_address, redis_password=None):
|
||||
def __init__(self, redis_address, port, redis_password=None):
|
||||
"""Initialize the reporter object."""
|
||||
self.cpu_counts = (psutil.cpu_count(), psutil.cpu_count(logical=False))
|
||||
self.ip = ray.services.get_node_ip_address()
|
||||
self.hostname = platform.node()
|
||||
self.port = port
|
||||
|
||||
_ = psutil.cpu_percent() # For initialization
|
||||
|
||||
@@ -225,7 +230,7 @@ class Reporter:
|
||||
server = grpc.server(thread_pool, options=(("grpc.so_reuseport", 0), ))
|
||||
reporter_pb2_grpc.add_ReporterServiceServicer_to_server(
|
||||
ReporterServer(), server)
|
||||
port = server.add_insecure_port("[::]:0")
|
||||
port = server.add_insecure_port("[::]:{}".format(self.port))
|
||||
server.start()
|
||||
self.redis_client.set("REPORTER_PORT:{}".format(self.ip), port)
|
||||
"""Run the reporter."""
|
||||
@@ -248,6 +253,11 @@ if __name__ == "__main__":
|
||||
required=True,
|
||||
type=str,
|
||||
help="The address to use for Redis.")
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
required=True,
|
||||
type=int,
|
||||
help="The port to bind the reporter process.")
|
||||
parser.add_argument(
|
||||
"--redis-password",
|
||||
required=False,
|
||||
@@ -270,7 +280,8 @@ if __name__ == "__main__":
|
||||
args = parser.parse_args()
|
||||
ray.utils.setup_logger(args.logging_level, args.logging_format)
|
||||
|
||||
reporter = Reporter(args.redis_address, redis_password=args.redis_password)
|
||||
reporter = Reporter(
|
||||
args.redis_address, args.port, redis_password=args.redis_password)
|
||||
|
||||
try:
|
||||
reporter.run()
|
||||
|
||||
@@ -1065,6 +1065,7 @@ def start_log_monitor(redis_address,
|
||||
|
||||
|
||||
def start_reporter(redis_address,
|
||||
port,
|
||||
stdout_file=None,
|
||||
stderr_file=None,
|
||||
redis_password=None,
|
||||
@@ -1073,6 +1074,7 @@ def start_reporter(redis_address,
|
||||
|
||||
Args:
|
||||
redis_address (str): The address of the Redis instance.
|
||||
port(int): The port to bind the reporter process.
|
||||
stdout_file: A file handle opened for writing to redirect stdout to. If
|
||||
no redirection should happen, then this should be None.
|
||||
stderr_file: A file handle opened for writing to redirect stderr to. If
|
||||
@@ -1085,10 +1087,8 @@ def start_reporter(redis_address,
|
||||
reporter_filepath = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)), "reporter.py")
|
||||
command = [
|
||||
sys.executable,
|
||||
"-u",
|
||||
reporter_filepath,
|
||||
"--redis-address={}".format(redis_address),
|
||||
sys.executable, "-u", reporter_filepath,
|
||||
"--redis-address={}".format(redis_address), "--port={}".format(port)
|
||||
]
|
||||
if redis_password:
|
||||
command += ["--redis-password", redis_password]
|
||||
@@ -1249,6 +1249,7 @@ def start_raylet(redis_address,
|
||||
max_worker_port=None,
|
||||
object_manager_port=None,
|
||||
redis_password=None,
|
||||
metrics_agent_port=None,
|
||||
use_valgrind=False,
|
||||
use_profiler=False,
|
||||
stdout_file=None,
|
||||
@@ -1284,6 +1285,7 @@ def start_raylet(redis_address,
|
||||
max_worker_port (int): The highest port number that workers will bind
|
||||
on. If set, min_worker_port must also be set.
|
||||
redis_password: The password to use when connecting to Redis.
|
||||
metrics_agent_port(int): The port where metrics agent is bound to.
|
||||
use_valgrind (bool): True if the raylet should be started inside
|
||||
of valgrind. If this is True, use_profiler must be False.
|
||||
use_profiler (bool): True if the raylet should be started inside
|
||||
@@ -1390,6 +1392,7 @@ def start_raylet(redis_address,
|
||||
"--redis_password={}".format(redis_password or ""),
|
||||
"--temp_dir={}".format(temp_dir),
|
||||
"--session_dir={}".format(session_dir),
|
||||
"--metrics-agent-port={}".format(metrics_agent_port),
|
||||
]
|
||||
if config.get("plasma_store_as_thread"):
|
||||
# command related to the plasma store
|
||||
|
||||
Reference in New Issue
Block a user