diff --git a/python/ray/node.py b/python/ray/node.py index 3eb1fcd0d..425965021 100644 --- a/python/ray/node.py +++ b/python/ray/node.py @@ -779,7 +779,8 @@ class Node: self.start_gcs_server() - self.start_monitor() + if not self._ray_params.no_monitor: + self.start_monitor() if self._ray_params.ray_client_server_port: self.start_ray_client_server() diff --git a/python/ray/parameter.py b/python/ray/parameter.py index c31e09df1..c0d3dc725 100644 --- a/python/ray/parameter.py +++ b/python/ray/parameter.py @@ -95,6 +95,8 @@ class RayParams: metrics_agent_port(int): The port to bind metrics agent. metrics_export_port(int): The port at which metrics are exposed through a Prometheus endpoint. + no_monitor(bool): If True, the ray autoscaler monitor for this cluster + will not be started. _system_config (dict): Configuration for overriding RayConfig defaults. Used to set system configuration and for experimental Ray core feature flags. @@ -150,6 +152,7 @@ class RayParams: enable_object_reconstruction=False, metrics_agent_port=None, metrics_export_port=None, + no_monitor=False, lru_evict=False): self.object_ref_seed = object_ref_seed self.redis_address = redis_address @@ -190,6 +193,7 @@ class RayParams: self.java_worker_options = java_worker_options self.metrics_agent_port = metrics_agent_port self.metrics_export_port = metrics_export_port + self.no_monitor = no_monitor self.start_initial_python_workers_for_first_job = ( start_initial_python_workers_for_first_job) self._system_config = _system_config or {} diff --git a/python/ray/scripts/scripts.py b/python/ray/scripts/scripts.py index d73e4335d..6fecd2dc2 100644 --- a/python/ray/scripts/scripts.py +++ b/python/ray/scripts/scripts.py @@ -422,6 +422,13 @@ def debug(address): default=None, help="the port to use to expose Ray metrics through a " "Prometheus endpoint.") +@click.option( + "--no-monitor", + is_flag=True, + hidden=True, + default=False, + help="If True, the ray autoscaler monitor for this cluster will not be " + "started.") @add_click_options(logging_options) def start(node_ip_address, address, port, redis_password, redis_shard_ports, object_manager_port, node_manager_port, gcs_server_port, @@ -432,8 +439,8 @@ def start(node_ip_address, address, port, redis_password, redis_shard_ports, plasma_directory, autoscaling_config, no_redirect_worker_output, no_redirect_output, plasma_store_socket_name, raylet_socket_name, temp_dir, java_worker_options, system_config, lru_evict, - enable_object_reconstruction, metrics_export_port, log_style, - log_color, verbose): + enable_object_reconstruction, metrics_export_port, no_monitor, + log_style, log_color, verbose): """Start Ray processes manually on the local machine.""" cli_logger.configure(log_style, log_color, verbose) if gcs_server_port and not head: @@ -494,7 +501,8 @@ def start(node_ip_address, address, port, redis_password, redis_shard_ports, _system_config=system_config, lru_evict=lru_evict, enable_object_reconstruction=enable_object_reconstruction, - metrics_export_port=metrics_export_port) + metrics_export_port=metrics_export_port, + no_monitor=no_monitor) if head: # Use default if port is none, allocate an available port if port is 0 if port is None: