Allow the node manager port and object manager port to be set through… (#3130)

* Allow the node manager port and object manager port to be set through ray start.

* Linting

* Fix Java test

* Address comments.
This commit is contained in:
Robert Nishihara
2018-10-28 17:28:41 -07:00
committed by Philipp Moritz
parent a404401dc6
commit fd854ff090
8 changed files with 104 additions and 32 deletions
+13 -13
View File
@@ -104,6 +104,11 @@ def cli(logging_level, logging_format):
required=False,
type=int,
help="the port to use for starting the object manager")
@click.option(
"--node-manager-port",
required=False,
type=int,
help="the port to use for starting the node manager")
@click.option(
"--object-store-memory",
required=False,
@@ -190,11 +195,11 @@ def cli(logging_level, logging_format):
help="manually specify the root temporary dir of the Ray process")
def start(node_ip_address, redis_address, redis_port, num_redis_shards,
redis_max_clients, redis_password, redis_shard_ports,
object_manager_port, object_store_memory, num_workers, num_cpus,
num_gpus, resources, head, no_ui, block, plasma_directory,
huge_pages, autoscaling_config, no_redirect_worker_output,
no_redirect_output, plasma_store_socket_name, raylet_socket_name,
temp_dir):
object_manager_port, node_manager_port, object_store_memory,
num_workers, num_cpus, num_gpus, resources, head, no_ui, block,
plasma_directory, huge_pages, autoscaling_config,
no_redirect_worker_output, no_redirect_output,
plasma_store_socket_name, raylet_socket_name, temp_dir):
# Convert hostnames to numerical IP address.
if node_ip_address is not None:
node_ip_address = services.address_to_ip(node_ip_address)
@@ -243,15 +248,9 @@ def start(node_ip_address, redis_address, redis_port, num_redis_shards,
logger.info("Using IP address {} for this node."
.format(node_ip_address))
address_info = {}
# Use the provided object manager port if there is one.
if object_manager_port is not None:
address_info["object_manager_ports"] = [object_manager_port]
if address_info == {}:
address_info = None
address_info = services.start_ray_head(
address_info=address_info,
object_manager_ports=[object_manager_port],
node_manager_ports=[node_manager_port],
node_ip_address=node_ip_address,
redis_port=redis_port,
redis_shard_ports=redis_shard_ports,
@@ -337,6 +336,7 @@ def start(node_ip_address, redis_address, redis_port, num_redis_shards,
node_ip_address=node_ip_address,
redis_address=redis_address,
object_manager_ports=[object_manager_port],
node_manager_ports=[node_manager_port],
num_workers=num_workers,
object_store_memory=object_store_memory,
redis_password=redis_password,
+51 -3
View File
@@ -849,6 +849,8 @@ def start_raylet(redis_address,
plasma_store_name,
worker_path,
resources=None,
object_manager_port=None,
node_manager_port=None,
num_workers=0,
use_valgrind=False,
use_profiler=False,
@@ -867,6 +869,13 @@ def start_raylet(redis_address,
raylet_name (str): The name of the raylet socket to create.
worker_path (str): The path of the script to use when the local
scheduler starts up new workers.
resources: The resources that this raylet has.
object_manager_port (int): The port to use for the object manager. If
this is not provided, we will use 0 and the object manager will
choose its own port.
node_manager_port (int): The port to use for the node manager. If
this is not provided, we will use 0 and the node manager will
choose its own port.
use_valgrind (bool): True if the raylet should be started inside
of valgrind. If this is True, use_profiler must be False.
use_profiler (bool): True if the raylet should be started inside
@@ -915,10 +924,21 @@ def start_raylet(redis_address,
if redis_password:
start_worker_command += " --redis-password {}".format(redis_password)
# If the object manager port is None, then use 0 to cause the object
# manager to choose its own port.
if object_manager_port is None:
object_manager_port = 0
# If the node manager port is None, then use 0 to cause the node manager
# to choose its own port.
if node_manager_port is None:
node_manager_port = 0
command = [
RAYLET_EXECUTABLE,
raylet_name,
plasma_store_name,
str(object_manager_port),
str(node_manager_port),
node_ip_address,
gcs_ip_address,
gcs_port,
@@ -1159,6 +1179,8 @@ def start_raylet_monitor(redis_address,
def start_ray_processes(address_info=None,
object_manager_ports=None,
node_manager_ports=None,
node_ip_address="127.0.0.1",
redis_port=None,
redis_shard_ports=None,
@@ -1188,6 +1210,12 @@ def start_ray_processes(address_info=None,
address_info (dict): A dictionary with address information for
processes that have already been started. If provided, address_info
will be modified to include processes that are newly started.
object_manager_ports (list): A list of the ports to use for the object
managers. There should be one per object manager being started on
this node (typically just one).
node_manager_ports (list): A list of the ports to use for the node
managers. There should be one per node manager being started on
this node (typically just one).
node_ip_address (str): The IP address of this node.
redis_port (int): The port that the primary Redis shard should listen
to. If None, then a random port will be chosen. If the key
@@ -1341,11 +1369,14 @@ def start_ray_processes(address_info=None,
raylet_socket_names = address_info["raylet_socket_names"]
# Get the ports to use for the object managers if any are provided.
object_manager_ports = (address_info["object_manager_ports"] if
"object_manager_ports" in address_info else None)
if not isinstance(object_manager_ports, list):
assert object_manager_ports is None or num_local_schedulers == 1
object_manager_ports = num_local_schedulers * [object_manager_ports]
assert len(object_manager_ports) == num_local_schedulers
if not isinstance(node_manager_ports, list):
assert node_manager_ports is None or num_local_schedulers == 1
node_manager_ports = num_local_schedulers * [node_manager_ports]
assert len(node_manager_ports) == num_local_schedulers
# Start any object stores that do not yet exist.
for i in range(num_local_schedulers - len(object_store_addresses)):
@@ -1378,6 +1409,8 @@ def start_ray_processes(address_info=None,
raylet_socket_name or get_raylet_socket_name(),
object_store_addresses[i],
worker_path,
object_manager_port=object_manager_ports[i],
node_manager_port=node_manager_ports[i],
resources=resources[i],
num_workers=workers_per_local_scheduler[i],
stdout_file=raylet_stdout_file,
@@ -1402,6 +1435,7 @@ def start_ray_processes(address_info=None,
def start_ray_node(node_ip_address,
redis_address,
object_manager_ports=None,
node_manager_ports=None,
num_workers=0,
num_local_schedulers=1,
object_store_memory=None,
@@ -1427,6 +1461,9 @@ def start_ray_node(node_ip_address,
object_manager_ports (list): A list of the ports to use for the object
managers. There should be one per object manager being started on
this node (typically just one).
node_manager_ports (list): A list of the ports to use for the node
managers. There should be one per node manager being started on
this node (typically just one).
num_workers (int): The number of workers to start.
num_local_schedulers (int): The number of local schedulers to start.
This is also the number of plasma stores and plasma managers to
@@ -1463,10 +1500,11 @@ def start_ray_node(node_ip_address,
"""
address_info = {
"redis_address": redis_address,
"object_manager_ports": object_manager_ports
}
return start_ray_processes(
address_info=address_info,
object_manager_ports=object_manager_ports,
node_manager_ports=node_manager_ports,
node_ip_address=node_ip_address,
num_workers=num_workers,
num_local_schedulers=num_local_schedulers,
@@ -1486,6 +1524,8 @@ def start_ray_node(node_ip_address,
def start_ray_head(address_info=None,
object_manager_ports=None,
node_manager_ports=None,
node_ip_address="127.0.0.1",
redis_port=None,
redis_shard_ports=None,
@@ -1514,6 +1554,12 @@ def start_ray_head(address_info=None,
address_info (dict): A dictionary with address information for
processes that have already been started. If provided, address_info
will be modified to include processes that are newly started.
object_manager_ports (list): A list of the ports to use for the object
managers. There should be one per object manager being started on
this node (typically just one).
node_manager_ports (list): A list of the ports to use for the node
managers. There should be one per node manager being started on
this node (typically just one).
node_ip_address (str): The IP address of this node.
redis_port (int): The port that the primary Redis shard should listen
to. If None, then a random port will be chosen. If the key
@@ -1570,6 +1616,8 @@ def start_ray_head(address_info=None,
num_redis_shards = 1 if num_redis_shards is None else num_redis_shards
return start_ray_processes(
address_info=address_info,
object_manager_ports=object_manager_ports,
node_manager_ports=node_manager_ports,
node_ip_address=node_ip_address,
redis_port=redis_port,
redis_shard_ports=redis_shard_ports,