[Multi-tenancy] Delete flag enable_multi_tenancy and remove old code path (#10573)

This commit is contained in:
Kai Yang
2020-12-10 19:01:40 +08:00
committed by GitHub
parent d681991773
commit e3b5deb741
47 changed files with 279 additions and 530 deletions
+12 -26
View File
@@ -136,7 +136,7 @@ def find_redis_address(address=None):
# --redis_address=123.456.78.910 --node_ip_address=123.456.78.910
# --raylet_socket_name=... --store_socket_name=... --object_manager_port=0
# --min_worker_port=10000 --max_worker_port=10999
# --node_manager_port=58578 --redis_port=6379 --num_initial_workers=8
# --node_manager_port=58578 --redis_port=6379
# --maximum_startup_concurrency=8
# --static_resource_list=node:123.456.78.910,1.0,object_store_memory,66
# --config_list=plasma_store_as_thread,True
@@ -1253,13 +1253,11 @@ def start_raylet(redis_address,
stderr_file=None,
config=None,
java_worker_options=None,
load_code_from_local=False,
huge_pages=False,
fate_share=None,
socket_to_use=None,
head_node=False,
start_initial_python_workers_for_first_job=False,
code_search_path=None):
start_initial_python_workers_for_first_job=False):
"""Start a raylet, which is a combined local scheduler and object manager.
Args:
@@ -1296,9 +1294,6 @@ def start_raylet(redis_address,
config (dict|None): Optional Raylet configuration that will
override defaults in RayConfig.
java_worker_options (list): The command options for Java worker.
code_search_path (list): Code search path for worker. code_search_path
is added to worker command in non-multi-tenancy mode and job_config
in multi-tenancy mode.
Returns:
ProcessInfo for the process that was started.
"""
@@ -1311,7 +1306,6 @@ def start_raylet(redis_address,
raise ValueError("Cannot use valgrind and profiler at the same time.")
assert resource_spec.resolved()
num_initial_workers = resource_spec.num_cpus
static_resources = resource_spec.to_resource_dict()
# Limit the number of workers that can be started in parallel by the
@@ -1348,7 +1342,6 @@ def start_raylet(redis_address,
raylet_name,
redis_password,
session_dir,
code_search_path,
)
else:
java_worker_command = []
@@ -1368,15 +1361,18 @@ def start_raylet(redis_address,
# Create the command that the Raylet will use to start workers.
start_worker_command = [
sys.executable, worker_path, f"--node-ip-address={node_ip_address}",
sys.executable,
worker_path,
f"--node-ip-address={node_ip_address}",
f"--node-manager-port={node_manager_port}",
f"--object-store-name={plasma_store_name}",
f"--raylet-name={raylet_name}", f"--redis-address={redis_address}",
f"--config-list={config_str}", f"--temp-dir={temp_dir}",
f"--metrics-agent-port={metrics_agent_port}"
f"--raylet-name={raylet_name}",
f"--redis-address={redis_address}",
f"--config-list={config_str}",
f"--temp-dir={temp_dir}",
f"--metrics-agent-port={metrics_agent_port}",
"RAY_WORKER_DYNAMIC_OPTION_PLACEHOLDER",
]
if code_search_path:
start_worker_command.append(f"--code-search-path={code_search_path}")
if redis_password:
start_worker_command += [f"--redis-password={redis_password}"]
@@ -1391,12 +1387,6 @@ def start_raylet(redis_address,
if max_worker_port is None:
max_worker_port = 0
if code_search_path is not None and len(code_search_path) > 0:
load_code_from_local = True
if load_code_from_local:
start_worker_command += ["--load-code-from-local"]
# Create agent command
agent_command = [
sys.executable,
@@ -1427,7 +1417,6 @@ def start_raylet(redis_address,
f"--node_ip_address={node_ip_address}",
f"--redis_address={gcs_ip_address}",
f"--redis_port={gcs_port}",
f"--num_initial_workers={num_initial_workers}",
f"--maximum_startup_concurrency={maximum_startup_concurrency}",
f"--static_resource_list={resource_argument}",
f"--config_list={config_str}",
@@ -1487,8 +1476,7 @@ def get_ray_jars_dir():
def build_java_worker_command(java_worker_options, redis_address,
node_manager_port, plasma_store_name,
raylet_name, redis_password, session_dir,
code_search_path):
raylet_name, redis_password, session_dir):
"""This method assembles the command used to start a Java worker.
Args:
@@ -1499,7 +1487,6 @@ def build_java_worker_command(java_worker_options, redis_address,
raylet_name (str): The name of the raylet socket to create.
redis_password (str): The password of connect to redis.
session_dir (str): The path of this session.
code_search_path (list): Teh job code search path.
Returns:
The command string for starting Java worker.
"""
@@ -1520,7 +1507,6 @@ def build_java_worker_command(java_worker_options, redis_address,
pairs.append(("ray.home", RAY_HOME))
pairs.append(("ray.logging.dir", os.path.join(session_dir, "logs")))
pairs.append(("ray.session-dir", session_dir))
pairs.append(("ray.job.code-search-path", code_search_path))
command = ["java"] + ["-D{}={}".format(*pair) for pair in pairs]
command += ["RAY_WORKER_RAYLET_CONFIG_PLACEHOLDER"]
+4 -4
View File
@@ -2,7 +2,7 @@
cluster_name: java
# The minimum number of workers nodes to launch in addition to the head
# node. This number should be >= 0.
min_workers: 1
min_workers: 1
# The maximum number of workers nodes to launch in addition to the head
# node. This takes precedence over min_workers.
max_workers: 1
@@ -72,10 +72,10 @@ worker_setup_commands: []
# Command to start ray on the head node. You don't need to change this.
head_start_ray_commands:
- ray stop
- ulimit -n 65536; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml --code-search-path=~/ray-word-count/target
- ulimit -n 65536; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml
# Command to start ray on worker nodes. You don't need to change this.
worker_start_ray_commands:
- ray stop
- ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076 --code-search-path=ray-word-count/target
- ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076
# To run the program, run `ray exec java.yaml "java -jar ray-word-count/target/ray-word-count-1.0-SNAPSHOT-jar-with-dependencies.jar"`
# To run the program, run `ray exec java.yaml "java -jar ray-word-count/target/ray-word-count-1.0-SNAPSHOT-jar-with-dependencies.jar -Dray.job.code-search-path=ray-word-count/target"`
-4
View File
@@ -51,10 +51,6 @@ cdef extern from "ray/common/ray_config.h" nogil:
uint64_t object_manager_default_chunk_size() const
int num_workers_per_process_python() const
int num_workers_per_process_java() const
uint32_t maximum_gcs_deletion_batch_size() const
int64_t max_direct_call_object_size() const
-8
View File
@@ -88,14 +88,6 @@ cdef class Config:
def object_manager_default_chunk_size():
return RayConfig.instance().object_manager_default_chunk_size()
@staticmethod
def num_workers_per_process_python():
return RayConfig.instance().num_workers_per_process_python()
@staticmethod
def num_workers_per_process_java():
return RayConfig.instance().num_workers_per_process_java()
@staticmethod
def maximum_gcs_deletion_batch_size():
return RayConfig.instance().maximum_gcs_deletion_batch_size()
+1 -7
View File
@@ -339,10 +339,6 @@ class Node:
"""Get the cluster Redis password"""
return self._ray_params.redis_password
@property
def load_code_from_local(self):
return self._ray_params.load_code_from_local
@property
def object_ref_seed(self):
"""Get the seed for deterministic generation of object refs"""
@@ -723,14 +719,12 @@ class Node:
stderr_file=stderr_file,
config=self._config,
java_worker_options=self._ray_params.java_worker_options,
load_code_from_local=self._ray_params.load_code_from_local,
huge_pages=self._ray_params.huge_pages,
fate_share=self.kernel_fate_share,
socket_to_use=self.socket,
head_node=self.head,
start_initial_python_workers_for_first_job=self._ray_params.
start_initial_python_workers_for_first_job,
code_search_path=self._ray_params.code_search_path)
start_initial_python_workers_for_first_job)
assert ray_constants.PROCESS_TYPE_RAYLET not in self.all_processes
self.all_processes[ray_constants.PROCESS_TYPE_RAYLET] = [process_info]
+1 -8
View File
@@ -89,7 +89,6 @@ class RayParams:
contents to Redis.
autoscaling_config: path to autoscaling config file.
java_worker_options (list): The command options for Java worker.
load_code_from_local: Whether load code from local file or from GCS.
metrics_agent_port(int): The port to bind metrics agent.
metrics_export_port(int): The port at which metrics are exposed
through a Prometheus endpoint.
@@ -142,14 +141,12 @@ class RayParams:
include_log_monitor=None,
autoscaling_config=None,
java_worker_options=None,
load_code_from_local=False,
start_initial_python_workers_for_first_job=False,
_system_config=None,
enable_object_reconstruction=False,
metrics_agent_port=None,
metrics_export_port=None,
lru_evict=False,
code_search_path=None):
lru_evict=False):
self.object_ref_seed = object_ref_seed
self.redis_address = redis_address
self.num_cpus = num_cpus
@@ -186,7 +183,6 @@ class RayParams:
self.include_log_monitor = include_log_monitor
self.autoscaling_config = autoscaling_config
self.java_worker_options = java_worker_options
self.load_code_from_local = load_code_from_local
self.metrics_agent_port = metrics_agent_port
self.metrics_export_port = metrics_export_port
self.start_initial_python_workers_for_first_job = (
@@ -195,9 +191,6 @@ class RayParams:
self._lru_evict = lru_evict
self._enable_object_reconstruction = enable_object_reconstruction
self._check_usage()
self.code_search_path = code_search_path
if code_search_path is None:
self.code_search_path = []
# Set the internal config options for LRU eviction.
if lru_evict:
+1 -17
View File
@@ -389,25 +389,12 @@ def debug(address):
default=None,
type=str,
help="Overwrite the options to start Java workers.")
@click.option(
"--code-search-path",
default=None,
hidden=True,
type=str,
help="A list of directories or jar files separated by colon that specify "
"the search path for user code. This will be used as `CLASSPATH` in "
"Java and `PYTHONPATH` in Python.")
@click.option(
"--system-config",
default=None,
hidden=True,
type=json.loads,
help="Override system configuration defaults.")
@click.option(
"--load-code-from-local",
is_flag=True,
default=False,
help="Specify whether load code from local file or GCS serialization.")
@click.option(
"--lru-evict",
is_flag=True,
@@ -436,8 +423,7 @@ def start(node_ip_address, address, port, redis_password, redis_shard_ports,
head, include_dashboard, dashboard_host, dashboard_port, block,
plasma_directory, autoscaling_config, no_redirect_worker_output,
no_redirect_output, plasma_store_socket_name, raylet_socket_name,
temp_dir, java_worker_options, load_code_from_local,
code_search_path, system_config, lru_evict,
temp_dir, java_worker_options, system_config, lru_evict,
enable_object_reconstruction, metrics_export_port, log_style,
log_color, verbose):
"""Start Ray processes manually on the local machine."""
@@ -496,8 +482,6 @@ def start(node_ip_address, address, port, redis_password, redis_shard_ports,
dashboard_host=dashboard_host,
dashboard_port=dashboard_port,
java_worker_options=java_worker_options,
load_code_from_local=load_code_from_local,
code_search_path=code_search_path,
_system_config=system_config,
lru_evict=lru_evict,
enable_object_reconstruction=enable_object_reconstruction,
+3 -2
View File
@@ -1,4 +1,5 @@
import pytest
import sys
import ray
import ray.cluster_utils
@@ -6,7 +7,7 @@ import ray.test_utils
def test_cross_language_raise_kwargs(shutdown_only):
ray.init(_load_code_from_local=True)
ray.init(job_config=ray.job_config.JobConfig(code_search_path=sys.path))
with pytest.raises(Exception, match="kwargs"):
ray.java_function("a", "b").remote(x="arg1")
@@ -16,7 +17,7 @@ def test_cross_language_raise_kwargs(shutdown_only):
def test_cross_language_raise_exception(shutdown_only):
ray.init(_load_code_from_local=True)
ray.init(job_config=ray.job_config.JobConfig(code_search_path=sys.path))
class PythonObject(object):
pass
+5 -9
View File
@@ -109,6 +109,7 @@ class Worker:
# by the worker should drop into the debugger at the specified
# breakpoint ID.
self.debugger_get_breakpoint = b""
self._load_code_from_local = False
@property
def connected(self):
@@ -122,7 +123,7 @@ class Worker:
@property
def load_code_from_local(self):
self.check_connected()
return self.node.load_code_from_local
return self._load_code_from_local
@property
def current_job_id(self):
@@ -222,6 +223,9 @@ class Worker:
"""
self.mode = mode
def set_load_code_from_local(self, load_code_from_local):
self._load_code_from_local = load_code_from_local
def put_object(self, value, object_ref=None, pin_object=True):
"""Put value in the local object store with object reference `object_ref`.
@@ -489,9 +493,7 @@ def init(
_memory=None,
_redis_password=ray_constants.REDIS_DEFAULT_PASSWORD,
_java_worker_options=None,
_code_search_path=None,
_temp_dir=None,
_load_code_from_local=False,
_lru_evict=False,
_metrics_export_port=None,
_system_config=None):
@@ -579,10 +581,7 @@ def init(
_temp_dir (str): If provided, specifies the root temporary
directory for the Ray process. Defaults to an OS-specific
conventional location, e.g., "/tmp/ray".
_load_code_from_local: Whether code should be loaded from a local
module or from the GCS.
_java_worker_options: Overwrite the options to start Java workers.
_code_search_path (list): Java classpath or python import path.
_lru_evict (bool): If True, when an object store is full, it will evict
objects in LRU order to make more space and when under memory
pressure, ray.ObjectLostError may be thrown. If False, then
@@ -701,9 +700,7 @@ def init(
redis_max_memory=_redis_max_memory,
plasma_store_socket_name=None,
temp_dir=_temp_dir,
load_code_from_local=_load_code_from_local,
java_worker_options=_java_worker_options,
code_search_path=_code_search_path,
start_initial_python_workers_for_first_job=True,
_system_config=_system_config,
lru_evict=_lru_evict,
@@ -749,7 +746,6 @@ def init(
redis_password=_redis_password,
object_ref_seed=None,
temp_dir=_temp_dir,
load_code_from_local=_load_code_from_local,
_system_config=_system_config,
lru_evict=_lru_evict,
enable_object_reconstruction=_enable_object_reconstruction,
+3 -1
View File
@@ -145,11 +145,14 @@ if __name__ == "__main__":
raylet_ip_address = args.node_ip_address
code_search_path = args.code_search_path
load_code_from_local = False
if code_search_path is not None:
load_code_from_local = True
for p in code_search_path.split(":"):
if os.path.isfile(p):
p = os.path.dirname(p)
sys.path.append(p)
ray.worker.global_worker.set_load_code_from_local(load_code_from_local)
ray_params = RayParams(
node_ip_address=args.node_ip_address,
@@ -160,7 +163,6 @@ if __name__ == "__main__":
plasma_store_socket_name=args.object_store_name,
raylet_socket_name=args.raylet_name,
temp_dir=args.temp_dir,
load_code_from_local=args.load_code_from_local,
metrics_agent_port=args.metrics_agent_port,
)