mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 21:38:18 +08:00
[Multi-tenancy] Delete flag enable_multi_tenancy and remove old code path (#10573)
This commit is contained in:
@@ -136,7 +136,7 @@ def find_redis_address(address=None):
|
||||
# --redis_address=123.456.78.910 --node_ip_address=123.456.78.910
|
||||
# --raylet_socket_name=... --store_socket_name=... --object_manager_port=0
|
||||
# --min_worker_port=10000 --max_worker_port=10999
|
||||
# --node_manager_port=58578 --redis_port=6379 --num_initial_workers=8
|
||||
# --node_manager_port=58578 --redis_port=6379
|
||||
# --maximum_startup_concurrency=8
|
||||
# --static_resource_list=node:123.456.78.910,1.0,object_store_memory,66
|
||||
# --config_list=plasma_store_as_thread,True
|
||||
@@ -1253,13 +1253,11 @@ def start_raylet(redis_address,
|
||||
stderr_file=None,
|
||||
config=None,
|
||||
java_worker_options=None,
|
||||
load_code_from_local=False,
|
||||
huge_pages=False,
|
||||
fate_share=None,
|
||||
socket_to_use=None,
|
||||
head_node=False,
|
||||
start_initial_python_workers_for_first_job=False,
|
||||
code_search_path=None):
|
||||
start_initial_python_workers_for_first_job=False):
|
||||
"""Start a raylet, which is a combined local scheduler and object manager.
|
||||
|
||||
Args:
|
||||
@@ -1296,9 +1294,6 @@ def start_raylet(redis_address,
|
||||
config (dict|None): Optional Raylet configuration that will
|
||||
override defaults in RayConfig.
|
||||
java_worker_options (list): The command options for Java worker.
|
||||
code_search_path (list): Code search path for worker. code_search_path
|
||||
is added to worker command in non-multi-tenancy mode and job_config
|
||||
in multi-tenancy mode.
|
||||
Returns:
|
||||
ProcessInfo for the process that was started.
|
||||
"""
|
||||
@@ -1311,7 +1306,6 @@ def start_raylet(redis_address,
|
||||
raise ValueError("Cannot use valgrind and profiler at the same time.")
|
||||
|
||||
assert resource_spec.resolved()
|
||||
num_initial_workers = resource_spec.num_cpus
|
||||
static_resources = resource_spec.to_resource_dict()
|
||||
|
||||
# Limit the number of workers that can be started in parallel by the
|
||||
@@ -1348,7 +1342,6 @@ def start_raylet(redis_address,
|
||||
raylet_name,
|
||||
redis_password,
|
||||
session_dir,
|
||||
code_search_path,
|
||||
)
|
||||
else:
|
||||
java_worker_command = []
|
||||
@@ -1368,15 +1361,18 @@ def start_raylet(redis_address,
|
||||
|
||||
# Create the command that the Raylet will use to start workers.
|
||||
start_worker_command = [
|
||||
sys.executable, worker_path, f"--node-ip-address={node_ip_address}",
|
||||
sys.executable,
|
||||
worker_path,
|
||||
f"--node-ip-address={node_ip_address}",
|
||||
f"--node-manager-port={node_manager_port}",
|
||||
f"--object-store-name={plasma_store_name}",
|
||||
f"--raylet-name={raylet_name}", f"--redis-address={redis_address}",
|
||||
f"--config-list={config_str}", f"--temp-dir={temp_dir}",
|
||||
f"--metrics-agent-port={metrics_agent_port}"
|
||||
f"--raylet-name={raylet_name}",
|
||||
f"--redis-address={redis_address}",
|
||||
f"--config-list={config_str}",
|
||||
f"--temp-dir={temp_dir}",
|
||||
f"--metrics-agent-port={metrics_agent_port}",
|
||||
"RAY_WORKER_DYNAMIC_OPTION_PLACEHOLDER",
|
||||
]
|
||||
if code_search_path:
|
||||
start_worker_command.append(f"--code-search-path={code_search_path}")
|
||||
if redis_password:
|
||||
start_worker_command += [f"--redis-password={redis_password}"]
|
||||
|
||||
@@ -1391,12 +1387,6 @@ def start_raylet(redis_address,
|
||||
if max_worker_port is None:
|
||||
max_worker_port = 0
|
||||
|
||||
if code_search_path is not None and len(code_search_path) > 0:
|
||||
load_code_from_local = True
|
||||
|
||||
if load_code_from_local:
|
||||
start_worker_command += ["--load-code-from-local"]
|
||||
|
||||
# Create agent command
|
||||
agent_command = [
|
||||
sys.executable,
|
||||
@@ -1427,7 +1417,6 @@ def start_raylet(redis_address,
|
||||
f"--node_ip_address={node_ip_address}",
|
||||
f"--redis_address={gcs_ip_address}",
|
||||
f"--redis_port={gcs_port}",
|
||||
f"--num_initial_workers={num_initial_workers}",
|
||||
f"--maximum_startup_concurrency={maximum_startup_concurrency}",
|
||||
f"--static_resource_list={resource_argument}",
|
||||
f"--config_list={config_str}",
|
||||
@@ -1487,8 +1476,7 @@ def get_ray_jars_dir():
|
||||
|
||||
def build_java_worker_command(java_worker_options, redis_address,
|
||||
node_manager_port, plasma_store_name,
|
||||
raylet_name, redis_password, session_dir,
|
||||
code_search_path):
|
||||
raylet_name, redis_password, session_dir):
|
||||
"""This method assembles the command used to start a Java worker.
|
||||
|
||||
Args:
|
||||
@@ -1499,7 +1487,6 @@ def build_java_worker_command(java_worker_options, redis_address,
|
||||
raylet_name (str): The name of the raylet socket to create.
|
||||
redis_password (str): The password of connect to redis.
|
||||
session_dir (str): The path of this session.
|
||||
code_search_path (list): Teh job code search path.
|
||||
Returns:
|
||||
The command string for starting Java worker.
|
||||
"""
|
||||
@@ -1520,7 +1507,6 @@ def build_java_worker_command(java_worker_options, redis_address,
|
||||
pairs.append(("ray.home", RAY_HOME))
|
||||
pairs.append(("ray.logging.dir", os.path.join(session_dir, "logs")))
|
||||
pairs.append(("ray.session-dir", session_dir))
|
||||
pairs.append(("ray.job.code-search-path", code_search_path))
|
||||
command = ["java"] + ["-D{}={}".format(*pair) for pair in pairs]
|
||||
|
||||
command += ["RAY_WORKER_RAYLET_CONFIG_PLACEHOLDER"]
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
cluster_name: java
|
||||
# The minimum number of workers nodes to launch in addition to the head
|
||||
# node. This number should be >= 0.
|
||||
min_workers: 1
|
||||
min_workers: 1
|
||||
# The maximum number of workers nodes to launch in addition to the head
|
||||
# node. This takes precedence over min_workers.
|
||||
max_workers: 1
|
||||
@@ -72,10 +72,10 @@ worker_setup_commands: []
|
||||
# Command to start ray on the head node. You don't need to change this.
|
||||
head_start_ray_commands:
|
||||
- ray stop
|
||||
- ulimit -n 65536; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml --code-search-path=~/ray-word-count/target
|
||||
- ulimit -n 65536; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml
|
||||
# Command to start ray on worker nodes. You don't need to change this.
|
||||
worker_start_ray_commands:
|
||||
- ray stop
|
||||
- ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076 --code-search-path=ray-word-count/target
|
||||
- ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076
|
||||
|
||||
# To run the program, run `ray exec java.yaml "java -jar ray-word-count/target/ray-word-count-1.0-SNAPSHOT-jar-with-dependencies.jar"`
|
||||
# To run the program, run `ray exec java.yaml "java -jar ray-word-count/target/ray-word-count-1.0-SNAPSHOT-jar-with-dependencies.jar -Dray.job.code-search-path=ray-word-count/target"`
|
||||
|
||||
@@ -51,10 +51,6 @@ cdef extern from "ray/common/ray_config.h" nogil:
|
||||
|
||||
uint64_t object_manager_default_chunk_size() const
|
||||
|
||||
int num_workers_per_process_python() const
|
||||
|
||||
int num_workers_per_process_java() const
|
||||
|
||||
uint32_t maximum_gcs_deletion_batch_size() const
|
||||
|
||||
int64_t max_direct_call_object_size() const
|
||||
|
||||
@@ -88,14 +88,6 @@ cdef class Config:
|
||||
def object_manager_default_chunk_size():
|
||||
return RayConfig.instance().object_manager_default_chunk_size()
|
||||
|
||||
@staticmethod
|
||||
def num_workers_per_process_python():
|
||||
return RayConfig.instance().num_workers_per_process_python()
|
||||
|
||||
@staticmethod
|
||||
def num_workers_per_process_java():
|
||||
return RayConfig.instance().num_workers_per_process_java()
|
||||
|
||||
@staticmethod
|
||||
def maximum_gcs_deletion_batch_size():
|
||||
return RayConfig.instance().maximum_gcs_deletion_batch_size()
|
||||
|
||||
+1
-7
@@ -339,10 +339,6 @@ class Node:
|
||||
"""Get the cluster Redis password"""
|
||||
return self._ray_params.redis_password
|
||||
|
||||
@property
|
||||
def load_code_from_local(self):
|
||||
return self._ray_params.load_code_from_local
|
||||
|
||||
@property
|
||||
def object_ref_seed(self):
|
||||
"""Get the seed for deterministic generation of object refs"""
|
||||
@@ -723,14 +719,12 @@ class Node:
|
||||
stderr_file=stderr_file,
|
||||
config=self._config,
|
||||
java_worker_options=self._ray_params.java_worker_options,
|
||||
load_code_from_local=self._ray_params.load_code_from_local,
|
||||
huge_pages=self._ray_params.huge_pages,
|
||||
fate_share=self.kernel_fate_share,
|
||||
socket_to_use=self.socket,
|
||||
head_node=self.head,
|
||||
start_initial_python_workers_for_first_job=self._ray_params.
|
||||
start_initial_python_workers_for_first_job,
|
||||
code_search_path=self._ray_params.code_search_path)
|
||||
start_initial_python_workers_for_first_job)
|
||||
assert ray_constants.PROCESS_TYPE_RAYLET not in self.all_processes
|
||||
self.all_processes[ray_constants.PROCESS_TYPE_RAYLET] = [process_info]
|
||||
|
||||
|
||||
@@ -89,7 +89,6 @@ class RayParams:
|
||||
contents to Redis.
|
||||
autoscaling_config: path to autoscaling config file.
|
||||
java_worker_options (list): The command options for Java worker.
|
||||
load_code_from_local: Whether load code from local file or from GCS.
|
||||
metrics_agent_port(int): The port to bind metrics agent.
|
||||
metrics_export_port(int): The port at which metrics are exposed
|
||||
through a Prometheus endpoint.
|
||||
@@ -142,14 +141,12 @@ class RayParams:
|
||||
include_log_monitor=None,
|
||||
autoscaling_config=None,
|
||||
java_worker_options=None,
|
||||
load_code_from_local=False,
|
||||
start_initial_python_workers_for_first_job=False,
|
||||
_system_config=None,
|
||||
enable_object_reconstruction=False,
|
||||
metrics_agent_port=None,
|
||||
metrics_export_port=None,
|
||||
lru_evict=False,
|
||||
code_search_path=None):
|
||||
lru_evict=False):
|
||||
self.object_ref_seed = object_ref_seed
|
||||
self.redis_address = redis_address
|
||||
self.num_cpus = num_cpus
|
||||
@@ -186,7 +183,6 @@ class RayParams:
|
||||
self.include_log_monitor = include_log_monitor
|
||||
self.autoscaling_config = autoscaling_config
|
||||
self.java_worker_options = java_worker_options
|
||||
self.load_code_from_local = load_code_from_local
|
||||
self.metrics_agent_port = metrics_agent_port
|
||||
self.metrics_export_port = metrics_export_port
|
||||
self.start_initial_python_workers_for_first_job = (
|
||||
@@ -195,9 +191,6 @@ class RayParams:
|
||||
self._lru_evict = lru_evict
|
||||
self._enable_object_reconstruction = enable_object_reconstruction
|
||||
self._check_usage()
|
||||
self.code_search_path = code_search_path
|
||||
if code_search_path is None:
|
||||
self.code_search_path = []
|
||||
|
||||
# Set the internal config options for LRU eviction.
|
||||
if lru_evict:
|
||||
|
||||
@@ -389,25 +389,12 @@ def debug(address):
|
||||
default=None,
|
||||
type=str,
|
||||
help="Overwrite the options to start Java workers.")
|
||||
@click.option(
|
||||
"--code-search-path",
|
||||
default=None,
|
||||
hidden=True,
|
||||
type=str,
|
||||
help="A list of directories or jar files separated by colon that specify "
|
||||
"the search path for user code. This will be used as `CLASSPATH` in "
|
||||
"Java and `PYTHONPATH` in Python.")
|
||||
@click.option(
|
||||
"--system-config",
|
||||
default=None,
|
||||
hidden=True,
|
||||
type=json.loads,
|
||||
help="Override system configuration defaults.")
|
||||
@click.option(
|
||||
"--load-code-from-local",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Specify whether load code from local file or GCS serialization.")
|
||||
@click.option(
|
||||
"--lru-evict",
|
||||
is_flag=True,
|
||||
@@ -436,8 +423,7 @@ def start(node_ip_address, address, port, redis_password, redis_shard_ports,
|
||||
head, include_dashboard, dashboard_host, dashboard_port, block,
|
||||
plasma_directory, autoscaling_config, no_redirect_worker_output,
|
||||
no_redirect_output, plasma_store_socket_name, raylet_socket_name,
|
||||
temp_dir, java_worker_options, load_code_from_local,
|
||||
code_search_path, system_config, lru_evict,
|
||||
temp_dir, java_worker_options, system_config, lru_evict,
|
||||
enable_object_reconstruction, metrics_export_port, log_style,
|
||||
log_color, verbose):
|
||||
"""Start Ray processes manually on the local machine."""
|
||||
@@ -496,8 +482,6 @@ def start(node_ip_address, address, port, redis_password, redis_shard_ports,
|
||||
dashboard_host=dashboard_host,
|
||||
dashboard_port=dashboard_port,
|
||||
java_worker_options=java_worker_options,
|
||||
load_code_from_local=load_code_from_local,
|
||||
code_search_path=code_search_path,
|
||||
_system_config=system_config,
|
||||
lru_evict=lru_evict,
|
||||
enable_object_reconstruction=enable_object_reconstruction,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import pytest
|
||||
import sys
|
||||
|
||||
import ray
|
||||
import ray.cluster_utils
|
||||
@@ -6,7 +7,7 @@ import ray.test_utils
|
||||
|
||||
|
||||
def test_cross_language_raise_kwargs(shutdown_only):
|
||||
ray.init(_load_code_from_local=True)
|
||||
ray.init(job_config=ray.job_config.JobConfig(code_search_path=sys.path))
|
||||
|
||||
with pytest.raises(Exception, match="kwargs"):
|
||||
ray.java_function("a", "b").remote(x="arg1")
|
||||
@@ -16,7 +17,7 @@ def test_cross_language_raise_kwargs(shutdown_only):
|
||||
|
||||
|
||||
def test_cross_language_raise_exception(shutdown_only):
|
||||
ray.init(_load_code_from_local=True)
|
||||
ray.init(job_config=ray.job_config.JobConfig(code_search_path=sys.path))
|
||||
|
||||
class PythonObject(object):
|
||||
pass
|
||||
|
||||
@@ -109,6 +109,7 @@ class Worker:
|
||||
# by the worker should drop into the debugger at the specified
|
||||
# breakpoint ID.
|
||||
self.debugger_get_breakpoint = b""
|
||||
self._load_code_from_local = False
|
||||
|
||||
@property
|
||||
def connected(self):
|
||||
@@ -122,7 +123,7 @@ class Worker:
|
||||
@property
|
||||
def load_code_from_local(self):
|
||||
self.check_connected()
|
||||
return self.node.load_code_from_local
|
||||
return self._load_code_from_local
|
||||
|
||||
@property
|
||||
def current_job_id(self):
|
||||
@@ -222,6 +223,9 @@ class Worker:
|
||||
"""
|
||||
self.mode = mode
|
||||
|
||||
def set_load_code_from_local(self, load_code_from_local):
|
||||
self._load_code_from_local = load_code_from_local
|
||||
|
||||
def put_object(self, value, object_ref=None, pin_object=True):
|
||||
"""Put value in the local object store with object reference `object_ref`.
|
||||
|
||||
@@ -489,9 +493,7 @@ def init(
|
||||
_memory=None,
|
||||
_redis_password=ray_constants.REDIS_DEFAULT_PASSWORD,
|
||||
_java_worker_options=None,
|
||||
_code_search_path=None,
|
||||
_temp_dir=None,
|
||||
_load_code_from_local=False,
|
||||
_lru_evict=False,
|
||||
_metrics_export_port=None,
|
||||
_system_config=None):
|
||||
@@ -579,10 +581,7 @@ def init(
|
||||
_temp_dir (str): If provided, specifies the root temporary
|
||||
directory for the Ray process. Defaults to an OS-specific
|
||||
conventional location, e.g., "/tmp/ray".
|
||||
_load_code_from_local: Whether code should be loaded from a local
|
||||
module or from the GCS.
|
||||
_java_worker_options: Overwrite the options to start Java workers.
|
||||
_code_search_path (list): Java classpath or python import path.
|
||||
_lru_evict (bool): If True, when an object store is full, it will evict
|
||||
objects in LRU order to make more space and when under memory
|
||||
pressure, ray.ObjectLostError may be thrown. If False, then
|
||||
@@ -701,9 +700,7 @@ def init(
|
||||
redis_max_memory=_redis_max_memory,
|
||||
plasma_store_socket_name=None,
|
||||
temp_dir=_temp_dir,
|
||||
load_code_from_local=_load_code_from_local,
|
||||
java_worker_options=_java_worker_options,
|
||||
code_search_path=_code_search_path,
|
||||
start_initial_python_workers_for_first_job=True,
|
||||
_system_config=_system_config,
|
||||
lru_evict=_lru_evict,
|
||||
@@ -749,7 +746,6 @@ def init(
|
||||
redis_password=_redis_password,
|
||||
object_ref_seed=None,
|
||||
temp_dir=_temp_dir,
|
||||
load_code_from_local=_load_code_from_local,
|
||||
_system_config=_system_config,
|
||||
lru_evict=_lru_evict,
|
||||
enable_object_reconstruction=_enable_object_reconstruction,
|
||||
|
||||
@@ -145,11 +145,14 @@ if __name__ == "__main__":
|
||||
raylet_ip_address = args.node_ip_address
|
||||
|
||||
code_search_path = args.code_search_path
|
||||
load_code_from_local = False
|
||||
if code_search_path is not None:
|
||||
load_code_from_local = True
|
||||
for p in code_search_path.split(":"):
|
||||
if os.path.isfile(p):
|
||||
p = os.path.dirname(p)
|
||||
sys.path.append(p)
|
||||
ray.worker.global_worker.set_load_code_from_local(load_code_from_local)
|
||||
|
||||
ray_params = RayParams(
|
||||
node_ip_address=args.node_ip_address,
|
||||
@@ -160,7 +163,6 @@ if __name__ == "__main__":
|
||||
plasma_store_socket_name=args.object_store_name,
|
||||
raylet_socket_name=args.raylet_name,
|
||||
temp_dir=args.temp_dir,
|
||||
load_code_from_local=args.load_code_from_local,
|
||||
metrics_agent_port=args.metrics_agent_port,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user