mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 03:34:48 +08:00
+4
-84
@@ -39,18 +39,6 @@ REDIS_MODULE = os.path.join(
|
||||
os.path.abspath(os.path.dirname(__file__)),
|
||||
"core/src/ray/gcs/redis_module/libray_redis_module.so")
|
||||
|
||||
# Location of the credis server and modules.
|
||||
# credis will be enabled if the environment variable RAY_USE_NEW_GCS is set.
|
||||
CREDIS_EXECUTABLE = os.path.join(
|
||||
os.path.abspath(os.path.dirname(__file__)),
|
||||
"core/src/credis/redis/src/redis-server" + EXE_SUFFIX)
|
||||
CREDIS_MASTER_MODULE = os.path.join(
|
||||
os.path.abspath(os.path.dirname(__file__)),
|
||||
"core/src/credis/build/src/libmaster.so")
|
||||
CREDIS_MEMBER_MODULE = os.path.join(
|
||||
os.path.abspath(os.path.dirname(__file__)),
|
||||
"core/src/credis/build/src/libmember.so")
|
||||
|
||||
# Location of the plasma object store executable.
|
||||
PLASMA_STORE_EXECUTABLE = os.path.join(
|
||||
os.path.abspath(os.path.dirname(__file__)),
|
||||
@@ -663,7 +651,6 @@ def start_redis(node_ip_address,
|
||||
redis_max_clients=None,
|
||||
redirect_worker_output=False,
|
||||
password=None,
|
||||
use_credis=None,
|
||||
fate_share=None):
|
||||
"""Start the Redis global state store.
|
||||
|
||||
@@ -686,9 +673,6 @@ def start_redis(node_ip_address,
|
||||
to this value when they start up.
|
||||
password (str): Prevents external clients without the password
|
||||
from connecting to Redis if provided.
|
||||
use_credis: If True, additionally load the chain-replicated libraries
|
||||
into the redis servers. Defaults to None, which means its value is
|
||||
set by the presence of "RAY_USE_NEW_GCS" in os.environ.
|
||||
|
||||
Returns:
|
||||
A tuple of the address for the primary Redis shard, a list of
|
||||
@@ -708,31 +692,8 @@ def start_redis(node_ip_address,
|
||||
|
||||
processes = []
|
||||
|
||||
if use_credis is None:
|
||||
use_credis = ("RAY_USE_NEW_GCS" in os.environ)
|
||||
if use_credis:
|
||||
if password is not None:
|
||||
# TODO(pschafhalter) remove this once credis supports
|
||||
# authenticating Redis ports
|
||||
raise ValueError("Setting the `redis_password` argument is not "
|
||||
"supported in credis. To run Ray with "
|
||||
"password-protected Redis ports, ensure that "
|
||||
"the environment variable `RAY_USE_NEW_GCS=off`.")
|
||||
assert num_redis_shards == 1, (
|
||||
"For now, RAY_USE_NEW_GCS supports 1 shard, and credis "
|
||||
"supports 1-node chain for that shard only.")
|
||||
|
||||
if use_credis:
|
||||
redis_executable = CREDIS_EXECUTABLE
|
||||
# TODO(suquark): We need credis here because some symbols need to be
|
||||
# imported from credis dynamically through dlopen when Ray is built
|
||||
# with RAY_USE_NEW_GCS=on. We should remove them later for the primary
|
||||
# shard.
|
||||
# See src/ray/gcs/redis_module/ray_redis_module.cc
|
||||
redis_modules = [CREDIS_MASTER_MODULE, REDIS_MODULE]
|
||||
else:
|
||||
redis_executable = REDIS_EXECUTABLE
|
||||
redis_modules = [REDIS_MODULE]
|
||||
redis_executable = REDIS_EXECUTABLE
|
||||
redis_modules = [REDIS_MODULE]
|
||||
|
||||
redis_stdout_file, redis_stderr_file = redirect_files[0]
|
||||
# Start the primary Redis shard.
|
||||
@@ -777,15 +738,8 @@ def start_redis(node_ip_address,
|
||||
redis_shards = []
|
||||
for i in range(num_redis_shards):
|
||||
redis_stdout_file, redis_stderr_file = redirect_files[i + 1]
|
||||
if use_credis:
|
||||
redis_executable = CREDIS_EXECUTABLE
|
||||
# It is important to load the credis module BEFORE the ray module,
|
||||
# as the latter contains an extern declaration that the former
|
||||
# supplies.
|
||||
redis_modules = [CREDIS_MEMBER_MODULE, REDIS_MODULE]
|
||||
else:
|
||||
redis_executable = REDIS_EXECUTABLE
|
||||
redis_modules = [REDIS_MODULE]
|
||||
redis_executable = REDIS_EXECUTABLE
|
||||
redis_modules = [REDIS_MODULE]
|
||||
|
||||
redis_shard_port, p = _start_redis_instance(
|
||||
redis_executable,
|
||||
@@ -804,40 +758,6 @@ def start_redis(node_ip_address,
|
||||
# Store redis shard information in the primary redis shard.
|
||||
primary_redis_client.rpush("RedisShards", shard_address)
|
||||
|
||||
if use_credis:
|
||||
# Configure the chain state. The way it is intended to work is
|
||||
# the following:
|
||||
#
|
||||
# PRIMARY_SHARD
|
||||
#
|
||||
# SHARD_1 (master replica) -> SHARD_1 (member replica)
|
||||
# -> SHARD_1 (member replica)
|
||||
#
|
||||
# SHARD_2 (master replica) -> SHARD_2 (member replica)
|
||||
# -> SHARD_2 (member replica)
|
||||
# ...
|
||||
#
|
||||
#
|
||||
# If we have credis members in future, their modules should be:
|
||||
# [CREDIS_MEMBER_MODULE, REDIS_MODULE], and they will be initialized by
|
||||
# execute_command("MEMBER.CONNECT_TO_MASTER", node_ip_address, port)
|
||||
#
|
||||
# Currently we have num_redis_shards == 1, so only one chain will be
|
||||
# created, and the chain only contains master.
|
||||
|
||||
# TODO(suquark): Currently, this is not correct because we are
|
||||
# using the master replica as the primary shard. This should be
|
||||
# fixed later. I had tried to fix it but failed because of heartbeat
|
||||
# issues.
|
||||
primary_client = redis.StrictRedis(
|
||||
host=node_ip_address, port=port, password=password)
|
||||
shard_client = redis.StrictRedis(
|
||||
host=node_ip_address, port=redis_shard_port, password=password)
|
||||
primary_client.execute_command("MASTER.ADD", node_ip_address,
|
||||
redis_shard_port)
|
||||
shard_client.execute_command("MEMBER.CONNECT_TO_MASTER",
|
||||
node_ip_address, port)
|
||||
|
||||
return redis_address, redis_shards, processes
|
||||
|
||||
|
||||
|
||||
@@ -189,9 +189,6 @@ def test_exception_raised_when_actor_node_dies(ray_start_cluster_head):
|
||||
ray.get(x_id)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("RAY_USE_NEW_GCS") == "on",
|
||||
reason="Hanging with new GCS API.")
|
||||
def test_actor_init_fails(ray_start_cluster_head):
|
||||
cluster = ray_start_cluster_head
|
||||
remote_node = cluster.add_node()
|
||||
@@ -347,9 +344,6 @@ def test_distributed_handle(ray_start_cluster_2_nodes):
|
||||
|
||||
|
||||
@pytest.mark.skip("This test does not work yet.")
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("RAY_USE_NEW_GCS") == "on",
|
||||
reason="Hanging with new GCS API.")
|
||||
def test_remote_checkpoint_distributed_handle(ray_start_cluster_2_nodes):
|
||||
cluster = ray_start_cluster_2_nodes
|
||||
counter, ids = setup_counter_actor(test_checkpoint=True)
|
||||
|
||||
@@ -79,9 +79,6 @@ def test_actor_class_methods(ray_start_regular):
|
||||
assert ray.get(a.g.remote(2)) == 4
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("RAY_USE_NEW_GCS") == "on",
|
||||
reason="Failing with new GCS API on Linux.")
|
||||
@pytest.mark.skipif(sys.platform == "win32", reason="Failing on Windows.")
|
||||
def test_actor_gpus(ray_start_cluster):
|
||||
cluster = ray_start_cluster
|
||||
|
||||
@@ -257,9 +257,6 @@ def test_not_logging_to_driver(shutdown_only):
|
||||
assert len(err_lines) == 0
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("RAY_USE_NEW_GCS") == "on",
|
||||
reason="New GCS API doesn't have a Python API yet.")
|
||||
def test_workers(shutdown_only):
|
||||
num_workers = 3
|
||||
ray.init(num_cpus=num_workers)
|
||||
|
||||
@@ -14,9 +14,6 @@ SIGKILL = signal.SIGKILL if sys.platform != "win32" else signal.SIGTERM
|
||||
|
||||
# This test checks that when a worker dies in the middle of a get, the plasma
|
||||
# store and raylet will not die.
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("RAY_USE_NEW_GCS") == "on",
|
||||
reason="Not working with new GCS API.")
|
||||
def test_dying_worker_get(ray_start_2_cpus):
|
||||
@ray.remote
|
||||
def sleep_forever(signal):
|
||||
@@ -65,9 +62,6 @@ def test_dying_worker_get(ray_start_2_cpus):
|
||||
|
||||
# This test checks that when a driver dies in the middle of a get, the plasma
|
||||
# store and raylet will not die.
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("RAY_USE_NEW_GCS") == "on",
|
||||
reason="Not working with new GCS API.")
|
||||
def test_dying_driver_get(ray_start_regular):
|
||||
# Start the Ray processes.
|
||||
address_info = ray_start_regular
|
||||
@@ -109,9 +103,6 @@ ray.get(ray.ObjectRef(ray.utils.hex_to_binary("{}")))
|
||||
|
||||
# This test checks that when a worker dies in the middle of a wait, the plasma
|
||||
# store and raylet will not die.
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("RAY_USE_NEW_GCS") == "on",
|
||||
reason="Not working with new GCS API.")
|
||||
def test_dying_worker_wait(ray_start_2_cpus):
|
||||
@ray.remote
|
||||
def sleep_forever():
|
||||
@@ -150,9 +141,6 @@ def test_dying_worker_wait(ray_start_2_cpus):
|
||||
|
||||
# This test checks that when a driver dies in the middle of a wait, the plasma
|
||||
# store and raylet will not die.
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("RAY_USE_NEW_GCS") == "on",
|
||||
reason="Not working with new GCS API.")
|
||||
def test_dying_driver_wait(ray_start_regular):
|
||||
# Start the Ray processes.
|
||||
address_info = ray_start_regular
|
||||
@@ -193,5 +181,4 @@ ray.wait([ray.ObjectRef(ray.utils.hex_to_binary("{}"))])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import pytest
|
||||
sys.exit(pytest.main(["-v", __file__]))
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
@@ -75,9 +74,6 @@ def test_actor_creation_node_failure(ray_start_cluster):
|
||||
cluster.remove_node(get_other_nodes(cluster, True)[-1])
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("RAY_USE_NEW_GCS") == "on",
|
||||
reason="Hanging with new GCS API.")
|
||||
def test_driver_lives_sequential(ray_start_regular):
|
||||
ray.worker._global_node.kill_raylet()
|
||||
ray.worker._global_node.kill_plasma_store()
|
||||
@@ -88,9 +84,6 @@ def test_driver_lives_sequential(ray_start_regular):
|
||||
# If the driver can reach the tearDown method, then it is still alive.
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("RAY_USE_NEW_GCS") == "on",
|
||||
reason="Hanging with new GCS API.")
|
||||
def test_driver_lives_parallel(ray_start_regular):
|
||||
all_processes = ray.worker._global_node.all_processes
|
||||
|
||||
|
||||
@@ -419,19 +419,18 @@ def test_calling_start_ray_head(call_ray_stop_only):
|
||||
check_call_ray(["start", "--head", "--redis-max-clients", "100"])
|
||||
check_call_ray(["stop"])
|
||||
|
||||
if "RAY_USE_NEW_GCS" not in os.environ:
|
||||
# Test starting Ray with redis shard ports specified.
|
||||
check_call_ray(
|
||||
["start", "--head", "--redis-shard-ports", "6380,6381,6382"])
|
||||
check_call_ray(["stop"])
|
||||
# Test starting Ray with redis shard ports specified.
|
||||
check_call_ray(
|
||||
["start", "--head", "--redis-shard-ports", "6380,6381,6382"])
|
||||
check_call_ray(["stop"])
|
||||
|
||||
# Test starting Ray with all arguments specified.
|
||||
check_call_ray([
|
||||
"start", "--head", "--redis-shard-ports", "6380,6381,6382",
|
||||
"--object-manager-port", "12345", "--num-cpus", "2", "--num-gpus",
|
||||
"0", "--redis-max-clients", "100", "--resources", "{\"Custom\": 1}"
|
||||
])
|
||||
check_call_ray(["stop"])
|
||||
# Test starting Ray with all arguments specified.
|
||||
check_call_ray([
|
||||
"start", "--head", "--redis-shard-ports", "6380,6381,6382",
|
||||
"--object-manager-port", "12345", "--num-cpus", "2", "--num-gpus", "0",
|
||||
"--redis-max-clients", "100", "--resources", "{\"Custom\": 1}"
|
||||
])
|
||||
check_call_ray(["stop"])
|
||||
|
||||
# Test starting Ray with invalid arguments.
|
||||
with pytest.raises(subprocess.CalledProcessError):
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
@@ -124,9 +123,6 @@ def test_actor_creation_node_failure(ray_start_cluster):
|
||||
cluster.remove_node(get_other_nodes(cluster, True)[-1])
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("RAY_USE_NEW_GCS") == "on",
|
||||
reason="Hanging with new GCS API.")
|
||||
def test_driver_lives_sequential(ray_start_regular):
|
||||
ray.worker._global_node.kill_raylet()
|
||||
ray.worker._global_node.kill_plasma_store()
|
||||
@@ -137,9 +133,6 @@ def test_driver_lives_sequential(ray_start_regular):
|
||||
# If the driver can reach the tearDown method, then it is still alive.
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("RAY_USE_NEW_GCS") == "on",
|
||||
reason="Hanging with new GCS API.")
|
||||
def test_driver_lives_parallel(ray_start_regular):
|
||||
all_processes = ray.worker._global_node.all_processes
|
||||
|
||||
|
||||
@@ -15,9 +15,6 @@ def password():
|
||||
|
||||
|
||||
class TestRedisPassword:
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("RAY_USE_NEW_GCS") == "on",
|
||||
reason="New GCS API doesn't support Redis authentication yet.")
|
||||
def test_redis_password(self, password, shutdown_only):
|
||||
@ray.remote
|
||||
def f():
|
||||
@@ -42,9 +39,6 @@ class TestRedisPassword:
|
||||
host=redis_ip, port=redis_port, password=password)
|
||||
assert redis_client.ping()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("RAY_USE_NEW_GCS") == "on",
|
||||
reason="New GCS API doesn't support Redis authentication yet.")
|
||||
def test_redis_password_cluster(self, password, shutdown_only):
|
||||
@ray.remote
|
||||
def f():
|
||||
|
||||
@@ -98,13 +98,6 @@ optional_ray_files += ray_autoscaler_files
|
||||
optional_ray_files += ray_project_files
|
||||
optional_ray_files += ray_dashboard_files
|
||||
|
||||
if os.getenv("RAY_USE_NEW_GCS") == "on":
|
||||
ray_files += [
|
||||
"ray/core/src/credis/build/src/libmember.so",
|
||||
"ray/core/src/credis/build/src/libmaster.so",
|
||||
"ray/core/src/credis/redis/src/redis-server" + exe_suffix,
|
||||
]
|
||||
|
||||
# If you're adding dependencies for ray extras, please
|
||||
# also update the matching section of requirements.txt
|
||||
# in this directory
|
||||
|
||||
Reference in New Issue
Block a user