mirror of
https://github.com/wassname/ray.git
synced 2026-06-29 22:34:24 +08:00
Enable even more new scheduler tests (#12096)
This commit is contained in:
@@ -691,6 +691,7 @@ def stop(force, verbose, log_style, log_color):
|
||||
["log_monitor.py", False],
|
||||
["reporter.py", False],
|
||||
["dashboard.py", False],
|
||||
["new_dashboard/agent.py", False],
|
||||
["ray_process_reaper.py", False],
|
||||
]
|
||||
|
||||
|
||||
+18
-27
@@ -12,8 +12,10 @@ py_test_module_list(
|
||||
files = [
|
||||
"test_async.py",
|
||||
"test_actor.py",
|
||||
"test_actor_failures.py",
|
||||
"test_actor_advanced.py",
|
||||
"test_advanced_3.py",
|
||||
"test_actor_failures.py",
|
||||
"test_actor_resources.py",
|
||||
"test_advanced.py",
|
||||
"test_advanced_2.py",
|
||||
"test_array.py",
|
||||
@@ -22,10 +24,14 @@ py_test_module_list(
|
||||
"test_basic_2.py",
|
||||
"test_cancel.py",
|
||||
"test_cli.py",
|
||||
"test_component_failures_2.py",
|
||||
"test_component_failures_3.py",
|
||||
"test_error_ray_not_initialized.py",
|
||||
"test_gcs_fault_tolerance.py",
|
||||
"test_iter.py",
|
||||
"test_joblib.py",
|
||||
"test_global_state.py",
|
||||
"test_global_gc.py",
|
||||
"test_mldataset.py",
|
||||
],
|
||||
size = "medium",
|
||||
@@ -36,13 +42,7 @@ py_test_module_list(
|
||||
|
||||
py_test_module_list(
|
||||
files = [
|
||||
"test_actor_resources.py",
|
||||
"test_advanced_3.py",
|
||||
"test_component_failures_2.py",
|
||||
"test_dynres.py",
|
||||
"test_global_gc.py",
|
||||
"test_global_state.py",
|
||||
"test_joblib.py",
|
||||
"test_dynres.py", # dyn res not implemented
|
||||
],
|
||||
size = "medium",
|
||||
extra_srcs = SRCS,
|
||||
@@ -55,14 +55,22 @@ py_test_module_list(
|
||||
"test_memory_limits.py",
|
||||
"test_memory_scheduling.py",
|
||||
"test_metrics.py",
|
||||
"test_multi_node.py",
|
||||
"test_multi_node_2.py",
|
||||
"test_multi_tenancy.py",
|
||||
"test_multinode_failures.py",
|
||||
"test_multinode_failures_2.py",
|
||||
"test_multiprocessing.py",
|
||||
"test_object_manager.py",
|
||||
"test_object_spilling.py",
|
||||
"test_output.py",
|
||||
"test_reconstruction.py",
|
||||
"test_reference_counting.py",
|
||||
"test_reference_counting_2.py",
|
||||
"test_resource_demand_scheduler.py",
|
||||
"test_serialization.py",
|
||||
"test_stress.py",
|
||||
"test_stress_sharded.py",
|
||||
"test_tensorflow.py",
|
||||
"test_unreconstructable_errors.py",
|
||||
],
|
||||
@@ -72,23 +80,6 @@ py_test_module_list(
|
||||
deps = ["//:ray_lib"],
|
||||
)
|
||||
|
||||
py_test_module_list(
|
||||
files = [
|
||||
"test_multinode_failures.py",
|
||||
"test_multi_node.py",
|
||||
"test_object_manager.py",
|
||||
"test_reconstruction.py",
|
||||
"test_reference_counting.py",
|
||||
"test_stress.py",
|
||||
"test_stress_sharded.py",
|
||||
"test_multi_tenancy.py",
|
||||
],
|
||||
size = "medium",
|
||||
extra_srcs = SRCS,
|
||||
tags = ["exclusive", "medium_size_python_tests_k_to_z", "new_scheduler_broken"],
|
||||
deps = ["//:ray_lib"],
|
||||
)
|
||||
|
||||
py_test_module_list(
|
||||
files = [
|
||||
"test_actor_pool.py",
|
||||
@@ -125,6 +116,7 @@ py_test_module_list(
|
||||
|
||||
py_test_module_list(
|
||||
files = [
|
||||
"test_failure.py",
|
||||
"test_stress_failure.py",
|
||||
],
|
||||
size = "large",
|
||||
@@ -135,8 +127,7 @@ py_test_module_list(
|
||||
|
||||
py_test_module_list(
|
||||
files = [
|
||||
"test_failure.py",
|
||||
"test_placement_group.py",
|
||||
"test_placement_group.py", # placement groups not implemented
|
||||
],
|
||||
size = "large",
|
||||
extra_srcs = SRCS,
|
||||
|
||||
@@ -21,7 +21,8 @@ import setproctitle
|
||||
import subprocess
|
||||
|
||||
from ray.test_utils import (check_call_ray, RayTestTimeoutException,
|
||||
wait_for_condition, wait_for_num_actors)
|
||||
wait_for_condition, wait_for_num_actors,
|
||||
new_scheduler_enabled)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -93,6 +94,7 @@ def test_local_scheduling_first(ray_start_cluster):
|
||||
assert local()
|
||||
|
||||
|
||||
@pytest.mark.skipif(new_scheduler_enabled(), reason="flakes more often")
|
||||
def test_load_balancing_with_dependencies(ray_start_cluster):
|
||||
# This test ensures that tasks are being assigned to all raylets in a
|
||||
# roughly equal manner even when the tasks have dependencies.
|
||||
|
||||
@@ -22,6 +22,7 @@ from ray.test_utils import (
|
||||
init_error_pubsub,
|
||||
get_error_message,
|
||||
Semaphore,
|
||||
new_scheduler_enabled,
|
||||
)
|
||||
|
||||
|
||||
@@ -662,6 +663,7 @@ def test_warning_for_resource_deadlock(error_pubsub, shutdown_only):
|
||||
assert errors[0].type == ray_constants.RESOURCE_DEADLOCK_ERROR
|
||||
|
||||
|
||||
@pytest.mark.skipif(new_scheduler_enabled(), reason="broken")
|
||||
def test_warning_for_infeasible_tasks(ray_start_regular, error_pubsub):
|
||||
p = error_pubsub
|
||||
# Check that we get warning messages for infeasible tasks.
|
||||
@@ -687,6 +689,7 @@ def test_warning_for_infeasible_tasks(ray_start_regular, error_pubsub):
|
||||
assert errors[0].type == ray_constants.INFEASIBLE_TASK_ERROR
|
||||
|
||||
|
||||
@pytest.mark.skipif(new_scheduler_enabled(), reason="broken")
|
||||
def test_warning_for_infeasible_zero_cpu_actor(shutdown_only):
|
||||
# Check that we cannot place an actor on a 0 CPU machine and that we get an
|
||||
# infeasibility warning (even though the actor creation task itself
|
||||
@@ -953,6 +956,7 @@ def test_raylet_crash_when_get(ray_start_regular):
|
||||
thread.join()
|
||||
|
||||
|
||||
@pytest.mark.skipif(new_scheduler_enabled(), reason="broken")
|
||||
def test_connect_with_disconnected_node(shutdown_only):
|
||||
config = {
|
||||
"num_heartbeats_timeout": 50,
|
||||
|
||||
@@ -9,7 +9,7 @@ import pytest
|
||||
|
||||
import ray
|
||||
import ray.cluster_utils
|
||||
from ray.test_utils import wait_for_condition
|
||||
from ray.test_utils import wait_for_condition, new_scheduler_enabled
|
||||
from ray.internal.internal_api import global_gc
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -129,6 +129,7 @@ def test_global_gc_when_full(shutdown_only):
|
||||
gc.enable()
|
||||
|
||||
|
||||
@pytest.mark.skipif(new_scheduler_enabled(), reason="hangs")
|
||||
def test_global_gc_actors(shutdown_only):
|
||||
ray.init(num_cpus=1)
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ import time
|
||||
import ray
|
||||
import ray.ray_constants
|
||||
import ray.test_utils
|
||||
from ray.test_utils import new_scheduler_enabled
|
||||
|
||||
from ray._raylet import GlobalStateAccessor
|
||||
|
||||
@@ -143,6 +144,7 @@ def test_global_state_actor_entry(ray_start_regular):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("max_shapes", [0, 2, -1])
|
||||
@pytest.mark.skipif(new_scheduler_enabled(), reason="broken")
|
||||
def test_load_report(shutdown_only, max_shapes):
|
||||
resource1 = "A"
|
||||
resource2 = "B"
|
||||
@@ -213,6 +215,7 @@ def test_load_report(shutdown_only, max_shapes):
|
||||
global_state_accessor.disconnect()
|
||||
|
||||
|
||||
@pytest.mark.skipif(new_scheduler_enabled(), reason="broken")
|
||||
def test_placement_group_load_report(ray_start_cluster):
|
||||
cluster = ray_start_cluster
|
||||
# Add a head node that doesn't have gpu resource.
|
||||
@@ -281,6 +284,7 @@ def test_placement_group_load_report(ray_start_cluster):
|
||||
global_state_accessor.disconnect()
|
||||
|
||||
|
||||
@pytest.mark.skipif(new_scheduler_enabled(), reason="broken")
|
||||
def test_backlog_report(shutdown_only):
|
||||
cluster = ray.init(
|
||||
num_cpus=1, _system_config={
|
||||
|
||||
@@ -9,7 +9,7 @@ from ray.test_utils import (
|
||||
RayTestTimeoutException, check_call_ray, run_string_as_driver,
|
||||
run_string_as_driver_nonblocking, wait_for_children_of_pid,
|
||||
wait_for_children_of_pid_to_exit, wait_for_condition, kill_process_by_name,
|
||||
Semaphore, init_error_pubsub, get_error_message)
|
||||
Semaphore, init_error_pubsub, get_error_message, new_scheduler_enabled)
|
||||
|
||||
|
||||
def test_remote_raylet_cleanup(ray_start_cluster):
|
||||
@@ -139,6 +139,7 @@ print("success")
|
||||
assert "success" in out
|
||||
|
||||
|
||||
@pytest.mark.skipif(new_scheduler_enabled(), reason="hangs")
|
||||
def test_driver_exiting_quickly(call_ray_start):
|
||||
# This test will create some drivers that submit some tasks and then
|
||||
# exit without waiting for the tasks to complete.
|
||||
@@ -304,6 +305,7 @@ ray.get([a.log.remote(), f.remote()])
|
||||
"--min-worker-port=0 --max-worker-port=0 --port 0"
|
||||
],
|
||||
indirect=True)
|
||||
@pytest.mark.skipif(new_scheduler_enabled(), reason="hangs")
|
||||
def test_drivers_release_resources(call_ray_start):
|
||||
address = call_ray_start
|
||||
|
||||
|
||||
@@ -6,7 +6,8 @@ import ray
|
||||
import ray.ray_constants as ray_constants
|
||||
from ray.monitor import Monitor
|
||||
from ray.cluster_utils import Cluster
|
||||
from ray.test_utils import generate_system_config_map, SignalActor
|
||||
from ray.test_utils import generate_system_config_map, SignalActor, \
|
||||
new_scheduler_enabled
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -117,6 +118,7 @@ def verify_load_metrics(monitor, expected_resource_usage=None, timeout=30):
|
||||
"num_cpus": 2,
|
||||
}],
|
||||
indirect=True)
|
||||
@pytest.mark.skipif(new_scheduler_enabled(), reason="fails")
|
||||
def test_heartbeats_single(ray_start_cluster_head):
|
||||
"""Unit test for `Cluster.wait_for_nodes`.
|
||||
|
||||
|
||||
@@ -10,9 +10,9 @@ import ray
|
||||
import ray.test_utils
|
||||
from ray.core.generated import common_pb2
|
||||
from ray.core.generated import node_manager_pb2, node_manager_pb2_grpc
|
||||
from ray.test_utils import (wait_for_condition, wait_for_pid_to_exit,
|
||||
run_string_as_driver,
|
||||
run_string_as_driver_nonblocking)
|
||||
from ray.test_utils import (
|
||||
wait_for_condition, wait_for_pid_to_exit, run_string_as_driver,
|
||||
run_string_as_driver_nonblocking, new_scheduler_enabled)
|
||||
|
||||
|
||||
def get_workers():
|
||||
@@ -207,6 +207,7 @@ def test_worker_capping_run_chained_tasks(shutdown_only):
|
||||
assert len(get_workers()) == 2
|
||||
|
||||
|
||||
@pytest.mark.skipif(new_scheduler_enabled(), reason="fails")
|
||||
def test_worker_capping_fifo(shutdown_only):
|
||||
# Start 2 initial workers by setting num_cpus to 2.
|
||||
info = ray.init(num_cpus=2)
|
||||
@@ -250,6 +251,7 @@ ray.shutdown()
|
||||
assert worker2.pid == get_workers()[0].pid
|
||||
|
||||
|
||||
@pytest.mark.skipif(new_scheduler_enabled(), reason="raylet hang 100% cpu")
|
||||
def test_worker_registration_failure_after_driver_exit(shutdown_only):
|
||||
info = ray.init(num_cpus=1)
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ import ray
|
||||
from ray.test_utils import (
|
||||
wait_for_condition,
|
||||
wait_for_pid_to_exit,
|
||||
new_scheduler_enabled,
|
||||
)
|
||||
|
||||
SIGKILL = signal.SIGKILL if sys.platform != "win32" else signal.SIGTERM
|
||||
@@ -487,6 +488,7 @@ def test_reconstruction_chain(ray_start_cluster, reconstruction_enabled):
|
||||
raise e.as_instanceof_cause()
|
||||
|
||||
|
||||
@pytest.mark.skipif(new_scheduler_enabled(), reason="hangs")
|
||||
def test_reconstruction_stress(ray_start_cluster):
|
||||
config = {
|
||||
"num_heartbeats_timeout": 10,
|
||||
|
||||
@@ -10,7 +10,8 @@ import pytest
|
||||
|
||||
import ray
|
||||
import ray.cluster_utils
|
||||
from ray.test_utils import SignalActor, put_object, wait_for_condition
|
||||
from ray.test_utils import SignalActor, put_object, wait_for_condition, \
|
||||
new_scheduler_enabled
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -166,6 +167,7 @@ def test_dependency_refcounts(ray_start_regular):
|
||||
check_refcounts({})
|
||||
|
||||
|
||||
@pytest.mark.skipif(new_scheduler_enabled(), reason="dynres notimpl")
|
||||
def test_actor_creation_task(ray_start_regular):
|
||||
@ray.remote
|
||||
def large_object():
|
||||
|
||||
Reference in New Issue
Block a user