Enable even more new scheduler tests (#12096)

This commit is contained in:
Eric Liang
2020-11-19 16:47:18 -08:00
committed by GitHub
parent dac09bd569
commit e72abcd0aa
11 changed files with 48 additions and 35 deletions
+1
View File
@@ -691,6 +691,7 @@ def stop(force, verbose, log_style, log_color):
["log_monitor.py", False],
["reporter.py", False],
["dashboard.py", False],
["new_dashboard/agent.py", False],
["ray_process_reaper.py", False],
]
+18 -27
View File
@@ -12,8 +12,10 @@ py_test_module_list(
files = [
"test_async.py",
"test_actor.py",
"test_actor_failures.py",
"test_actor_advanced.py",
"test_advanced_3.py",
"test_actor_failures.py",
"test_actor_resources.py",
"test_advanced.py",
"test_advanced_2.py",
"test_array.py",
@@ -22,10 +24,14 @@ py_test_module_list(
"test_basic_2.py",
"test_cancel.py",
"test_cli.py",
"test_component_failures_2.py",
"test_component_failures_3.py",
"test_error_ray_not_initialized.py",
"test_gcs_fault_tolerance.py",
"test_iter.py",
"test_joblib.py",
"test_global_state.py",
"test_global_gc.py",
"test_mldataset.py",
],
size = "medium",
@@ -36,13 +42,7 @@ py_test_module_list(
py_test_module_list(
files = [
"test_actor_resources.py",
"test_advanced_3.py",
"test_component_failures_2.py",
"test_dynres.py",
"test_global_gc.py",
"test_global_state.py",
"test_joblib.py",
"test_dynres.py", # dyn res not implemented
],
size = "medium",
extra_srcs = SRCS,
@@ -55,14 +55,22 @@ py_test_module_list(
"test_memory_limits.py",
"test_memory_scheduling.py",
"test_metrics.py",
"test_multi_node.py",
"test_multi_node_2.py",
"test_multi_tenancy.py",
"test_multinode_failures.py",
"test_multinode_failures_2.py",
"test_multiprocessing.py",
"test_object_manager.py",
"test_object_spilling.py",
"test_output.py",
"test_reconstruction.py",
"test_reference_counting.py",
"test_reference_counting_2.py",
"test_resource_demand_scheduler.py",
"test_serialization.py",
"test_stress.py",
"test_stress_sharded.py",
"test_tensorflow.py",
"test_unreconstructable_errors.py",
],
@@ -72,23 +80,6 @@ py_test_module_list(
deps = ["//:ray_lib"],
)
py_test_module_list(
files = [
"test_multinode_failures.py",
"test_multi_node.py",
"test_object_manager.py",
"test_reconstruction.py",
"test_reference_counting.py",
"test_stress.py",
"test_stress_sharded.py",
"test_multi_tenancy.py",
],
size = "medium",
extra_srcs = SRCS,
tags = ["exclusive", "medium_size_python_tests_k_to_z", "new_scheduler_broken"],
deps = ["//:ray_lib"],
)
py_test_module_list(
files = [
"test_actor_pool.py",
@@ -125,6 +116,7 @@ py_test_module_list(
py_test_module_list(
files = [
"test_failure.py",
"test_stress_failure.py",
],
size = "large",
@@ -135,8 +127,7 @@ py_test_module_list(
py_test_module_list(
files = [
"test_failure.py",
"test_placement_group.py",
"test_placement_group.py", # placement groups not implemented
],
size = "large",
extra_srcs = SRCS,
+3 -1
View File
@@ -21,7 +21,8 @@ import setproctitle
import subprocess
from ray.test_utils import (check_call_ray, RayTestTimeoutException,
wait_for_condition, wait_for_num_actors)
wait_for_condition, wait_for_num_actors,
new_scheduler_enabled)
logger = logging.getLogger(__name__)
@@ -93,6 +94,7 @@ def test_local_scheduling_first(ray_start_cluster):
assert local()
@pytest.mark.skipif(new_scheduler_enabled(), reason="flakes more often")
def test_load_balancing_with_dependencies(ray_start_cluster):
# This test ensures that tasks are being assigned to all raylets in a
# roughly equal manner even when the tasks have dependencies.
+4
View File
@@ -22,6 +22,7 @@ from ray.test_utils import (
init_error_pubsub,
get_error_message,
Semaphore,
new_scheduler_enabled,
)
@@ -662,6 +663,7 @@ def test_warning_for_resource_deadlock(error_pubsub, shutdown_only):
assert errors[0].type == ray_constants.RESOURCE_DEADLOCK_ERROR
@pytest.mark.skipif(new_scheduler_enabled(), reason="broken")
def test_warning_for_infeasible_tasks(ray_start_regular, error_pubsub):
p = error_pubsub
# Check that we get warning messages for infeasible tasks.
@@ -687,6 +689,7 @@ def test_warning_for_infeasible_tasks(ray_start_regular, error_pubsub):
assert errors[0].type == ray_constants.INFEASIBLE_TASK_ERROR
@pytest.mark.skipif(new_scheduler_enabled(), reason="broken")
def test_warning_for_infeasible_zero_cpu_actor(shutdown_only):
# Check that we cannot place an actor on a 0 CPU machine and that we get an
# infeasibility warning (even though the actor creation task itself
@@ -953,6 +956,7 @@ def test_raylet_crash_when_get(ray_start_regular):
thread.join()
@pytest.mark.skipif(new_scheduler_enabled(), reason="broken")
def test_connect_with_disconnected_node(shutdown_only):
config = {
"num_heartbeats_timeout": 50,
+2 -1
View File
@@ -9,7 +9,7 @@ import pytest
import ray
import ray.cluster_utils
from ray.test_utils import wait_for_condition
from ray.test_utils import wait_for_condition, new_scheduler_enabled
from ray.internal.internal_api import global_gc
logger = logging.getLogger(__name__)
@@ -129,6 +129,7 @@ def test_global_gc_when_full(shutdown_only):
gc.enable()
@pytest.mark.skipif(new_scheduler_enabled(), reason="hangs")
def test_global_gc_actors(shutdown_only):
ray.init(num_cpus=1)
+4
View File
@@ -8,6 +8,7 @@ import time
import ray
import ray.ray_constants
import ray.test_utils
from ray.test_utils import new_scheduler_enabled
from ray._raylet import GlobalStateAccessor
@@ -143,6 +144,7 @@ def test_global_state_actor_entry(ray_start_regular):
@pytest.mark.parametrize("max_shapes", [0, 2, -1])
@pytest.mark.skipif(new_scheduler_enabled(), reason="broken")
def test_load_report(shutdown_only, max_shapes):
resource1 = "A"
resource2 = "B"
@@ -213,6 +215,7 @@ def test_load_report(shutdown_only, max_shapes):
global_state_accessor.disconnect()
@pytest.mark.skipif(new_scheduler_enabled(), reason="broken")
def test_placement_group_load_report(ray_start_cluster):
cluster = ray_start_cluster
# Add a head node that doesn't have gpu resource.
@@ -281,6 +284,7 @@ def test_placement_group_load_report(ray_start_cluster):
global_state_accessor.disconnect()
@pytest.mark.skipif(new_scheduler_enabled(), reason="broken")
def test_backlog_report(shutdown_only):
cluster = ray.init(
num_cpus=1, _system_config={
+3 -1
View File
@@ -9,7 +9,7 @@ from ray.test_utils import (
RayTestTimeoutException, check_call_ray, run_string_as_driver,
run_string_as_driver_nonblocking, wait_for_children_of_pid,
wait_for_children_of_pid_to_exit, wait_for_condition, kill_process_by_name,
Semaphore, init_error_pubsub, get_error_message)
Semaphore, init_error_pubsub, get_error_message, new_scheduler_enabled)
def test_remote_raylet_cleanup(ray_start_cluster):
@@ -139,6 +139,7 @@ print("success")
assert "success" in out
@pytest.mark.skipif(new_scheduler_enabled(), reason="hangs")
def test_driver_exiting_quickly(call_ray_start):
# This test will create some drivers that submit some tasks and then
# exit without waiting for the tasks to complete.
@@ -304,6 +305,7 @@ ray.get([a.log.remote(), f.remote()])
"--min-worker-port=0 --max-worker-port=0 --port 0"
],
indirect=True)
@pytest.mark.skipif(new_scheduler_enabled(), reason="hangs")
def test_drivers_release_resources(call_ray_start):
address = call_ray_start
+3 -1
View File
@@ -6,7 +6,8 @@ import ray
import ray.ray_constants as ray_constants
from ray.monitor import Monitor
from ray.cluster_utils import Cluster
from ray.test_utils import generate_system_config_map, SignalActor
from ray.test_utils import generate_system_config_map, SignalActor, \
new_scheduler_enabled
logger = logging.getLogger(__name__)
@@ -117,6 +118,7 @@ def verify_load_metrics(monitor, expected_resource_usage=None, timeout=30):
"num_cpus": 2,
}],
indirect=True)
@pytest.mark.skipif(new_scheduler_enabled(), reason="fails")
def test_heartbeats_single(ray_start_cluster_head):
"""Unit test for `Cluster.wait_for_nodes`.
+5 -3
View File
@@ -10,9 +10,9 @@ import ray
import ray.test_utils
from ray.core.generated import common_pb2
from ray.core.generated import node_manager_pb2, node_manager_pb2_grpc
from ray.test_utils import (wait_for_condition, wait_for_pid_to_exit,
run_string_as_driver,
run_string_as_driver_nonblocking)
from ray.test_utils import (
wait_for_condition, wait_for_pid_to_exit, run_string_as_driver,
run_string_as_driver_nonblocking, new_scheduler_enabled)
def get_workers():
@@ -207,6 +207,7 @@ def test_worker_capping_run_chained_tasks(shutdown_only):
assert len(get_workers()) == 2
@pytest.mark.skipif(new_scheduler_enabled(), reason="fails")
def test_worker_capping_fifo(shutdown_only):
# Start 2 initial workers by setting num_cpus to 2.
info = ray.init(num_cpus=2)
@@ -250,6 +251,7 @@ ray.shutdown()
assert worker2.pid == get_workers()[0].pid
@pytest.mark.skipif(new_scheduler_enabled(), reason="raylet hang 100% cpu")
def test_worker_registration_failure_after_driver_exit(shutdown_only):
info = ray.init(num_cpus=1)
+2
View File
@@ -9,6 +9,7 @@ import ray
from ray.test_utils import (
wait_for_condition,
wait_for_pid_to_exit,
new_scheduler_enabled,
)
SIGKILL = signal.SIGKILL if sys.platform != "win32" else signal.SIGTERM
@@ -487,6 +488,7 @@ def test_reconstruction_chain(ray_start_cluster, reconstruction_enabled):
raise e.as_instanceof_cause()
@pytest.mark.skipif(new_scheduler_enabled(), reason="hangs")
def test_reconstruction_stress(ray_start_cluster):
config = {
"num_heartbeats_timeout": 10,
+3 -1
View File
@@ -10,7 +10,8 @@ import pytest
import ray
import ray.cluster_utils
from ray.test_utils import SignalActor, put_object, wait_for_condition
from ray.test_utils import SignalActor, put_object, wait_for_condition, \
new_scheduler_enabled
logger = logging.getLogger(__name__)
@@ -166,6 +167,7 @@ def test_dependency_refcounts(ray_start_regular):
check_refcounts({})
@pytest.mark.skipif(new_scheduler_enabled(), reason="dynres notimpl")
def test_actor_creation_task(ray_start_regular):
@ray.remote
def large_object():