[Object Spilling] Remove retries and use a timer instead. (#13175)

This commit is contained in:
SangBin Cho
2021-01-19 11:01:45 -08:00
committed by GitHub
parent 86d5000047
commit 99375c4cfc
10 changed files with 88 additions and 75 deletions
-1
View File
@@ -210,7 +210,6 @@ class RayParams:
raise Exception(
"Object pinning cannot be enabled if using LRU eviction.")
self._system_config["object_pinning_enabled"] = False
self._system_config["object_store_full_max_retries"] = -1
self._system_config["free_objects_period_milliseconds"] = 1000
# Set the internal config options for object reconstruction.
-1
View File
@@ -22,7 +22,6 @@ def get_default_fixure_system_config():
system_config = {
"object_timeout_milliseconds": 200,
"num_heartbeats_timeout": 10,
"object_store_full_max_retries": 3,
"object_store_full_delay_ms": 100,
}
return system_config
+1 -7
View File
@@ -1023,9 +1023,6 @@ def test_connect_with_disconnected_node(shutdown_only):
"ray_start_cluster_head", [{
"num_cpus": 5,
"object_store_memory": 10**8,
"_system_config": {
"object_store_full_max_retries": 0
}
}],
indirect=True)
def test_parallel_actor_fill_plasma_retry(ray_start_cluster_head):
@@ -1042,10 +1039,7 @@ def test_parallel_actor_fill_plasma_retry(ray_start_cluster_head):
def test_fill_object_store_exception(shutdown_only):
ray.init(
num_cpus=2,
object_store_memory=10**8,
_system_config={"object_store_full_max_retries": 0})
ray.init(num_cpus=2, object_store_memory=10**8)
@ray.remote
def expensive_task():
-13
View File
@@ -84,7 +84,6 @@ def test_spilling_not_done_for_pinned_object(tmp_path, shutdown_only):
_system_config={
"max_io_workers": 4,
"automatic_object_spilling_enabled": True,
"object_store_full_max_retries": 4,
"object_store_full_delay_ms": 100,
"object_spilling_config": json.dumps({
"type": "filesystem",
@@ -117,7 +116,6 @@ def test_spilling_not_done_for_pinned_object(tmp_path, shutdown_only):
"object_store_memory": 75 * 1024 * 1024,
"_system_config": {
"automatic_object_spilling_enabled": True,
"object_store_full_max_retries": 4,
"object_store_full_delay_ms": 100,
"max_io_workers": 4,
"object_spilling_config": json.dumps({
@@ -170,7 +168,6 @@ def test_spill_objects_automatically(object_spilling_config, shutdown_only):
_system_config={
"max_io_workers": 4,
"automatic_object_spilling_enabled": True,
"object_store_full_max_retries": 4,
"object_store_full_delay_ms": 100,
"object_spilling_config": object_spilling_config,
"min_spilling_size": 0
@@ -251,10 +248,6 @@ def test_spill_during_get(object_spilling_config, shutdown_only):
_system_config={
"automatic_object_spilling_enabled": True,
"object_store_full_delay_ms": 100,
# NOTE(swang): Use infinite retries because the OOM timer can still
# get accidentally triggered when objects are released too slowly
# (see github.com/ray-project/ray/issues/12040).
"object_store_full_max_retries": -1,
"max_io_workers": 1,
"object_spilling_config": object_spilling_config,
"min_spilling_size": 0,
@@ -286,7 +279,6 @@ def test_spill_deadlock(object_spilling_config, shutdown_only):
_system_config={
"max_io_workers": 1,
"automatic_object_spilling_enabled": True,
"object_store_full_max_retries": 4,
"object_store_full_delay_ms": 100,
"object_spilling_config": object_spilling_config,
"min_spilling_size": 0,
@@ -320,7 +312,6 @@ def test_delete_objects(tmp_path, shutdown_only):
"max_io_workers": 1,
"min_spilling_size": 0,
"automatic_object_spilling_enabled": True,
"object_store_full_max_retries": 4,
"object_store_full_delay_ms": 100,
"object_spilling_config": json.dumps({
"type": "filesystem",
@@ -363,7 +354,6 @@ def test_delete_objects_delete_while_creating(tmp_path, shutdown_only):
"max_io_workers": 4,
"min_spilling_size": 0,
"automatic_object_spilling_enabled": True,
"object_store_full_max_retries": 4,
"object_store_full_delay_ms": 100,
"object_spilling_config": json.dumps({
"type": "filesystem",
@@ -413,7 +403,6 @@ def test_delete_objects_on_worker_failure(tmp_path, shutdown_only):
_system_config={
"max_io_workers": 4,
"automatic_object_spilling_enabled": True,
"object_store_full_max_retries": 4,
"object_store_full_delay_ms": 100,
"object_spilling_config": json.dumps({
"type": "filesystem",
@@ -489,7 +478,6 @@ def test_delete_objects_multi_node(tmp_path, ray_start_cluster):
"max_io_workers": 2,
"min_spilling_size": 20 * 1024 * 1024,
"automatic_object_spilling_enabled": True,
"object_store_full_max_retries": 4,
"object_store_full_delay_ms": 100,
"object_spilling_config": json.dumps({
"type": "filesystem",
@@ -563,7 +551,6 @@ def test_fusion_objects(tmp_path, shutdown_only):
_system_config={
"max_io_workers": 3,
"automatic_object_spilling_enabled": True,
"object_store_full_max_retries": 4,
"object_store_full_delay_ms": 100,
"object_spilling_config": json.dumps({
"type": "filesystem",
@@ -20,7 +20,6 @@ logger = logging.getLogger(__name__)
@pytest.fixture
def one_worker_100MiB(request):
config = {
"object_store_full_max_retries": 2,
"task_retry_delay_ms": 0,
"object_timeout_milliseconds": 1000,
}