mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 15:40:09 +08:00
Fix two types of eviction hangs (#5225)
This commit is contained in:
@@ -1330,7 +1330,7 @@ def test_actors_and_tasks_with_gpus_version_two(shutdown_only):
|
||||
|
||||
|
||||
def test_blocking_actor_task(shutdown_only):
|
||||
ray.init(num_cpus=1, num_gpus=1)
|
||||
ray.init(num_cpus=1, num_gpus=1, object_store_memory=int(10**8))
|
||||
|
||||
@ray.remote(num_gpus=1)
|
||||
def f():
|
||||
@@ -2023,7 +2023,7 @@ def test_lifetime_and_transient_resources(ray_start_regular):
|
||||
actor2s = [Actor2.remote() for _ in range(2)]
|
||||
results = [a.method.remote() for a in actor2s]
|
||||
ready_ids, remaining_ids = ray.wait(
|
||||
results, num_returns=len(results), timeout=1.0)
|
||||
results, num_returns=len(results), timeout=5.0)
|
||||
assert len(ready_ids) == 1
|
||||
|
||||
|
||||
|
||||
@@ -688,7 +688,7 @@ def test_raylet_crash_when_get(ray_start_regular):
|
||||
|
||||
thread = threading.Thread(target=sleep_to_kill_raylet)
|
||||
thread.start()
|
||||
with pytest.raises(Exception, match=r".*Connection closed unexpectedly.*"):
|
||||
with pytest.raises(ray.exceptions.UnreconstructableError):
|
||||
ray.get(nonexistent_id)
|
||||
thread.join()
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ import json
|
||||
import numpy as np
|
||||
import os
|
||||
import pytest
|
||||
import sys
|
||||
import time
|
||||
|
||||
import ray
|
||||
@@ -479,6 +480,8 @@ def test_nondeterministic_task(ray_start_reconstruction):
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("RAY_USE_NEW_GCS") == "on",
|
||||
reason="Failing with new GCS API on Linux.")
|
||||
@pytest.mark.skipif(
|
||||
sys.version_info < (3, 0), reason="This test requires Python 3.")
|
||||
@pytest.mark.parametrize(
|
||||
"ray_start_object_store_memory", [10**9], indirect=True)
|
||||
def test_driver_put_errors(ray_start_object_store_memory):
|
||||
@@ -524,6 +527,7 @@ def test_driver_put_errors(ray_start_object_store_memory):
|
||||
|
||||
errors = wait_for_errors(error_check)
|
||||
assert all(error["type"] == ray_constants.PUT_RECONSTRUCTION_PUSH_ERROR
|
||||
or "ray.exceptions.UnreconstructableError" in error["message"]
|
||||
for error in errors)
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import unittest
|
||||
|
||||
import ray
|
||||
|
||||
|
||||
class TestUnreconstructableErrors(unittest.TestCase):
|
||||
def setUp(self):
|
||||
ray.init(object_store_memory=10000000, redis_max_memory=10000000)
|
||||
|
||||
def tearDown(self):
|
||||
ray.shutdown()
|
||||
|
||||
def testDriverPutEvictedCannotReconstruct(self):
|
||||
x_id = ray.put(np.zeros(1 * 1024 * 1024))
|
||||
ray.get(x_id)
|
||||
for _ in range(10):
|
||||
ray.put(np.zeros(1 * 1024 * 1024))
|
||||
self.assertRaises(ray.exceptions.UnreconstructableError,
|
||||
lambda: ray.get(x_id))
|
||||
|
||||
def testLineageEvictedReconstructionFails(self):
|
||||
@ray.remote
|
||||
def f(data):
|
||||
return 0
|
||||
|
||||
x_id = f.remote(None)
|
||||
ray.get(x_id)
|
||||
for _ in range(400):
|
||||
ray.get([f.remote(np.zeros(10000)) for _ in range(50)])
|
||||
self.assertRaises(ray.exceptions.UnreconstructableError,
|
||||
lambda: ray.get(x_id))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(verbosity=2)
|
||||
Reference in New Issue
Block a user