Fix two types of eviction hangs (#5225)

This commit is contained in:
Eric Liang
2019-07-23 21:20:17 -07:00
committed by GitHub
parent 97c43284a6
commit 5b76238bce
9 changed files with 126 additions and 45 deletions
+2 -2
View File
@@ -1330,7 +1330,7 @@ def test_actors_and_tasks_with_gpus_version_two(shutdown_only):
def test_blocking_actor_task(shutdown_only):
ray.init(num_cpus=1, num_gpus=1)
ray.init(num_cpus=1, num_gpus=1, object_store_memory=int(10**8))
@ray.remote(num_gpus=1)
def f():
@@ -2023,7 +2023,7 @@ def test_lifetime_and_transient_resources(ray_start_regular):
actor2s = [Actor2.remote() for _ in range(2)]
results = [a.method.remote() for a in actor2s]
ready_ids, remaining_ids = ray.wait(
results, num_returns=len(results), timeout=1.0)
results, num_returns=len(results), timeout=5.0)
assert len(ready_ids) == 1
+1 -1
View File
@@ -688,7 +688,7 @@ def test_raylet_crash_when_get(ray_start_regular):
thread = threading.Thread(target=sleep_to_kill_raylet)
thread.start()
with pytest.raises(Exception, match=r".*Connection closed unexpectedly.*"):
with pytest.raises(ray.exceptions.UnreconstructableError):
ray.get(nonexistent_id)
thread.join()
+4
View File
@@ -6,6 +6,7 @@ import json
import numpy as np
import os
import pytest
import sys
import time
import ray
@@ -479,6 +480,8 @@ def test_nondeterministic_task(ray_start_reconstruction):
@pytest.mark.skipif(
os.environ.get("RAY_USE_NEW_GCS") == "on",
reason="Failing with new GCS API on Linux.")
@pytest.mark.skipif(
sys.version_info < (3, 0), reason="This test requires Python 3.")
@pytest.mark.parametrize(
"ray_start_object_store_memory", [10**9], indirect=True)
def test_driver_put_errors(ray_start_object_store_memory):
@@ -524,6 +527,7 @@ def test_driver_put_errors(ray_start_object_store_memory):
errors = wait_for_errors(error_check)
assert all(error["type"] == ray_constants.PUT_RECONSTRUCTION_PUSH_ERROR
or "ray.exceptions.UnreconstructableError" in error["message"]
for error in errors)
@@ -0,0 +1,40 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import unittest
import ray
class TestUnreconstructableErrors(unittest.TestCase):
def setUp(self):
ray.init(object_store_memory=10000000, redis_max_memory=10000000)
def tearDown(self):
ray.shutdown()
def testDriverPutEvictedCannotReconstruct(self):
x_id = ray.put(np.zeros(1 * 1024 * 1024))
ray.get(x_id)
for _ in range(10):
ray.put(np.zeros(1 * 1024 * 1024))
self.assertRaises(ray.exceptions.UnreconstructableError,
lambda: ray.get(x_id))
def testLineageEvictedReconstructionFails(self):
@ray.remote
def f(data):
return 0
x_id = f.remote(None)
ray.get(x_id)
for _ in range(400):
ray.get([f.remote(np.zeros(10000)) for _ in range(50)])
self.assertRaises(ray.exceptions.UnreconstructableError,
lambda: ray.get(x_id))
if __name__ == "__main__":
unittest.main(verbosity=2)