From 305eaaabe9395ba49a8cc7896a29358bc023cd0d Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Tue, 11 Feb 2020 20:28:13 -0800 Subject: [PATCH] Fix hang if actor object id is returned from a task that exits (#6885) --- .gitignore | 1 + python/ray/tests/test_actor.py | 19 +++++++++++++++++++ src/ray/core_worker/core_worker.cc | 7 ++++--- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 34fc5fa99..696f59f4a 100644 --- a/.gitignore +++ b/.gitignore @@ -135,6 +135,7 @@ scripts/nodes.txt **/.pytest_cache **/.cache .benchmarks +python-driver-* # Vscode .vscode/ diff --git a/python/ray/tests/test_actor.py b/python/ray/tests/test_actor.py index f0d59c154..0a0fa8e8c 100644 --- a/python/ray/tests/test_actor.py +++ b/python/ray/tests/test_actor.py @@ -16,6 +16,25 @@ from ray.test_utils import run_string_as_driver from ray.experimental.internal_kv import _internal_kv_get, _internal_kv_put +def test_actor_exit_from_task(ray_start_regular): + @ray.remote + class Actor: + def __init__(self): + print("Actor created") + + def f(self): + return 0 + + @ray.remote + def f(): + a = Actor.remote() + x_id = a.f.remote() + return [x_id] + + x_id = ray.get(f.remote())[0] + print(ray.get(x_id)) # This should not hang. + + def test_actor_init_error_propagated(ray_start_regular): @ray.remote class Actor: diff --git a/src/ray/core_worker/core_worker.cc b/src/ray/core_worker/core_worker.cc index 7ecf01794..554612ac2 100644 --- a/src/ray/core_worker/core_worker.cc +++ b/src/ray/core_worker/core_worker.cc @@ -288,13 +288,14 @@ void CoreWorker::SetCurrentTaskId(const TaskID &task_id) { absl::MutexLock lock(&mutex_); not_actor_task = actor_id_.IsNil(); } - // Clear all actor handles at the end of each non-actor task. if (not_actor_task && task_id.IsNil()) { absl::MutexLock lock(&actor_handles_mutex_); + // Reset the seqnos so that for the next task it start off at 0. for (const auto &handle : actor_handles_) { - RAY_CHECK_OK(gcs_client_->Actors().AsyncUnsubscribe(handle.first, nullptr)); + handle.second->Reset(); } - actor_handles_.clear(); + // TODO(ekl) we can't unsubscribe to actor notifications here due to + // https://github.com/ray-project/ray/pull/6885 } }