From 3aec683f6176232da5fe1aade908d839959461f0 Mon Sep 17 00:00:00 2001 From: Edward Oakes Date: Fri, 1 May 2020 11:58:47 -0500 Subject: [PATCH] Avoid fate sharing with owner for detached actors (#8267) --- python/ray/serve/tests/conftest.py | 3 --- src/ray/raylet/node_manager.cc | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/python/ray/serve/tests/conftest.py b/python/ray/serve/tests/conftest.py index 5e5de4e14..2068f8c66 100644 --- a/python/ray/serve/tests/conftest.py +++ b/python/ray/serve/tests/conftest.py @@ -6,9 +6,6 @@ import pytest import ray from ray import serve -# TODO(edoakes): the failure tests currently fail with the GCS service enabled. -os.environ["RAY_GCS_SERVICE_ENABLED"] = "false" - if os.environ.get("RAY_SERVE_INTENTIONALLY_CRASH", False): serve.master._CRASH_AFTER_CHECKPOINT_PROBABILITY = 0.5 diff --git a/src/ray/raylet/node_manager.cc b/src/ray/raylet/node_manager.cc index 1044091c4..d75152e60 100644 --- a/src/ray/raylet/node_manager.cc +++ b/src/ray/raylet/node_manager.cc @@ -2514,9 +2514,9 @@ void NodeManager::AssignTask(const std::shared_ptr &worker, const Task & : worker->GetTaskResourceIds()); // If the owner has died since this task was queued, cancel the task by - // killing the worker. - if (failed_workers_cache_.count(owner_worker_id) > 0 || - failed_nodes_cache_.count(owner_node_id) > 0) { + // killing the worker (unless this task is for a detached actor). + if (!worker->IsDetachedActor() && (failed_workers_cache_.count(owner_worker_id) > 0 || + failed_nodes_cache_.count(owner_node_id) > 0)) { // TODO(swang): Skip assigning this task to this worker instead of // killing the worker? KillWorker(worker);