From f093ed1fc6b2421dde00dc5fa00a800989c12179 Mon Sep 17 00:00:00 2001 From: Stephanie Wang Date: Thu, 9 Aug 2018 14:46:46 -0700 Subject: [PATCH] [xray] Fix crash in case of spurious reconstruction (#2609) * Exit if task already queued * address comments --- src/ray/raylet/node_manager.cc | 7 +++++++ src/ray/raylet/scheduling_queue.cc | 7 +++++++ src/ray/raylet/scheduling_queue.h | 6 ++++++ 3 files changed, 20 insertions(+) diff --git a/src/ray/raylet/node_manager.cc b/src/ray/raylet/node_manager.cc index 441a58ed6..31b037b5c 100644 --- a/src/ray/raylet/node_manager.cc +++ b/src/ray/raylet/node_manager.cc @@ -824,6 +824,13 @@ void NodeManager::TreatTaskAsFailed(const TaskSpecification &spec) { void NodeManager::SubmitTask(const Task &task, const Lineage &uncommitted_lineage, bool forwarded) { + if (local_queues_.HasTask(task.GetTaskSpecification().TaskId())) { + RAY_LOG(WARNING) << "Submitted task " << task.GetTaskSpecification().TaskId() + << " is already queued and will not be reconstructed. This is most " + "likely due to spurious reconstruction."; + return; + } + // Add the task and its uncommitted lineage to the lineage cache. lineage_cache_.AddWaitingTask(task, uncommitted_lineage); diff --git a/src/ray/raylet/scheduling_queue.cc b/src/ray/raylet/scheduling_queue.cc index 1fd9f59c5..7458aedb3 100644 --- a/src/ray/raylet/scheduling_queue.cc +++ b/src/ray/raylet/scheduling_queue.cc @@ -223,6 +223,13 @@ void SchedulingQueue::QueueMethodsWaitingForActorCreation( QueueTasks(methods_waiting_for_actor_creation_, tasks); } +bool SchedulingQueue::HasTask(const TaskID &task_id) const { + return (methods_waiting_for_actor_creation_.HasTask(task_id) || + waiting_tasks_.HasTask(task_id) || placeable_tasks_.HasTask(task_id) || + ready_tasks_.HasTask(task_id) || running_tasks_.HasTask(task_id) || + blocked_tasks_.HasTask(task_id)); +} + void SchedulingQueue::QueueWaitingTasks(const std::vector &tasks) { QueueTasks(waiting_tasks_, tasks); } diff --git a/src/ray/raylet/scheduling_queue.h b/src/ray/raylet/scheduling_queue.h index 7845dfb6a..13dc903e2 100644 --- a/src/ray/raylet/scheduling_queue.h +++ b/src/ray/raylet/scheduling_queue.h @@ -26,6 +26,12 @@ class SchedulingQueue { /// SchedulingQueue destructor. virtual ~SchedulingQueue() {} + /// \brief Check if the queue contains a specific task id. + /// + /// \param task_id The task ID for the task. + /// \return Whether the task_id exists in the queue. + bool HasTask(const TaskID &task_id) const; + /// Get the queue of tasks that are destined for actors that have not yet /// been created. ///