mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 18:06:25 +08:00
[GCS Actor Management] Race condition around creating -> created phase. (#10035)
* Fix the issue. * Address a code review.
This commit is contained in:
@@ -891,6 +891,14 @@ void GcsActorManager::OnActorCreationSuccess(const std::shared_ptr<GcsActor> &ac
|
||||
}
|
||||
actor->UpdateState(rpc::ActorTableData::ALIVE);
|
||||
auto actor_table_data = actor->GetActorTableData();
|
||||
|
||||
// We should register the entry to the in-memory index before flushing them to
|
||||
// GCS because otherwise, there could be timing problems due to asynchronous Put.
|
||||
auto worker_id = actor->GetWorkerID();
|
||||
auto node_id = actor->GetNodeID();
|
||||
RAY_CHECK(!worker_id.IsNil());
|
||||
RAY_CHECK(!node_id.IsNil());
|
||||
RAY_CHECK(created_actors_[node_id].emplace(worker_id, actor_id).second);
|
||||
// The backend storage is reliable in the future, so the status must be ok.
|
||||
RAY_CHECK_OK(gcs_table_storage_->ActorTable().Put(
|
||||
actor_id, actor_table_data,
|
||||
@@ -898,7 +906,6 @@ void GcsActorManager::OnActorCreationSuccess(const std::shared_ptr<GcsActor> &ac
|
||||
RAY_CHECK_OK(gcs_pub_sub_->Publish(ACTOR_CHANNEL, actor_id.Hex(),
|
||||
actor_table_data.SerializeAsString(),
|
||||
nullptr));
|
||||
|
||||
// Invoke all callbacks for all registration requests of this actor (duplicated
|
||||
// requests are included) and remove all of them from
|
||||
// actor_to_create_callbacks_.
|
||||
@@ -909,12 +916,6 @@ void GcsActorManager::OnActorCreationSuccess(const std::shared_ptr<GcsActor> &ac
|
||||
}
|
||||
actor_to_create_callbacks_.erase(iter);
|
||||
}
|
||||
|
||||
auto worker_id = actor->GetWorkerID();
|
||||
auto node_id = actor->GetNodeID();
|
||||
RAY_CHECK(!worker_id.IsNil());
|
||||
RAY_CHECK(!node_id.IsNil());
|
||||
RAY_CHECK(created_actors_[node_id].emplace(worker_id, actor_id).second);
|
||||
}));
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user