[GCS Actor Management] Race condition around creating -> created phase. (#10035)

* Fix the issue.

* Address a code review.
This commit is contained in:
SangBin Cho
2020-08-11 12:31:27 -07:00
parent 4fc0452e7b
commit 46af99ee25
+8 -7
View File
@@ -891,6 +891,14 @@ void GcsActorManager::OnActorCreationSuccess(const std::shared_ptr<GcsActor> &ac
}
actor->UpdateState(rpc::ActorTableData::ALIVE);
auto actor_table_data = actor->GetActorTableData();
// We should register the entry to the in-memory index before flushing them to
// GCS because otherwise, there could be timing problems due to asynchronous Put.
auto worker_id = actor->GetWorkerID();
auto node_id = actor->GetNodeID();
RAY_CHECK(!worker_id.IsNil());
RAY_CHECK(!node_id.IsNil());
RAY_CHECK(created_actors_[node_id].emplace(worker_id, actor_id).second);
// The backend storage is reliable in the future, so the status must be ok.
RAY_CHECK_OK(gcs_table_storage_->ActorTable().Put(
actor_id, actor_table_data,
@@ -898,7 +906,6 @@ void GcsActorManager::OnActorCreationSuccess(const std::shared_ptr<GcsActor> &ac
RAY_CHECK_OK(gcs_pub_sub_->Publish(ACTOR_CHANNEL, actor_id.Hex(),
actor_table_data.SerializeAsString(),
nullptr));
// Invoke all callbacks for all registration requests of this actor (duplicated
// requests are included) and remove all of them from
// actor_to_create_callbacks_.
@@ -909,12 +916,6 @@ void GcsActorManager::OnActorCreationSuccess(const std::shared_ptr<GcsActor> &ac
}
actor_to_create_callbacks_.erase(iter);
}
auto worker_id = actor->GetWorkerID();
auto node_id = actor->GetNodeID();
RAY_CHECK(!worker_id.IsNil());
RAY_CHECK(!node_id.IsNil());
RAY_CHECK(created_actors_[node_id].emplace(worker_id, actor_id).second);
}));
}