GCS-Based actor management implementation (#6763)

* add gcs actor manager

* fix test_metrics.py

* fix TestTaskInfo

* fix comment

* fix comment

* fix comment

* fix comment

* fix comment

* fix comment

* fix compile error

* fix merge error

Co-authored-by: senlin.zsl <senlin.zsl@antfin.com>
This commit is contained in:
ZhuSenlin
2020-04-14 00:48:48 +08:00
committed by GitHub
parent 1b0f6fd558
commit 4a81793ba5
38 changed files with 2636 additions and 116 deletions
@@ -40,7 +40,7 @@ public class RuntimeContextImpl implements RuntimeContext {
return false;
}
return runtime.getGcsClient().actorExists(getCurrentActorId());
return runtime.getGcsClient().wasCurrentActorReconstructed(getCurrentActorId());
}
@Override
@@ -9,6 +9,7 @@ import io.ray.api.id.JobId;
import io.ray.api.id.TaskId;
import io.ray.api.id.UniqueId;
import io.ray.api.runtimecontext.NodeInfo;
import io.ray.runtime.config.RayConfig;
import io.ray.runtime.generated.Gcs;
import io.ray.runtime.generated.Gcs.ActorCheckpointIdData;
import io.ray.runtime.generated.Gcs.GcsNodeInfo;
@@ -27,9 +28,7 @@ import org.slf4j.LoggerFactory;
* An implementation of GcsClient.
*/
public class GcsClient {
private static Logger LOGGER = LoggerFactory.getLogger(GcsClient.class);
private RedisClient primary;
private List<RedisClient> shards;
@@ -126,6 +125,29 @@ public class GcsClient {
return primary.exists(key);
}
public boolean wasCurrentActorReconstructed(ActorId actorId) {
byte[] key = ArrayUtils.addAll(TablePrefix.ACTOR.toString().getBytes(), actorId.getBytes());
if (!RayConfig.getInstance().gcsServiceEnabled) {
return primary.exists(key);
}
// TODO(ZhuSenlin): Get the actor table data from CoreWorker later.
byte[] value = primary.get(key);
if (value == null) {
return false;
}
Gcs.ActorTableData actorTableData = null;
try {
actorTableData = Gcs.ActorTableData.parseFrom(value);
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException("Received invalid protobuf data from GCS.");
}
long maxReconstructions = actorTableData.getMaxReconstructions();
long remainingReconstructions = actorTableData.getRemainingReconstructions();
return maxReconstructions - remainingReconstructions != 0;
}
/**
* Query whether the raylet task exists in Gcs.
*/