mirror of
https://github.com/wassname/ray.git
synced 2026-06-30 14:05:08 +08:00
Rename max_reconstructions to max_restarts and use -1 for infinite (#8274)
Co-authored-by: Edward Oakes <ed.nmi.oakes@gmail.com>
This commit is contained in:
@@ -18,9 +18,9 @@ public interface BaseActor {
|
||||
* Kill the actor immediately. This will cause any outstanding tasks submitted to the actor to
|
||||
* fail and the actor to exit in the same way as if it crashed.
|
||||
*
|
||||
* @param noReconstruction If set to true, the killed actor will not be reconstructed anymore.
|
||||
* @param noRestart If set to true, the killed actor will not be restarted anymore.
|
||||
*/
|
||||
default void kill(boolean noReconstruction) {
|
||||
Ray.internal().killActor(this, noReconstruction);
|
||||
default void kill(boolean noRestart) {
|
||||
Ray.internal().killActor(this, noRestart);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,7 +73,7 @@ public interface Checkpointable {
|
||||
/**
|
||||
* Load actor's previous checkpoint, and restore actor's state.
|
||||
*
|
||||
* This method will be called when an actor is reconstructed, after actor's constructor. If the
|
||||
* This method will be called when an actor is restarted, after the actor's constructor. If the
|
||||
* actor needs to restore from previous checkpoint, this function should restore actor's state and
|
||||
* return the checkpoint ID. Otherwise, it should do nothing and return null.
|
||||
*
|
||||
|
||||
@@ -4,7 +4,7 @@ import io.ray.api.id.ObjectId;
|
||||
|
||||
/**
|
||||
* Indicates that an object is lost (either evicted or explicitly deleted) and cannot be
|
||||
* reconstructed.
|
||||
* restarted.
|
||||
*
|
||||
* Note, this exception only happens for actor objects. If actor's current state is after object's
|
||||
* creating task, the actor cannot re-run the task to reconstruct the object.
|
||||
|
||||
@@ -7,20 +7,16 @@ import java.util.Map;
|
||||
* The options for creating actor.
|
||||
*/
|
||||
public class ActorCreationOptions extends BaseTaskOptions {
|
||||
|
||||
public static final int NO_RECONSTRUCTION = 0;
|
||||
public static final int INFINITE_RECONSTRUCTION = (int) Math.pow(2, 30);
|
||||
|
||||
public final int maxReconstructions;
|
||||
public final int maxRestarts;
|
||||
|
||||
public final String jvmOptions;
|
||||
|
||||
public final int maxConcurrency;
|
||||
|
||||
private ActorCreationOptions(Map<String, Double> resources, int maxReconstructions,
|
||||
private ActorCreationOptions(Map<String, Double> resources, int maxRestarts,
|
||||
String jvmOptions, int maxConcurrency) {
|
||||
super(resources);
|
||||
this.maxReconstructions = maxReconstructions;
|
||||
this.maxRestarts = maxRestarts;
|
||||
this.jvmOptions = jvmOptions;
|
||||
this.maxConcurrency = maxConcurrency;
|
||||
}
|
||||
@@ -31,7 +27,7 @@ public class ActorCreationOptions extends BaseTaskOptions {
|
||||
public static class Builder {
|
||||
|
||||
private Map<String, Double> resources = new HashMap<>();
|
||||
private int maxReconstructions = NO_RECONSTRUCTION;
|
||||
private int maxRestarts = 0;
|
||||
private String jvmOptions = null;
|
||||
private int maxConcurrency = 1;
|
||||
|
||||
@@ -40,8 +36,8 @@ public class ActorCreationOptions extends BaseTaskOptions {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setMaxReconstructions(int maxReconstructions) {
|
||||
this.maxReconstructions = maxReconstructions;
|
||||
public Builder setMaxRestarts(int maxRestarts) {
|
||||
this.maxRestarts = maxRestarts;
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -65,7 +61,7 @@ public class ActorCreationOptions extends BaseTaskOptions {
|
||||
|
||||
public ActorCreationOptions createActorCreationOptions() {
|
||||
return new ActorCreationOptions(
|
||||
resources, maxReconstructions, jvmOptions, maxConcurrency);
|
||||
resources, maxRestarts, jvmOptions, maxConcurrency);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -86,9 +86,9 @@ public interface RayRuntime {
|
||||
* Kill the actor immediately.
|
||||
*
|
||||
* @param actor The actor to be killed.
|
||||
* @param noReconstruction If set to true, the killed actor will not be reconstructed anymore.
|
||||
* @param noRestart If set to true, the killed actor will not be restarted anymore.
|
||||
*/
|
||||
void killActor(BaseActor actor, boolean noReconstruction);
|
||||
void killActor(BaseActor actor, boolean noRestart);
|
||||
|
||||
/**
|
||||
* Invoke a remote function.
|
||||
|
||||
@@ -22,11 +22,11 @@ public interface RuntimeContext {
|
||||
ActorId getCurrentActorId();
|
||||
|
||||
/**
|
||||
* Returns true if the current actor was reconstructed, false if it's created for the first time.
|
||||
* Returns true if the current actor was restarted, false if it's created for the first time.
|
||||
*
|
||||
* Note, this method should only be called from an actor creation task.
|
||||
*/
|
||||
boolean wasCurrentActorReconstructed();
|
||||
boolean wasCurrentActorRestarted();
|
||||
|
||||
/**
|
||||
* Get the raylet socket name.
|
||||
|
||||
@@ -62,7 +62,7 @@ public class RayDevRuntime extends AbstractRayRuntime {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void killActor(BaseActor actor, boolean noReconstruction) {
|
||||
public void killActor(BaseActor actor, boolean noRestart) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
||||
@@ -126,8 +126,8 @@ public final class RayNativeRuntime extends AbstractRayRuntime {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void killActor(BaseActor actor, boolean noReconstruction) {
|
||||
nativeKillActor(actor.getId().getBytes(), noReconstruction);
|
||||
public void killActor(BaseActor actor, boolean noRestart) {
|
||||
nativeKillActor(actor.getId().getBytes(), noRestart);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -160,7 +160,7 @@ public final class RayNativeRuntime extends AbstractRayRuntime {
|
||||
|
||||
private static native void nativeSetResource(String resourceName, double capacity, byte[] nodeId);
|
||||
|
||||
private static native void nativeKillActor(byte[] actorId, boolean noReconstruction);
|
||||
private static native void nativeKillActor(byte[] actorId, boolean noRestart);
|
||||
|
||||
private static native void nativeSetCoreWorker(byte[] workerId);
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ public class RuntimeContextImpl implements RuntimeContext {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean wasCurrentActorReconstructed() {
|
||||
public boolean wasCurrentActorRestarted() {
|
||||
TaskType currentTaskType = runtime.getWorkerContext().getCurrentTaskType();
|
||||
Preconditions.checkState(currentTaskType == TaskType.ACTOR_CREATION_TASK,
|
||||
"This method can only be called from an actor creation task.");
|
||||
@@ -40,7 +40,7 @@ public class RuntimeContextImpl implements RuntimeContext {
|
||||
return false;
|
||||
}
|
||||
|
||||
return runtime.getGcsClient().wasCurrentActorReconstructed(getCurrentActorId());
|
||||
return runtime.getGcsClient().wasCurrentActorRestarted(getCurrentActorId());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -125,7 +125,7 @@ public class GcsClient {
|
||||
return primary.exists(key);
|
||||
}
|
||||
|
||||
public boolean wasCurrentActorReconstructed(ActorId actorId) {
|
||||
public boolean wasCurrentActorRestarted(ActorId actorId) {
|
||||
byte[] key = ArrayUtils.addAll(TablePrefix.ACTOR.toString().getBytes(), actorId.getBytes());
|
||||
if (!RayConfig.getInstance().gcsServiceEnabled) {
|
||||
return primary.exists(key);
|
||||
@@ -142,10 +142,7 @@ public class GcsClient {
|
||||
} catch (InvalidProtocolBufferException e) {
|
||||
throw new RuntimeException("Received invalid protobuf data from GCS.");
|
||||
}
|
||||
|
||||
long maxReconstructions = actorTableData.getMaxReconstructions();
|
||||
long remainingReconstructions = actorTableData.getRemainingReconstructions();
|
||||
return maxReconstructions - remainingReconstructions != 0;
|
||||
return actorTableData.getNumRestarts() != 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
+10
-10
@@ -16,20 +16,20 @@ import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
@Test
|
||||
public class ActorReconstructionTest extends BaseTest {
|
||||
public class ActorRestartTest extends BaseTest {
|
||||
|
||||
public static class Counter {
|
||||
|
||||
protected int value = 0;
|
||||
|
||||
private boolean wasCurrentActorReconstructed = false;
|
||||
private boolean wasCurrentActorRestarted = false;
|
||||
|
||||
public Counter() {
|
||||
wasCurrentActorReconstructed = Ray.getRuntimeContext().wasCurrentActorReconstructed();
|
||||
wasCurrentActorRestarted = Ray.getRuntimeContext().wasCurrentActorRestarted();
|
||||
}
|
||||
|
||||
public boolean wasCurrentActorReconstructed() {
|
||||
return wasCurrentActorReconstructed;
|
||||
public boolean wasCurrentActorRestarted() {
|
||||
return wasCurrentActorRestarted;
|
||||
}
|
||||
|
||||
public int increase() {
|
||||
@@ -42,17 +42,17 @@ public class ActorReconstructionTest extends BaseTest {
|
||||
}
|
||||
}
|
||||
|
||||
public void testActorReconstruction() throws InterruptedException, IOException {
|
||||
public void testActorRestart() throws InterruptedException, IOException {
|
||||
TestUtils.skipTestUnderSingleProcess();
|
||||
ActorCreationOptions options =
|
||||
new ActorCreationOptions.Builder().setMaxReconstructions(1).createActorCreationOptions();
|
||||
new ActorCreationOptions.Builder().setMaxRestarts(1).createActorCreationOptions();
|
||||
RayActor<Counter> actor = Ray.createActor(Counter::new, options);
|
||||
// Call increase 3 times.
|
||||
for (int i = 0; i < 3; i++) {
|
||||
actor.call(Counter::increase).get();
|
||||
}
|
||||
|
||||
Assert.assertFalse(actor.call(Counter::wasCurrentActorReconstructed).get());
|
||||
Assert.assertFalse(actor.call(Counter::wasCurrentActorRestarted).get());
|
||||
|
||||
// Kill the actor process.
|
||||
int pid = actor.call(Counter::getPid).get();
|
||||
@@ -63,7 +63,7 @@ public class ActorReconstructionTest extends BaseTest {
|
||||
int value = actor.call(Counter::increase).get();
|
||||
Assert.assertEquals(value, 1);
|
||||
|
||||
Assert.assertTrue(actor.call(Counter::wasCurrentActorReconstructed).get());
|
||||
Assert.assertTrue(actor.call(Counter::wasCurrentActorRestarted).get());
|
||||
|
||||
// Kill the actor process again.
|
||||
pid = actor.call(Counter::getPid).get();
|
||||
@@ -124,7 +124,7 @@ public class ActorReconstructionTest extends BaseTest {
|
||||
public void testActorCheckpointing() throws IOException, InterruptedException {
|
||||
TestUtils.skipTestUnderSingleProcess();
|
||||
ActorCreationOptions options =
|
||||
new ActorCreationOptions.Builder().setMaxReconstructions(1).createActorCreationOptions();
|
||||
new ActorCreationOptions.Builder().setMaxRestarts(1).createActorCreationOptions();
|
||||
RayActor<CheckpointableCounter> actor = Ray.createActor(CheckpointableCounter::new, options);
|
||||
// Call increase 3 times.
|
||||
for (int i = 0; i < 3; i++) {
|
||||
@@ -41,45 +41,45 @@ public class KillActorTest extends BaseTest {
|
||||
|
||||
public static class KillerActor {
|
||||
|
||||
public void kill(RayActor<?> actor, boolean noReconstruction) {
|
||||
actor.kill(noReconstruction);
|
||||
public void kill(RayActor<?> actor, boolean noRestart) {
|
||||
actor.kill(noRestart);
|
||||
}
|
||||
}
|
||||
|
||||
private static void localKill(RayActor<?> actor, boolean noReconstruction) {
|
||||
actor.kill(noReconstruction);
|
||||
private static void localKill(RayActor<?> actor, boolean noRestart) {
|
||||
actor.kill(noRestart);
|
||||
}
|
||||
|
||||
private static void remoteKill(RayActor<?> actor, boolean noReconstruction) {
|
||||
private static void remoteKill(RayActor<?> actor, boolean noRestart) {
|
||||
RayActor<KillerActor> killer = Ray.createActor(KillerActor::new);
|
||||
killer.call(KillerActor::kill, actor, noReconstruction);
|
||||
killer.call(KillerActor::kill, actor, noRestart);
|
||||
}
|
||||
|
||||
private void testKillActor(BiConsumer<RayActor<?>, Boolean> kill, boolean noReconstruction) {
|
||||
private void testKillActor(BiConsumer<RayActor<?>, Boolean> kill, boolean noRestart) {
|
||||
TestUtils.skipTestUnderSingleProcess();
|
||||
|
||||
ActorCreationOptions options =
|
||||
new ActorCreationOptions.Builder().setMaxReconstructions(1).createActorCreationOptions();
|
||||
new ActorCreationOptions.Builder().setMaxRestarts(1).createActorCreationOptions();
|
||||
RayActor<HangActor> actor = Ray.createActor(HangActor::new, options);
|
||||
RayObject<Boolean> result = actor.call(HangActor::hang);
|
||||
// The actor will hang in this task.
|
||||
Assert.assertEquals(0, Ray.wait(ImmutableList.of(result), 1, 500).getReady().size());
|
||||
|
||||
// Kill the actor
|
||||
kill.accept(actor, noReconstruction);
|
||||
kill.accept(actor, noRestart);
|
||||
// The get operation will fail with RayActorException
|
||||
Assert.expectThrows(RayActorException.class, result::get);
|
||||
|
||||
try {
|
||||
// Sleep 1s here to make sure the driver has received the actor notification
|
||||
// (of state RECONSTRUCTING or DEAD).
|
||||
// (of state RESTARTING or DEAD).
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
if (noReconstruction) {
|
||||
// The actor should not be reconstructed.
|
||||
if (noRestart) {
|
||||
// The actor should not be restarted.
|
||||
Assert.expectThrows(RayActorException.class, () -> actor.call(HangActor::hang).get());
|
||||
} else {
|
||||
Assert.assertEquals(actor.call(HangActor::ping).get(), "pong");
|
||||
|
||||
Reference in New Issue
Block a user