Rename max_reconstructions to max_restarts and use -1 for infinite (#8274)

Co-authored-by: Edward Oakes <ed.nmi.oakes@gmail.com>
This commit is contained in:
Max Fitton
2020-05-14 08:30:29 -07:00
committed by GitHub
parent 5f4c196fed
commit 00325eb2b2
71 changed files with 403 additions and 393 deletions
@@ -18,9 +18,9 @@ public interface BaseActor {
* Kill the actor immediately. This will cause any outstanding tasks submitted to the actor to
* fail and the actor to exit in the same way as if it crashed.
*
* @param noReconstruction If set to true, the killed actor will not be reconstructed anymore.
* @param noRestart If set to true, the killed actor will not be restarted anymore.
*/
default void kill(boolean noReconstruction) {
Ray.internal().killActor(this, noReconstruction);
default void kill(boolean noRestart) {
Ray.internal().killActor(this, noRestart);
}
}
@@ -73,7 +73,7 @@ public interface Checkpointable {
/**
* Load actor's previous checkpoint, and restore actor's state.
*
* This method will be called when an actor is reconstructed, after actor's constructor. If the
* This method will be called when an actor is restarted, after the actor's constructor. If the
* actor needs to restore from previous checkpoint, this function should restore actor's state and
* return the checkpoint ID. Otherwise, it should do nothing and return null.
*
@@ -4,7 +4,7 @@ import io.ray.api.id.ObjectId;
/**
* Indicates that an object is lost (either evicted or explicitly deleted) and cannot be
* reconstructed.
* restarted.
*
* Note, this exception only happens for actor objects. If actor's current state is after object's
* creating task, the actor cannot re-run the task to reconstruct the object.
@@ -7,20 +7,16 @@ import java.util.Map;
* The options for creating actor.
*/
public class ActorCreationOptions extends BaseTaskOptions {
public static final int NO_RECONSTRUCTION = 0;
public static final int INFINITE_RECONSTRUCTION = (int) Math.pow(2, 30);
public final int maxReconstructions;
public final int maxRestarts;
public final String jvmOptions;
public final int maxConcurrency;
private ActorCreationOptions(Map<String, Double> resources, int maxReconstructions,
private ActorCreationOptions(Map<String, Double> resources, int maxRestarts,
String jvmOptions, int maxConcurrency) {
super(resources);
this.maxReconstructions = maxReconstructions;
this.maxRestarts = maxRestarts;
this.jvmOptions = jvmOptions;
this.maxConcurrency = maxConcurrency;
}
@@ -31,7 +27,7 @@ public class ActorCreationOptions extends BaseTaskOptions {
public static class Builder {
private Map<String, Double> resources = new HashMap<>();
private int maxReconstructions = NO_RECONSTRUCTION;
private int maxRestarts = 0;
private String jvmOptions = null;
private int maxConcurrency = 1;
@@ -40,8 +36,8 @@ public class ActorCreationOptions extends BaseTaskOptions {
return this;
}
public Builder setMaxReconstructions(int maxReconstructions) {
this.maxReconstructions = maxReconstructions;
public Builder setMaxRestarts(int maxRestarts) {
this.maxRestarts = maxRestarts;
return this;
}
@@ -65,7 +61,7 @@ public class ActorCreationOptions extends BaseTaskOptions {
public ActorCreationOptions createActorCreationOptions() {
return new ActorCreationOptions(
resources, maxReconstructions, jvmOptions, maxConcurrency);
resources, maxRestarts, jvmOptions, maxConcurrency);
}
}
@@ -86,9 +86,9 @@ public interface RayRuntime {
* Kill the actor immediately.
*
* @param actor The actor to be killed.
* @param noReconstruction If set to true, the killed actor will not be reconstructed anymore.
* @param noRestart If set to true, the killed actor will not be restarted anymore.
*/
void killActor(BaseActor actor, boolean noReconstruction);
void killActor(BaseActor actor, boolean noRestart);
/**
* Invoke a remote function.
@@ -22,11 +22,11 @@ public interface RuntimeContext {
ActorId getCurrentActorId();
/**
* Returns true if the current actor was reconstructed, false if it's created for the first time.
* Returns true if the current actor was restarted, false if it's created for the first time.
*
* Note, this method should only be called from an actor creation task.
*/
boolean wasCurrentActorReconstructed();
boolean wasCurrentActorRestarted();
/**
* Get the raylet socket name.
@@ -62,7 +62,7 @@ public class RayDevRuntime extends AbstractRayRuntime {
}
@Override
public void killActor(BaseActor actor, boolean noReconstruction) {
public void killActor(BaseActor actor, boolean noRestart) {
throw new UnsupportedOperationException();
}
@@ -126,8 +126,8 @@ public final class RayNativeRuntime extends AbstractRayRuntime {
}
@Override
public void killActor(BaseActor actor, boolean noReconstruction) {
nativeKillActor(actor.getId().getBytes(), noReconstruction);
public void killActor(BaseActor actor, boolean noRestart) {
nativeKillActor(actor.getId().getBytes(), noRestart);
}
@Override
@@ -160,7 +160,7 @@ public final class RayNativeRuntime extends AbstractRayRuntime {
private static native void nativeSetResource(String resourceName, double capacity, byte[] nodeId);
private static native void nativeKillActor(byte[] actorId, boolean noReconstruction);
private static native void nativeKillActor(byte[] actorId, boolean noRestart);
private static native void nativeSetCoreWorker(byte[] workerId);
@@ -32,7 +32,7 @@ public class RuntimeContextImpl implements RuntimeContext {
}
@Override
public boolean wasCurrentActorReconstructed() {
public boolean wasCurrentActorRestarted() {
TaskType currentTaskType = runtime.getWorkerContext().getCurrentTaskType();
Preconditions.checkState(currentTaskType == TaskType.ACTOR_CREATION_TASK,
"This method can only be called from an actor creation task.");
@@ -40,7 +40,7 @@ public class RuntimeContextImpl implements RuntimeContext {
return false;
}
return runtime.getGcsClient().wasCurrentActorReconstructed(getCurrentActorId());
return runtime.getGcsClient().wasCurrentActorRestarted(getCurrentActorId());
}
@Override
@@ -125,7 +125,7 @@ public class GcsClient {
return primary.exists(key);
}
public boolean wasCurrentActorReconstructed(ActorId actorId) {
public boolean wasCurrentActorRestarted(ActorId actorId) {
byte[] key = ArrayUtils.addAll(TablePrefix.ACTOR.toString().getBytes(), actorId.getBytes());
if (!RayConfig.getInstance().gcsServiceEnabled) {
return primary.exists(key);
@@ -142,10 +142,7 @@ public class GcsClient {
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException("Received invalid protobuf data from GCS.");
}
long maxReconstructions = actorTableData.getMaxReconstructions();
long remainingReconstructions = actorTableData.getRemainingReconstructions();
return maxReconstructions - remainingReconstructions != 0;
return actorTableData.getNumRestarts() != 0;
}
/**
@@ -16,20 +16,20 @@ import org.testng.Assert;
import org.testng.annotations.Test;
@Test
public class ActorReconstructionTest extends BaseTest {
public class ActorRestartTest extends BaseTest {
public static class Counter {
protected int value = 0;
private boolean wasCurrentActorReconstructed = false;
private boolean wasCurrentActorRestarted = false;
public Counter() {
wasCurrentActorReconstructed = Ray.getRuntimeContext().wasCurrentActorReconstructed();
wasCurrentActorRestarted = Ray.getRuntimeContext().wasCurrentActorRestarted();
}
public boolean wasCurrentActorReconstructed() {
return wasCurrentActorReconstructed;
public boolean wasCurrentActorRestarted() {
return wasCurrentActorRestarted;
}
public int increase() {
@@ -42,17 +42,17 @@ public class ActorReconstructionTest extends BaseTest {
}
}
public void testActorReconstruction() throws InterruptedException, IOException {
public void testActorRestart() throws InterruptedException, IOException {
TestUtils.skipTestUnderSingleProcess();
ActorCreationOptions options =
new ActorCreationOptions.Builder().setMaxReconstructions(1).createActorCreationOptions();
new ActorCreationOptions.Builder().setMaxRestarts(1).createActorCreationOptions();
RayActor<Counter> actor = Ray.createActor(Counter::new, options);
// Call increase 3 times.
for (int i = 0; i < 3; i++) {
actor.call(Counter::increase).get();
}
Assert.assertFalse(actor.call(Counter::wasCurrentActorReconstructed).get());
Assert.assertFalse(actor.call(Counter::wasCurrentActorRestarted).get());
// Kill the actor process.
int pid = actor.call(Counter::getPid).get();
@@ -63,7 +63,7 @@ public class ActorReconstructionTest extends BaseTest {
int value = actor.call(Counter::increase).get();
Assert.assertEquals(value, 1);
Assert.assertTrue(actor.call(Counter::wasCurrentActorReconstructed).get());
Assert.assertTrue(actor.call(Counter::wasCurrentActorRestarted).get());
// Kill the actor process again.
pid = actor.call(Counter::getPid).get();
@@ -124,7 +124,7 @@ public class ActorReconstructionTest extends BaseTest {
public void testActorCheckpointing() throws IOException, InterruptedException {
TestUtils.skipTestUnderSingleProcess();
ActorCreationOptions options =
new ActorCreationOptions.Builder().setMaxReconstructions(1).createActorCreationOptions();
new ActorCreationOptions.Builder().setMaxRestarts(1).createActorCreationOptions();
RayActor<CheckpointableCounter> actor = Ray.createActor(CheckpointableCounter::new, options);
// Call increase 3 times.
for (int i = 0; i < 3; i++) {
@@ -41,45 +41,45 @@ public class KillActorTest extends BaseTest {
public static class KillerActor {
public void kill(RayActor<?> actor, boolean noReconstruction) {
actor.kill(noReconstruction);
public void kill(RayActor<?> actor, boolean noRestart) {
actor.kill(noRestart);
}
}
private static void localKill(RayActor<?> actor, boolean noReconstruction) {
actor.kill(noReconstruction);
private static void localKill(RayActor<?> actor, boolean noRestart) {
actor.kill(noRestart);
}
private static void remoteKill(RayActor<?> actor, boolean noReconstruction) {
private static void remoteKill(RayActor<?> actor, boolean noRestart) {
RayActor<KillerActor> killer = Ray.createActor(KillerActor::new);
killer.call(KillerActor::kill, actor, noReconstruction);
killer.call(KillerActor::kill, actor, noRestart);
}
private void testKillActor(BiConsumer<RayActor<?>, Boolean> kill, boolean noReconstruction) {
private void testKillActor(BiConsumer<RayActor<?>, Boolean> kill, boolean noRestart) {
TestUtils.skipTestUnderSingleProcess();
ActorCreationOptions options =
new ActorCreationOptions.Builder().setMaxReconstructions(1).createActorCreationOptions();
new ActorCreationOptions.Builder().setMaxRestarts(1).createActorCreationOptions();
RayActor<HangActor> actor = Ray.createActor(HangActor::new, options);
RayObject<Boolean> result = actor.call(HangActor::hang);
// The actor will hang in this task.
Assert.assertEquals(0, Ray.wait(ImmutableList.of(result), 1, 500).getReady().size());
// Kill the actor
kill.accept(actor, noReconstruction);
kill.accept(actor, noRestart);
// The get operation will fail with RayActorException
Assert.expectThrows(RayActorException.class, result::get);
try {
// Sleep 1s here to make sure the driver has received the actor notification
// (of state RECONSTRUCTING or DEAD).
// (of state RESTARTING or DEAD).
Thread.sleep(1000);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
if (noReconstruction) {
// The actor should not be reconstructed.
if (noRestart) {
// The actor should not be restarted.
Assert.expectThrows(RayActorException.class, () -> actor.call(HangActor::hang).get());
} else {
Assert.assertEquals(actor.call(HangActor::ping).get(), "pong");