[Core]Remove checkpoint table (#12235)

* Delete an actor entry from node manager.

* Remove checkpoint table

* remote checkpoint interface

* remove checkpoint interface

* fix ExitActorTest

Co-authored-by: chaokunyang <shawn.ck.yang@gmail.com>
This commit is contained in:
SangBin Cho
2020-12-01 08:58:36 -08:00
committed by GitHub
parent 9021f15b2a
commit f6f3cc9af1
52 changed files with 6 additions and 2012 deletions
@@ -1,14 +1,10 @@
package io.ray.test;
import io.ray.api.ActorHandle;
import io.ray.api.Checkpointable;
import io.ray.api.Ray;
import io.ray.api.id.ActorId;
import io.ray.api.id.UniqueId;
import io.ray.runtime.exception.RayActorException;
import io.ray.runtime.util.SystemUtil;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.testng.Assert;
import org.testng.annotations.Test;
@@ -73,68 +69,5 @@ public class ActorRestartTest extends BaseTest {
// We should receive a RayActorException because the actor is dead.
}
}
public static class CheckpointableCounter extends Counter implements Checkpointable {
private boolean resumedFromCheckpoint = false;
private boolean increaseCalled = false;
@Override
public int increase() {
increaseCalled = true;
return super.increase();
}
public boolean wasResumedFromCheckpoint() {
return resumedFromCheckpoint;
}
@Override
public boolean shouldCheckpoint(CheckpointContext checkpointContext) {
// Checkpoint the actor when value is increased to 3.
boolean shouldCheckpoint = increaseCalled && value == 3;
increaseCalled = false;
return shouldCheckpoint;
}
@Override
public void saveCheckpoint(ActorId actorId, UniqueId checkpointId) {
// In practice, user should save the checkpoint id and data to a persistent store.
// But for simplicity, we don't do that in this unit test.
}
@Override
public UniqueId loadCheckpoint(ActorId actorId, List<Checkpoint> availableCheckpoints) {
// Restore previous value and return checkpoint id.
this.value = 3;
this.resumedFromCheckpoint = true;
return availableCheckpoints.get(availableCheckpoints.size() - 1).checkpointId;
}
@Override
public void checkpointExpired(ActorId actorId, UniqueId checkpointId) {
}
}
public void testActorCheckpointing() throws IOException, InterruptedException {
ActorHandle<CheckpointableCounter> actor = Ray.actor(CheckpointableCounter::new)
.setMaxRestarts(1).remote();
// Call increase 3 times.
for (int i = 0; i < 3; i++) {
actor.task(CheckpointableCounter::increase).remote().get();
}
// Assert that the actor wasn't resumed from a checkpoint.
Assert.assertFalse(actor.task(CheckpointableCounter::wasResumedFromCheckpoint).remote().get());
int pid = actor.task(CheckpointableCounter::getPid).remote().get();
Runtime.getRuntime().exec("kill -9 " + pid);
// Wait for the actor to be killed.
TimeUnit.SECONDS.sleep(1);
// Try calling increase on this actor again and check the value is now 4.
int value = actor.task(CheckpointableCounter::increase).remote().get();
Assert.assertEquals(value, 4);
// Assert that the actor was resumed from a checkpoint.
Assert.assertTrue(actor.task(CheckpointableCounter::wasResumedFromCheckpoint).remote().get());
}
}
@@ -3,17 +3,13 @@ package io.ray.test;
import static io.ray.runtime.util.SystemUtil.pid;
import io.ray.api.ActorHandle;
import io.ray.api.Checkpointable;
import io.ray.api.ObjectRef;
import io.ray.api.Ray;
import io.ray.api.id.ActorId;
import io.ray.api.id.UniqueId;
import io.ray.runtime.exception.RayActorException;
import io.ray.runtime.task.TaskExecutor;
import io.ray.runtime.util.SystemUtil;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.testng.Assert;
@@ -22,7 +18,7 @@ import org.testng.annotations.Test;
@Test(groups = {"cluster"})
public class ExitActorTest extends BaseTest {
private static class ExitingActor implements Checkpointable {
private static class ExitingActor {
int counter = 0;
@@ -45,26 +41,6 @@ public class ExitActorTest extends BaseTest {
}
}
@Override
public boolean shouldCheckpoint(CheckpointContext checkpointContext) {
return true;
}
@Override
public void saveCheckpoint(ActorId actorId, UniqueId checkpointId) {
}
@Override
public UniqueId loadCheckpoint(ActorId actorId, List<Checkpoint> availableCheckpoints) {
// Dummy load checkpoint.
this.counter = 1;
return availableCheckpoints.get(availableCheckpoints.size() - 1).checkpointId;
}
@Override
public void checkpointExpired(ActorId actorId, UniqueId checkpointId) {
}
public boolean exit() {
Ray.exitActor();
return false;
@@ -79,7 +55,7 @@ public class ExitActorTest extends BaseTest {
Runtime.getRuntime().exec("kill -9 " + pid);
TimeUnit.SECONDS.sleep(1);
// Make sure this actor can be reconstructed.
Assert.assertEquals(2, (int) actor.task(ExitingActor::incr).remote().get());
Assert.assertEquals(1, (int) actor.task(ExitingActor::incr).remote().get());
// `exitActor` will exit the actor without reconstructing.
ObjectRef<Boolean> obj = actor.task(ExitingActor::exit).remote();