mirror of
https://github.com/wassname/ray.git
synced 2026-07-01 01:09:50 +08:00
[Core]Remove checkpoint table (#12235)
* Delete an actor entry from node manager. * Remove checkpoint table * remote checkpoint interface * remove checkpoint interface * fix ExitActorTest Co-authored-by: chaokunyang <shawn.ck.yang@gmail.com>
This commit is contained in:
@@ -1,14 +1,10 @@
|
||||
package io.ray.test;
|
||||
|
||||
import io.ray.api.ActorHandle;
|
||||
import io.ray.api.Checkpointable;
|
||||
import io.ray.api.Ray;
|
||||
import io.ray.api.id.ActorId;
|
||||
import io.ray.api.id.UniqueId;
|
||||
import io.ray.runtime.exception.RayActorException;
|
||||
import io.ray.runtime.util.SystemUtil;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
@@ -73,68 +69,5 @@ public class ActorRestartTest extends BaseTest {
|
||||
// We should receive a RayActorException because the actor is dead.
|
||||
}
|
||||
}
|
||||
|
||||
public static class CheckpointableCounter extends Counter implements Checkpointable {
|
||||
|
||||
private boolean resumedFromCheckpoint = false;
|
||||
private boolean increaseCalled = false;
|
||||
|
||||
@Override
|
||||
public int increase() {
|
||||
increaseCalled = true;
|
||||
return super.increase();
|
||||
}
|
||||
|
||||
public boolean wasResumedFromCheckpoint() {
|
||||
return resumedFromCheckpoint;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean shouldCheckpoint(CheckpointContext checkpointContext) {
|
||||
// Checkpoint the actor when value is increased to 3.
|
||||
boolean shouldCheckpoint = increaseCalled && value == 3;
|
||||
increaseCalled = false;
|
||||
return shouldCheckpoint;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void saveCheckpoint(ActorId actorId, UniqueId checkpointId) {
|
||||
// In practice, user should save the checkpoint id and data to a persistent store.
|
||||
// But for simplicity, we don't do that in this unit test.
|
||||
}
|
||||
|
||||
@Override
|
||||
public UniqueId loadCheckpoint(ActorId actorId, List<Checkpoint> availableCheckpoints) {
|
||||
// Restore previous value and return checkpoint id.
|
||||
this.value = 3;
|
||||
this.resumedFromCheckpoint = true;
|
||||
return availableCheckpoints.get(availableCheckpoints.size() - 1).checkpointId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkpointExpired(ActorId actorId, UniqueId checkpointId) {
|
||||
}
|
||||
}
|
||||
|
||||
public void testActorCheckpointing() throws IOException, InterruptedException {
|
||||
ActorHandle<CheckpointableCounter> actor = Ray.actor(CheckpointableCounter::new)
|
||||
.setMaxRestarts(1).remote();
|
||||
// Call increase 3 times.
|
||||
for (int i = 0; i < 3; i++) {
|
||||
actor.task(CheckpointableCounter::increase).remote().get();
|
||||
}
|
||||
// Assert that the actor wasn't resumed from a checkpoint.
|
||||
Assert.assertFalse(actor.task(CheckpointableCounter::wasResumedFromCheckpoint).remote().get());
|
||||
int pid = actor.task(CheckpointableCounter::getPid).remote().get();
|
||||
Runtime.getRuntime().exec("kill -9 " + pid);
|
||||
// Wait for the actor to be killed.
|
||||
TimeUnit.SECONDS.sleep(1);
|
||||
|
||||
// Try calling increase on this actor again and check the value is now 4.
|
||||
int value = actor.task(CheckpointableCounter::increase).remote().get();
|
||||
Assert.assertEquals(value, 4);
|
||||
// Assert that the actor was resumed from a checkpoint.
|
||||
Assert.assertTrue(actor.task(CheckpointableCounter::wasResumedFromCheckpoint).remote().get());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,17 +3,13 @@ package io.ray.test;
|
||||
import static io.ray.runtime.util.SystemUtil.pid;
|
||||
|
||||
import io.ray.api.ActorHandle;
|
||||
import io.ray.api.Checkpointable;
|
||||
import io.ray.api.ObjectRef;
|
||||
import io.ray.api.Ray;
|
||||
import io.ray.api.id.ActorId;
|
||||
import io.ray.api.id.UniqueId;
|
||||
import io.ray.runtime.exception.RayActorException;
|
||||
import io.ray.runtime.task.TaskExecutor;
|
||||
import io.ray.runtime.util.SystemUtil;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import org.testng.Assert;
|
||||
@@ -22,7 +18,7 @@ import org.testng.annotations.Test;
|
||||
@Test(groups = {"cluster"})
|
||||
public class ExitActorTest extends BaseTest {
|
||||
|
||||
private static class ExitingActor implements Checkpointable {
|
||||
private static class ExitingActor {
|
||||
|
||||
int counter = 0;
|
||||
|
||||
@@ -45,26 +41,6 @@ public class ExitActorTest extends BaseTest {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean shouldCheckpoint(CheckpointContext checkpointContext) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void saveCheckpoint(ActorId actorId, UniqueId checkpointId) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public UniqueId loadCheckpoint(ActorId actorId, List<Checkpoint> availableCheckpoints) {
|
||||
// Dummy load checkpoint.
|
||||
this.counter = 1;
|
||||
return availableCheckpoints.get(availableCheckpoints.size() - 1).checkpointId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkpointExpired(ActorId actorId, UniqueId checkpointId) {
|
||||
}
|
||||
|
||||
public boolean exit() {
|
||||
Ray.exitActor();
|
||||
return false;
|
||||
@@ -79,7 +55,7 @@ public class ExitActorTest extends BaseTest {
|
||||
Runtime.getRuntime().exec("kill -9 " + pid);
|
||||
TimeUnit.SECONDS.sleep(1);
|
||||
// Make sure this actor can be reconstructed.
|
||||
Assert.assertEquals(2, (int) actor.task(ExitingActor::incr).remote().get());
|
||||
Assert.assertEquals(1, (int) actor.task(ExitingActor::incr).remote().get());
|
||||
|
||||
// `exitActor` will exit the actor without reconstructing.
|
||||
ObjectRef<Boolean> obj = actor.task(ExitingActor::exit).remote();
|
||||
|
||||
Reference in New Issue
Block a user