[Java] Release actor instance reference when Ray.exitActor() is invoked (#11324)

This commit is contained in:
Kai Yang
2020-10-14 13:12:59 +08:00
committed by GitHub
parent c926838411
commit abc6126814
11 changed files with 71 additions and 0 deletions
@@ -282,6 +282,11 @@ public abstract class AbstractRayRuntime implements RayRuntimeInternal {
return objectStore;
}
@Override
public TaskExecutor getTaskExecutor() {
return taskExecutor;
}
@Override
public FunctionManager getFunctionManager() {
return functionManager;
@@ -6,6 +6,7 @@ import io.ray.runtime.context.WorkerContext;
import io.ray.runtime.functionmanager.FunctionManager;
import io.ray.runtime.gcs.GcsClient;
import io.ray.runtime.object.ObjectStore;
import io.ray.runtime.task.TaskExecutor;
/**
* This interface is required to make {@link RayRuntimeProxy} work.
@@ -21,6 +22,8 @@ public interface RayRuntimeInternal extends RayRuntime {
ObjectStore getObjectStore();
TaskExecutor getTaskExecutor();
FunctionManager getFunctionManager();
RayConfig getRayConfig();
@@ -45,6 +45,11 @@ public class NativeTaskExecutor extends TaskExecutor<NativeTaskExecutor.NativeAc
return new NativeActorContext();
}
public void onWorkerShutdown(byte[] workerIdBytes) {
// This is to make sure no memory leak when `Ray.exitActor()` is called.
removeActorContext(new UniqueId(workerIdBytes));
}
@Override
protected void maybeSaveCheckpoint(Object actor, ActorId actorId) {
if (!(actor instanceof Checkpointable)) {
@@ -65,6 +65,10 @@ public abstract class TaskExecutor<T extends TaskExecutor.ActorContext> {
this.actorContextMap.put(runtime.getWorkerContext().getCurrentWorkerId(), actorContext);
}
protected void removeActorContext(UniqueId workerId) {
this.actorContextMap.remove(workerId);
}
private RayFunction getRayFunction(List<String> rayFunctionInfo) {
JobId jobId = runtime.getWorkerContext().getCurrentJobId();
JavaFunctionDescriptor functionDescriptor = parseFunctionDescriptor(rayFunctionInfo);
@@ -9,9 +9,12 @@ import io.ray.api.Ray;
import io.ray.api.id.ActorId;
import io.ray.api.id.UniqueId;
import io.ray.runtime.exception.RayActorException;
import io.ray.runtime.task.TaskExecutor;
import io.ray.runtime.util.SystemUtil;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.testng.Assert;
import org.testng.annotations.Test;
@@ -31,6 +34,17 @@ public class ExitActorTest extends BaseTest {
return pid();
}
public int getSizeOfActorContextMap() {
TaskExecutor taskExecutor = TestUtils.getRuntime().getTaskExecutor();
try {
Field field = TaskExecutor.class.getDeclaredField("actorContextMap");
field.setAccessible(true);
return ((Map<?, ?>)field.get(taskExecutor)).size();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Override
public boolean shouldCheckpoint(CheckpointContext checkpointContext) {
return true;
@@ -77,6 +91,8 @@ public class ExitActorTest extends BaseTest {
ActorHandle<ExitingActor> actor1 = Ray.actor(ExitingActor::new)
.setMaxRestarts(10000).remote();
int pid = actor1.task(ExitingActor::getPid).remote().get();
Assert.assertEquals(
1, (int) actor1.task(ExitingActor::getSizeOfActorContextMap).remote().get());
ActorHandle<ExitingActor> actor2;
while (true) {
// Create another actor which share the same process of actor 1.
@@ -86,11 +102,17 @@ public class ExitActorTest extends BaseTest {
break;
}
}
Assert.assertEquals(
2, (int) actor1.task(ExitingActor::getSizeOfActorContextMap).remote().get());
Assert.assertEquals(
2, (int) actor2.task(ExitingActor::getSizeOfActorContextMap).remote().get());
ObjectRef<Boolean> obj1 = actor1.task(ExitingActor::exit).remote();
Assert.assertThrows(RayActorException.class, obj1::get);
Assert.assertTrue(SystemUtil.isProcessAlive(pid));
// Actor 2 shouldn't exit or be reconstructed.
Assert.assertEquals(1, (int) actor2.task(ExitingActor::incr).remote().get());
Assert.assertEquals(
1, (int) actor2.task(ExitingActor::getSizeOfActorContextMap).remote().get());
Assert.assertEquals(pid, (int) actor2.task(ExitingActor::getPid).remote().get());
Assert.assertTrue(SystemUtil.isProcessAlive(pid));
}