Propagate backend error to worker (#4039)

This commit is contained in:
Hao Chen
2019-02-16 11:39:15 +08:00
committed by GitHub
parent 4be3d0c5d3
commit de17443dc2
21 changed files with 635 additions and 258 deletions
@@ -10,6 +10,7 @@ import org.ray.api.Checkpointable;
import org.ray.api.Ray;
import org.ray.api.RayActor;
import org.ray.api.annotation.RayRemote;
import org.ray.api.exception.RayActorException;
import org.ray.api.id.UniqueId;
import org.ray.api.options.ActorCreationOptions;
import org.testng.Assert;
@@ -60,11 +61,8 @@ public class ActorReconstructionTest extends BaseTest {
try {
Ray.call(Counter::increase, actor).get();
Assert.fail("The above task didn't fail.");
} catch (StringIndexOutOfBoundsException e) {
// Raylet backend will put invalid data in task's result to indicate the task has failed.
// Thus, Java deserialization will fail and throw `StringIndexOutOfBoundsException`.
// TODO(hchen): we should use object's metadata to indicate task failure,
// instead of throwing this exception.
} catch (RayActorException e) {
// We should receive a RayActorException because the actor is dead.
}
}
@@ -1,11 +1,16 @@
package org.ray.api.test;
import com.google.common.collect.ImmutableList;
import java.util.concurrent.TimeUnit;
import org.ray.api.Ray;
import org.ray.api.RayActor;
import org.ray.api.RayObject;
import org.ray.api.annotation.RayRemote;
import org.ray.api.exception.UnreconstructableException;
import org.ray.api.id.UniqueId;
import org.ray.runtime.AbstractRayRuntime;
import org.ray.runtime.RayActorImpl;
import org.ray.runtime.objectstore.ObjectStoreProxy.GetResult;
import org.testng.Assert;
import org.testng.annotations.Test;
@@ -83,4 +88,30 @@ public class ActorTest extends BaseTest {
Assert.assertEquals(Integer.valueOf(103), Ray.call(Counter::increase, counter2, 2).get());
}
@Test
public void testUnreconstructableActorObject() throws InterruptedException {
RayActor<Counter> counter = Ray.createActor(Counter::new, 100);
// Call an actor method.
RayObject value = Ray.call(Counter::getValue, counter);
Assert.assertEquals(100, value.get());
// Delete the object from the object store.
Ray.internal().free(ImmutableList.of(value.getId()), false);
// Wait until the object is deleted, because the above free operation is async.
while (true) {
GetResult<Integer> result = ((AbstractRayRuntime)
Ray.internal()).getObjectStoreProxy().get(value.getId(), 0);
if (!result.exists) {
break;
}
TimeUnit.MILLISECONDS.sleep(100);
}
try {
// Try getting the object again, this should throw an UnreconstructableException.
value.get();
Assert.fail("This line should not be reachable.");
} catch (UnreconstructableException e) {
Assert.assertEquals(value.getId(), e.objectId);
}
}
}
@@ -3,7 +3,9 @@ package org.ray.api.test;
import org.ray.api.Ray;
import org.ray.api.RayActor;
import org.ray.api.RayObject;
import org.ray.api.exception.RayException;
import org.ray.api.exception.RayActorException;
import org.ray.api.exception.RayTaskException;
import org.ray.api.exception.RayWorkerException;
import org.testng.Assert;
import org.testng.annotations.Test;
@@ -15,6 +17,11 @@ public class FailureTest extends BaseTest {
throw new RuntimeException(EXCEPTION_MESSAGE);
}
public static int badFunc2() {
System.exit(-1);
return 0;
}
public static class BadActor {
public BadActor(boolean failOnCreation) {
@@ -23,17 +30,21 @@ public class FailureTest extends BaseTest {
}
}
public int func() {
public int badMethod() {
throw new RuntimeException(EXCEPTION_MESSAGE);
}
public int badMethod2() {
System.exit(-1);
return 0;
}
}
private static void assertTaskFail(RayObject<?> rayObject) {
private static void assertTaskFailedWithRayTaskException(RayObject<?> rayObject) {
try {
rayObject.get();
Assert.fail("Task didn't fail.");
} catch (RayException e) {
e.printStackTrace();
} catch (RayTaskException e) {
Throwable rootCause = e.getCause();
while (rootCause.getCause() != null) {
rootCause = rootCause.getCause();
@@ -45,19 +56,49 @@ public class FailureTest extends BaseTest {
@Test
public void testNormalTaskFailure() {
assertTaskFail(Ray.call(FailureTest::badFunc));
assertTaskFailedWithRayTaskException(Ray.call(FailureTest::badFunc));
}
@Test
public void testActorCreationFailure() {
RayActor<BadActor> actor = Ray.createActor(BadActor::new, true);
assertTaskFail(Ray.call(BadActor::func, actor));
assertTaskFailedWithRayTaskException(Ray.call(BadActor::badMethod, actor));
}
@Test
public void testActorTaskFailure() {
RayActor<BadActor> actor = Ray.createActor(BadActor::new, false);
assertTaskFail(Ray.call(BadActor::func, actor));
assertTaskFailedWithRayTaskException(Ray.call(BadActor::badMethod, actor));
}
@Test
public void testWorkerProcessDying() {
try {
Ray.call(FailureTest::badFunc2).get();
Assert.fail("This line shouldn't be reached.");
} catch (RayWorkerException e) {
// When the worker process dies while executing a task, we should receive an
// RayWorkerException.
}
}
@Test
public void testActorProcessDying() {
RayActor<BadActor> actor = Ray.createActor(BadActor::new, false);
try {
Ray.call(BadActor::badMethod2, actor).get();
Assert.fail("This line shouldn't be reached.");
} catch (RayActorException e) {
// When the actor process dies while executing a task, we should receive an
// RayActorException.
}
try {
Ray.call(BadActor::badMethod, actor).get();
Assert.fail("This line shouldn't be reached.");
} catch (RayActorException e) {
// When a actor task is submitted to a dead actor, we should also receive an
// RayActorException.
}
}
}