Cross language exception (#10023)

This commit is contained in:
fyrestone
2020-08-26 10:46:05 +08:00
committed by GitHub
parent 1e99b814f0
commit 08adbb371f
30 changed files with 441 additions and 137 deletions
+33 -11
View File
@@ -23,17 +23,39 @@ from custom_directives import CustomGalleryItemDirective
# These lines added to enable Sphinx to work without installing Ray.
import mock
MOCK_MODULES = [
"blist", "gym", "gym.spaces", "psutil", "ray._raylet",
"ray.core.generated", "ray.core.generated.gcs_pb2",
"ray.core.generated.ray.protocol.Task", "scipy.signal", "scipy.stats",
"setproctitle", "tensorflow_probability", "tensorflow",
"tensorflow.contrib", "tensorflow.contrib.all_reduce", "tree",
"tensorflow.contrib.all_reduce.python", "tensorflow.contrib.layers",
"tensorflow.contrib.rnn", "tensorflow.contrib.slim", "tensorflow.core",
"tensorflow.core.util", "tensorflow.python", "tensorflow.python.client",
"tensorflow.python.util", "torch", "torch.distributed", "torch.nn",
"torch.nn.parallel", "torch.utils.data", "torch.utils.data.distributed",
"zoopt"
"blist",
"gym",
"gym.spaces",
"psutil",
"ray._raylet",
"ray.core.generated",
"ray.core.generated.common_pb2",
"ray.core.generated.gcs_pb2",
"ray.core.generated.ray.protocol.Task",
"scipy.signal",
"scipy.stats",
"setproctitle",
"tensorflow_probability",
"tensorflow",
"tensorflow.contrib",
"tensorflow.contrib.all_reduce",
"tree",
"tensorflow.contrib.all_reduce.python",
"tensorflow.contrib.layers",
"tensorflow.contrib.rnn",
"tensorflow.contrib.slim",
"tensorflow.core",
"tensorflow.core.util",
"tensorflow.python",
"tensorflow.python.client",
"tensorflow.python.util",
"torch",
"torch.distributed",
"torch.nn",
"torch.nn.parallel",
"torch.utils.data",
"torch.utils.data.distributed",
"zoopt",
]
import scipy.stats
import scipy.linalg
+1
View File
@@ -105,6 +105,7 @@ define_java_module(
":io_ray_ray_runtime",
"@maven//:com_google_code_gson_gson",
"@maven//:com_google_guava_guava",
"@maven//:com_google_protobuf_protobuf_java",
"@maven//:com_sun_xml_bind_jaxb_core",
"@maven//:com_sun_xml_bind_jaxb_impl",
"@maven//:commons_io_commons_io",
@@ -1,15 +0,0 @@
package io.ray.api.exception;
/**
* Base class of all ray exceptions.
*/
public class RayException extends RuntimeException {
public RayException(String message) {
super(message);
}
public RayException(String message, Throwable cause) {
super(message, cause);
}
}
@@ -1,15 +0,0 @@
package io.ray.api.exception;
/**
* Indicates that a task threw an exception during execution.
*
* If a task throws an exception during execution, a RayTaskException is stored in the object store
* as the task's output. Then when the object is retrieved from the object store, this exception
* will be thrown and propagate the error message.
*/
public class RayTaskException extends RayException {
public RayTaskException(String message, Throwable cause) {
super(message, cause);
}
}
@@ -8,7 +8,6 @@ import io.ray.api.BaseActorHandle;
import io.ray.api.ObjectRef;
import io.ray.api.PyActorHandle;
import io.ray.api.WaitResult;
import io.ray.api.exception.RayException;
import io.ray.api.function.PyActorClass;
import io.ray.api.function.PyActorMethod;
import io.ray.api.function.PyFunction;
@@ -81,7 +80,7 @@ public abstract class AbstractRayRuntime implements RayRuntimeInternal {
}
@Override
public <T> T get(ObjectRef<T> objectRef) throws RayException {
public <T> T get(ObjectRef<T> objectRef) throws RuntimeException {
List<T> ret = get(ImmutableList.of(objectRef));
return ret.get(0);
}
@@ -1,8 +1,8 @@
package io.ray.runtime;
import io.ray.api.exception.RayException;
import io.ray.api.runtime.RayRuntime;
import io.ray.runtime.config.RunMode;
import io.ray.runtime.exception.RayException;
import java.lang.reflect.InvocationHandler;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
@@ -0,0 +1,18 @@
package io.ray.runtime.exception;
import io.ray.runtime.generated.Common.Language;
public class CrossLanguageException extends RayException {
private Language language;
public CrossLanguageException(io.ray.runtime.generated.Common.RayException exception) {
super(String.format("An exception raised from %s:\n%s", exception.getLanguage().name(),
exception.getFormattedExceptionString()));
this.language = exception.getLanguage();
}
public Language getLanguage() {
return this.language;
}
}
@@ -1,4 +1,4 @@
package io.ray.api.exception;
package io.ray.runtime.exception;
/**
* Indicates that the actor died unexpectedly before finishing a task.
@@ -0,0 +1,40 @@
package io.ray.runtime.exception;
import com.google.protobuf.ByteString;
import com.google.protobuf.InvalidProtocolBufferException;
import io.ray.runtime.generated.Common.Language;
import io.ray.runtime.serializer.Serializer;
public class RayException extends RuntimeException {
public RayException(String message) {
super(message);
}
public RayException(String message, Throwable cause) {
super(message, cause);
}
public byte[] toBytes() {
String formattedException = org.apache.commons.lang3.exception.ExceptionUtils
.getStackTrace(this);
io.ray.runtime.generated.Common.RayException.Builder builder =
io.ray.runtime.generated.Common.RayException.newBuilder();
builder.setLanguage(Language.JAVA);
builder.setFormattedExceptionString(formattedException);
builder.setSerializedException(ByteString.copyFrom(Serializer.encode(this).getLeft()));
return builder.build().toByteArray();
}
public static RayException fromBytes(byte[] serialized)
throws InvalidProtocolBufferException {
io.ray.runtime.generated.Common.RayException exception =
io.ray.runtime.generated.Common.RayException.parseFrom(serialized);
if (exception.getLanguage() == Language.JAVA) {
return Serializer
.decode(exception.getSerializedException().toByteArray(), RayException.class);
} else {
return new CrossLanguageException(exception);
}
}
}
@@ -0,0 +1,13 @@
package io.ray.runtime.exception;
import io.ray.runtime.util.NetworkUtil;
import io.ray.runtime.util.SystemUtil;
public class RayTaskException extends RayException {
public RayTaskException(String message, Throwable cause) {
super(String.format("(pid=%d, ip=%s) %s",
SystemUtil.pid(), NetworkUtil.getIpAddress(null), message), cause);
}
}
@@ -1,4 +1,4 @@
package io.ray.api.exception;
package io.ray.runtime.exception;
/**
* Indicates that the worker died unexpectedly while executing a task.
@@ -1,4 +1,4 @@
package io.ray.api.exception;
package io.ray.runtime.exception;
import io.ray.api.id.ObjectId;
@@ -1,11 +1,12 @@
package io.ray.runtime.object;
import io.ray.api.exception.RayActorException;
import io.ray.api.exception.RayTaskException;
import io.ray.api.exception.RayWorkerException;
import io.ray.api.exception.UnreconstructableException;
import com.google.protobuf.InvalidProtocolBufferException;
import io.ray.api.id.ObjectId;
import io.ray.runtime.generated.Gcs.ErrorType;
import io.ray.runtime.exception.RayActorException;
import io.ray.runtime.exception.RayTaskException;
import io.ray.runtime.exception.RayWorkerException;
import io.ray.runtime.exception.UnreconstructableException;
import io.ray.runtime.generated.Common.ErrorType;
import io.ray.runtime.serializer.Serializer;
import java.nio.ByteBuffer;
import java.util.ArrayList;
@@ -70,7 +71,21 @@ public class ObjectSerializer {
} else if (Arrays.equals(meta, UNRECONSTRUCTABLE_EXCEPTION_META)) {
return new UnreconstructableException(objectId);
} else if (Arrays.equals(meta, TASK_EXECUTION_EXCEPTION_META)) {
return Serializer.decode(data, objectType);
// Serialization logic of task execution exception: an instance of
// `io.ray.runtime.exception.RayTaskException`
// -> a `RayException` protobuf message
// -> protobuf-serialized bytes
// -> MessagePack-serialized bytes.
// So here the `data` variable is MessagePack-serialized bytes, and the `serialized`
// variable is protobuf-serialized bytes. They are not the same.
byte[] serialized = Serializer.decode(data, byte[].class);
try {
return RayTaskException.fromBytes(serialized);
} catch (InvalidProtocolBufferException e) {
throw new IllegalArgumentException(
"Can't deserialize RayTaskException object: " + objectId
.toString());
}
} else if (Arrays.equals(meta, OBJECT_METADATA_TYPE_PYTHON)) {
throw new IllegalArgumentException("Can't deserialize Python object: " + objectId
.toString());
@@ -107,7 +122,12 @@ public class ObjectSerializer {
}
return new NativeRayObject(bytes, OBJECT_METADATA_TYPE_RAW);
} else if (object instanceof RayTaskException) {
byte[] serializedBytes = Serializer.encode(object).getLeft();
RayTaskException taskException = (RayTaskException) object;
byte[] serializedBytes = Serializer.encode(taskException.toBytes()).getLeft();
// serializedBytes is MessagePack serialized bytes
// taskException.toBytes() is protobuf serialized bytes
// Only OBJECT_METADATA_TYPE_RAW is raw bytes,
// any other type should be the MessagePack serialized bytes.
return new NativeRayObject(serializedBytes, TASK_EXECUTION_EXCEPTION_META);
} else {
try {
@@ -3,10 +3,10 @@ package io.ray.runtime.object;
import com.google.common.base.Preconditions;
import io.ray.api.ObjectRef;
import io.ray.api.WaitResult;
import io.ray.api.exception.RayException;
import io.ray.api.id.ObjectId;
import io.ray.api.id.UniqueId;
import io.ray.runtime.context.WorkerContext;
import io.ray.runtime.exception.RayException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@@ -160,8 +160,7 @@ public abstract class ObjectStore {
* Delete a list of objects from the object store.
*
* @param objectIds IDs of the objects to delete.
* @param localOnly Whether only delete the objects in local node, or all nodes in the
* cluster.
* @param localOnly Whether only delete the objects in local node, or all nodes in the cluster.
* @param deleteCreatingTasks Whether also delete the tasks that created these objects.
*/
public abstract void delete(List<ObjectId> objectIds, boolean localOnly,
@@ -169,6 +168,7 @@ public abstract class ObjectStore {
/**
* Increase the local reference count for this object ID.
*
* @param workerId The ID of the worker to increase on.
* @param objectId The object ID to increase the reference count for.
*/
@@ -176,6 +176,7 @@ public abstract class ObjectStore {
/**
* Decrease the reference count for this object ID.
*
* @param workerId The ID of the worker to decrease on.
* @param objectId The object ID to decrease the reference count for.
*/
@@ -1,12 +1,12 @@
package io.ray.runtime.task;
import com.google.common.base.Preconditions;
import io.ray.api.exception.RayTaskException;
import io.ray.api.id.ActorId;
import io.ray.api.id.JobId;
import io.ray.api.id.TaskId;
import io.ray.api.id.UniqueId;
import io.ray.runtime.RayRuntimeInternal;
import io.ray.runtime.exception.RayTaskException;
import io.ray.runtime.functionmanager.JavaFunctionDescriptor;
import io.ray.runtime.functionmanager.RayFunction;
import io.ray.runtime.generated.Common.TaskType;
@@ -3,7 +3,7 @@ package io.ray.test;
import io.ray.api.ActorHandle;
import io.ray.api.Checkpointable;
import io.ray.api.Ray;
import io.ray.api.exception.RayActorException;
import io.ray.runtime.exception.RayActorException;
import io.ray.api.id.ActorId;
import io.ray.api.id.UniqueId;
import io.ray.runtime.util.SystemUtil;
@@ -5,7 +5,7 @@ import io.ray.api.ActorHandle;
import io.ray.api.ObjectRef;
import io.ray.api.PyActorHandle;
import io.ray.api.Ray;
import io.ray.api.exception.UnreconstructableException;
import io.ray.runtime.exception.UnreconstructableException;
import io.ray.api.id.ActorId;
import io.ray.api.id.UniqueId;
import java.util.Collections;
@@ -11,6 +11,9 @@ import io.ray.api.function.PyActorMethod;
import io.ray.api.function.PyFunction;
import io.ray.runtime.actor.NativeActorHandle;
import io.ray.runtime.actor.NativePyActorHandle;
import io.ray.runtime.exception.CrossLanguageException;
import io.ray.runtime.exception.RayException;
import io.ray.runtime.generated.Common.Language;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
@@ -19,14 +22,11 @@ import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.Test;
public class CrossLanguageInvocationTest extends BaseMultiLanguageTest {
private static final Logger LOGGER = LoggerFactory.getLogger(CrossLanguageInvocationTest.class);
private static final String PYTHON_MODULE = "test_cross_language_invocation";
@Override
@@ -151,7 +151,6 @@ public class CrossLanguageInvocationTest extends BaseMultiLanguageTest {
PyFunction.of(PYTHON_MODULE, "py_func_call_java_actor", byte[].class),
"1".getBytes()).remote();
Assert.assertEquals(res.get(), "Counter1".getBytes());
}
@Test
@@ -188,6 +187,91 @@ public class CrossLanguageInvocationTest extends BaseMultiLanguageTest {
Assert.assertEquals(res.get(), "3".getBytes());
}
@Test
public void testExceptionSerialization() throws IOException {
try {
throw new RayException("Test Exception");
} catch (RayException e) {
String formattedException = org.apache.commons.lang3.exception.ExceptionUtils
.getStackTrace(e);
io.ray.runtime.generated.Common.RayException exception = io.ray.runtime.generated.Common.RayException
.parseFrom(e.toBytes());
Assert.assertEquals(exception.getFormattedExceptionString(), formattedException);
}
}
@Test
public void testRaiseExceptionFromPython() {
ObjectRef<Object> res = Ray.task(PyFunction.of(
PYTHON_MODULE, "py_func_python_raise_exception", Object.class)).remote();
try {
res.get();
} catch (RuntimeException ex) {
// ex is a Python exception(py_func_python_raise_exception) with no cause.
Assert.assertTrue(ex instanceof CrossLanguageException);
CrossLanguageException e = (CrossLanguageException) ex;
Assert.assertEquals(e.getLanguage(), Language.PYTHON);
// ex.cause is null.
Assert.assertNull(ex.getCause());
Assert.assertTrue(ex.getMessage().contains("ZeroDivisionError: division by zero"),
ex.getMessage());
return;
}
Assert.fail();
}
@Test
public void testThrowExceptionFromJava() {
ObjectRef<Object> res = Ray.task(PyFunction.of(
PYTHON_MODULE, "py_func_java_throw_exception", Object.class)).remote();
try {
res.get();
} catch (RuntimeException ex) {
final String message = ex.getMessage();
Assert.assertTrue(message.contains("py_func_java_throw_exception"), message);
Assert.assertTrue(message.contains("io.ray.test.CrossLanguageInvocationTest.throwException"),
message);
Assert.assertTrue(message.contains("java.lang.ArithmeticException: / by zero"), message);
return;
}
Assert.fail();
}
@Test
public void testRaiseExceptionFromNestPython() {
ObjectRef<Object> res = Ray.task(
PyFunction.of(PYTHON_MODULE, "py_func_nest_python_raise_exception", Object.class)).remote();
try {
res.get();
} catch (RuntimeException ex) {
final String message = ex.getMessage();
Assert.assertTrue(message.contains("py_func_nest_python_raise_exception"), message);
Assert.assertTrue(message.contains("io.ray.runtime.task.TaskExecutor.execute"), message);
Assert.assertTrue(message.contains("py_func_python_raise_exception"), message);
Assert.assertTrue(message.contains("ZeroDivisionError: division by zero"), message);
return;
}
Assert.fail();
}
@Test
public void testThrowExceptionFromNestJava() {
ObjectRef<Object> res = Ray.task(
PyFunction.of(PYTHON_MODULE, "py_func_nest_java_throw_exception", Object.class)).remote();
try {
res.get();
} catch (RuntimeException ex) {
final String message = ex.getMessage();
Assert.assertTrue(message.contains("py_func_nest_java_throw_exception"), message);
Assert.assertEquals(org.apache.commons.lang3.StringUtils
.countMatches(message, "io.ray.runtime.exception.RayTaskException"), 2);
Assert.assertTrue(message.contains("py_func_java_throw_exception"), message);
Assert.assertTrue(message.contains("java.lang.ArithmeticException: / by zero"), message);
return;
}
Assert.fail();
}
public static Object[] pack(int i, String s, double f, Object[] o) {
// This function will be called from test_cross_language_invocation.py
return new Object[]{i, s, f, o};
@@ -227,6 +311,23 @@ public class CrossLanguageInvocationTest extends BaseMultiLanguageTest {
return (byte[]) res.get();
}
@SuppressWarnings("ConstantOverflow")
public static Object throwException() {
return 1 / 0;
}
public static Object throwJavaException() {
ObjectRef<Object> res = Ray.task(
PyFunction.of(PYTHON_MODULE, "py_func_java_throw_exception", Object.class)).remote();
return res.get();
}
public static Object raisePythonException() {
ObjectRef<Object> res = Ray.task(
PyFunction.of(PYTHON_MODULE, "py_func_python_raise_exception", Object.class)).remote();
return res.get();
}
public static class TestActor {
public TestActor(byte[] v) {
@@ -3,11 +3,10 @@ package io.ray.test;
import io.ray.api.ActorHandle;
import io.ray.api.ObjectRef;
import io.ray.api.Ray;
import io.ray.api.exception.RayActorException;
import io.ray.api.exception.RayException;
import io.ray.api.exception.RayTaskException;
import io.ray.api.exception.RayWorkerException;
import io.ray.runtime.exception.RayActorException;
import io.ray.runtime.exception.RayWorkerException;
import io.ray.api.function.RayFunc0;
import io.ray.runtime.exception.RayTaskException;
import java.time.Duration;
import java.time.Instant;
import java.util.Arrays;
@@ -138,7 +137,7 @@ public class FailureTest extends BaseTest {
try {
Ray.get(Arrays.asList(obj1, obj2));
Assert.fail("Should throw RayException.");
} catch (RayException e) {
} catch (RuntimeException e) {
Instant end = Instant.now();
long duration = Duration.between(start, end).toMillis();
Assert.assertTrue(duration < 5000, "Should fail quickly. " +
@@ -4,7 +4,7 @@ import com.google.common.collect.ImmutableList;
import io.ray.api.ActorHandle;
import io.ray.api.ObjectRef;
import io.ray.api.Ray;
import io.ray.api.exception.RayActorException;
import io.ray.runtime.exception.RayActorException;
import java.util.function.BiConsumer;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
@@ -5,7 +5,6 @@ import io.ray.api.ActorHandle;
import io.ray.api.ObjectRef;
import io.ray.api.Ray;
import io.ray.api.WaitResult;
import io.ray.api.exception.RayException;
import io.ray.api.id.ActorId;
import java.util.ArrayList;
import java.util.List;
@@ -189,7 +188,7 @@ public class MultiThreadingTest extends BaseTest {
try {
// It wouldn't be OK to run them in another thread if not wrapped the runnable.
for (Runnable runnable : runnables) {
Assert.expectThrows(RayException.class, runnable::run);
Assert.expectThrows(RuntimeException.class, runnable::run);
}
} catch (Throwable ex) {
throwable[0] = ex;
@@ -83,6 +83,35 @@ def py_func_pass_python_actor_handle():
return ray.get(r)
@ray.remote
def py_func_python_raise_exception():
1 / 0
@ray.remote
def py_func_java_throw_exception():
f = ray.java_function("io.ray.test.CrossLanguageInvocationTest",
"throwException")
r = f.remote()
return ray.get(r)
@ray.remote
def py_func_nest_python_raise_exception():
f = ray.java_function("io.ray.test.CrossLanguageInvocationTest",
"raisePythonException")
r = f.remote()
return ray.get(r)
@ray.remote
def py_func_nest_java_throw_exception():
f = ray.java_function("io.ray.test.CrossLanguageInvocationTest",
"throwJavaException")
r = f.remote()
return ray.get(r)
@ray.remote
class Counter(object):
def __init__(self, value):
+3
View File
@@ -222,6 +222,9 @@ cdef class Language:
cdef from_native(const CLanguage& lang):
return Language(<int32_t>lang)
def value(self):
return <int32_t>self.lang
def __eq__(self, other):
return (isinstance(other, Language) and
(<int32_t>self.lang) == (<int32_t>(<Language>other).lang))
+31 -1
View File
@@ -1,14 +1,44 @@
import os
from traceback import format_exception
import colorama
import ray
import ray.cloudpickle as pickle
from ray.core.generated.common_pb2 import RayException, Language
import setproctitle
class RayError(Exception):
"""Super class of all ray exception types."""
pass
def to_bytes(self):
# Extract exc_info from exception object.
exc_info = (type(self), self, self.__traceback__)
formatted_exception_string = "\n".join(format_exception(*exc_info))
return RayException(
language=ray.Language.PYTHON.value(),
serialized_exception=pickle.dumps(self),
formatted_exception_string=formatted_exception_string
).SerializeToString()
@staticmethod
def from_bytes(b):
ray_exception = RayException()
ray_exception.ParseFromString(b)
if ray_exception.language == ray.Language.PYTHON.value():
return pickle.loads(ray_exception.serialized_exception)
else:
return CrossLanguageError(ray_exception)
class CrossLanguageError(RayError):
"""Raised from another language."""
def __init__(self, ray_exception):
super().__init__("An exception raised from {}:\n{}".format(
Language.Name(ray_exception.language),
ray_exception.formatted_exception_string))
class RayConnectionError(RayError):
+44 -13
View File
@@ -1,19 +1,50 @@
from ray.core.generated.common_pb2 import ErrorType
from ray.core.generated.gcs_pb2 import (
ActorCheckpointIdData, ActorTableData, GcsNodeInfo, JobTableData,
JobConfig, ErrorTableData, ErrorType, GcsEntry, HeartbeatBatchTableData,
HeartbeatTableData, ObjectTableData, ProfileTableData, TablePrefix,
TablePubsub, TaskTableData, ResourceMap, ResourceTableData,
ObjectLocationInfo, PubSubMessage, WorkerTableData,
PlacementGroupTableData)
ActorCheckpointIdData,
ActorTableData,
GcsNodeInfo,
JobTableData,
JobConfig,
ErrorTableData,
GcsEntry,
HeartbeatBatchTableData,
HeartbeatTableData,
ObjectTableData,
ProfileTableData,
TablePrefix,
TablePubsub,
TaskTableData,
ResourceMap,
ResourceTableData,
ObjectLocationInfo,
PubSubMessage,
WorkerTableData,
PlacementGroupTableData,
)
__all__ = [
"ActorCheckpointIdData", "ActorTableData", "GcsNodeInfo", "JobTableData",
"JobConfig", "ErrorTableData", "ErrorType", "GcsEntry",
"HeartbeatBatchTableData", "HeartbeatTableData", "ObjectTableData",
"ProfileTableData", "TablePrefix", "TablePubsub", "TaskTableData",
"ResourceMap", "ResourceTableData", "construct_error_message",
"ObjectLocationInfo", "PubSubMessage", "WorkerTableData",
"PlacementGroupTableData"
"ActorCheckpointIdData",
"ActorTableData",
"GcsNodeInfo",
"JobTableData",
"JobConfig",
"ErrorTableData",
"ErrorType",
"GcsEntry",
"HeartbeatBatchTableData",
"HeartbeatTableData",
"ObjectTableData",
"ProfileTableData",
"TablePrefix",
"TablePubsub",
"TaskTableData",
"ResourceMap",
"ResourceTableData",
"construct_error_message",
"ObjectLocationInfo",
"PubSubMessage",
"WorkerTableData",
"PlacementGroupTableData",
]
FUNCTION_PREFIX = "RemoteFunction:"
+17 -16
View File
@@ -9,6 +9,7 @@ import ray.utils
from ray.utils import _random_string
from ray.gcs_utils import ErrorType
from ray.exceptions import (
RayError,
PlasmaObjectNotAvailable,
RayTaskError,
RayActorError,
@@ -221,10 +222,10 @@ class SerializationContext:
def _deserialize_msgpack_data(self, data, metadata):
msgpack_data, pickle5_data = split_buffer(data)
if metadata == ray_constants.OBJECT_METADATA_TYPE_CROSS_LANGUAGE:
python_objects = []
else:
if metadata == ray_constants.OBJECT_METADATA_TYPE_PYTHON:
python_objects = self._deserialize_pickle5_data(pickle5_data)
else:
python_objects = []
try:
@@ -262,8 +263,7 @@ class SerializationContext:
# independent.
if error_type == ErrorType.Value("TASK_EXECUTION_EXCEPTION"):
obj = self._deserialize_msgpack_data(data, metadata)
assert isinstance(obj, RayTaskError)
return obj
return RayError.from_bytes(obj)
elif error_type == ErrorType.Value("WORKER_DIED"):
return RayWorkerError()
elif error_type == ErrorType.Value("ACTOR_DIED"):
@@ -347,7 +347,16 @@ class SerializationContext:
metadata, inband, writer,
self.get_and_clear_contained_object_refs())
def _serialize_to_msgpack(self, metadata, value):
def _serialize_to_msgpack(self, value):
# Only RayTaskError is possible to be serialized here. We don't
# need to deal with other exception types here.
if isinstance(value, RayTaskError):
metadata = str(
ErrorType.Value("TASK_EXECUTION_EXCEPTION")).encode("ascii")
value = value.to_bytes()
else:
metadata = ray_constants.OBJECT_METADATA_TYPE_CROSS_LANGUAGE
python_objects = []
def _python_serializer(o):
@@ -358,10 +367,10 @@ class SerializationContext:
msgpack_data = MessagePackSerializer.dumps(value, _python_serializer)
if python_objects:
metadata = ray_constants.OBJECT_METADATA_TYPE_PYTHON
pickle5_serialized_object = \
self._serialize_to_pickle5(metadata, python_objects)
else:
metadata = ray_constants.OBJECT_METADATA_TYPE_CROSS_LANGUAGE
pickle5_serialized_object = None
return MessagePackSerializedObject(metadata, msgpack_data,
@@ -379,15 +388,7 @@ class SerializationContext:
# that this object can also be read by Java.
return RawSerializedObject(value)
else:
# Only RayTaskError is possible to be serialized here. We don't
# need to deal with other exception types here.
if isinstance(value, RayTaskError):
metadata = str(ErrorType.Value(
"TASK_EXECUTION_EXCEPTION")).encode("ascii")
else:
metadata = ray_constants.OBJECT_METADATA_TYPE_PYTHON
return self._serialize_to_msgpack(metadata, value)
return self._serialize_to_msgpack(value)
def register_custom_serializer(self,
cls,
+17
View File
@@ -12,6 +12,7 @@ import redis
import ray
import ray.ray_constants as ray_constants
from ray.exceptions import RayTaskError
from ray.cluster_utils import Cluster
from ray.test_utils import (
wait_for_condition,
@@ -452,6 +453,22 @@ def test_actor_scope_or_intentionally_killed_message(ray_start_regular,
errors)
def test_exception_chain(ray_start_regular):
@ray.remote
def bar():
return 1 / 0
@ray.remote
def foo():
return ray.get(bar.remote())
r = foo.remote()
try:
ray.get(r)
except ZeroDivisionError as ex:
assert isinstance(ex, RayTaskError)
@pytest.mark.skip("This test does not work yet.")
@pytest.mark.parametrize(
"ray_start_object_store_memory", [10**6], indirect=True)
+1 -1
View File
@@ -167,7 +167,7 @@ jint JNI_OnLoad(JavaVM *vm, void *reserved) {
java_system_class = LoadClass(env, "java/lang/System");
java_system_gc = env->GetStaticMethodID(java_system_class, "gc", "()V");
java_ray_exception_class = LoadClass(env, "io/ray/api/exception/RayException");
java_ray_exception_class = LoadClass(env, "io/ray/runtime/exception/RayException");
java_jni_exception_util_class = LoadClass(env, "io/ray/runtime/util/JniExceptionUtil");
java_jni_exception_util_get_stack_trace = env->GetStaticMethodID(
+44
View File
@@ -100,6 +100,50 @@ message FunctionDescriptor {
}
}
// This enum type is used as object's metadata to indicate the object's
// creating task has failed because of a certain error.
// TODO(hchen): We may want to make these errors more specific. E.g., we may
// want to distinguish between intentional and expected actor failures, and
// between worker process failure and node failure.
enum ErrorType {
// Indicates that a task failed because the worker died unexpectedly while
// executing it.
WORKER_DIED = 0;
// Indicates that a task failed because the actor died unexpectedly before
// finishing it.
ACTOR_DIED = 1;
// Indicates that an object is lost and cannot be restarted.
// Note, this currently only happens to actor objects. When the actor's
// state is already after the object's creating task, the actor cannot
// re-run the task.
// TODO(hchen): we may want to reuse this error type for more cases. E.g.,
// 1) A object that was put by the driver.
// 2) The object's creating task is already cleaned up from GCS (this
// currently crashes raylet).
OBJECT_UNRECONSTRUCTABLE = 2;
// Indicates that a task failed due to user code failure.
TASK_EXECUTION_EXCEPTION = 3;
// Indicates that the object has been placed in plasma. This error shouldn't
// ever be exposed to user code; it is only used internally to indicate the
// result of a direct call has been placed in plasma.
OBJECT_IN_PLASMA = 4;
// Indicates that an object has been cancelled.
TASK_CANCELLED = 5;
// Inidicates that creating the GCS service failed to create the actor.
ACTOR_CREATION_FAILED = 6;
}
/// The task exception encapsulates all information about task
/// execution execeptions.
message RayException {
// Language of this exception.
Language language = 1;
// The serialized exception.
bytes serialized_exception = 2;
// The formatted exception string.
string formatted_exception_string = 3;
}
/// The task specification encapsulates all immutable information about the
/// task. These fields are determined at submission time, converse to the
/// `TaskExecutionSpec` may change at execution time.
-34
View File
@@ -422,37 +422,3 @@ message PubSubMessage {
bytes id = 1;
bytes data = 2;
}
// This enum type is used as object's metadata to indicate the object's
// creating task has failed because of a certain error.
// TODO(hchen): We may want to make these errors more specific. E.g., we may
// want to distinguish between intentional and expected actor failures, and
// between worker process failure and node failure.
enum ErrorType {
// Indicates that a task failed because the worker died unexpectedly while
// executing it.
WORKER_DIED = 0;
// Indicates that a task failed because the actor died unexpectedly before
// finishing it.
ACTOR_DIED = 1;
// Indicates that an object is lost and cannot be restarted.
// Note, this currently only happens to actor objects. When the actor's
// state is already after the object's creating task, the actor cannot
// re-run the task.
// TODO(hchen): we may want to reuse this error type for more cases. E.g.,
// 1) A object that was put by the driver.
// 2) The object's creating task is already cleaned up from GCS (this
// currently
// crashes raylet).
OBJECT_UNRECONSTRUCTABLE = 2;
// Indicates that a task failed due to user code failure.
TASK_EXECUTION_EXCEPTION = 3;
// Indicates that the object has been placed in plasma. This error shouldn't
// ever be exposed to user code; it is only used internally to indicate the
// result of a direct call has been placed in plasma.
OBJECT_IN_PLASMA = 4;
// Indicates that an object has been cancelled.
TASK_CANCELLED = 5;
// Inidicates that creating the GCS service failed to create the actor.
ACTOR_CREATION_FAILED = 6;
}