mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 04:55:04 +08:00
[api] Second round of 1.0 API changes: exceptions, num_return_vals (#10377)
This commit is contained in:
+13
-13
@@ -101,11 +101,11 @@ import ray.ray_constants as ray_constants
|
||||
from ray import profiling
|
||||
from ray.exceptions import (
|
||||
RayError,
|
||||
RayletError,
|
||||
RaySystemError,
|
||||
RayTaskError,
|
||||
ObjectStoreFullError,
|
||||
RayTimeoutError,
|
||||
RayCancellationError
|
||||
GetTimeoutError,
|
||||
TaskCancelledError
|
||||
)
|
||||
from ray.utils import decode
|
||||
import gc
|
||||
@@ -143,11 +143,11 @@ cdef int check_status(const CRayStatus& status) nogil except -1:
|
||||
elif status.IsInterrupted():
|
||||
raise KeyboardInterrupt()
|
||||
elif status.IsTimedOut():
|
||||
raise RayTimeoutError(message)
|
||||
raise GetTimeoutError(message)
|
||||
elif status.IsNotFound():
|
||||
raise ValueError(message)
|
||||
else:
|
||||
raise RayletError(message)
|
||||
raise RaySystemError(message)
|
||||
|
||||
cdef RayObjectsToDataMetadataPairs(
|
||||
const c_vector[shared_ptr[CRayObject]] objects):
|
||||
@@ -481,7 +481,7 @@ cdef execute_task(
|
||||
outputs = function_executor(*args, **kwargs)
|
||||
task_exception = False
|
||||
except KeyboardInterrupt as e:
|
||||
raise RayCancellationError(
|
||||
raise TaskCancelledError(
|
||||
core_worker.get_current_task_id())
|
||||
if c_return_ids.size() == 1:
|
||||
outputs = (outputs,)
|
||||
@@ -489,7 +489,7 @@ cdef execute_task(
|
||||
# was exiting and was raised after the except block.
|
||||
if not check_signals().ok():
|
||||
task_exception = True
|
||||
raise RayCancellationError(
|
||||
raise TaskCancelledError(
|
||||
core_worker.get_current_task_id())
|
||||
# Store the outputs in the object store.
|
||||
with core_worker.profile_event(b"task:store_outputs"):
|
||||
@@ -976,7 +976,7 @@ cdef class CoreWorker:
|
||||
Language language,
|
||||
FunctionDescriptor function_descriptor,
|
||||
args,
|
||||
int num_return_vals,
|
||||
int num_returns,
|
||||
resources,
|
||||
int max_retries,
|
||||
PlacementGroupID placement_group_id,
|
||||
@@ -993,7 +993,7 @@ cdef class CoreWorker:
|
||||
with self.profile_event(b"submit_task"):
|
||||
prepare_resources(resources, &c_resources)
|
||||
task_options = CTaskOptions(
|
||||
num_return_vals, c_resources)
|
||||
num_returns, c_resources)
|
||||
ray_function = CRayFunction(
|
||||
language.lang, function_descriptor.descriptor)
|
||||
prepare_args(self, language, args, &args_vector)
|
||||
@@ -1103,7 +1103,7 @@ cdef class CoreWorker:
|
||||
ActorID actor_id,
|
||||
FunctionDescriptor function_descriptor,
|
||||
args,
|
||||
int num_return_vals,
|
||||
int num_returns,
|
||||
double num_method_cpus):
|
||||
|
||||
cdef:
|
||||
@@ -1117,7 +1117,7 @@ cdef class CoreWorker:
|
||||
with self.profile_event(b"submit_task"):
|
||||
if num_method_cpus > 0:
|
||||
c_resources[b"CPU"] = num_method_cpus
|
||||
task_options = CTaskOptions(num_return_vals, c_resources)
|
||||
task_options = CTaskOptions(num_returns, c_resources)
|
||||
ray_function = CRayFunction(
|
||||
language.lang, function_descriptor.descriptor)
|
||||
prepare_args(self, language, args, &args_vector)
|
||||
@@ -1209,7 +1209,7 @@ cdef class CoreWorker:
|
||||
return ray.actor.ActorHandle(language, actor_id,
|
||||
method_meta.decorators,
|
||||
method_meta.signatures,
|
||||
method_meta.num_return_vals,
|
||||
method_meta.num_returns,
|
||||
actor_method_cpu,
|
||||
actor_creation_function_descriptor,
|
||||
worker.current_session_and_job)
|
||||
@@ -1217,7 +1217,7 @@ cdef class CoreWorker:
|
||||
return ray.actor.ActorHandle(language, actor_id,
|
||||
{}, # method decorators
|
||||
{}, # method signatures
|
||||
{}, # method num_return_vals
|
||||
{}, # method num_returns
|
||||
0, # actor method cpu
|
||||
actor_creation_function_descriptor,
|
||||
worker.current_session_and_job)
|
||||
|
||||
+29
-30
@@ -23,7 +23,7 @@ def method(*args, **kwargs):
|
||||
|
||||
@ray.remote
|
||||
class Foo:
|
||||
@ray.method(num_return_vals=2)
|
||||
@ray.method(num_returns=2)
|
||||
def bar(self):
|
||||
return 1, 2
|
||||
|
||||
@@ -32,16 +32,16 @@ def method(*args, **kwargs):
|
||||
_, _ = f.bar.remote()
|
||||
|
||||
Args:
|
||||
num_return_vals: The number of object refs that should be returned by
|
||||
num_returns: The number of object refs that should be returned by
|
||||
invocations of this actor method.
|
||||
"""
|
||||
assert len(args) == 0
|
||||
assert len(kwargs) == 1
|
||||
assert "num_return_vals" in kwargs
|
||||
num_return_vals = kwargs["num_return_vals"]
|
||||
assert "num_returns" in kwargs
|
||||
num_returns = kwargs["num_returns"]
|
||||
|
||||
def annotate_method(method):
|
||||
method.__ray_num_return_vals__ = num_return_vals
|
||||
method.__ray_num_returns__ = num_returns
|
||||
return method
|
||||
|
||||
return annotate_method
|
||||
@@ -58,7 +58,7 @@ class ActorMethod:
|
||||
Attributes:
|
||||
_actor: A handle to the actor.
|
||||
_method_name: The name of the actor method.
|
||||
_num_return_vals: The default number of return values that the method
|
||||
_num_returns: The default number of return values that the method
|
||||
invocation should return.
|
||||
_decorator: An optional decorator that should be applied to the actor
|
||||
method invocation (as opposed to the actor method execution) before
|
||||
@@ -72,12 +72,12 @@ class ActorMethod:
|
||||
def __init__(self,
|
||||
actor,
|
||||
method_name,
|
||||
num_return_vals,
|
||||
num_returns,
|
||||
decorator=None,
|
||||
hardref=False):
|
||||
self._actor_ref = weakref.ref(actor)
|
||||
self._method_name = method_name
|
||||
self._num_return_vals = num_return_vals
|
||||
self._num_returns = num_returns
|
||||
# This is a decorator that is used to wrap the function invocation (as
|
||||
# opposed to the function execution). The decorator must return a
|
||||
# function that takes in two arguments ("args" and "kwargs"). In most
|
||||
@@ -100,9 +100,9 @@ class ActorMethod:
|
||||
def remote(self, *args, **kwargs):
|
||||
return self._remote(args, kwargs)
|
||||
|
||||
def _remote(self, args=None, kwargs=None, num_return_vals=None):
|
||||
if num_return_vals is None:
|
||||
num_return_vals = self._num_return_vals
|
||||
def _remote(self, args=None, kwargs=None, num_returns=None):
|
||||
if num_returns is None:
|
||||
num_returns = self._num_returns
|
||||
|
||||
def invocation(args, kwargs):
|
||||
actor = self._actor_hard_ref or self._actor_ref()
|
||||
@@ -112,7 +112,7 @@ class ActorMethod:
|
||||
self._method_name,
|
||||
args=args,
|
||||
kwargs=kwargs,
|
||||
num_return_vals=num_return_vals)
|
||||
num_returns=num_returns)
|
||||
|
||||
# Apply the decorator if there is one.
|
||||
if self._decorator is not None:
|
||||
@@ -124,7 +124,7 @@ class ActorMethod:
|
||||
return {
|
||||
"actor": self._actor_ref(),
|
||||
"method_name": self._method_name,
|
||||
"num_return_vals": self._num_return_vals,
|
||||
"num_returns": self._num_returns,
|
||||
"decorator": self._decorator,
|
||||
}
|
||||
|
||||
@@ -132,7 +132,7 @@ class ActorMethod:
|
||||
self.__init__(
|
||||
state["actor"],
|
||||
state["method_name"],
|
||||
state["num_return_vals"],
|
||||
state["num_returns"],
|
||||
state["decorator"],
|
||||
hardref=True)
|
||||
|
||||
@@ -147,7 +147,7 @@ class ActorClassMethodMetadata(object):
|
||||
can be set by attaching the attribute
|
||||
"__ray_invocation_decorator__" to the actor method.
|
||||
signatures: The signatures of the methods.
|
||||
num_return_vals: The default number of return values for
|
||||
num_returns: The default number of return values for
|
||||
each actor method.
|
||||
"""
|
||||
|
||||
@@ -182,7 +182,7 @@ class ActorClassMethodMetadata(object):
|
||||
# arguments.
|
||||
self.decorators = {}
|
||||
self.signatures = {}
|
||||
self.num_return_vals = {}
|
||||
self.num_returns = {}
|
||||
for method_name, method in actor_methods:
|
||||
# Whether or not this method requires binding of its first
|
||||
# argument. For class and static methods, we do not want to bind
|
||||
@@ -198,11 +198,10 @@ class ActorClassMethodMetadata(object):
|
||||
self.signatures[method_name] = signature.extract_signature(
|
||||
method, ignore_first=not is_bound)
|
||||
# Set the default number of return values for this method.
|
||||
if hasattr(method, "__ray_num_return_vals__"):
|
||||
self.num_return_vals[method_name] = (
|
||||
method.__ray_num_return_vals__)
|
||||
if hasattr(method, "__ray_num_returns__"):
|
||||
self.num_returns[method_name] = (method.__ray_num_returns__)
|
||||
else:
|
||||
self.num_return_vals[method_name] = (
|
||||
self.num_returns[method_name] = (
|
||||
ray_constants.DEFAULT_ACTOR_METHOD_NUM_RETURN_VALS)
|
||||
|
||||
if hasattr(method, "__ray_invocation_decorator__"):
|
||||
@@ -589,7 +588,7 @@ class ActorClass:
|
||||
actor_id,
|
||||
meta.method_meta.decorators,
|
||||
meta.method_meta.signatures,
|
||||
meta.method_meta.num_return_vals,
|
||||
meta.method_meta.num_returns,
|
||||
actor_method_cpu,
|
||||
meta.actor_creation_function_descriptor,
|
||||
worker.current_session_and_job,
|
||||
@@ -617,7 +616,7 @@ class ActorHandle:
|
||||
invocation side, whereas a regular decorator can be used to change
|
||||
the behavior on the execution side.
|
||||
_ray_method_signatures: The signatures of the actor methods.
|
||||
_ray_method_num_return_vals: The default number of return values for
|
||||
_ray_method_num_returns: The default number of return values for
|
||||
each method.
|
||||
_ray_actor_method_cpus: The number of CPUs required by actor methods.
|
||||
_ray_original_handle: True if this is the original actor handle for a
|
||||
@@ -633,7 +632,7 @@ class ActorHandle:
|
||||
actor_id,
|
||||
method_decorators,
|
||||
method_signatures,
|
||||
method_num_return_vals,
|
||||
method_num_returns,
|
||||
actor_method_cpus,
|
||||
actor_creation_function_descriptor,
|
||||
session_and_job,
|
||||
@@ -643,7 +642,7 @@ class ActorHandle:
|
||||
self._ray_original_handle = original_handle
|
||||
self._ray_method_decorators = method_decorators
|
||||
self._ray_method_signatures = method_signatures
|
||||
self._ray_method_num_return_vals = method_num_return_vals
|
||||
self._ray_method_num_returns = method_num_returns
|
||||
self._ray_actor_method_cpus = actor_method_cpus
|
||||
self._ray_session_and_job = session_and_job
|
||||
self._ray_is_cross_language = language != Language.PYTHON
|
||||
@@ -664,7 +663,7 @@ class ActorHandle:
|
||||
method = ActorMethod(
|
||||
self,
|
||||
method_name,
|
||||
self._ray_method_num_return_vals[method_name],
|
||||
self._ray_method_num_returns[method_name],
|
||||
decorator=self._ray_method_decorators.get(method_name))
|
||||
setattr(self, method_name, method)
|
||||
|
||||
@@ -680,7 +679,7 @@ class ActorHandle:
|
||||
method_name,
|
||||
args=None,
|
||||
kwargs=None,
|
||||
num_return_vals=None):
|
||||
num_returns=None):
|
||||
"""Method execution stub for an actor handle.
|
||||
|
||||
This is the function that executes when
|
||||
@@ -692,7 +691,7 @@ class ActorHandle:
|
||||
method_name: The name of the actor method to execute.
|
||||
args: A list of arguments for the actor method.
|
||||
kwargs: A dictionary of keyword arguments for the actor method.
|
||||
num_return_vals (int): The number of return values for the method.
|
||||
num_returns (int): The number of return values for the method.
|
||||
|
||||
Returns:
|
||||
object_refs: A list of object refs returned by the remote actor
|
||||
@@ -725,7 +724,7 @@ class ActorHandle:
|
||||
|
||||
object_refs = worker.core_worker.submit_actor_task(
|
||||
self._ray_actor_language, self._ray_actor_id, function_descriptor,
|
||||
list_args, num_return_vals, self._ray_actor_method_cpus)
|
||||
list_args, num_returns, self._ray_actor_method_cpus)
|
||||
|
||||
if len(object_refs) == 1:
|
||||
object_refs = object_refs[0]
|
||||
@@ -795,7 +794,7 @@ class ActorHandle:
|
||||
"actor_id": self._ray_actor_id,
|
||||
"method_decorators": self._ray_method_decorators,
|
||||
"method_signatures": self._ray_method_signatures,
|
||||
"method_num_return_vals": self._ray_method_num_return_vals,
|
||||
"method_num_returns": self._ray_method_num_returns,
|
||||
"actor_method_cpus": self._ray_actor_method_cpus,
|
||||
"actor_creation_function_descriptor": self.
|
||||
_ray_actor_creation_function_descriptor,
|
||||
@@ -830,7 +829,7 @@ class ActorHandle:
|
||||
state["actor_id"],
|
||||
state["method_decorators"],
|
||||
state["method_signatures"],
|
||||
state["method_num_return_vals"],
|
||||
state["method_num_returns"],
|
||||
state["actor_method_cpus"],
|
||||
state["actor_creation_function_descriptor"],
|
||||
worker.current_session_and_job)
|
||||
|
||||
@@ -66,7 +66,7 @@ def java_function(class_name, function_name):
|
||||
None, # memory,
|
||||
None, # object_store_memory,
|
||||
None, # resources,
|
||||
None, # num_return_vals,
|
||||
None, # num_returns,
|
||||
None, # max_calls,
|
||||
None, # max_retries
|
||||
placement_group=None,
|
||||
|
||||
+15
-35
@@ -41,12 +41,7 @@ class CrossLanguageError(RayError):
|
||||
ray_exception.formatted_exception_string))
|
||||
|
||||
|
||||
class RayConnectionError(RayError):
|
||||
"""Raised when ray is not yet connected but needs to be."""
|
||||
pass
|
||||
|
||||
|
||||
class RayCancellationError(RayError):
|
||||
class TaskCancelledError(RayError):
|
||||
"""Raised when this task is cancelled.
|
||||
|
||||
Attributes:
|
||||
@@ -143,7 +138,7 @@ class RayTaskError(RayError):
|
||||
return "\n".join(out)
|
||||
|
||||
|
||||
class RayWorkerError(RayError):
|
||||
class WorkerCrashedError(RayError):
|
||||
"""Indicates that the worker died unexpectedly while executing a task."""
|
||||
|
||||
def __str__(self):
|
||||
@@ -161,8 +156,8 @@ class RayActorError(RayError):
|
||||
return "The actor died unexpectedly before finishing this task."
|
||||
|
||||
|
||||
class RayletError(RayError):
|
||||
"""Indicates that the Raylet client has errored.
|
||||
class RaySystemError(RayError):
|
||||
"""Indicates that Ray encountered a system error.
|
||||
|
||||
This exception can be thrown when the raylet is killed.
|
||||
"""
|
||||
@@ -171,7 +166,7 @@ class RayletError(RayError):
|
||||
self.client_exc = client_exc
|
||||
|
||||
def __str__(self):
|
||||
return f"The Raylet died with this message: {self.client_exc}"
|
||||
return f"System error: {self.client_exc}"
|
||||
|
||||
|
||||
class ObjectStoreFullError(RayError):
|
||||
@@ -184,21 +179,13 @@ class ObjectStoreFullError(RayError):
|
||||
def __str__(self):
|
||||
return super(ObjectStoreFullError, self).__str__() + (
|
||||
"\n"
|
||||
"The local object store is full of objects that are still in scope"
|
||||
" and cannot be evicted. Try increasing the object store memory "
|
||||
"available with ray.init(object_store_memory=<bytes>). "
|
||||
"You can also try setting an option to fallback to LRU eviction "
|
||||
"when the object store is full by calling "
|
||||
"ray.init(lru_evict=True). See also: "
|
||||
"https://docs.ray.io/en/latest/memory-management.html.")
|
||||
"The local object store is full of objects that are still in "
|
||||
"scope and cannot be evicted. Tip: Use the `ray memory` command "
|
||||
"to list active objects in the cluster.")
|
||||
|
||||
|
||||
class UnreconstructableError(RayError):
|
||||
"""Indicates that an object is lost and cannot be reconstructed.
|
||||
|
||||
Note, this exception only happens for actor objects. If actor's current
|
||||
state is after object's creating task, the actor cannot re-run the task to
|
||||
reconstruct the object.
|
||||
class ObjectLostError(RayError):
|
||||
"""Indicates that an object has been lost due to node failure.
|
||||
|
||||
Attributes:
|
||||
object_ref: ID of the object.
|
||||
@@ -208,17 +195,10 @@ class UnreconstructableError(RayError):
|
||||
self.object_ref = object_ref
|
||||
|
||||
def __str__(self):
|
||||
return (
|
||||
f"Object {self.object_ref.hex()} is lost "
|
||||
"(either LRU evicted or deleted by user) and "
|
||||
"cannot be reconstructed. Try increasing the object store "
|
||||
"memory available with ray.init(object_store_memory=<bytes>) "
|
||||
"or setting object store limits with "
|
||||
"ray.remote(object_store_memory=<bytes>). "
|
||||
"See also: https://docs.ray.io/en/latest/memory-management.html")
|
||||
return (f"Object {self.object_ref.hex()} is lost due to node failure.")
|
||||
|
||||
|
||||
class RayTimeoutError(RayError):
|
||||
class GetTimeoutError(RayError):
|
||||
"""Indicates that a call to the worker timed out."""
|
||||
pass
|
||||
|
||||
@@ -232,9 +212,9 @@ RAY_EXCEPTION_TYPES = [
|
||||
PlasmaObjectNotAvailable,
|
||||
RayError,
|
||||
RayTaskError,
|
||||
RayWorkerError,
|
||||
WorkerCrashedError,
|
||||
RayActorError,
|
||||
ObjectStoreFullError,
|
||||
UnreconstructableError,
|
||||
RayTimeoutError,
|
||||
ObjectLostError,
|
||||
GetTimeoutError,
|
||||
]
|
||||
|
||||
@@ -7,7 +7,7 @@ from . import core
|
||||
__all__ = ["tsqr", "modified_lu", "tsqr_hr", "qr"]
|
||||
|
||||
|
||||
@ray.remote(num_return_vals=2)
|
||||
@ray.remote(num_returns=2)
|
||||
def tsqr(a):
|
||||
"""Perform a QR decomposition of a tall-skinny matrix.
|
||||
|
||||
@@ -83,7 +83,7 @@ def tsqr(a):
|
||||
# TODO(rkn): This is unoptimized, we really want a block version of this.
|
||||
# This is Algorithm 5 from
|
||||
# http://www.eecs.berkeley.edu/Pubs/TechRpts/2013/EECS-2013-175.pdf.
|
||||
@ray.remote(num_return_vals=3)
|
||||
@ray.remote(num_returns=3)
|
||||
def modified_lu(q):
|
||||
"""Perform a modified LU decomposition of a matrix.
|
||||
|
||||
@@ -121,7 +121,7 @@ def modified_lu(q):
|
||||
return ray.get(core.numpy_to_dist.remote(ray.put(L))), U, S
|
||||
|
||||
|
||||
@ray.remote(num_return_vals=2)
|
||||
@ray.remote(num_returns=2)
|
||||
def tsqr_hr_helper1(u, s, y_top_block, b):
|
||||
y_top = y_top_block[:b, :b]
|
||||
s_full = np.diag(s)
|
||||
@@ -137,7 +137,7 @@ def tsqr_hr_helper2(s, r_temp):
|
||||
|
||||
# This is Algorithm 6 from
|
||||
# http://www.eecs.berkeley.edu/Pubs/TechRpts/2013/EECS-2013-175.pdf.
|
||||
@ray.remote(num_return_vals=4)
|
||||
@ray.remote(num_returns=4)
|
||||
def tsqr_hr(a):
|
||||
q, r_temp = tsqr.remote(a)
|
||||
y, u, s = modified_lu.remote(q)
|
||||
@@ -160,7 +160,7 @@ def qr_helper2(y_ri, a_rc):
|
||||
|
||||
# This is Algorithm 7 from
|
||||
# http://www.eecs.berkeley.edu/Pubs/TechRpts/2013/EECS-2013-175.pdf.
|
||||
@ray.remote(num_return_vals=2)
|
||||
@ray.remote(num_returns=2)
|
||||
def qr(a):
|
||||
|
||||
m, n = a.shape[0], a.shape[1]
|
||||
|
||||
@@ -18,12 +18,12 @@ def solve(a, b):
|
||||
return np.linalg.solve(a, b)
|
||||
|
||||
|
||||
@ray.remote(num_return_vals=2)
|
||||
@ray.remote(num_returns=2)
|
||||
def tensorsolve(a):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@ray.remote(num_return_vals=2)
|
||||
@ray.remote(num_returns=2)
|
||||
def tensorinv(a):
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -63,22 +63,22 @@ def det(a):
|
||||
return np.linalg.det(a)
|
||||
|
||||
|
||||
@ray.remote(num_return_vals=3)
|
||||
@ray.remote(num_returns=3)
|
||||
def svd(a):
|
||||
return np.linalg.svd(a)
|
||||
|
||||
|
||||
@ray.remote(num_return_vals=2)
|
||||
@ray.remote(num_returns=2)
|
||||
def eig(a):
|
||||
return np.linalg.eig(a)
|
||||
|
||||
|
||||
@ray.remote(num_return_vals=2)
|
||||
@ray.remote(num_returns=2)
|
||||
def eigh(a):
|
||||
return np.linalg.eigh(a)
|
||||
|
||||
|
||||
@ray.remote(num_return_vals=4)
|
||||
@ray.remote(num_returns=4)
|
||||
def lstsq(a, b):
|
||||
return np.linalg.lstsq(a)
|
||||
|
||||
@@ -88,7 +88,7 @@ def norm(x):
|
||||
return np.linalg.norm(x)
|
||||
|
||||
|
||||
@ray.remote(num_return_vals=2)
|
||||
@ray.remote(num_returns=2)
|
||||
def qr(a):
|
||||
return np.linalg.qr(a)
|
||||
|
||||
|
||||
@@ -130,7 +130,7 @@ DASHBOARD_DIED_ERROR = "dashboard_died"
|
||||
RAYLET_CONNECTION_ERROR = "raylet_connection_error"
|
||||
|
||||
# Used in gpu detection
|
||||
RESOURCE_CONSTRAINT_PREFIX = "GPUType:"
|
||||
RESOURCE_CONSTRAINT_PREFIX = "gpu_type:"
|
||||
|
||||
RESOURCES_ENVIRONMENT_VARIABLE = "RAY_OVERRIDE_RESOURCES"
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@ class RemoteFunction:
|
||||
_object_store_memory: The object store memory request for this task.
|
||||
_resources: The default custom resource requirements for invocations of
|
||||
this remote function.
|
||||
_num_return_vals: The default number of return values for invocations
|
||||
_num_returns: The default number of return values for invocations
|
||||
of this remote function.
|
||||
_max_calls: The number of times a worker can execute this function
|
||||
before exiting.
|
||||
@@ -61,8 +61,8 @@ class RemoteFunction:
|
||||
"""
|
||||
|
||||
def __init__(self, language, function, function_descriptor, num_cpus,
|
||||
num_gpus, memory, object_store_memory, resources,
|
||||
num_return_vals, max_calls, max_retries, placement_group,
|
||||
num_gpus, memory, object_store_memory, resources, num_returns,
|
||||
max_calls, max_retries, placement_group,
|
||||
placement_group_bundle_index):
|
||||
self._language = language
|
||||
self._function = function
|
||||
@@ -79,8 +79,8 @@ class RemoteFunction:
|
||||
"setting object_store_memory is not implemented for tasks")
|
||||
self._object_store_memory = None
|
||||
self._resources = resources
|
||||
self._num_return_vals = (DEFAULT_REMOTE_FUNCTION_NUM_RETURN_VALS if
|
||||
num_return_vals is None else num_return_vals)
|
||||
self._num_returns = (DEFAULT_REMOTE_FUNCTION_NUM_RETURN_VALS
|
||||
if num_returns is None else num_returns)
|
||||
self._max_calls = (DEFAULT_REMOTE_FUNCTION_MAX_CALLS
|
||||
if max_calls is None else max_calls)
|
||||
self._max_retries = (DEFAULT_REMOTE_FUNCTION_NUM_TASK_RETRIES
|
||||
@@ -107,7 +107,7 @@ class RemoteFunction:
|
||||
def _submit(self,
|
||||
args=None,
|
||||
kwargs=None,
|
||||
num_return_vals=None,
|
||||
num_returns=None,
|
||||
num_cpus=None,
|
||||
num_gpus=None,
|
||||
resources=None):
|
||||
@@ -116,7 +116,7 @@ class RemoteFunction:
|
||||
return self._remote(
|
||||
args=args,
|
||||
kwargs=kwargs,
|
||||
num_return_vals=num_return_vals,
|
||||
num_returns=num_returns,
|
||||
num_cpus=num_cpus,
|
||||
num_gpus=num_gpus,
|
||||
resources=resources)
|
||||
@@ -144,7 +144,7 @@ class RemoteFunction:
|
||||
def _remote(self,
|
||||
args=None,
|
||||
kwargs=None,
|
||||
num_return_vals=None,
|
||||
num_returns=None,
|
||||
num_cpus=None,
|
||||
num_gpus=None,
|
||||
memory=None,
|
||||
@@ -182,8 +182,8 @@ class RemoteFunction:
|
||||
kwargs = {} if kwargs is None else kwargs
|
||||
args = [] if args is None else args
|
||||
|
||||
if num_return_vals is None:
|
||||
num_return_vals = self._num_return_vals
|
||||
if num_returns is None:
|
||||
num_returns = self._num_returns
|
||||
if max_retries is None:
|
||||
max_retries = self._max_retries
|
||||
|
||||
@@ -213,7 +213,7 @@ class RemoteFunction:
|
||||
"cannot be executed locally."
|
||||
object_refs = worker.core_worker.submit_task(
|
||||
self._language, self._function_descriptor, list_args,
|
||||
num_return_vals, resources, max_retries, placement_group.id,
|
||||
num_returns, resources, max_retries, placement_group.id,
|
||||
placement_group_bundle_index)
|
||||
|
||||
if len(object_refs) == 1:
|
||||
|
||||
@@ -13,9 +13,9 @@ from ray.exceptions import (
|
||||
PlasmaObjectNotAvailable,
|
||||
RayTaskError,
|
||||
RayActorError,
|
||||
RayCancellationError,
|
||||
RayWorkerError,
|
||||
UnreconstructableError,
|
||||
TaskCancelledError,
|
||||
WorkerCrashedError,
|
||||
ObjectLostError,
|
||||
)
|
||||
from ray._raylet import (
|
||||
split_buffer,
|
||||
@@ -265,14 +265,13 @@ class SerializationContext:
|
||||
obj = self._deserialize_msgpack_data(data, metadata)
|
||||
return RayError.from_bytes(obj)
|
||||
elif error_type == ErrorType.Value("WORKER_DIED"):
|
||||
return RayWorkerError()
|
||||
return WorkerCrashedError()
|
||||
elif error_type == ErrorType.Value("ACTOR_DIED"):
|
||||
return RayActorError()
|
||||
elif error_type == ErrorType.Value("TASK_CANCELLED"):
|
||||
return RayCancellationError()
|
||||
return TaskCancelledError()
|
||||
elif error_type == ErrorType.Value("OBJECT_UNRECONSTRUCTABLE"):
|
||||
return UnreconstructableError(
|
||||
ray.ObjectRef(object_ref.binary()))
|
||||
return ObjectLostError(ray.ObjectRef(object_ref.binary()))
|
||||
else:
|
||||
assert error_type != ErrorType.Value("OBJECT_IN_PLASMA"), \
|
||||
"Tried to get object that has been promoted to plasma."
|
||||
|
||||
@@ -191,7 +191,7 @@ async def test_router_use_max_concurrency(serve_instance):
|
||||
second_query = q.enqueue_request.remote(RequestMetadata("svc", None), 1)
|
||||
|
||||
# Neither queries should be available
|
||||
with pytest.raises(ray.exceptions.RayTimeoutError):
|
||||
with pytest.raises(ray.exceptions.GetTimeoutError):
|
||||
ray.get([first_query, second_query], timeout=0.2)
|
||||
|
||||
# Let's retrieve the router internal state
|
||||
|
||||
+1
-1
@@ -48,7 +48,7 @@ class GlobalState:
|
||||
"""
|
||||
if (self.redis_client is None or self.redis_clients is None
|
||||
or self.global_state_accessor is None):
|
||||
raise ray.exceptions.RayConnectionError(
|
||||
raise ray.exceptions.RaySystemError(
|
||||
"Ray has not been started yet. You can start Ray with "
|
||||
"'ray.init()'.")
|
||||
|
||||
|
||||
@@ -646,15 +646,15 @@ def test_multiple_return_values(ray_start_regular_shared):
|
||||
def method0(self):
|
||||
return 1
|
||||
|
||||
@ray.method(num_return_vals=1)
|
||||
@ray.method(num_returns=1)
|
||||
def method1(self):
|
||||
return 1
|
||||
|
||||
@ray.method(num_return_vals=2)
|
||||
@ray.method(num_returns=2)
|
||||
def method2(self):
|
||||
return 1, 2
|
||||
|
||||
@ray.method(num_return_vals=3)
|
||||
@ray.method(num_returns=3)
|
||||
def method3(self):
|
||||
return 1, 2, 3
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ def test_actor_eviction(ray_start_regular):
|
||||
val = ray.get(obj)
|
||||
assert isinstance(val, np.ndarray), val
|
||||
num_success += 1
|
||||
except ray.exceptions.UnreconstructableError:
|
||||
except ray.exceptions.ObjectLostError:
|
||||
num_evicted += 1
|
||||
# Some objects should have been evicted, and some should still be in the
|
||||
# object store.
|
||||
|
||||
@@ -64,12 +64,12 @@ def test_submit_api(shutdown_only):
|
||||
def g():
|
||||
return ray.get_gpu_ids()
|
||||
|
||||
assert f._remote([0], num_return_vals=0) is None
|
||||
id1 = f._remote(args=[1], num_return_vals=1)
|
||||
assert f._remote([0], num_returns=0) is None
|
||||
id1 = f._remote(args=[1], num_returns=1)
|
||||
assert ray.get(id1) == [0]
|
||||
id1, id2 = f._remote(args=[2], num_return_vals=2)
|
||||
id1, id2 = f._remote(args=[2], num_returns=2)
|
||||
assert ray.get([id1, id2]) == [0, 1]
|
||||
id1, id2, id3 = f._remote(args=[3], num_return_vals=3)
|
||||
id1, id2, id3 = f._remote(args=[3], num_returns=3)
|
||||
assert ray.get([id1, id2, id3]) == [0, 1, 2]
|
||||
assert ray.get(
|
||||
g._remote(args=[], num_cpus=1, num_gpus=1,
|
||||
@@ -107,7 +107,7 @@ def test_submit_api(shutdown_only):
|
||||
ray.get(a2.method._remote())
|
||||
|
||||
id1, id2, id3, id4 = a.method._remote(
|
||||
args=["test"], kwargs={"b": 2}, num_return_vals=4)
|
||||
args=["test"], kwargs={"b": 2}, num_returns=4)
|
||||
assert ray.get([id1, id2, id3, id4]) == [0, 1, "test", 2]
|
||||
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ from unittest.mock import MagicMock, patch
|
||||
import ray
|
||||
import ray.cluster_utils
|
||||
import ray.test_utils
|
||||
from ray.exceptions import RayTimeoutError
|
||||
from ray.exceptions import GetTimeoutError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -351,7 +351,7 @@ def test_system_config_when_connecting(ray_start_cluster):
|
||||
ray.put(np.zeros(40 * 1024 * 1024, dtype=np.uint8))
|
||||
|
||||
# This would not raise an exception if object pinning was enabled.
|
||||
with pytest.raises(ray.exceptions.UnreconstructableError):
|
||||
with pytest.raises(ray.exceptions.ObjectLostError):
|
||||
ray.get(obj_ref)
|
||||
|
||||
|
||||
@@ -377,7 +377,7 @@ def test_get_with_timeout(ray_start_regular_shared):
|
||||
# Check that get() raises a TimeoutError after the timeout if the object
|
||||
# is not ready yet.
|
||||
result_id = signal.wait.remote()
|
||||
with pytest.raises(RayTimeoutError):
|
||||
with pytest.raises(GetTimeoutError):
|
||||
ray.get(result_id, timeout=0.1)
|
||||
|
||||
# Check that a subsequent get() returns early.
|
||||
|
||||
@@ -5,18 +5,18 @@ import time
|
||||
import pytest
|
||||
|
||||
import ray
|
||||
from ray.exceptions import RayCancellationError, RayTaskError, \
|
||||
RayTimeoutError, RayWorkerError, \
|
||||
UnreconstructableError
|
||||
from ray.exceptions import TaskCancelledError, RayTaskError, \
|
||||
GetTimeoutError, WorkerCrashedError, \
|
||||
ObjectLostError
|
||||
from ray.test_utils import SignalActor
|
||||
|
||||
|
||||
def valid_exceptions(use_force):
|
||||
if use_force:
|
||||
return (RayTaskError, RayCancellationError, RayWorkerError,
|
||||
UnreconstructableError)
|
||||
return (RayTaskError, TaskCancelledError, WorkerCrashedError,
|
||||
ObjectLostError)
|
||||
else:
|
||||
return (RayTaskError, RayCancellationError)
|
||||
return (RayTaskError, TaskCancelledError)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_force", [True, False])
|
||||
@@ -50,10 +50,10 @@ def test_cancel_chain(ray_start_regular, use_force):
|
||||
with pytest.raises(valid_exceptions(use_force)):
|
||||
ray.get(ob)
|
||||
|
||||
with pytest.raises(RayTimeoutError):
|
||||
with pytest.raises(GetTimeoutError):
|
||||
ray.get(obj1, timeout=.1)
|
||||
|
||||
with pytest.raises(RayTimeoutError):
|
||||
with pytest.raises(GetTimeoutError):
|
||||
ray.get(obj2, timeout=.1)
|
||||
|
||||
signaler2.send.remote()
|
||||
@@ -249,7 +249,7 @@ def test_remote_cancel(ray_start_regular, use_force):
|
||||
outer = remote_wait.remote([sig])
|
||||
inner = ray.get(outer)[0]
|
||||
|
||||
with pytest.raises(RayTimeoutError):
|
||||
with pytest.raises(GetTimeoutError):
|
||||
ray.get(inner, timeout=1)
|
||||
|
||||
ray.cancel(inner, force=use_force)
|
||||
|
||||
@@ -70,7 +70,8 @@ def test_worker_failed(ray_start_workers_separate_multinode):
|
||||
for object_ref in object_refs:
|
||||
try:
|
||||
ray.get(object_ref)
|
||||
except (ray.exceptions.RayTaskError, ray.exceptions.RayWorkerError):
|
||||
except (ray.exceptions.RayTaskError,
|
||||
ray.exceptions.WorkerCrashedError):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ def test_errors_before_initializing_ray():
|
||||
for api_method in api_methods:
|
||||
print(api_method)
|
||||
with pytest.raises(
|
||||
ray.exceptions.RayConnectionError,
|
||||
ray.exceptions.RaySystemError,
|
||||
match="Ray has not been started yet."):
|
||||
api_method()
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ def test_failed_task(ray_start_regular, error_pubsub):
|
||||
def throw_exception_fct2():
|
||||
raise Exception("Test function 2 intentionally failed.")
|
||||
|
||||
@ray.remote(num_return_vals=3)
|
||||
@ray.remote(num_returns=3)
|
||||
def throw_exception_fct3(x):
|
||||
raise Exception("Test function 3 intentionally failed.")
|
||||
|
||||
@@ -362,7 +362,7 @@ def test_worker_dying(ray_start_regular, error_pubsub):
|
||||
def f():
|
||||
eval("exit()")
|
||||
|
||||
with pytest.raises(ray.exceptions.RayWorkerError):
|
||||
with pytest.raises(ray.exceptions.WorkerCrashedError):
|
||||
ray.get(f.remote())
|
||||
|
||||
errors = get_error_message(p, 1, ray_constants.WORKER_DIED_PUSH_ERROR)
|
||||
@@ -901,7 +901,7 @@ def test_raylet_crash_when_get(ray_start_regular):
|
||||
|
||||
thread = threading.Thread(target=sleep_to_kill_raylet)
|
||||
thread.start()
|
||||
with pytest.raises(ray.exceptions.UnreconstructableError):
|
||||
with pytest.raises(ray.exceptions.ObjectLostError):
|
||||
ray.get(object_ref)
|
||||
thread.join()
|
||||
|
||||
@@ -1062,7 +1062,7 @@ def test_eviction(ray_start_cluster):
|
||||
# Evict the object.
|
||||
ray.internal.free([obj])
|
||||
# ray.get throws an exception.
|
||||
with pytest.raises(ray.exceptions.UnreconstructableError):
|
||||
with pytest.raises(ray.exceptions.ObjectLostError):
|
||||
ray.get(obj)
|
||||
|
||||
@ray.remote
|
||||
|
||||
@@ -5,7 +5,7 @@ import ray
|
||||
|
||||
MB = 1024 * 1024
|
||||
|
||||
OBJECT_EVICTED = ray.exceptions.UnreconstructableError
|
||||
OBJECT_EVICTED = ray.exceptions.ObjectLostError
|
||||
OBJECT_TOO_LARGE = ray.exceptions.ObjectStoreFullError
|
||||
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ def test_basic_task_api(ray_start_regular):
|
||||
|
||||
# Test multiple return values.
|
||||
|
||||
@ray.remote(num_return_vals=3)
|
||||
@ray.remote(num_returns=3)
|
||||
def f_multiple_returns():
|
||||
return 1, 2, 3
|
||||
|
||||
|
||||
@@ -70,7 +70,8 @@ def test_worker_failed(ray_start_workers_separate_multinode):
|
||||
for object_ref in object_refs:
|
||||
try:
|
||||
ray.get(object_ref)
|
||||
except (ray.exceptions.RayTaskError, ray.exceptions.RayWorkerError):
|
||||
except (ray.exceptions.RayTaskError,
|
||||
ray.exceptions.WorkerCrashedError):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
@@ -355,7 +355,7 @@ def test_remove_placement_group(ray_start_cluster):
|
||||
# That means this request should fail.
|
||||
with pytest.raises(ray.exceptions.RayActorError, match="actor died"):
|
||||
ray.get(a.f.remote(), timeout=3.0)
|
||||
with pytest.raises(ray.exceptions.RayWorkerError):
|
||||
with pytest.raises(ray.exceptions.WorkerCrashedError):
|
||||
ray.get(task_ref)
|
||||
|
||||
|
||||
@@ -576,7 +576,7 @@ def test_pending_placement_group_wait(ray_start_cluster):
|
||||
assert len(ready) == 0
|
||||
table = ray.experimental.placement_group_table(placement_group)
|
||||
assert table["state"] == "PENDING"
|
||||
with pytest.raises(ray.exceptions.RayTimeoutError):
|
||||
with pytest.raises(ray.exceptions.GetTimeoutError):
|
||||
ray.get(placement_group.ready(), timeout=0.1)
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import pytest
|
||||
|
||||
import ray
|
||||
from ray.exceptions import RayTimeoutError
|
||||
from ray.exceptions import GetTimeoutError
|
||||
from ray.experimental.queue import Queue, Empty, Full
|
||||
|
||||
|
||||
@@ -80,7 +80,7 @@ def test_async_get(ray_start_regular):
|
||||
with pytest.raises(Empty):
|
||||
q.get_nowait()
|
||||
|
||||
with pytest.raises(RayTimeoutError):
|
||||
with pytest.raises(GetTimeoutError):
|
||||
ray.get(future, timeout=0.1) # task not canceled on timeout.
|
||||
|
||||
q.put(1)
|
||||
@@ -95,7 +95,7 @@ def test_async_put(ray_start_regular):
|
||||
with pytest.raises(Full):
|
||||
q.put_nowait(3)
|
||||
|
||||
with pytest.raises(RayTimeoutError):
|
||||
with pytest.raises(GetTimeoutError):
|
||||
ray.get(future, timeout=0.1) # task not canceled on timeout.
|
||||
|
||||
assert q.get() == 1
|
||||
|
||||
@@ -110,7 +110,7 @@ def test_reconstruction_cached_dependency(ray_start_cluster,
|
||||
else:
|
||||
with pytest.raises(ray.exceptions.RayTaskError) as e:
|
||||
ray.get(dependent_task.remote(obj))
|
||||
with pytest.raises(ray.exceptions.UnreconstructableError):
|
||||
with pytest.raises(ray.exceptions.ObjectLostError):
|
||||
raise e.as_instanceof_cause()
|
||||
|
||||
|
||||
@@ -159,7 +159,7 @@ def test_basic_reconstruction(ray_start_cluster, reconstruction_enabled):
|
||||
else:
|
||||
with pytest.raises(ray.exceptions.RayTaskError) as e:
|
||||
ray.get(dependent_task.remote(obj))
|
||||
with pytest.raises(ray.exceptions.UnreconstructableError):
|
||||
with pytest.raises(ray.exceptions.ObjectLostError):
|
||||
raise e.as_instanceof_cause()
|
||||
|
||||
|
||||
@@ -215,7 +215,7 @@ def test_basic_reconstruction_put(ray_start_cluster, reconstruction_enabled):
|
||||
# been evicted.
|
||||
try:
|
||||
ray.get(result)
|
||||
except ray.exceptions.UnreconstructableError:
|
||||
except ray.exceptions.ObjectLostError:
|
||||
pass
|
||||
|
||||
|
||||
@@ -284,7 +284,7 @@ def test_basic_reconstruction_actor_task(ray_start_cluster,
|
||||
else:
|
||||
with pytest.raises(ray.exceptions.RayTaskError) as e:
|
||||
ray.get(dependent_task.remote(obj))
|
||||
with pytest.raises(ray.exceptions.UnreconstructableError):
|
||||
with pytest.raises(ray.exceptions.ObjectLostError):
|
||||
raise e.as_instanceof_cause()
|
||||
|
||||
# Make sure the actor handle is still usable.
|
||||
@@ -356,8 +356,7 @@ def test_basic_reconstruction_actor_constructor(ray_start_cluster,
|
||||
return True
|
||||
except ray.exceptions.RayActorError:
|
||||
return False
|
||||
except (ray.exceptions.RayTaskError,
|
||||
ray.exceptions.UnreconstructableError):
|
||||
except (ray.exceptions.RayTaskError, ray.exceptions.ObjectLostError):
|
||||
return True
|
||||
|
||||
wait_for_condition(probe)
|
||||
@@ -369,7 +368,7 @@ def test_basic_reconstruction_actor_constructor(ray_start_cluster,
|
||||
x = a.dependent_task.remote(obj)
|
||||
print(x)
|
||||
ray.get(x)
|
||||
with pytest.raises(ray.exceptions.UnreconstructableError):
|
||||
with pytest.raises(ray.exceptions.ObjectLostError):
|
||||
raise e.as_instanceof_cause()
|
||||
|
||||
|
||||
@@ -429,7 +428,7 @@ def test_multiple_downstream_tasks(ray_start_cluster, reconstruction_enabled):
|
||||
dependent_task.options(resources={
|
||||
"node1": 1
|
||||
}).remote(obj))
|
||||
with pytest.raises(ray.exceptions.UnreconstructableError):
|
||||
with pytest.raises(ray.exceptions.ObjectLostError):
|
||||
raise e.as_instanceof_cause()
|
||||
|
||||
|
||||
@@ -480,7 +479,7 @@ def test_reconstruction_chain(ray_start_cluster, reconstruction_enabled):
|
||||
else:
|
||||
with pytest.raises(ray.exceptions.RayTaskError) as e:
|
||||
ray.get(dependent_task.remote(obj))
|
||||
with pytest.raises(ray.exceptions.UnreconstructableError):
|
||||
with pytest.raises(ray.exceptions.ObjectLostError):
|
||||
raise e.as_instanceof_cause()
|
||||
|
||||
|
||||
|
||||
@@ -354,7 +354,7 @@ def test_basic_serialized_reference(one_worker_100MiB, use_ray_put, failure):
|
||||
try:
|
||||
ray.get(obj_ref)
|
||||
assert not failure
|
||||
except ray.exceptions.RayWorkerError:
|
||||
except ray.exceptions.WorkerCrashedError:
|
||||
assert failure
|
||||
|
||||
# Reference should be gone, check that array gets evicted.
|
||||
@@ -403,7 +403,7 @@ def test_recursive_serialized_reference(one_worker_100MiB, use_ray_put,
|
||||
assert ray.get(tail_oid) is None
|
||||
assert not failure
|
||||
# TODO(edoakes): this should raise WorkerError.
|
||||
except ray.exceptions.UnreconstructableError:
|
||||
except ray.exceptions.ObjectLostError:
|
||||
assert failure
|
||||
|
||||
# Reference should be gone, check that array gets evicted.
|
||||
@@ -501,8 +501,7 @@ def test_worker_holding_serialized_reference(one_worker_100MiB, use_ray_put,
|
||||
try:
|
||||
ray.get(child_return_id)
|
||||
assert not failure
|
||||
except (ray.exceptions.RayWorkerError,
|
||||
ray.exceptions.UnreconstructableError):
|
||||
except (ray.exceptions.WorkerCrashedError, ray.exceptions.ObjectLostError):
|
||||
assert failure
|
||||
del child_return_id
|
||||
|
||||
|
||||
@@ -91,7 +91,7 @@ def test_recursively_nest_ids(one_worker_100MiB, use_ray_put, failure):
|
||||
ray.get(tail_oid)
|
||||
assert not failure
|
||||
# TODO(edoakes): this should raise WorkerError.
|
||||
except ray.exceptions.UnreconstructableError:
|
||||
except ray.exceptions.ObjectLostError:
|
||||
assert failure
|
||||
|
||||
# Reference should be gone, check that array gets evicted.
|
||||
@@ -130,7 +130,7 @@ def test_return_object_ref(one_worker_100MiB, use_ray_put, failure):
|
||||
# Check that the owner dying unpins the object. This should execute on
|
||||
# the same worker because there is only one started and the other tasks
|
||||
# have finished.
|
||||
with pytest.raises(ray.exceptions.RayWorkerError):
|
||||
with pytest.raises(ray.exceptions.WorkerCrashedError):
|
||||
ray.get(exit.remote())
|
||||
else:
|
||||
# Check that removing the inner ID unpins the object.
|
||||
@@ -173,7 +173,7 @@ def test_pass_returned_object_ref(one_worker_100MiB, use_ray_put, failure):
|
||||
# Should succeed because inner_oid is pinned if no failure.
|
||||
ray.get(pending_oid)
|
||||
assert not failure
|
||||
except ray.exceptions.RayWorkerError:
|
||||
except ray.exceptions.WorkerCrashedError:
|
||||
assert failure
|
||||
|
||||
def ref_not_exists():
|
||||
@@ -232,7 +232,7 @@ def test_recursively_pass_returned_object_ref(one_worker_100MiB, use_ray_put,
|
||||
_fill_object_store_and_get(inner_oid)
|
||||
assert not failure
|
||||
# TODO(edoakes): this should raise WorkerError.
|
||||
except ray.exceptions.UnreconstructableError:
|
||||
except ray.exceptions.ObjectLostError:
|
||||
assert failure
|
||||
|
||||
inner_oid_bytes = inner_oid.binary()
|
||||
@@ -311,7 +311,7 @@ def test_borrowed_id_failure(one_worker_100MiB, failure):
|
||||
def resolve_ref(self):
|
||||
assert self.ref is not None
|
||||
if failure:
|
||||
with pytest.raises(ray.exceptions.UnreconstructableError):
|
||||
with pytest.raises(ray.exceptions.ObjectLostError):
|
||||
ray.get(self.ref)
|
||||
else:
|
||||
ray.get(self.ref)
|
||||
|
||||
@@ -422,7 +422,7 @@ def test_register_class(ray_start_2_cpus):
|
||||
assert ray.get(h2.remote(10)).value == 10
|
||||
|
||||
# Test registering multiple classes with the same name.
|
||||
@ray.remote(num_return_vals=3)
|
||||
@ray.remote(num_returns=3)
|
||||
def j():
|
||||
class Class0:
|
||||
def method0(self):
|
||||
|
||||
@@ -335,10 +335,9 @@ def test_driver_put_errors(ray_start_object_store_memory, error_pubsub):
|
||||
return len(errors) > 1
|
||||
|
||||
errors = wait_for_errors(p, error_check)
|
||||
assert all(
|
||||
error.type == ray_constants.PUT_RECONSTRUCTION_PUSH_ERROR
|
||||
or "ray.exceptions.UnreconstructableError" in error.error_messages
|
||||
for error in errors)
|
||||
assert all(error.type == ray_constants.PUT_RECONSTRUCTION_PUSH_ERROR
|
||||
or "ray.exceptions.ObjectLostError" in error.error_messages
|
||||
for error in errors)
|
||||
|
||||
|
||||
# NOTE(swang): This test tries to launch 1000 workers and breaks.
|
||||
|
||||
@@ -4,7 +4,7 @@ import unittest
|
||||
import ray
|
||||
|
||||
|
||||
class TestUnreconstructableErrors(unittest.TestCase):
|
||||
class TestObjectLostErrors(unittest.TestCase):
|
||||
def setUp(self):
|
||||
ray.init(
|
||||
num_cpus=1,
|
||||
@@ -20,7 +20,7 @@ class TestUnreconstructableErrors(unittest.TestCase):
|
||||
ray.get(x_id)
|
||||
for _ in range(20):
|
||||
ray.put(np.zeros(10 * 1024 * 1024))
|
||||
self.assertRaises(ray.exceptions.UnreconstructableError,
|
||||
self.assertRaises(ray.exceptions.ObjectLostError,
|
||||
lambda: ray.get(x_id))
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ import traceback
|
||||
from contextlib import contextmanager
|
||||
|
||||
import ray
|
||||
from ray.exceptions import RayTimeoutError
|
||||
from ray.exceptions import GetTimeoutError
|
||||
from ray import ray_constants
|
||||
from ray.resource_spec import ResourceSpec
|
||||
from ray.tune.durable_trainable import DurableTrainable
|
||||
@@ -397,7 +397,7 @@ class RayTrialExecutor(TrialExecutor):
|
||||
reset_val = ray.get(
|
||||
trainable.reset.remote(new_config, trial.logdir),
|
||||
timeout=DEFAULT_GET_TIMEOUT)
|
||||
except RayTimeoutError:
|
||||
except GetTimeoutError:
|
||||
logger.exception("Trial %s: reset timed out.", trial)
|
||||
return False
|
||||
return reset_val
|
||||
|
||||
+21
-22
@@ -41,7 +41,7 @@ from ray import import_thread
|
||||
from ray import profiling
|
||||
|
||||
from ray.exceptions import (
|
||||
RayConnectionError,
|
||||
RaySystemError,
|
||||
RayError,
|
||||
RayTaskError,
|
||||
ObjectStoreFullError,
|
||||
@@ -202,8 +202,8 @@ class Worker:
|
||||
Exception: An exception is raised if the worker is not connected.
|
||||
"""
|
||||
if not self.connected:
|
||||
raise RayConnectionError("Ray has not been started yet. You can "
|
||||
"start Ray with 'ray.init()'.")
|
||||
raise RaySystemError("Ray has not been started yet. You can "
|
||||
"start Ray with 'ray.init()'.")
|
||||
|
||||
def set_mode(self, mode):
|
||||
"""Set the mode of the worker.
|
||||
@@ -568,7 +568,7 @@ def init(
|
||||
the distributed plasma store is lost due to node failure, Ray will
|
||||
attempt to reconstruct the object by re-executing the task that
|
||||
created the object. Arguments to the task will be recursively
|
||||
reconstructed. If False, then ray.UnreconstructableError will be
|
||||
reconstructed. If False, then ray.ObjectLostError will be
|
||||
thrown.
|
||||
_redis_max_memory: Redis max memory.
|
||||
_node_ip_address (str): The IP address of the node that we are on.
|
||||
@@ -589,7 +589,7 @@ def init(
|
||||
_java_worker_options: Overwrite the options to start Java workers.
|
||||
_lru_evict (bool): If True, when an object store is full, it will evict
|
||||
objects in LRU order to make more space and when under memory
|
||||
pressure, ray.UnreconstructableError may be thrown. If False, then
|
||||
pressure, ray.ObjectLostError may be thrown. If False, then
|
||||
reference counting will be used to decide which objects are safe
|
||||
to evict and when under memory pressure, ray.ObjectStoreFullError
|
||||
may be thrown.
|
||||
@@ -1383,7 +1383,7 @@ def get(object_refs, *, timeout=None):
|
||||
A Python object or a list of Python objects.
|
||||
|
||||
Raises:
|
||||
RayTimeoutError: A RayTimeoutError is raised if a timeout is set and
|
||||
GetTimeoutError: A GetTimeoutError is raised if a timeout is set and
|
||||
the get takes longer than timeout to return.
|
||||
Exception: An exception is raised if the task that created the object
|
||||
or that created one of the objects raised an exception.
|
||||
@@ -1417,7 +1417,7 @@ def get(object_refs, *, timeout=None):
|
||||
for i, value in enumerate(values):
|
||||
if isinstance(value, RayError):
|
||||
last_task_error_raise_time = time.time()
|
||||
if isinstance(value, ray.exceptions.UnreconstructableError):
|
||||
if isinstance(value, ray.exceptions.ObjectLostError):
|
||||
worker.core_worker.dump_object_store_memory_usage()
|
||||
if isinstance(value, RayTaskError):
|
||||
raise value.as_instanceof_cause()
|
||||
@@ -1611,7 +1611,7 @@ def cancel(object_ref, *, force=False):
|
||||
Only non-actor tasks can be canceled. Canceled tasks will not be
|
||||
retried (max_retries will not be respected).
|
||||
|
||||
Calling ray.get on a canceled task will raise a RayCancellationError.
|
||||
Calling ray.get on a canceled task will raise a TaskCancelledError.
|
||||
|
||||
Args:
|
||||
object_ref (ObjectRef): ObjectRef returned by the task
|
||||
@@ -1642,7 +1642,7 @@ def _mode(worker=global_worker):
|
||||
return worker.mode
|
||||
|
||||
|
||||
def make_decorator(num_return_vals=None,
|
||||
def make_decorator(num_returns=None,
|
||||
num_cpus=None,
|
||||
num_gpus=None,
|
||||
memory=None,
|
||||
@@ -1682,13 +1682,12 @@ def make_decorator(num_return_vals=None,
|
||||
" integer")
|
||||
return ray.remote_function.RemoteFunction(
|
||||
Language.PYTHON, function_or_class, None, num_cpus, num_gpus,
|
||||
memory, object_store_memory, resources, num_return_vals,
|
||||
max_calls, max_retries, placement_group,
|
||||
placement_group_bundle_index)
|
||||
memory, object_store_memory, resources, num_returns, max_calls,
|
||||
max_retries, placement_group, placement_group_bundle_index)
|
||||
|
||||
if inspect.isclass(function_or_class):
|
||||
if num_return_vals is not None:
|
||||
raise TypeError("The keyword 'num_return_vals' is not "
|
||||
if num_returns is not None:
|
||||
raise TypeError("The keyword 'num_returns' is not "
|
||||
"allowed for actors.")
|
||||
if max_calls is not None:
|
||||
raise TypeError("The keyword 'max_calls' is not "
|
||||
@@ -1732,7 +1731,7 @@ def remote(*args, **kwargs):
|
||||
|
||||
It can also be used with specific keyword arguments:
|
||||
|
||||
* **num_return_vals:** This is only for *remote functions*. It specifies
|
||||
* **num_returns:** This is only for *remote functions*. It specifies
|
||||
the number of object refs returned by the remote function invocation.
|
||||
* **num_cpus:** The quantity of CPU cores to reserve for this task or for
|
||||
the lifetime of the actor.
|
||||
@@ -1774,7 +1773,7 @@ def remote(*args, **kwargs):
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@ray.remote(num_gpus=1, max_calls=1, num_return_vals=2)
|
||||
@ray.remote(num_gpus=1, max_calls=1, num_returns=2)
|
||||
def f():
|
||||
return 1, 2
|
||||
|
||||
@@ -1789,7 +1788,7 @@ def remote(*args, **kwargs):
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@ray.remote(num_gpus=1, max_calls=1, num_return_vals=2)
|
||||
@ray.remote(num_gpus=1, max_calls=1, num_returns=2)
|
||||
def f():
|
||||
return 1, 2
|
||||
g = f.options(num_gpus=2, max_calls=None)
|
||||
@@ -1815,15 +1814,15 @@ def remote(*args, **kwargs):
|
||||
error_string = ("The @ray.remote decorator must be applied either "
|
||||
"with no arguments and no parentheses, for example "
|
||||
"'@ray.remote', or it must be applied using some of "
|
||||
"the arguments 'num_return_vals', 'num_cpus', 'num_gpus', "
|
||||
"the arguments 'num_returns', 'num_cpus', 'num_gpus', "
|
||||
"'memory', 'object_store_memory', 'resources', "
|
||||
"'max_calls', or 'max_restarts', like "
|
||||
"'@ray.remote(num_return_vals=2, "
|
||||
"'@ray.remote(num_returns=2, "
|
||||
"resources={\"CustomResource\": 1})'.")
|
||||
assert len(args) == 0 and len(kwargs) > 0, error_string
|
||||
for key in kwargs:
|
||||
assert key in [
|
||||
"num_return_vals",
|
||||
"num_returns",
|
||||
"num_cpus",
|
||||
"num_gpus",
|
||||
"memory",
|
||||
@@ -1848,7 +1847,7 @@ def remote(*args, **kwargs):
|
||||
assert "GPU" not in resources, "Use the 'num_gpus' argument."
|
||||
|
||||
# Handle other arguments.
|
||||
num_return_vals = kwargs.get("num_return_vals")
|
||||
num_returns = kwargs.get("num_returns")
|
||||
max_calls = kwargs.get("max_calls")
|
||||
max_restarts = kwargs.get("max_restarts")
|
||||
max_task_retries = kwargs.get("max_task_retries")
|
||||
@@ -1857,7 +1856,7 @@ def remote(*args, **kwargs):
|
||||
max_retries = kwargs.get("max_retries")
|
||||
|
||||
return make_decorator(
|
||||
num_return_vals=num_return_vals,
|
||||
num_returns=num_returns,
|
||||
num_cpus=num_cpus,
|
||||
num_gpus=num_gpus,
|
||||
memory=memory,
|
||||
|
||||
Reference in New Issue
Block a user