mirror of
https://github.com/wassname/ray.git
synced 2026-07-02 04:42:11 +08:00
[direct task] Retry tasks on failure and turn on RAY_FORCE_DIRECT for test_multinode_failures.py (#6306)
* multinode failures direct * Add number of retries allowed for tasks * Retry tasks * Add failing test for object reconstruction * Handle return status and debug * update * Retry task unit test * update * update * todo * Fix max_retries decorator, fix test * Fix test that flaked * lint * comments
This commit is contained in:
@@ -1621,6 +1621,7 @@ def make_decorator(num_return_vals=None,
|
||||
object_store_memory=None,
|
||||
resources=None,
|
||||
max_calls=None,
|
||||
max_retries=None,
|
||||
max_reconstructions=None,
|
||||
worker=None):
|
||||
def decorator(function_or_class):
|
||||
@@ -1633,7 +1634,8 @@ def make_decorator(num_return_vals=None,
|
||||
|
||||
return ray.remote_function.RemoteFunction(
|
||||
function_or_class, num_cpus, num_gpus, memory,
|
||||
object_store_memory, resources, num_return_vals, max_calls)
|
||||
object_store_memory, resources, num_return_vals, max_calls,
|
||||
max_retries)
|
||||
|
||||
if inspect.isclass(function_or_class):
|
||||
if num_return_vals is not None:
|
||||
@@ -1732,6 +1734,7 @@ def remote(*args, **kwargs):
|
||||
"resources",
|
||||
"max_calls",
|
||||
"max_reconstructions",
|
||||
"max_retries",
|
||||
], error_string
|
||||
|
||||
num_cpus = kwargs["num_cpus"] if "num_cpus" in kwargs else None
|
||||
@@ -1751,6 +1754,7 @@ def remote(*args, **kwargs):
|
||||
max_reconstructions = kwargs.get("max_reconstructions")
|
||||
memory = kwargs.get("memory")
|
||||
object_store_memory = kwargs.get("object_store_memory")
|
||||
max_retries = kwargs.get("max_retries")
|
||||
|
||||
return make_decorator(
|
||||
num_return_vals=num_return_vals,
|
||||
@@ -1761,4 +1765,5 @@ def remote(*args, **kwargs):
|
||||
resources=resources,
|
||||
max_calls=max_calls,
|
||||
max_reconstructions=max_reconstructions,
|
||||
max_retries=max_retries,
|
||||
worker=worker)
|
||||
|
||||
Reference in New Issue
Block a user