mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 00:29:38 +08:00
[tune] Added possibility to execute infinite recovery retries for a trial (#3901)
Allows to let a trial try to do infinite recoveries by setting _max_failures_ to a negative number.
This commit is contained in:
@@ -74,7 +74,8 @@ class Experiment(object):
|
||||
experiment regardless of the checkpoint_freq. Default is False.
|
||||
max_failures (int): Try to recover a trial from its last
|
||||
checkpoint at least this many times. Only applies if
|
||||
checkpointing is enabled. Defaults to 3.
|
||||
checkpointing is enabled. Setting to -1 will lead to infinite
|
||||
recovery retries. Defaults to 3.
|
||||
restore (str): Path to checkpoint. Only makes sense to set if
|
||||
running 1 trial. Defaults to None.
|
||||
repeat: Deprecated and will be removed in future versions of
|
||||
|
||||
@@ -356,7 +356,8 @@ class Trial(object):
|
||||
be a checkpoint.
|
||||
"""
|
||||
return (self.checkpoint_freq > 0
|
||||
and self.num_failures < self.max_failures)
|
||||
and (self.num_failures < self.max_failures
|
||||
or self.max_failures < 0))
|
||||
|
||||
def update_last_result(self, result, terminate=False):
|
||||
if terminate:
|
||||
|
||||
Reference in New Issue
Block a user