[Tune] Better warnings/exceptions for fail_fast='raise' (#11842)

This commit is contained in:
Amog Kamsetty
2020-11-06 15:01:55 -08:00
committed by GitHub
parent 045fed5cd2
commit 900a48c19c
+7 -3
View File
@@ -6,6 +6,7 @@ import os
import time
import traceback
import types
import warnings
import ray.cloudpickle as cloudpickle
from ray.services import get_node_ip_address
@@ -157,7 +158,7 @@ class TrialRunner:
if isinstance(self._fail_fast, str):
self._fail_fast = self._fail_fast.upper()
if self._fail_fast == TrialRunner.RAISE:
logger.warning(
warnings.warn(
"fail_fast='raise' detected. Be careful when using this "
"mode as resources (such as Ray processes, "
"file descriptors, and temporary files) may not be "
@@ -474,7 +475,7 @@ class TrialRunner:
wait_for_trial = trials_done and not self._search_alg.is_finished()
# Only fetch a new trial if we have no pending trial
if not any(trial.status == Trial.PENDING for trial in self._trials) \
or wait_for_trial:
or wait_for_trial:
self._update_trial_queue(blocking=wait_for_trial)
with warn_if_slow("choose_trial_to_run"):
trial = self._scheduler_alg.choose_trial_to_run(self)
@@ -625,9 +626,12 @@ class TrialRunner:
else:
self._execute_action(trial, decision)
except Exception:
logger.exception("Trial %s: Error processing event.", trial)
error_msg = "Trial %s: Error processing event." % trial
if self._fail_fast == TrialRunner.RAISE:
logger.error(error_msg)
raise
else:
logger.exception(error_msg)
self._process_trial_failure(trial, traceback.format_exc())
def _validate_result_metrics(self, result):