mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 01:16:06 +08:00
[tune] Improve error message when Ray crashes (#3795)
This commit is contained in:
@@ -9,6 +9,7 @@ import time
|
||||
import traceback
|
||||
|
||||
import ray
|
||||
from ray.tune.error import TuneError
|
||||
from ray.tune.logger import NoopLogger
|
||||
from ray.tune.trial import Trial, Resources, Checkpoint
|
||||
from ray.tune.trial_executor import TrialExecutor
|
||||
@@ -270,6 +271,11 @@ class RayTrialExecutor(TrialExecutor):
|
||||
logger.warning("Cluster resources not detected. Retrying...")
|
||||
time.sleep(0.5)
|
||||
|
||||
if not resources or "CPU" not in resources:
|
||||
raise TuneError("Cluster resources cannot be detected. "
|
||||
"You can resume this experiment by passing in "
|
||||
"`resume=True` to `run_experiments`.")
|
||||
|
||||
resources = resources.copy()
|
||||
num_cpus = resources.pop("CPU")
|
||||
num_gpus = resources.pop("GPU")
|
||||
|
||||
Reference in New Issue
Block a user