[tune] Improve error message when Ray crashes (#3795)

This commit is contained in:
Richard Liaw
2019-02-15 01:04:17 -08:00
committed by GitHub
parent 7cf62a10cd
commit bb7c4ce9c4
+6
View File
@@ -9,6 +9,7 @@ import time
import traceback
import ray
from ray.tune.error import TuneError
from ray.tune.logger import NoopLogger
from ray.tune.trial import Trial, Resources, Checkpoint
from ray.tune.trial_executor import TrialExecutor
@@ -270,6 +271,11 @@ class RayTrialExecutor(TrialExecutor):
logger.warning("Cluster resources not detected. Retrying...")
time.sleep(0.5)
if not resources or "CPU" not in resources:
raise TuneError("Cluster resources cannot be detected. "
"You can resume this experiment by passing in "
"`resume=True` to `run_experiments`.")
resources = resources.copy()
num_cpus = resources.pop("CPU")
num_gpus = resources.pop("GPU")