From df9beb71236b77870da27a34e082c0cd6df0b184 Mon Sep 17 00:00:00 2001 From: Kristian Hartikainen Date: Mon, 4 Mar 2019 14:26:10 -0800 Subject: [PATCH] [tune] Fix trial result fetching (#4219) * Fix trial results wait in RayTrialExecutor.get_next_available_trial * Add comment for the results shuffling * Remove timeout from the wait * Change random.sample to random.shuffle --- python/ray/tune/ray_trial_executor.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/python/ray/tune/ray_trial_executor.py b/python/ray/tune/ray_trial_executor.py index 38b41d4a4..c9bb976ac 100644 --- a/python/ray/tune/ray_trial_executor.py +++ b/python/ray/tune/ray_trial_executor.py @@ -5,6 +5,7 @@ from __future__ import print_function import logging import os +import random import time import traceback @@ -230,7 +231,13 @@ class RayTrialExecutor(TrialExecutor): return list(self._running.values()) def get_next_available_trial(self): - [result_id], _ = ray.wait(list(self._running)) + shuffled_results = list(self._running.keys()) + random.shuffle(shuffled_results) + # Note: We shuffle the results because `ray.wait` by default returns + # the first available result, and we want to guarantee that slower + # trials (i.e. trials that run remotely) also get fairly reported. + # See https://github.com/ray-project/ray/issues/4211 for details. + [result_id], _ = ray.wait(shuffled_results) return self._running[result_id] def fetch_result(self, trial):