From 88ab887cc4ada214a8bab30cff69568f1cb98017 Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Tue, 2 Feb 2021 00:10:35 -0800 Subject: [PATCH] Unconditionally retry all RPC errors on client connect (#13845) * wip * Update python/ray/util/client/worker.py Co-authored-by: fangfengbin <869218239a@zju.edu.cn> Co-authored-by: fangfengbin <869218239a@zju.edu.cn> --- python/ray/util/client/worker.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/python/ray/util/client/worker.py b/python/ray/util/client/worker.py index b0a4b78f5..a97ccaca7 100644 --- a/python/ray/util/client/worker.py +++ b/python/ray/util/client/worker.py @@ -101,17 +101,11 @@ class Worker: # Note that channel_ready_future constitutes its own timeout, # which is why we do not sleep here. except grpc.RpcError as e: - if e.code() == grpc.StatusCode.UNAVAILABLE: - # UNAVAILABLE is gRPC's retryable error, - # so we do that here. - logger.info("Ray client server unavailable, " - f"retrying in {timeout}s...") - logger.debug(f"Received when checking init: {e.details()}") - # Ray is not ready yet, wait a timeout - time.sleep(timeout) - else: - # Any other gRPC error gets a reraise - raise e + logger.info("Ray client server unavailable, " + f"retrying in {timeout}s...") + logger.debug(f"Received when checking init: {e.details()}") + # Ray is not ready yet, wait a timeout. + time.sleep(timeout) # Fallthrough, backoff, and retry at the top of the loop logger.info("Waiting for Ray to become ready on the server, " f"retry in {timeout}s...")