From cf98b1bb1321598c1b77d31cf8d715fc0a6bd1f7 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Mon, 19 Aug 2019 16:18:08 -0700 Subject: [PATCH] [autoscaler] Fix ssh control path length issue (#5476) --- python/ray/autoscaler/updater.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/python/ray/autoscaler/updater.py b/python/ray/autoscaler/updater.py index c5c3d3c4c..146fe9dfb 100644 --- a/python/ray/autoscaler/updater.py +++ b/python/ray/autoscaler/updater.py @@ -6,6 +6,7 @@ try: # py3 from shlex import quote except ImportError: # py2 from pipes import quote +import hashlib import logging import os import subprocess @@ -23,7 +24,7 @@ logger = logging.getLogger(__name__) # How long to wait for a node to start, in seconds NODE_START_WAIT_S = 300 SSH_CHECK_INTERVAL = 5 -CONTROL_PATH_MAX_LENGTH = 70 +HASH_MAX_LENGTH = 10 def get_default_ssh_options(private_key, connect_timeout, ssh_control_path): @@ -57,8 +58,11 @@ class NodeUpdater(object): exit_on_update_fail=False, use_internal_ip=False): - ssh_control_path = "/tmp/{}_ray_ssh_sockets/{}".format( - getuser(), cluster_name)[:CONTROL_PATH_MAX_LENGTH] + ssh_control_hash = hashlib.md5(cluster_name.encode()).hexdigest() + ssh_user_hash = hashlib.md5(getuser().encode()).hexdigest() + ssh_control_path = "/tmp/ray_ssh_{}/{}".format( + ssh_user_hash[:HASH_MAX_LENGTH], + ssh_control_hash[:HASH_MAX_LENGTH]) self.daemon = True self.process_runner = process_runner