[autoscaler] Fix ssh control path length issue (#5476)

This commit is contained in:
Philipp Moritz
2019-08-19 16:18:08 -07:00
committed by GitHub
parent 0916603e61
commit cf98b1bb13
+7 -3
View File
@@ -6,6 +6,7 @@ try: # py3
from shlex import quote
except ImportError: # py2
from pipes import quote
import hashlib
import logging
import os
import subprocess
@@ -23,7 +24,7 @@ logger = logging.getLogger(__name__)
# How long to wait for a node to start, in seconds
NODE_START_WAIT_S = 300
SSH_CHECK_INTERVAL = 5
CONTROL_PATH_MAX_LENGTH = 70
HASH_MAX_LENGTH = 10
def get_default_ssh_options(private_key, connect_timeout, ssh_control_path):
@@ -57,8 +58,11 @@ class NodeUpdater(object):
exit_on_update_fail=False,
use_internal_ip=False):
ssh_control_path = "/tmp/{}_ray_ssh_sockets/{}".format(
getuser(), cluster_name)[:CONTROL_PATH_MAX_LENGTH]
ssh_control_hash = hashlib.md5(cluster_name.encode()).hexdigest()
ssh_user_hash = hashlib.md5(getuser().encode()).hexdigest()
ssh_control_path = "/tmp/ray_ssh_{}/{}".format(
ssh_user_hash[:HASH_MAX_LENGTH],
ssh_control_hash[:HASH_MAX_LENGTH])
self.daemon = True
self.process_runner = process_runner