From c75038b94530eaa19afc82b5efc0c8c6525492cf Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Sun, 27 Jan 2019 17:41:37 -0800 Subject: [PATCH] [autoscaler] Updating a file in file mounts causes all worker nodes to get restarted --- python/ray/autoscaler/autoscaler.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/python/ray/autoscaler/autoscaler.py b/python/ray/autoscaler/autoscaler.py index aa4391827..a01b7e7a0 100644 --- a/python/ray/autoscaler/autoscaler.py +++ b/python/ray/autoscaler/autoscaler.py @@ -675,6 +675,12 @@ def hash_launch_conf(node_conf, auth): return hasher.hexdigest() +# Cache the file hashes to avoid rescanning it each time. Also, this avoids +# inadvertently restarting workers if the file mount content is mutated on the +# head node. +_hash_cache = {} + + def hash_runtime_conf(file_mounts, extra_objs): hasher = hashlib.sha1() @@ -699,9 +705,15 @@ def hash_runtime_conf(file_mounts, extra_objs): with open(path, "rb") as f: hasher.update(binascii.hexlify(f.read())) - hasher.update(json.dumps(sorted(file_mounts.items())).encode("utf-8")) - hasher.update(json.dumps(extra_objs, sort_keys=True).encode("utf-8")) - for local_path in sorted(file_mounts.values()): - add_content_hashes(local_path) + conf_str = (json.dumps(sorted(file_mounts.items())).encode("utf-8") + + json.dumps(extra_objs, sort_keys=True).encode("utf-8")) - return hasher.hexdigest() + # Important: only hash the files once. Otherwise, we can end up restarting + # workers if the files were changed and we re-hashed them. + if conf_str not in _hash_cache: + hasher.update(conf_str) + for local_path in sorted(file_mounts.values()): + add_content_hashes(local_path) + _hash_cache[conf_str] = hasher.hexdigest() + + return _hash_cache[conf_str]