From f1b4ea69a3bb6305fda8257aa6494035c39de3e0 Mon Sep 17 00:00:00 2001 From: Steve Severance Date: Sat, 28 Jul 2018 23:29:09 -0700 Subject: [PATCH] Prevent hasher from running out of memory on large files (#2451) * Prevent hasher from running out of memory on large files * dump out keys * only print if failed * remove debugging * Fix lint error. Reverse adding newline. --- python/ray/autoscaler/autoscaler.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/ray/autoscaler/autoscaler.py b/python/ray/autoscaler/autoscaler.py index 182d1cdae..873aa48ad 100644 --- a/python/ray/autoscaler/autoscaler.py +++ b/python/ray/autoscaler/autoscaler.py @@ -653,7 +653,12 @@ def hash_runtime_conf(file_mounts, extra_objs): for name in filenames: hasher.update(name.encode("utf-8")) with open(os.path.join(dirpath, name), "rb") as f: - hasher.update(binascii.hexlify(f.read())) + if os.path.getsize(os.path.join(dirpath, + name)) < 1000000000: + hasher.update(binascii.hexlify(f.read())) + else: + for chunk in iter(lambda: f.read(8192), b''): + hasher.update(binascii.hexlify(chunk)) else: with open(path, "rb") as f: hasher.update(binascii.hexlify(f.read()))