From 463511f8a677a7e775af53cd1264dc9c93d631cf Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Sun, 11 Nov 2018 00:29:45 -0800 Subject: [PATCH] [tune] Track and warn on low memory (#3298) --- python/ray/tune/trial_runner.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/python/ray/tune/trial_runner.py b/python/ray/tune/trial_runner.py index bb31345c8..8a86ae500 100644 --- a/python/ray/tune/trial_runner.py +++ b/python/ray/tune/trial_runner.py @@ -216,8 +216,28 @@ class TrialRunner(object): messages = ["== Status =="] messages.append(self._scheduler_alg.debug_string()) messages.append(self.trial_executor.debug_string()) + messages.append(self._memory_debug_string()) return messages + def _memory_debug_string(self): + try: + import psutil + total_gb = psutil.virtual_memory().total / 1e9 + used_gb = total_gb - psutil.virtual_memory().available / 1e9 + if used_gb > total_gb * 0.9: + warn = (": ***LOW MEMORY*** less than 10% of the memory on " + "this node is available for use. This can cause " + "unexpected crashes. Consider " + "reducing the memory used by your application " + "or reducing the Ray object store size by setting " + "`object_store_memory` when calling `ray.init`.") + else: + warn = "" + return "Memory usage on this node: {}/{} GB{}".format( + round(used_gb, 1), round(total_gb, 1), warn) + except ImportError: + return "Unknown memory usage (`pip install psutil` to resolve)" + def has_resources(self, resources): """Returns whether this runner has at least the specified resources.""" return self.trial_executor.has_resources(resources)