From 51559c08b975b8f5a32a7ea33f88c355617f109b Mon Sep 17 00:00:00 2001 From: yncxcw Date: Wed, 22 Apr 2020 15:32:35 -0600 Subject: [PATCH] Fix mis-memory counting in memory monitor for contaienr environment (#8113) Co-authored-by: weich --- python/ray/memory_monitor.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/ray/memory_monitor.py b/python/ray/memory_monitor.py index aea7c939c..e383ea9a6 100644 --- a/python/ray/memory_monitor.py +++ b/python/ray/memory_monitor.py @@ -114,6 +114,13 @@ class MemoryMonitor: with open("/sys/fs/cgroup/memory/memory.usage_in_bytes", "rb") as f: used_gb = int(f.read()) / (1024**3) + # Exclude the page cache + with open("/sys/fs/cgroup/memory/memory.stat", "r") as f: + for line in f.readlines(): + if line.split(" ")[0] == "cache": + used_gb = \ + used_gb - int(line.split(" ")[1]) / (1024**3) + assert used_gb >= 0 if used_gb > total_gb * self.error_threshold: raise RayOutOfMemoryError( RayOutOfMemoryError.get_message(used_gb, total_gb,