Change os.uname()[1] and socket.gethostname() to the portable and faster platform.node_ip() (#8839)

Co-authored-by: Mehrdad <noreply@github.com>
This commit is contained in:
mehrdadn
2020-06-08 21:29:46 -07:00
committed by SangBin Cho
parent 51ba6d5112
commit e6215d224c
15 changed files with 31 additions and 27 deletions
+2 -2
View File
@@ -12,7 +12,7 @@ import errno
import json
import logging
import os
import socket
import platform
import threading
import time
import traceback
@@ -965,7 +965,7 @@ if __name__ == "__main__":
args.redis_address, password=args.redis_password)
traceback_str = ray.utils.format_error_message(traceback.format_exc())
message = ("The dashboard on node {} failed with the following "
"error:\n{}".format(socket.gethostname(), traceback_str))
"error:\n{}".format(platform.node(), traceback_str))
ray.utils.push_error_to_driver_through_redis(
redis_client, ray_constants.DASHBOARD_DIED_ERROR, message)
if isinstance(e, OSError) and e.errno == errno.ENOENT:
+2 -2
View File
@@ -4,8 +4,8 @@ import glob
import json
import logging
import os
import platform
import shutil
import socket
import time
import traceback
@@ -302,7 +302,7 @@ if __name__ == "__main__":
args.redis_address, password=args.redis_password)
traceback_str = ray.utils.format_error_message(traceback.format_exc())
message = ("The log monitor on node {} failed with the following "
"error:\n{}".format(socket.gethostname(), traceback_str))
"error:\n{}".format(platform.node(), traceback_str))
ray.utils.push_error_to_driver_through_redis(
redis_client, ray_constants.LOG_MONITOR_DIED_ERROR, message)
raise e
+2 -1
View File
@@ -1,5 +1,6 @@
import logging
import os
import platform
import sys
import time
@@ -47,7 +48,7 @@ class RayOutOfMemoryError(Exception):
" ".join(cmdline)[:100].strip())
return ("More than {}% of the memory on ".format(int(
100 * threshold)) + "node {} is used ({} / {} GB). ".format(
os.uname()[1], round(used_gb, 2), round(total_gb, 2)) +
platform.node(), round(used_gb, 2), round(total_gb, 2)) +
"The top 10 memory consumers are:\n\n{}".format(proc_str) +
"\n\nIn addition, up to {} GiB of shared memory is ".format(
round(get_shared(psutil.virtual_memory()) / (1024**3), 2))
+3 -3
View File
@@ -6,7 +6,7 @@ import traceback
import time
import datetime
import grpc
import socket
import platform
import subprocess
import sys
from concurrent import futures
@@ -92,7 +92,7 @@ class Reporter:
"""Initialize the reporter object."""
self.cpu_counts = (psutil.cpu_count(), psutil.cpu_count(logical=False))
self.ip = ray.services.get_node_ip_address()
self.hostname = socket.gethostname()
self.hostname = platform.node()
_ = psutil.cpu_percent() # For initialization
@@ -252,7 +252,7 @@ if __name__ == "__main__":
args.redis_address, password=args.redis_password)
traceback_str = ray.utils.format_error_message(traceback.format_exc())
message = ("The reporter on node {} failed with the following "
"error:\n{}".format(socket.gethostname(), traceback_str))
"error:\n{}".format(platform.node(), traceback_str))
ray.utils.push_error_to_driver_through_redis(
redis_client, ray_constants.REPORTER_DIED_ERROR, message)
raise e
+2 -1
View File
@@ -6,6 +6,7 @@ import logging
import glob
import os
import pickle
import platform
import pandas as pd
from six import string_types
import shutil
@@ -308,7 +309,7 @@ class Trainable:
time_this_iter_s=time_this_iter,
time_total_s=self._time_total,
pid=os.getpid(),
hostname=os.uname()[1],
hostname=platform.node(),
node_ip=self._local_ip,
config=self.config,
time_since_restore=self._time_since_restore,
+2 -1
View File
@@ -3,6 +3,7 @@ from collections import deque
import copy
from datetime import datetime
import logging
import platform
import shutil
import uuid
import time
@@ -42,7 +43,7 @@ class Location:
def __str__(self):
if not self.pid:
return ""
elif self.hostname == os.uname()[1]:
elif self.hostname == platform.node():
return "pid={}".format(self.pid)
else:
return "{}:{}".format(self.hostname, self.pid)