mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 17:49:47 +08:00
Change os.uname()[1] and socket.gethostname() to the portable and faster platform.node_ip() (#8839)
Co-authored-by: Mehrdad <noreply@github.com>
This commit is contained in:
@@ -7,10 +7,10 @@ import ray
|
||||
|
||||
@ray.remote
|
||||
def gethostname(x):
|
||||
import platform
|
||||
import time
|
||||
import socket
|
||||
time.sleep(0.01)
|
||||
return x + (socket.gethostname(), )
|
||||
return x + (platform.node(), )
|
||||
|
||||
|
||||
def wait_for_nodes(expected):
|
||||
|
||||
@@ -211,7 +211,7 @@ program:
|
||||
.. code-block:: python
|
||||
|
||||
from collections import Counter
|
||||
import socket
|
||||
import platform
|
||||
import time
|
||||
import ray
|
||||
|
||||
@@ -220,7 +220,7 @@ program:
|
||||
@ray.remote
|
||||
def f(x):
|
||||
time.sleep(0.01)
|
||||
return x + (socket.gethostname(), )
|
||||
return x + (platform.node(), )
|
||||
|
||||
# Check that objects can be transferred from each node to each other node.
|
||||
%time Counter(ray.get([f.remote(f.remote(())) for _ in range(100)]))
|
||||
|
||||
+2
-2
@@ -6,10 +6,10 @@ import ray
|
||||
|
||||
@ray.remote
|
||||
def gethostname(x):
|
||||
import platform
|
||||
import time
|
||||
import socket
|
||||
time.sleep(0.01)
|
||||
return x + (socket.gethostname(), )
|
||||
return x + (platform.node(), )
|
||||
|
||||
|
||||
def wait_for_nodes(expected):
|
||||
|
||||
@@ -12,7 +12,7 @@ import errno
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
import platform
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
@@ -965,7 +965,7 @@ if __name__ == "__main__":
|
||||
args.redis_address, password=args.redis_password)
|
||||
traceback_str = ray.utils.format_error_message(traceback.format_exc())
|
||||
message = ("The dashboard on node {} failed with the following "
|
||||
"error:\n{}".format(socket.gethostname(), traceback_str))
|
||||
"error:\n{}".format(platform.node(), traceback_str))
|
||||
ray.utils.push_error_to_driver_through_redis(
|
||||
redis_client, ray_constants.DASHBOARD_DIED_ERROR, message)
|
||||
if isinstance(e, OSError) and e.errno == errno.ENOENT:
|
||||
|
||||
@@ -4,8 +4,8 @@ import glob
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import shutil
|
||||
import socket
|
||||
import time
|
||||
import traceback
|
||||
|
||||
@@ -302,7 +302,7 @@ if __name__ == "__main__":
|
||||
args.redis_address, password=args.redis_password)
|
||||
traceback_str = ray.utils.format_error_message(traceback.format_exc())
|
||||
message = ("The log monitor on node {} failed with the following "
|
||||
"error:\n{}".format(socket.gethostname(), traceback_str))
|
||||
"error:\n{}".format(platform.node(), traceback_str))
|
||||
ray.utils.push_error_to_driver_through_redis(
|
||||
redis_client, ray_constants.LOG_MONITOR_DIED_ERROR, message)
|
||||
raise e
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import sys
|
||||
import time
|
||||
|
||||
@@ -47,7 +48,7 @@ class RayOutOfMemoryError(Exception):
|
||||
" ".join(cmdline)[:100].strip())
|
||||
return ("More than {}% of the memory on ".format(int(
|
||||
100 * threshold)) + "node {} is used ({} / {} GB). ".format(
|
||||
os.uname()[1], round(used_gb, 2), round(total_gb, 2)) +
|
||||
platform.node(), round(used_gb, 2), round(total_gb, 2)) +
|
||||
"The top 10 memory consumers are:\n\n{}".format(proc_str) +
|
||||
"\n\nIn addition, up to {} GiB of shared memory is ".format(
|
||||
round(get_shared(psutil.virtual_memory()) / (1024**3), 2))
|
||||
|
||||
@@ -6,7 +6,7 @@ import traceback
|
||||
import time
|
||||
import datetime
|
||||
import grpc
|
||||
import socket
|
||||
import platform
|
||||
import subprocess
|
||||
import sys
|
||||
from concurrent import futures
|
||||
@@ -92,7 +92,7 @@ class Reporter:
|
||||
"""Initialize the reporter object."""
|
||||
self.cpu_counts = (psutil.cpu_count(), psutil.cpu_count(logical=False))
|
||||
self.ip = ray.services.get_node_ip_address()
|
||||
self.hostname = socket.gethostname()
|
||||
self.hostname = platform.node()
|
||||
|
||||
_ = psutil.cpu_percent() # For initialization
|
||||
|
||||
@@ -252,7 +252,7 @@ if __name__ == "__main__":
|
||||
args.redis_address, password=args.redis_password)
|
||||
traceback_str = ray.utils.format_error_message(traceback.format_exc())
|
||||
message = ("The reporter on node {} failed with the following "
|
||||
"error:\n{}".format(socket.gethostname(), traceback_str))
|
||||
"error:\n{}".format(platform.node(), traceback_str))
|
||||
ray.utils.push_error_to_driver_through_redis(
|
||||
redis_client, ray_constants.REPORTER_DIED_ERROR, message)
|
||||
raise e
|
||||
|
||||
@@ -6,6 +6,7 @@ import logging
|
||||
import glob
|
||||
import os
|
||||
import pickle
|
||||
import platform
|
||||
import pandas as pd
|
||||
from six import string_types
|
||||
import shutil
|
||||
@@ -308,7 +309,7 @@ class Trainable:
|
||||
time_this_iter_s=time_this_iter,
|
||||
time_total_s=self._time_total,
|
||||
pid=os.getpid(),
|
||||
hostname=os.uname()[1],
|
||||
hostname=platform.node(),
|
||||
node_ip=self._local_ip,
|
||||
config=self.config,
|
||||
time_since_restore=self._time_since_restore,
|
||||
|
||||
@@ -3,6 +3,7 @@ from collections import deque
|
||||
import copy
|
||||
from datetime import datetime
|
||||
import logging
|
||||
import platform
|
||||
import shutil
|
||||
import uuid
|
||||
import time
|
||||
@@ -42,7 +43,7 @@ class Location:
|
||||
def __str__(self):
|
||||
if not self.pid:
|
||||
return ""
|
||||
elif self.hostname == os.uname()[1]:
|
||||
elif self.hostname == platform.node():
|
||||
return "pid={}".format(self.pid)
|
||||
else:
|
||||
return "{}:{}".format(self.hostname, self.pid)
|
||||
|
||||
@@ -3,6 +3,7 @@ import numpy as np
|
||||
import gym
|
||||
import logging
|
||||
import pickle
|
||||
import platform
|
||||
import os
|
||||
|
||||
import ray
|
||||
@@ -891,7 +892,7 @@ class RolloutWorker(ParallelIteratorWorker):
|
||||
def get_host(self):
|
||||
"""Returns the hostname of the process running this evaluator."""
|
||||
|
||||
return os.uname()[1]
|
||||
return platform.node()
|
||||
|
||||
@DeveloperAPI
|
||||
def apply(self, func, *args):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import numpy as np
|
||||
import random
|
||||
import os
|
||||
import collections
|
||||
import platform
|
||||
import sys
|
||||
|
||||
import ray
|
||||
@@ -343,7 +343,7 @@ class LocalReplayBuffer(ParallelIteratorWorker):
|
||||
return _local_replay_buffer
|
||||
|
||||
def get_host(self):
|
||||
return os.uname()[1]
|
||||
return platform.node()
|
||||
|
||||
def add_batch(self, batch):
|
||||
# Make a copy so the replay buffer doesn't pin plasma memory.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
from typing import List
|
||||
|
||||
import ray
|
||||
@@ -58,7 +58,7 @@ class Aggregator(ParallelIteratorWorker):
|
||||
super().__init__(generator, repeat=False)
|
||||
|
||||
def get_host(self):
|
||||
return os.uname()[1]
|
||||
return platform.node()
|
||||
|
||||
def set_weights(self, weights, global_vars):
|
||||
self.weights = weights
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import collections
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import time
|
||||
|
||||
import ray
|
||||
@@ -172,4 +172,4 @@ class AggregationWorker(AggregationWorkerBase):
|
||||
return result
|
||||
|
||||
def get_host(self):
|
||||
return os.uname()[1]
|
||||
return platform.node()
|
||||
|
||||
@@ -5,7 +5,7 @@ https://arxiv.org/abs/1803.00933"""
|
||||
import collections
|
||||
import logging
|
||||
import numpy as np
|
||||
import os
|
||||
import platform
|
||||
import random
|
||||
from six.moves import queue
|
||||
import threading
|
||||
@@ -340,7 +340,7 @@ class LocalReplayBuffer(ParallelIteratorWorker):
|
||||
return _local_replay_buffer
|
||||
|
||||
def get_host(self):
|
||||
return os.uname()[1]
|
||||
return platform.node()
|
||||
|
||||
def add_batch(self, batch):
|
||||
# Make a copy so the replay buffer doesn't pin plasma memory.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import ray
|
||||
from collections import deque
|
||||
|
||||
@@ -73,7 +73,7 @@ def drop_colocated(actors):
|
||||
|
||||
|
||||
def split_colocated(actors):
|
||||
localhost = os.uname()[1]
|
||||
localhost = platform.node()
|
||||
hosts = ray.get([a.get_host.remote() for a in actors])
|
||||
local = []
|
||||
non_local = []
|
||||
|
||||
Reference in New Issue
Block a user