Start moving ray internal files to _private module (#10994)

This commit is contained in:
Eric Liang
2020-09-24 22:46:35 -07:00
committed by GitHub
parent 3b6fe72029
commit 609c1b8acd
39 changed files with 1825 additions and 1831 deletions
View File
File diff suppressed because it is too large Load Diff
+1 -1
View File
@@ -19,7 +19,7 @@ except ImportError: # py2
from pipes import quote
from ray.experimental.internal_kv import _internal_kv_get
import ray.services as services
import ray._private.services as services
from ray.ray_constants import AUTOSCALER_RESOURCE_REQUEST_CHANNEL
from ray.autoscaler._private.util import validate_config, hash_runtime_conf, \
hash_launch_conf, prepare_config, DEBUG_AUTOSCALING_ERROR, \
@@ -2,7 +2,7 @@ import logging
import time
import numpy as np
import ray.services as services
import ray._private.services as services
from ray.ray_constants import MEMORY_RESOURCE_UNIT_BYTES
logger = logging.getLogger(__name__)
+1 -1
View File
@@ -7,7 +7,7 @@ import threading
from typing import Any, Dict
import ray
import ray.services as services
import ray._private.services as services
from ray.autoscaler.node_provider import _get_default_config
from ray.autoscaler._private.docker import validate_docker_config
+6 -5
View File
@@ -484,7 +484,7 @@ class Dashboard:
metrics_export_address=None):
self.host = host
self.port = port
self.redis_client = ray.services.create_redis_client(
self.redis_client = ray._private.services.create_redis_client(
redis_address, password=redis_password)
self.temp_dir = temp_dir
self.dashboard_id = str(uuid.uuid4())
@@ -553,14 +553,15 @@ class Dashboard:
def _start_exporting_metrics(self):
result, error = self.metrics_export_client.start_exporting_metrics()
if not result and error:
url = ray.services.get_webui_url_from_redis(self.redis_client)
url = ray._private.services.get_webui_url_from_redis(
self.redis_client)
error += (" Please reenable the metrics export by going to "
"the url: {}/api/metrics/enable".format(url))
ray.utils.push_error_to_driver_through_redis(
self.redis_client, "metrics export failed", error)
def log_dashboard_url(self):
url = ray.services.get_webui_url_from_redis(self.redis_client)
url = ray._private.services.get_webui_url_from_redis(self.redis_client)
if url is None:
raise ValueError("WebUI URL is not present in GCS.")
with open(os.path.join(self.temp_dir, "dashboard_url"), "w") as f:
@@ -582,7 +583,7 @@ class RayletStats(threading.Thread):
self.nodes = []
self.stubs = {}
self.reporter_stubs = {}
self.redis_client = ray.services.create_redis_client(
self.redis_client = ray._private.services.create_redis_client(
redis_address, password=redis_password)
self._raylet_stats_lock = threading.Lock()
@@ -961,7 +962,7 @@ if __name__ == "__main__":
dashboard.run()
except Exception as e:
# Something went wrong, so push an error to all drivers.
redis_client = ray.services.create_redis_client(
redis_client = ray._private.services.create_redis_client(
args.redis_address, password=args.redis_password)
traceback_str = ray.utils.format_error_message(traceback.format_exc())
message = ("The dashboard on node {} failed with the following "
+1 -1
View File
@@ -72,7 +72,7 @@ def _get_actor_group_stats(group):
class NodeStats(threading.Thread):
def __init__(self, redis_address, redis_password=None):
self.redis_key = "{}.*".format(ray.gcs_utils.REPORTER_CHANNEL)
self.redis_client = ray.services.create_redis_client(
self.redis_client = ray._private.services.create_redis_client(
redis_address, password=redis_password)
self._node_stats = {}
+1 -1
View File
@@ -86,7 +86,7 @@ class RayTaskError(RayError):
else:
self.proctitle = setproctitle.getproctitle()
self.pid = pid or os.getpid()
self.ip = ip or ray.services.get_node_ip_address()
self.ip = ip or ray._private.services.get_node_ip_address()
self.function_name = function_name
self.traceback_str = traceback_str
self.cause_cls = cause_cls
+3 -3
View File
@@ -11,7 +11,7 @@ import time
import traceback
import ray.ray_constants as ray_constants
import ray.services as services
import ray._private.services as services
import ray.utils
# Logger for this module. It should be configured at the entry point
@@ -78,7 +78,7 @@ class LogMonitor:
"""Initialize the log monitor object."""
self.ip = services.get_node_ip_address()
self.logs_dir = logs_dir
self.redis_client = ray.services.create_redis_client(
self.redis_client = ray._private.services.create_redis_client(
redis_address, password=redis_password)
self.log_filenames = set()
self.open_file_infos = []
@@ -319,7 +319,7 @@ if __name__ == "__main__":
log_monitor.run()
except Exception as e:
# Something went wrong, so push an error to all drivers.
redis_client = ray.services.create_redis_client(
redis_client = ray._private.services.create_redis_client(
args.redis_address, password=args.redis_password)
traceback_str = ray.utils.format_error_message(traceback.format_exc())
message = (f"The log monitor on node {platform.node()} "
+2 -2
View File
@@ -35,7 +35,7 @@ class Monitor:
# Initialize the Redis clients.
ray.state.state._initialize_global_state(
redis_address, redis_password=redis_password)
self.redis = ray.services.create_redis_client(
self.redis = ray._private.services.create_redis_client(
redis_address, password=redis_password)
# Set the redis client and mode so _internal_kv works for autoscaler.
worker = ray.worker.global_worker
@@ -372,7 +372,7 @@ if __name__ == "__main__":
monitor.destroy_autoscaler_workers()
# Something went wrong, so push an error to all drivers.
redis_client = ray.services.create_redis_client(
redis_client = ray._private.services.create_redis_client(
args.redis_address, password=args.redis_password)
traceback_str = ray.utils.format_error_message(traceback.format_exc())
message = ("The monitor failed with the "
+20 -19
View File
@@ -15,7 +15,7 @@ import time
import ray
import ray.ray_constants as ray_constants
import ray.services
import ray._private.services
import ray.utils
from ray.resource_spec import ResourceSpec
from ray.utils import try_to_create_directory, try_to_symlink, open_log
@@ -77,10 +77,10 @@ class Node:
if ray_params.node_ip_address:
node_ip_address = ray_params.node_ip_address
elif ray_params.redis_address:
node_ip_address = ray.services.get_node_ip_address(
node_ip_address = ray._private.services.get_node_ip_address(
ray_params.redis_address)
else:
node_ip_address = ray.services.get_node_ip_address()
node_ip_address = ray._private.services.get_node_ip_address()
self._node_ip_address = node_ip_address
if ray_params.raylet_ip_address:
@@ -156,10 +156,11 @@ class Node:
or self._ray_params.node_manager_port is None):
# Get the address info of the processes to connect to
# from Redis.
address_info = ray.services.get_address_info_from_redis(
self.redis_address,
self._raylet_ip_address,
redis_password=self.redis_password)
address_info = (
ray._private.services.get_address_info_from_redis(
self.redis_address,
self._raylet_ip_address,
redis_password=self.redis_password))
self._plasma_store_socket_name = address_info[
"object_store_address"]
self._raylet_socket_name = address_info["raylet_socket_name"]
@@ -178,7 +179,7 @@ class Node:
self._webui_url = None
else:
self._webui_url = (
ray.services.get_webui_url_from_redis(redis_client))
ray._private.services.get_webui_url_from_redis(redis_client))
if head or not connect_only:
# We need to start a local raylet.
@@ -384,7 +385,7 @@ class Node:
def create_redis_client(self):
"""Create a redis client."""
return ray.services.create_redis_client(
return ray._private.services.create_redis_client(
self._redis_address, self._ray_params.redis_password)
def get_temp_dir_path(self):
@@ -550,7 +551,7 @@ class Node:
"""
assert not self.kernel_fate_share, (
"a reaper should not be used with kernel fate-sharing")
process_info = ray.services.start_reaper(fate_share=False)
process_info = ray._private.services.start_reaper(fate_share=False)
assert ray_constants.PROCESS_TYPE_REAPER not in self.all_processes
if process_info is not None:
self.all_processes[ray_constants.PROCESS_TYPE_REAPER] = [
@@ -566,7 +567,7 @@ class Node:
self.get_log_file_handles(f"redis-shard_{i}", unique=True))
(self._redis_address, redis_shards,
process_infos) = ray.services.start_redis(
process_infos) = ray._private.services.start_redis(
self._node_ip_address,
redis_log_files,
self.get_resource_spec(),
@@ -586,7 +587,7 @@ class Node:
"""Start the log monitor."""
stdout_file, stderr_file = self.get_log_file_handles(
"log_monitor", unique=True)
process_info = ray.services.start_log_monitor(
process_info = ray._private.services.start_log_monitor(
self.redis_address,
self._logs_dir,
stdout_file=stdout_file,
@@ -603,7 +604,7 @@ class Node:
stdout_file, stderr_file = self.get_log_file_handles(
"reporter", unique=True)
process_info = ray.services.start_reporter(
process_info = ray._private.services.start_reporter(
self.redis_address,
self._ray_params.metrics_agent_port,
self._metrics_export_port,
@@ -630,7 +631,7 @@ class Node:
else:
stdout_file, stderr_file = self.get_log_file_handles(
"dashboard", unique=True)
self._webui_url, process_info = ray.services.start_dashboard(
self._webui_url, process_info = ray._private.services.start_dashboard(
require_dashboard,
self._ray_params.dashboard_host,
self.redis_address,
@@ -653,7 +654,7 @@ class Node:
"""Start the plasma store."""
stdout_file, stderr_file = self.get_log_file_handles(
"plasma_store", unique=True)
process_info = ray.services.start_plasma_store(
process_info = ray._private.services.start_plasma_store(
self.get_resource_spec(),
plasma_directory,
object_store_memory,
@@ -674,7 +675,7 @@ class Node:
"""
stdout_file, stderr_file = self.get_log_file_handles(
"gcs_server", unique=True)
process_info = ray.services.start_gcs_server(
process_info = ray._private.services.start_gcs_server(
self._redis_address,
stdout_file=stdout_file,
stderr_file=stderr_file,
@@ -705,7 +706,7 @@ class Node:
"""
stdout_file, stderr_file = self.get_log_file_handles(
"raylet", unique=True)
process_info = ray.services.start_raylet(
process_info = ray._private.services.start_raylet(
self._redis_address,
self._node_ip_address,
self._ray_params.node_manager_port,
@@ -784,7 +785,7 @@ class Node:
"""Start the monitor."""
stdout_file, stderr_file = self.get_log_file_handles(
"monitor", unique=True)
process_info = ray.services.start_monitor(
process_info = ray._private.services.start_monitor(
self._redis_address,
stdout_file=stdout_file,
stderr_file=stderr_file,
@@ -820,7 +821,7 @@ class Node:
# times to avoid printing multiple warnings.
resource_spec = self.get_resource_spec()
plasma_directory, object_store_memory = \
ray.services.determine_plasma_store_config(
ray._private.services.determine_plasma_store_config(
resource_spec.object_store_memory,
plasma_directory=self._ray_params.plasma_directory,
huge_pages=self._ray_params.huge_pages
+4 -4
View File
@@ -15,7 +15,7 @@ import ray
import psutil
import ray.ray_constants as ray_constants
import ray.services
import ray._private.services
import ray.utils
from ray.core.generated import reporter_pb2
from ray.core.generated import reporter_pb2_grpc
@@ -114,7 +114,7 @@ class Reporter:
redis_password=None):
"""Initialize the reporter object."""
self.cpu_counts = (psutil.cpu_count(), psutil.cpu_count(logical=False))
self.ip = ray.services.get_node_ip_address()
self.ip = ray._private.services.get_node_ip_address()
self.hostname = platform.node()
self.port = port
self.metrics_agent = MetricsAgent(metrics_export_port)
@@ -123,7 +123,7 @@ class Reporter:
_ = psutil.cpu_percent() # For initialization
self.redis_key = f"{ray.gcs_utils.REPORTER_CHANNEL}.{self.hostname}"
self.redis_client = ray.services.create_redis_client(
self.redis_client = ray._private.services.create_redis_client(
redis_address, password=redis_password)
self.network_stats_hist = [(0, (0.0, 0.0))] # time, (sent, recv)
@@ -309,7 +309,7 @@ if __name__ == "__main__":
reporter.run()
except Exception as e:
# Something went wrong, so push an error to all drivers.
redis_client = ray.services.create_redis_client(
redis_client = ray._private.services.create_redis_client(
args.redis_address, password=args.redis_password)
traceback_str = ray.utils.format_error_message(traceback.format_exc())
message = ("The reporter on node {} failed with the following "
+1 -1
View File
@@ -141,7 +141,7 @@ class ResourceSpec(
assert "object_store_memory" not in resources, resources
if node_ip_address is None:
node_ip_address = ray.services.get_node_ip_address()
node_ip_address = ray._private.services.get_node_ip_address()
# Automatically create a node id resource on each node. This is
# queryable with ray.state.node_ids() and ray.state.current_node_id().
+1 -1
View File
@@ -14,7 +14,7 @@ from socket import socket
import ray
import psutil
import ray.services as services
import ray._private.services as services
from ray.autoscaler._private.commands import (
attach_cluster, exec_cluster, create_or_update_cluster, monitor_cluster,
rsync, teardown_cluster, get_head_node_ip, kill_node, get_worker_node_ips,
+1 -1
View File
@@ -15,7 +15,7 @@ from ray.serve.constants import SERVE_PROXY_NAME
from ray.serve.utils import (block_until_http_ready, get_all_node_ids,
format_actor_name)
from ray.test_utils import wait_for_condition
from ray.services import new_port
from ray._private.services import new_port
@pytest.mark.skipif(
+2 -1747
View File
File diff suppressed because it is too large Load Diff
+4 -6
View File
@@ -6,11 +6,9 @@ import time
import ray
from ray import gcs_utils
from google.protobuf.json_format import MessageToDict
from ray import (
gcs_utils,
services,
)
from ray._private import services
from ray.utils import (decode, binary_to_hex, hex_to_binary)
from ray._raylet import GlobalStateAccessor
@@ -893,8 +891,8 @@ def current_node_id():
Returns:
Id of the current node.
"""
return ray.resource_spec.NODE_ID_PREFIX + ray.services.get_node_ip_address(
)
return (ray.resource_spec.NODE_ID_PREFIX +
ray._private.services.get_node_ip_address())
def node_ids():
+2 -2
View File
@@ -12,7 +12,7 @@ import math
from contextlib import redirect_stdout, redirect_stderr
import ray
import ray.services
import ray._private.services
import ray.utils
from ray.scripts.scripts import main as ray_main
@@ -150,7 +150,7 @@ def wait_for_children_of_pid_to_exit(pid, timeout=20):
def kill_process_by_name(name, SIGKILL=False):
for p in psutil.process_iter(attrs=["name"]):
if p.info["name"] == name + ray.services.EXE_SUFFIX:
if p.info["name"] == name + ray._private.services.EXE_SUFFIX:
if SIGKILL:
p.kill()
else:
+2 -2
View File
@@ -545,7 +545,7 @@ def test_invalid_unicode_in_worker_log(shutdown_only):
time.sleep(1.0)
# Make sure that nothing has died.
assert ray.services.remaining_processes_alive()
assert ray._private.services.remaining_processes_alive()
@pytest.mark.skip(reason="This test is too expensive to run.")
@@ -580,7 +580,7 @@ def test_move_log_files_to_old(shutdown_only):
break
# Make sure that nothing has died.
assert ray.services.remaining_processes_alive()
assert ray._private.services.remaining_processes_alive()
def test_lease_request_leak(shutdown_only):
+1 -1
View File
@@ -11,7 +11,7 @@ import sys
from jsonschema.exceptions import ValidationError
import ray
import ray.services as services
import ray._private.services as services
from ray.autoscaler._private.util import prepare_config, validate_config
from ray.autoscaler._private.commands import get_or_create_head_node
from ray.autoscaler._private.docker import DOCKER_MOUNT_PREFIX
+1 -1
View File
@@ -630,7 +630,7 @@ def test_get_correct_node_ip():
node_mock = MagicMock()
node_mock.node_ip_address = "10.0.0.111"
worker_mock._global_node = node_mock
found_ip = ray.services.get_node_ip_address()
found_ip = ray._private.services.get_node_ip_address()
assert found_ip == "10.0.0.111"
+4 -4
View File
@@ -57,7 +57,7 @@ def test_dying_worker_get(ray_start_2_cpus):
time.sleep(0.1)
# Make sure that nothing has died.
assert ray.services.remaining_processes_alive()
assert ray._private.services.remaining_processes_alive()
# This test checks that when a driver dies in the middle of a get, the plasma
@@ -98,7 +98,7 @@ ray.get(ray.ObjectRef(ray.utils.hex_to_binary("{}")))
time.sleep(0.1)
# Make sure that nothing has died.
assert ray.services.remaining_processes_alive()
assert ray._private.services.remaining_processes_alive()
# This test checks that when a worker dies in the middle of a wait, the plasma
@@ -136,7 +136,7 @@ def test_dying_worker_wait(ray_start_2_cpus):
time.sleep(0.1)
# Make sure that nothing has died.
assert ray.services.remaining_processes_alive()
assert ray._private.services.remaining_processes_alive()
# This test checks that when a driver dies in the middle of a wait, the plasma
@@ -177,7 +177,7 @@ ray.wait([ray.ObjectRef(ray.utils.hex_to_binary("{}"))])
time.sleep(0.1)
# Make sure that nothing has died.
assert ray.services.remaining_processes_alive()
assert ray._private.services.remaining_processes_alive()
if __name__ == "__main__":
+1 -1
View File
@@ -87,7 +87,7 @@ def test_failed_task(ray_start_regular, error_pubsub):
def test_push_error_to_driver_through_redis(ray_start_regular, error_pubsub):
address_info = ray_start_regular
address = address_info["redis_address"]
redis_client = ray.services.create_redis_client(
redis_client = ray._private.services.create_redis_client(
address, password=ray.ray_constants.REDIS_DEFAULT_PASSWORD)
error_message = "Test error message"
ray.utils.push_error_to_driver_through_redis(
+1 -1
View File
@@ -142,7 +142,7 @@ def test_load_report(shutdown_only, max_shapes):
_system_config={
"max_resource_shapes_per_load_report": max_shapes,
})
redis = ray.services.create_redis_client(
redis = ray._private.services.create_redis_client(
cluster["redis_address"],
password=ray.ray_constants.REDIS_DEFAULT_PASSWORD)
client = redis.pubsub(ignore_subscribe_messages=True)
+3 -3
View File
@@ -34,7 +34,7 @@ def test_submitting_many_tasks(ray_start_sharded):
return x
ray.get([g(100) for _ in range(100)])
assert ray.services.remaining_processes_alive()
assert ray._private.services.remaining_processes_alive()
def test_submitting_many_actors_to_one(ray_start_sharded):
@@ -72,7 +72,7 @@ def test_getting_and_putting(ray_start_sharded):
for _ in range(1000):
ray.get(x_id)
assert ray.services.remaining_processes_alive()
assert ray._private.services.remaining_processes_alive()
def test_getting_many_objects(ray_start_sharded):
@@ -84,7 +84,7 @@ def test_getting_many_objects(ray_start_sharded):
lst = ray.get([f.remote() for _ in range(n)])
assert lst == n * [1]
assert ray.services.remaining_processes_alive()
assert ray._private.services.remaining_processes_alive()
if __name__ == "__main__":
+3 -2
View File
@@ -113,7 +113,8 @@ class TestSyncFunctionality(unittest.TestCase):
}).trials
with patch.object(CommandBasedClient, "_execute") as mock_fn:
with patch("ray.services.get_node_ip_address") as mock_sync:
with patch(
"ray._private.services.get_node_ip_address") as mock_sync:
sync_config = tune.SyncConfig(
sync_to_driver="echo {source} {target}")
mock_sync.return_value = "0.0.0.0"
@@ -209,7 +210,7 @@ class TestSyncFunctionality(unittest.TestCase):
test_file_path = os.path.join(trial.logdir, "test.log2")
self.assertFalse(os.path.exists(test_file_path))
with patch("ray.services.get_node_ip_address") as mock_sync:
with patch("ray._private.services.get_node_ip_address") as mock_sync:
mock_sync.return_value = "0.0.0.0"
sync_config = tune.SyncConfig(sync_to_driver=sync_func_driver)
[trial] = tune.run(
+1 -1
View File
@@ -289,7 +289,7 @@ class Trainable:
return ""
def get_current_ip(self):
self._local_ip = ray.services.get_node_ip_address()
self._local_ip = ray._private.services.get_node_ip_address()
return self._local_ip
def train(self):
+1 -1
View File
@@ -8,7 +8,7 @@ import traceback
import types
import ray.cloudpickle as cloudpickle
from ray.services import get_node_ip_address
from ray._private.services import get_node_ip_address
from ray.tune import TuneError
from ray.tune.stopper import NoopStopper
from ray.tune.progress_reporter import trial_progress_str
+2 -2
View File
@@ -3,7 +3,7 @@ import json
import os
import ray
import ray.services
import ray._private.services
from ray.util.sgd import utils
logger = logging.getLogger(__name__)
@@ -148,7 +148,7 @@ class TFRunner:
def get_node_ip(self):
"""Returns the IP address of the current node."""
return ray.services.get_node_ip_address()
return ray._private.services.get_node_ip_address()
def find_free_port(self):
"""Finds a free port on the current node."""
@@ -184,7 +184,7 @@ def clear_dummy_actor():
def reserve_resources(num_cpus, num_gpus, retries=20):
ip = ray.services.get_node_ip_address()
ip = ray._private.services.get_node_ip_address()
reserved_cuda_device = None
+1 -1
View File
@@ -9,7 +9,7 @@ logger = logging.getLogger(__name__)
def setup_address():
ip = ray.services.get_node_ip_address()
ip = ray._private.services.get_node_ip_address()
port = find_free_port()
return f"tcp://{ip}:{port}"
+2 -2
View File
@@ -25,7 +25,7 @@ import ray.parameter
import ray.ray_constants as ray_constants
import ray.remote_function
import ray.serialization as serialization
import ray.services as services
import ray._private.services as services
import ray
import setproctitle
import ray.signature
@@ -1169,7 +1169,7 @@ def connect(node,
# For driver's check that the version information matches the version
# information that the Ray cluster was started with.
try:
ray.services.check_version_info(worker.redis_client)
ray._private.services.check_version_info(worker.redis_client)
except Exception as e:
if mode == SCRIPT_MODE:
raise e