mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 21:38:18 +08:00
[serve] Rename to Controller (#9566)
This commit is contained in:
@@ -7,7 +7,7 @@ py_library(
|
||||
|
||||
serve_tests_srcs = glob(["tests/*.py"],
|
||||
exclude=["tests/test_nonblocking.py",
|
||||
"tests/test_master_crashes.py",
|
||||
"tests/test_controller_crashes.py",
|
||||
"tests/test_serve.py",
|
||||
])
|
||||
|
||||
@@ -108,12 +108,12 @@ py_test(
|
||||
)
|
||||
|
||||
|
||||
# Runs test_api and test_failure with injected failures in the master actor.
|
||||
# Runs test_api and test_failure with injected failures in the controller.
|
||||
# TODO(edoakes): reenable this once we're using GCS actor fault tolerance.
|
||||
# py_test(
|
||||
# name = "test_master_crashes",
|
||||
# name = "test_controller_crashes",
|
||||
# size = "medium",
|
||||
# srcs = glob(["tests/test_master_crashes.py",
|
||||
# srcs = glob(["tests/test_controller_crashes.py",
|
||||
# "tests/test_api.py",
|
||||
# "tests/test_failure.py"],
|
||||
# exclude=["tests/test_nonblocking.py",
|
||||
|
||||
+35
-36
@@ -2,32 +2,32 @@ from functools import wraps
|
||||
|
||||
import ray
|
||||
from ray.serve.constants import (DEFAULT_HTTP_HOST, DEFAULT_HTTP_PORT,
|
||||
SERVE_MASTER_NAME, HTTP_PROXY_TIMEOUT)
|
||||
from ray.serve.master import ServeMaster
|
||||
SERVE_CONTROLLER_NAME, HTTP_PROXY_TIMEOUT)
|
||||
from ray.serve.controller import ServeController
|
||||
from ray.serve.handle import RayServeHandle
|
||||
from ray.serve.utils import (block_until_http_ready, format_actor_name)
|
||||
from ray.serve.exceptions import RayServeException
|
||||
from ray.serve.config import BackendConfig, ReplicaConfig
|
||||
from ray.serve.metric import InMemoryExporter
|
||||
|
||||
master_actor = None
|
||||
controller = None
|
||||
|
||||
|
||||
def _get_master_actor():
|
||||
def _get_controller():
|
||||
"""Used for internal purpose because using just import serve.global_state
|
||||
will always reference the original None object.
|
||||
"""
|
||||
global master_actor
|
||||
if master_actor is None:
|
||||
global controller
|
||||
if controller is None:
|
||||
raise RayServeException("Please run serve.init to initialize or "
|
||||
"connect to existing ray serve cluster.")
|
||||
return master_actor
|
||||
return controller
|
||||
|
||||
|
||||
def _ensure_connected(f):
|
||||
@wraps(f)
|
||||
def check(*args, **kwargs):
|
||||
_get_master_actor()
|
||||
_get_controller()
|
||||
return f(*args, **kwargs)
|
||||
|
||||
return check
|
||||
@@ -85,11 +85,11 @@ def init(name=None,
|
||||
if not ray.is_initialized():
|
||||
ray.init()
|
||||
|
||||
# Try to get serve master actor if it exists
|
||||
global master_actor
|
||||
master_actor_name = format_actor_name(SERVE_MASTER_NAME, name)
|
||||
# Try to get serve controller if it exists
|
||||
global controller
|
||||
controller_name = format_actor_name(SERVE_CONTROLLER_NAME, name)
|
||||
try:
|
||||
master_actor = ray.get_actor(master_actor_name)
|
||||
controller = ray.get_actor(controller_name)
|
||||
return
|
||||
except ValueError:
|
||||
pass
|
||||
@@ -98,8 +98,8 @@ def init(name=None,
|
||||
# serve.init() was run on. We should consider making this configurable
|
||||
# in the future.
|
||||
http_node_id = ray.state.current_node_id()
|
||||
master_actor = ServeMaster.options(
|
||||
name=master_actor_name,
|
||||
controller = ServeController.options(
|
||||
name=controller_name,
|
||||
max_restarts=-1,
|
||||
max_task_retries=-1,
|
||||
).remote(name, http_node_id, http_host, http_port, metric_exporter)
|
||||
@@ -116,10 +116,10 @@ def shutdown():
|
||||
Shuts down all processes and deletes all state associated with the Serve
|
||||
instance that's currently connected to (via serve.init).
|
||||
"""
|
||||
global master_actor
|
||||
ray.get(master_actor.shutdown.remote())
|
||||
ray.kill(master_actor, no_restart=True)
|
||||
master_actor = None
|
||||
global controller
|
||||
ray.get(controller.shutdown.remote())
|
||||
ray.kill(controller, no_restart=True)
|
||||
controller = None
|
||||
|
||||
|
||||
def create_endpoint(endpoint_name,
|
||||
@@ -163,8 +163,8 @@ def create_endpoint(endpoint_name,
|
||||
upper_methods.append(method.upper())
|
||||
|
||||
ray.get(
|
||||
master_actor.create_endpoint.remote(endpoint_name, {backend: 1.0},
|
||||
route, upper_methods))
|
||||
controller.create_endpoint.remote(endpoint_name, {backend: 1.0}, route,
|
||||
upper_methods))
|
||||
|
||||
|
||||
@_ensure_connected
|
||||
@@ -173,7 +173,7 @@ def delete_endpoint(endpoint):
|
||||
|
||||
Does not delete any associated backends.
|
||||
"""
|
||||
ray.get(master_actor.delete_endpoint.remote(endpoint))
|
||||
ray.get(controller.delete_endpoint.remote(endpoint))
|
||||
|
||||
|
||||
@_ensure_connected
|
||||
@@ -183,7 +183,7 @@ def list_endpoints():
|
||||
The dictionary keys are endpoint names and values are dictionaries
|
||||
of the form: {"methods": List[str], "traffic": Dict[str, float]}.
|
||||
"""
|
||||
return ray.get(master_actor.get_all_endpoints.remote())
|
||||
return ray.get(controller.get_all_endpoints.remote())
|
||||
|
||||
|
||||
@_ensure_connected
|
||||
@@ -210,7 +210,7 @@ def update_backend_config(backend_tag, config_options):
|
||||
if not isinstance(config_options, dict):
|
||||
raise ValueError("config_options must be a dictionary.")
|
||||
ray.get(
|
||||
master_actor.update_backend_config.remote(backend_tag, config_options))
|
||||
controller.update_backend_config.remote(backend_tag, config_options))
|
||||
|
||||
|
||||
@_ensure_connected
|
||||
@@ -220,7 +220,7 @@ def get_backend_config(backend_tag):
|
||||
Args:
|
||||
backend_tag(str): A registered backend.
|
||||
"""
|
||||
return ray.get(master_actor.get_backend_config.remote(backend_tag))
|
||||
return ray.get(controller.get_backend_config.remote(backend_tag))
|
||||
|
||||
|
||||
@_ensure_connected
|
||||
@@ -265,8 +265,8 @@ def create_backend(backend_tag,
|
||||
replica_config.is_blocking)
|
||||
|
||||
ray.get(
|
||||
master_actor.create_backend.remote(backend_tag, backend_config,
|
||||
replica_config))
|
||||
controller.create_backend.remote(backend_tag, backend_config,
|
||||
replica_config))
|
||||
|
||||
|
||||
@_ensure_connected
|
||||
@@ -275,7 +275,7 @@ def list_backends():
|
||||
|
||||
Dictionary maps backend tags to backend configs.
|
||||
"""
|
||||
return ray.get(master_actor.get_all_backends.remote())
|
||||
return ray.get(controller.get_all_backends.remote())
|
||||
|
||||
|
||||
@_ensure_connected
|
||||
@@ -284,7 +284,7 @@ def delete_backend(backend_tag):
|
||||
|
||||
The backend must not currently be used by any endpoints.
|
||||
"""
|
||||
ray.get(master_actor.delete_backend.remote(backend_tag))
|
||||
ray.get(controller.delete_backend.remote(backend_tag))
|
||||
|
||||
|
||||
@_ensure_connected
|
||||
@@ -304,8 +304,8 @@ def set_traffic(endpoint_name, traffic_policy_dictionary):
|
||||
to their traffic weights. The weights must sum to 1.
|
||||
"""
|
||||
ray.get(
|
||||
master_actor.set_traffic.remote(endpoint_name,
|
||||
traffic_policy_dictionary))
|
||||
controller.set_traffic.remote(endpoint_name,
|
||||
traffic_policy_dictionary))
|
||||
|
||||
|
||||
@_ensure_connected
|
||||
@@ -329,8 +329,8 @@ def shadow_traffic(endpoint_name, backend_tag, proportion):
|
||||
raise TypeError("proportion must be a float from 0 to 1.")
|
||||
|
||||
ray.get(
|
||||
master_actor.shadow_traffic.remote(endpoint_name, backend_tag,
|
||||
proportion))
|
||||
controller.shadow_traffic.remote(endpoint_name, backend_tag,
|
||||
proportion))
|
||||
|
||||
|
||||
@_ensure_connected
|
||||
@@ -353,11 +353,10 @@ def get_handle(endpoint_name,
|
||||
RayServeHandle
|
||||
"""
|
||||
if not missing_ok:
|
||||
assert endpoint_name in ray.get(
|
||||
master_actor.get_all_endpoints.remote())
|
||||
assert endpoint_name in ray.get(controller.get_all_endpoints.remote())
|
||||
|
||||
return RayServeHandle(
|
||||
ray.get(master_actor.get_http_proxy.remote())[0],
|
||||
ray.get(controller.get_http_proxy.remote())[0],
|
||||
endpoint_name,
|
||||
relative_slo_ms,
|
||||
absolute_slo_ms,
|
||||
@@ -387,5 +386,5 @@ def stat():
|
||||
For PrometheusExporter, it returns the metrics in prometheus format
|
||||
in plain text.
|
||||
"""
|
||||
[metric_exporter] = ray.get(master_actor.get_metric_exporter.remote())
|
||||
[metric_exporter] = ray.get(controller.get_metric_exporter.remote())
|
||||
return ray.get(metric_exporter.inspect_metrics.remote())
|
||||
|
||||
@@ -110,8 +110,9 @@ def create_backend_worker(func_or_class):
|
||||
else:
|
||||
_callable = func_or_class(*init_args)
|
||||
|
||||
master = serve.api._get_master_actor()
|
||||
[metric_exporter] = ray.get(master.get_metric_exporter.remote())
|
||||
controller = serve.api._get_controller()
|
||||
[metric_exporter] = ray.get(
|
||||
controller.get_metric_exporter.remote())
|
||||
metric_client = MetricClient(
|
||||
metric_exporter, default_labels={"backend": backend_tag})
|
||||
self.backend = RayServeWorker(backend_tag, replica_tag, _callable,
|
||||
|
||||
@@ -8,9 +8,9 @@ import click
|
||||
|
||||
from ray import serve
|
||||
from ray.serve.constants import DEFAULT_HTTP_ADDRESS
|
||||
from ray.serve import master
|
||||
from ray.serve import controller
|
||||
|
||||
master._TRACING_ENABLED = True
|
||||
controller._TRACING_ENABLED = True
|
||||
|
||||
|
||||
def block_until_ready(url):
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#: Actor name used to register master actor
|
||||
SERVE_MASTER_NAME = "SERVE_MASTER_ACTOR"
|
||||
#: Actor name used to register controller
|
||||
SERVE_CONTROLLER_NAME = "SERVE_CONTROLLER_ACTOR"
|
||||
|
||||
#: Actor name used to register HTTP proxy actor
|
||||
SERVE_PROXY_NAME = "SERVE_PROXY_ACTOR"
|
||||
|
||||
@@ -18,12 +18,12 @@ from ray.serve.utils import (format_actor_name, get_random_letters, logger,
|
||||
|
||||
import numpy as np
|
||||
|
||||
# Used for testing purposes only. If this is set, the master actor will crash
|
||||
# Used for testing purposes only. If this is set, the controller will crash
|
||||
# after writing each checkpoint with the specified probability.
|
||||
_CRASH_AFTER_CHECKPOINT_PROBABILITY = 0.0
|
||||
CHECKPOINT_KEY = "serve-master-checkpoint"
|
||||
CHECKPOINT_KEY = "serve-controller-checkpoint"
|
||||
|
||||
# Feature flag for master actor resource checking. If true, master actor will
|
||||
# Feature flag for controller resource checking. If true, controller will
|
||||
# error if the desired replicas exceed current resource availability.
|
||||
_RESOURCE_CHECK_ENABLED = True
|
||||
|
||||
@@ -62,10 +62,10 @@ BackendInfo = namedtuple("BackendInfo",
|
||||
|
||||
|
||||
@ray.remote
|
||||
class ServeMaster:
|
||||
class ServeController:
|
||||
"""Responsible for managing the state of the serving system.
|
||||
|
||||
The master actor implements fault tolerance by persisting its state in
|
||||
The controller implements fault tolerance by persisting its state in
|
||||
a new checkpoint each time a state change is made. If the actor crashes,
|
||||
the latest checkpoint is loaded and the state is recovered. Checkpoints
|
||||
are written/read using a provided KV-store interface.
|
||||
@@ -75,13 +75,13 @@ class ServeMaster:
|
||||
those actors from this actor on startup and updates are pushed out from
|
||||
this actor.
|
||||
|
||||
All other actors started by the master actor are named, detached actors
|
||||
so they will not fate share with the master if it crashes.
|
||||
All other actors started by the controller are named, detached actors
|
||||
so they will not fate share with the controller if it crashes.
|
||||
|
||||
The following guarantees are provided for state-changing calls to the
|
||||
master actor:
|
||||
controller:
|
||||
- If the call succeeds, the change was made and will be reflected in
|
||||
the system even if the master actor or other actors die unexpectedly.
|
||||
the system even if the controller or other actors die unexpectedly.
|
||||
- If the call fails, the change may have been made but isn't guaranteed
|
||||
to have been. The client should retry in this case. Note that this
|
||||
requires all implementations here to be idempotent.
|
||||
@@ -111,9 +111,9 @@ class RayServeHandle:
|
||||
)
|
||||
|
||||
def get_traffic_policy(self):
|
||||
master_actor = serve.api._get_master_actor()
|
||||
controller = serve.api._get_controller()
|
||||
return ray.get(
|
||||
master_actor.get_traffic_policy.remote(self.endpoint_name))
|
||||
controller.get_traffic_policy.remote(self.endpoint_name))
|
||||
|
||||
def __repr__(self):
|
||||
return """
|
||||
|
||||
@@ -26,14 +26,14 @@ class HTTPProxy:
|
||||
# blocks forever
|
||||
"""
|
||||
|
||||
async def fetch_config_from_master(self, instance_name=None):
|
||||
async def fetch_config_from_controller(self, instance_name=None):
|
||||
assert ray.is_initialized()
|
||||
master = serve.api._get_master_actor()
|
||||
controller = serve.api._get_controller()
|
||||
|
||||
self.route_table = await master.get_http_proxy_config.remote()
|
||||
self.route_table = await controller.get_http_proxy_config.remote()
|
||||
|
||||
# The exporter is required to return results for /-/metrics endpoint.
|
||||
[self.metric_exporter] = await master.get_metric_exporter.remote()
|
||||
[self.metric_exporter] = await controller.get_metric_exporter.remote()
|
||||
|
||||
self.metric_client = MetricClient(self.metric_exporter)
|
||||
self.request_counter = self.metric_client.new_counter(
|
||||
@@ -172,7 +172,7 @@ class HTTPProxyActor:
|
||||
async def __init__(self, host, port, instance_name=None):
|
||||
serve.init(name=instance_name)
|
||||
self.app = HTTPProxy()
|
||||
await self.app.fetch_config_from_master(instance_name)
|
||||
await self.app.fetch_config_from_controller(instance_name)
|
||||
self.host = host
|
||||
self.port = port
|
||||
|
||||
|
||||
@@ -133,27 +133,27 @@ class Router:
|
||||
|
||||
# -- State Restoration -- #
|
||||
# Fetch the worker handles, traffic policies, and backend configs from
|
||||
# the master actor. We use a "pull-based" approach instead of pushing
|
||||
# them from the master so that the router can transparently recover
|
||||
# the controller. We use a "pull-based" approach instead of pushing
|
||||
# them from the controller so that the router can transparently recover
|
||||
# from failure.
|
||||
serve.init(name=instance_name)
|
||||
master_actor = serve.api._get_master_actor()
|
||||
controller = serve.api._get_controller()
|
||||
|
||||
traffic_policies = ray.get(master_actor.get_traffic_policies.remote())
|
||||
traffic_policies = ray.get(controller.get_traffic_policies.remote())
|
||||
for endpoint, traffic_policy in traffic_policies.items():
|
||||
await self.set_traffic(endpoint, traffic_policy)
|
||||
|
||||
backend_dict = ray.get(master_actor.get_all_worker_handles.remote())
|
||||
backend_dict = ray.get(controller.get_all_worker_handles.remote())
|
||||
for backend_tag, replica_dict in backend_dict.items():
|
||||
for replica_tag, worker in replica_dict.items():
|
||||
await self.add_new_worker(backend_tag, replica_tag, worker)
|
||||
|
||||
backend_configs = ray.get(master_actor.get_backend_configs.remote())
|
||||
backend_configs = ray.get(controller.get_backend_configs.remote())
|
||||
for backend, backend_config in backend_configs.items():
|
||||
await self.set_backend_config(backend, backend_config)
|
||||
|
||||
# -- Metric Registration -- #
|
||||
[metric_exporter] = ray.get(master_actor.get_metric_exporter.remote())
|
||||
[metric_exporter] = ray.get(controller.get_metric_exporter.remote())
|
||||
self.metric_client = MetricClient(metric_exporter)
|
||||
self.num_router_requests = self.metric_client.new_counter(
|
||||
"num_router_requests",
|
||||
|
||||
@@ -6,7 +6,7 @@ import ray
|
||||
from ray import serve
|
||||
|
||||
if os.environ.get("RAY_SERVE_INTENTIONALLY_CRASH", False):
|
||||
serve.master._CRASH_AFTER_CHECKPOINT_PROBABILITY = 0.5
|
||||
serve.controller._CRASH_AFTER_CHECKPOINT_PROBABILITY = 0.5
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
@@ -22,9 +22,9 @@ def serve_instance(_shared_serve_instance):
|
||||
yield
|
||||
# Re-init if necessary.
|
||||
serve.init()
|
||||
master = serve.api._get_master_actor()
|
||||
controller = serve.api._get_controller()
|
||||
# Clear all state between tests to avoid naming collisions.
|
||||
for endpoint in ray.get(master.get_all_endpoints.remote()):
|
||||
for endpoint in ray.get(controller.get_all_endpoints.remote()):
|
||||
serve.delete_endpoint(endpoint)
|
||||
for backend in ray.get(master.get_all_backends.remote()):
|
||||
for backend in ray.get(controller.get_all_backends.remote()):
|
||||
serve.delete_backend(backend)
|
||||
|
||||
@@ -228,16 +228,16 @@ def test_updating_config(serve_instance):
|
||||
})
|
||||
serve.create_endpoint("bsimple", backend="bsimple:v1", route="/bsimple")
|
||||
|
||||
master_actor = serve.api._get_master_actor()
|
||||
controller = serve.api._get_controller()
|
||||
old_replica_tag_list = ray.get(
|
||||
master_actor._list_replicas.remote("bsimple:v1"))
|
||||
controller._list_replicas.remote("bsimple:v1"))
|
||||
|
||||
serve.update_backend_config("bsimple:v1", {"max_batch_size": 5})
|
||||
new_replica_tag_list = ray.get(
|
||||
master_actor._list_replicas.remote("bsimple:v1"))
|
||||
controller._list_replicas.remote("bsimple:v1"))
|
||||
new_all_tag_list = []
|
||||
for worker_dict in ray.get(
|
||||
master_actor.get_all_worker_handles.remote()).values():
|
||||
controller.get_all_worker_handles.remote()).values():
|
||||
new_all_tag_list.extend(list(worker_dict.keys()))
|
||||
|
||||
# the old and new replica tag list should be identical
|
||||
@@ -550,7 +550,7 @@ def test_create_infeasible_error(serve_instance):
|
||||
config={"num_replicas": current_cpus + 20})
|
||||
|
||||
# No replica should be created!
|
||||
replicas = ray.get(serve.api.master_actor._list_replicas.remote("f1"))
|
||||
replicas = ray.get(serve.api.controller._list_replicas.remote("f1"))
|
||||
assert len(replicas) == 0
|
||||
|
||||
|
||||
@@ -569,7 +569,7 @@ def test_shutdown(serve_instance):
|
||||
|
||||
def check_dead():
|
||||
for actor_name in [
|
||||
constants.SERVE_MASTER_NAME, constants.SERVE_PROXY_NAME,
|
||||
constants.SERVE_CONTROLLER_NAME, constants.SERVE_PROXY_NAME,
|
||||
constants.SERVE_METRIC_SINK_NAME
|
||||
]:
|
||||
try:
|
||||
|
||||
@@ -7,7 +7,7 @@ import ray
|
||||
from ray import serve
|
||||
import ray.serve.context as context
|
||||
from ray.serve.backend_worker import create_backend_worker, wrap_to_ray_error
|
||||
from ray.serve.master import TrafficPolicy
|
||||
from ray.serve.controller import TrafficPolicy
|
||||
from ray.serve.request_params import RequestMetadata
|
||||
from ray.serve.router import Router
|
||||
from ray.serve.config import BackendConfig
|
||||
|
||||
@@ -19,62 +19,64 @@ def request_with_retries(endpoint, timeout=30):
|
||||
time.sleep(0.1)
|
||||
|
||||
|
||||
def test_master_failure(serve_instance):
|
||||
def test_controller_failure(serve_instance):
|
||||
serve.init()
|
||||
|
||||
def function():
|
||||
return "hello1"
|
||||
|
||||
serve.create_backend("master_failure:v1", function)
|
||||
serve.create_backend("controller_failure:v1", function)
|
||||
serve.create_endpoint(
|
||||
"master_failure", backend="master_failure:v1", route="/master_failure")
|
||||
"controller_failure",
|
||||
backend="controller_failure:v1",
|
||||
route="/controller_failure")
|
||||
|
||||
assert request_with_retries("/master_failure", timeout=1).text == "hello1"
|
||||
assert request_with_retries(
|
||||
"/controller_failure", timeout=1).text == "hello1"
|
||||
|
||||
for _ in range(10):
|
||||
response = request_with_retries("/master_failure", timeout=30)
|
||||
response = request_with_retries("/controller_failure", timeout=30)
|
||||
assert response.text == "hello1"
|
||||
|
||||
ray.kill(serve.api._get_master_actor(), no_restart=False)
|
||||
ray.kill(serve.api._get_controller(), no_restart=False)
|
||||
|
||||
for _ in range(10):
|
||||
response = request_with_retries("/master_failure", timeout=30)
|
||||
response = request_with_retries("/controller_failure", timeout=30)
|
||||
assert response.text == "hello1"
|
||||
|
||||
def function():
|
||||
return "hello2"
|
||||
|
||||
ray.kill(serve.api._get_master_actor(), no_restart=False)
|
||||
ray.kill(serve.api._get_controller(), no_restart=False)
|
||||
|
||||
serve.create_backend("master_failure:v2", function)
|
||||
serve.set_traffic("master_failure", {"master_failure:v2": 1.0})
|
||||
serve.create_backend("controller_failure:v2", function)
|
||||
serve.set_traffic("controller_failure", {"controller_failure:v2": 1.0})
|
||||
|
||||
for _ in range(10):
|
||||
response = request_with_retries("/master_failure", timeout=30)
|
||||
response = request_with_retries("/controller_failure", timeout=30)
|
||||
assert response.text == "hello2"
|
||||
|
||||
def function():
|
||||
return "hello3"
|
||||
|
||||
ray.kill(serve.api._get_master_actor(), no_restart=False)
|
||||
serve.create_backend("master_failure_2", function)
|
||||
ray.kill(serve.api._get_master_actor(), no_restart=False)
|
||||
ray.kill(serve.api._get_controller(), no_restart=False)
|
||||
serve.create_backend("controller_failure_2", function)
|
||||
ray.kill(serve.api._get_controller(), no_restart=False)
|
||||
serve.create_endpoint(
|
||||
"master_failure_2",
|
||||
backend="master_failure_2",
|
||||
route="/master_failure_2")
|
||||
ray.kill(serve.api._get_master_actor(), no_restart=False)
|
||||
"controller_failure_2",
|
||||
backend="controller_failure_2",
|
||||
route="/controller_failure_2")
|
||||
ray.kill(serve.api._get_controller(), no_restart=False)
|
||||
|
||||
for _ in range(10):
|
||||
response = request_with_retries("/master_failure", timeout=30)
|
||||
response = request_with_retries("/controller_failure", timeout=30)
|
||||
assert response.text == "hello2"
|
||||
response = request_with_retries("/master_failure_2", timeout=30)
|
||||
response = request_with_retries("/controller_failure_2", timeout=30)
|
||||
assert response.text == "hello3"
|
||||
|
||||
|
||||
def _kill_http_proxy():
|
||||
[http_proxy] = ray.get(
|
||||
serve.api._get_master_actor().get_http_proxy.remote())
|
||||
[http_proxy] = ray.get(serve.api._get_controller().get_http_proxy.remote())
|
||||
ray.kill(http_proxy, no_restart=False)
|
||||
|
||||
|
||||
@@ -108,8 +110,8 @@ def test_http_proxy_failure(serve_instance):
|
||||
|
||||
|
||||
def _get_worker_handles(backend):
|
||||
master_actor = serve.api._get_master_actor()
|
||||
backend_dict = ray.get(master_actor.get_all_worker_handles.remote())
|
||||
controller = serve.api._get_controller()
|
||||
backend_dict = ray.get(controller.get_all_worker_handles.remote())
|
||||
|
||||
return list(backend_dict[backend].values())
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ from collections import defaultdict
|
||||
import pytest
|
||||
import ray
|
||||
|
||||
from ray.serve.master import TrafficPolicy
|
||||
from ray.serve.controller import TrafficPolicy
|
||||
from ray.serve.router import Router, Query
|
||||
from ray.serve.request_params import RequestMetadata
|
||||
from ray.serve.utils import get_random_letters
|
||||
|
||||
Reference in New Issue
Block a user