mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 12:10:40 +08:00
Remove blocking flag from serve.init() (#8654)
This commit is contained in:
@@ -4,7 +4,7 @@ from multiprocessing import cpu_count
|
||||
|
||||
import ray
|
||||
from ray.serve.constants import (DEFAULT_HTTP_HOST, DEFAULT_HTTP_PORT,
|
||||
SERVE_MASTER_NAME)
|
||||
SERVE_MASTER_NAME, HTTP_PROXY_TIMEOUT)
|
||||
from ray.serve.master import ServeMaster
|
||||
from ray.serve.handle import RayServeHandle
|
||||
from ray.serve.utils import (block_until_http_ready, format_actor_name,
|
||||
@@ -61,7 +61,6 @@ def accept_batch(f):
|
||||
|
||||
|
||||
def init(cluster_name=None,
|
||||
blocking=False,
|
||||
http_host=DEFAULT_HTTP_HOST,
|
||||
http_port=DEFAULT_HTTP_PORT,
|
||||
ray_init_kwargs={
|
||||
@@ -81,8 +80,6 @@ def init(cluster_name=None,
|
||||
cluster_name (str): A unique name for this serve cluster. This allows
|
||||
multiple serve clusters to run on the same ray cluster. Must be
|
||||
specified in all subsequent serve.init() calls.
|
||||
blocking (bool): If true, the function will wait for the HTTP server to
|
||||
be healthy, and other components to be ready before returns.
|
||||
http_host (str): Host for HTTP server. Default to "0.0.0.0".
|
||||
http_port (int): Port for HTTP server. Default to 8000.
|
||||
ray_init_kwargs (dict): Argument passed to ray.init, if there is no ray
|
||||
@@ -125,9 +122,9 @@ def init(cluster_name=None,
|
||||
max_restarts=-1,
|
||||
).remote(cluster_name, http_node_id, http_host, http_port, metric_exporter)
|
||||
|
||||
if blocking:
|
||||
block_until_http_ready("http://{}:{}/-/routes".format(
|
||||
http_host, http_port))
|
||||
block_until_http_ready(
|
||||
"http://{}:{}/-/routes".format(http_host, http_port),
|
||||
timeout=HTTP_PROXY_TIMEOUT)
|
||||
|
||||
|
||||
@_ensure_connected
|
||||
|
||||
@@ -27,3 +27,6 @@ DEFAULT_LATENCY_SLO_MS = 1e9
|
||||
|
||||
#: Interval for metric client to push metrics to exporters
|
||||
METRIC_PUSH_INTERVAL_S = 2
|
||||
|
||||
#: Time to wait for HTTP proxy in `serve.init()`
|
||||
HTTP_PROXY_TIMEOUT = 60
|
||||
|
||||
@@ -5,7 +5,7 @@ import time
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
|
||||
serve.init(blocking=True)
|
||||
serve.init()
|
||||
|
||||
|
||||
def noop(_):
|
||||
|
||||
@@ -14,7 +14,7 @@ def echo(flask_request):
|
||||
return "hello " + flask_request.args.get("name", "serve!")
|
||||
|
||||
|
||||
serve.init(blocking=True)
|
||||
serve.init()
|
||||
|
||||
serve.create_endpoint("my_endpoint", "/echo")
|
||||
serve.create_backend("echo:v1", echo)
|
||||
|
||||
@@ -24,7 +24,7 @@ class MagicCounter:
|
||||
return base_number + self.increment
|
||||
|
||||
|
||||
serve.init(blocking=True)
|
||||
serve.init()
|
||||
serve.create_endpoint("magic_counter", "/counter")
|
||||
serve.create_backend("counter:v1", MagicCounter, 42) # increment=42
|
||||
serve.set_traffic("magic_counter", {"counter:v1": 1.0})
|
||||
|
||||
@@ -35,7 +35,7 @@ class MagicCounter:
|
||||
return result
|
||||
|
||||
|
||||
serve.init(blocking=True)
|
||||
serve.init()
|
||||
serve.create_endpoint("magic_counter", "/counter")
|
||||
serve.create_backend(
|
||||
"counter:v1", MagicCounter, 42,
|
||||
|
||||
@@ -26,7 +26,7 @@ class MagicCounter:
|
||||
return ""
|
||||
|
||||
|
||||
serve.init(blocking=True)
|
||||
serve.init()
|
||||
serve.create_endpoint("magic_counter", "/counter")
|
||||
# specify max_batch_size in BackendConfig
|
||||
backend_config = {"max_batch_size": 5}
|
||||
|
||||
@@ -26,7 +26,7 @@ def echo(_):
|
||||
raise Exception("Something went wrong...")
|
||||
|
||||
|
||||
serve.init(blocking=True)
|
||||
serve.init()
|
||||
|
||||
serve.create_endpoint("my_endpoint", "/echo")
|
||||
serve.create_backend("echo:v1", echo)
|
||||
|
||||
@@ -24,7 +24,6 @@ def echo_v2(_):
|
||||
|
||||
# specify the router policy as FixedPacking with packing num as 5
|
||||
serve.init(
|
||||
blocking=True,
|
||||
queueing_policy=serve.RoutePolicy.FixedPacking,
|
||||
policy_kwargs={"packing_num": 5})
|
||||
|
||||
|
||||
@@ -11,8 +11,7 @@ import ray.serve as serve
|
||||
from ray.serve.utils import pformat_color_json
|
||||
|
||||
# initialize ray serve system.
|
||||
# blocking=True will wait for HTTP server to be ready to serve request.
|
||||
serve.init(blocking=True)
|
||||
serve.init()
|
||||
|
||||
# an endpoint is associated with an http URL.
|
||||
serve.create_endpoint("my_endpoint", "/echo")
|
||||
|
||||
@@ -6,8 +6,7 @@ import ray.serve as serve
|
||||
import time
|
||||
|
||||
# initialize ray serve system.
|
||||
# blocking=True will wait for HTTP server to be ready to serve request.
|
||||
serve.init(blocking=True)
|
||||
serve.init()
|
||||
|
||||
|
||||
# a backend can be a function or class.
|
||||
|
||||
@@ -19,7 +19,7 @@ def echo_v2(_):
|
||||
|
||||
|
||||
# specify the router policy as RoundRobin
|
||||
serve.init(blocking=True, queueing_policy=serve.RoutePolicy.RoundRobin)
|
||||
serve.init(queueing_policy=serve.RoutePolicy.RoundRobin)
|
||||
|
||||
# create a service
|
||||
serve.create_endpoint("my_endpoint", "/echo")
|
||||
|
||||
@@ -10,8 +10,7 @@ import ray
|
||||
import ray.serve as serve
|
||||
|
||||
# initialize ray serve system.
|
||||
# blocking=True will wait for HTTP server to be ready to serve request.
|
||||
serve.init(blocking=True)
|
||||
serve.init()
|
||||
|
||||
# an endpoint is associated with an http URL.
|
||||
serve.create_endpoint("my_endpoint", "/echo")
|
||||
|
||||
@@ -18,7 +18,7 @@ def echo_v2(_):
|
||||
return "v2"
|
||||
|
||||
|
||||
serve.init(blocking=True)
|
||||
serve.init()
|
||||
|
||||
serve.create_endpoint("my_endpoint", "/echo")
|
||||
serve.create_backend("echo:v1", echo_v1)
|
||||
|
||||
@@ -11,7 +11,7 @@ if os.environ.get("RAY_SERVE_INTENTIONALLY_CRASH", False):
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def _shared_serve_instance():
|
||||
serve.init(blocking=True, ray_init_kwargs={"num_cpus": 36})
|
||||
serve.init(ray_init_kwargs={"num_cpus": 36})
|
||||
yield
|
||||
|
||||
|
||||
|
||||
@@ -354,7 +354,7 @@ def test_cluster_name():
|
||||
backend = "backend"
|
||||
endpoint = "endpoint"
|
||||
|
||||
serve.init(cluster_name="cluster1", blocking=True, http_port=8001)
|
||||
serve.init(cluster_name="cluster1", http_port=8001)
|
||||
serve.create_endpoint(endpoint, route=route)
|
||||
|
||||
def function():
|
||||
@@ -367,7 +367,7 @@ def test_cluster_name():
|
||||
|
||||
# Create a second cluster on port 8002. Create an endpoint and backend with
|
||||
# the same names and check that they don't collide.
|
||||
serve.init(cluster_name="cluster2", blocking=True, http_port=8002)
|
||||
serve.init(cluster_name="cluster2", http_port=8002)
|
||||
serve.create_endpoint(endpoint, route=route)
|
||||
|
||||
def function():
|
||||
|
||||
@@ -12,6 +12,7 @@ import os
|
||||
import ray
|
||||
import requests
|
||||
from pygments import formatters, highlight, lexers
|
||||
from ray.serve.constants import HTTP_PROXY_TIMEOUT
|
||||
from ray.serve.context import FakeFlaskRequest, TaskContext
|
||||
from ray.serve.http_util import build_flask_request
|
||||
import numpy as np
|
||||
@@ -87,9 +88,11 @@ def pformat_color_json(d):
|
||||
return colorful_json
|
||||
|
||||
|
||||
def block_until_http_ready(http_endpoint, num_retries=6, backoff_time_s=1):
|
||||
def block_until_http_ready(http_endpoint,
|
||||
backoff_time_s=1,
|
||||
timeout=HTTP_PROXY_TIMEOUT):
|
||||
http_is_ready = False
|
||||
retries = num_retries
|
||||
start_time = time.time()
|
||||
|
||||
while not http_is_ready:
|
||||
try:
|
||||
@@ -99,14 +102,11 @@ def block_until_http_ready(http_endpoint, num_retries=6, backoff_time_s=1):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Exponential backoff
|
||||
time.sleep(backoff_time_s)
|
||||
backoff_time_s *= 2
|
||||
if 0 < timeout < time.time() - start_time:
|
||||
raise TimeoutError(
|
||||
"HTTP proxy not ready after {} seconds.".format(timeout))
|
||||
|
||||
retries -= 1
|
||||
if retries == 0:
|
||||
raise Exception(
|
||||
"HTTP proxy not ready after {} retries.".format(num_retries))
|
||||
time.sleep(backoff_time_s)
|
||||
|
||||
|
||||
def get_random_letters(length=6):
|
||||
|
||||
Reference in New Issue
Block a user