Remove blocking flag from serve.init() (#8654)

2026-06-28 12:10:40 +08:00 · 2020-05-29 22:25:35 +02:00
parent 457a66ae9c
commit e5b6566d28
19 changed files with 32 additions and 36 deletions
@@ -4,7 +4,7 @@ from multiprocessing import cpu_count

 import ray
 from ray.serve.constants import (DEFAULT_HTTP_HOST, DEFAULT_HTTP_PORT,
-                                 SERVE_MASTER_NAME)
+                                 SERVE_MASTER_NAME, HTTP_PROXY_TIMEOUT)
 from ray.serve.master import ServeMaster
 from ray.serve.handle import RayServeHandle
 from ray.serve.utils import (block_until_http_ready, format_actor_name,
@@ -61,7 +61,6 @@ def accept_batch(f):


 def init(cluster_name=None,
-         blocking=False,
         http_host=DEFAULT_HTTP_HOST,
         http_port=DEFAULT_HTTP_PORT,
         ray_init_kwargs={
@@ -81,8 +80,6 @@ def init(cluster_name=None,
        cluster_name (str): A unique name for this serve cluster. This allows
            multiple serve clusters to run on the same ray cluster. Must be
            specified in all subsequent serve.init() calls.
-        blocking (bool): If true, the function will wait for the HTTP server to
-            be healthy, and other components to be ready before returns.
        http_host (str): Host for HTTP server. Default to "0.0.0.0".
        http_port (int): Port for HTTP server. Default to 8000.
        ray_init_kwargs (dict): Argument passed to ray.init, if there is no ray
@@ -125,9 +122,9 @@ def init(cluster_name=None,
        max_restarts=-1,
    ).remote(cluster_name, http_node_id, http_host, http_port, metric_exporter)

-    if blocking:
-        block_until_http_ready("http://{}:{}/-/routes".format(
-            http_host, http_port))
+    block_until_http_ready(
+        "http://{}:{}/-/routes".format(http_host, http_port),
+        timeout=HTTP_PROXY_TIMEOUT)


@_ensure_connected
@@ -27,3 +27,6 @@ DEFAULT_LATENCY_SLO_MS = 1e9

 #: Interval for metric client to push metrics to exporters
 METRIC_PUSH_INTERVAL_S = 2
+
+#: Time to wait for HTTP proxy in `serve.init()`
+HTTP_PROXY_TIMEOUT = 60
@@ -5,7 +5,7 @@ import time
 import pandas as pd
 from tqdm import tqdm

-serve.init(blocking=True)
+serve.init()


 def noop(_):
@@ -14,7 +14,7 @@ def echo(flask_request):
    return "hello " + flask_request.args.get("name", "serve!")


-serve.init(blocking=True)
+serve.init()

 serve.create_endpoint("my_endpoint", "/echo")
 serve.create_backend("echo:v1", echo)
@@ -24,7 +24,7 @@ class MagicCounter:
        return base_number + self.increment


-serve.init(blocking=True)
+serve.init()
 serve.create_endpoint("magic_counter", "/counter")
 serve.create_backend("counter:v1", MagicCounter, 42)  # increment=42
 serve.set_traffic("magic_counter", {"counter:v1": 1.0})
@@ -35,7 +35,7 @@ class MagicCounter:
            return result


-serve.init(blocking=True)
+serve.init()
 serve.create_endpoint("magic_counter", "/counter")
 serve.create_backend(
    "counter:v1", MagicCounter, 42,
@@ -26,7 +26,7 @@ class MagicCounter:
        return ""


-serve.init(blocking=True)
+serve.init()
 serve.create_endpoint("magic_counter", "/counter")
 # specify max_batch_size in BackendConfig
 backend_config = {"max_batch_size": 5}
@@ -26,7 +26,7 @@ def echo(_):
    raise Exception("Something went wrong...")


-serve.init(blocking=True)
+serve.init()

 serve.create_endpoint("my_endpoint", "/echo")
 serve.create_backend("echo:v1", echo)
@@ -24,7 +24,6 @@ def echo_v2(_):

 # specify the router policy as FixedPacking with packing num as 5
 serve.init(
-    blocking=True,
    queueing_policy=serve.RoutePolicy.FixedPacking,
    policy_kwargs={"packing_num": 5})

@@ -11,8 +11,7 @@ import ray.serve as serve
 from ray.serve.utils import pformat_color_json

 # initialize ray serve system.
-# blocking=True will wait for HTTP server to be ready to serve request.
-serve.init(blocking=True)
+serve.init()

 # an endpoint is associated with an http URL.
 serve.create_endpoint("my_endpoint", "/echo")
@@ -6,8 +6,7 @@ import ray.serve as serve
 import time

 # initialize ray serve system.
-# blocking=True will wait for HTTP server to be ready to serve request.
-serve.init(blocking=True)
+serve.init()


 # a backend can be a function or class.
@@ -19,7 +19,7 @@ def echo_v2(_):


 # specify the router policy as RoundRobin
-serve.init(blocking=True, queueing_policy=serve.RoutePolicy.RoundRobin)
+serve.init(queueing_policy=serve.RoutePolicy.RoundRobin)

 # create a service
 serve.create_endpoint("my_endpoint", "/echo")
@@ -10,8 +10,7 @@ import ray
 import ray.serve as serve

 # initialize ray serve system.
-# blocking=True will wait for HTTP server to be ready to serve request.
-serve.init(blocking=True)
+serve.init()

 # an endpoint is associated with an http URL.
 serve.create_endpoint("my_endpoint", "/echo")
@@ -18,7 +18,7 @@ def echo_v2(_):
    return "v2"


-serve.init(blocking=True)
+serve.init()

 serve.create_endpoint("my_endpoint", "/echo")
 serve.create_backend("echo:v1", echo_v1)
@@ -11,7 +11,7 @@ if os.environ.get("RAY_SERVE_INTENTIONALLY_CRASH", False):

@pytest.fixture(scope="session")
 def _shared_serve_instance():
-    serve.init(blocking=True, ray_init_kwargs={"num_cpus": 36})
+    serve.init(ray_init_kwargs={"num_cpus": 36})
    yield


@@ -354,7 +354,7 @@ def test_cluster_name():
    backend = "backend"
    endpoint = "endpoint"

-    serve.init(cluster_name="cluster1", blocking=True, http_port=8001)
+    serve.init(cluster_name="cluster1", http_port=8001)
    serve.create_endpoint(endpoint, route=route)

    def function():
@@ -367,7 +367,7 @@ def test_cluster_name():

    # Create a second cluster on port 8002. Create an endpoint and backend with
    # the same names and check that they don't collide.
-    serve.init(cluster_name="cluster2", blocking=True, http_port=8002)
+    serve.init(cluster_name="cluster2", http_port=8002)
    serve.create_endpoint(endpoint, route=route)

    def function():
@@ -12,6 +12,7 @@ import os
 import ray
 import requests
 from pygments import formatters, highlight, lexers
+from ray.serve.constants import HTTP_PROXY_TIMEOUT
 from ray.serve.context import FakeFlaskRequest, TaskContext
 from ray.serve.http_util import build_flask_request
 import numpy as np
@@ -87,9 +88,11 @@ def pformat_color_json(d):
    return colorful_json


-def block_until_http_ready(http_endpoint, num_retries=6, backoff_time_s=1):
+def block_until_http_ready(http_endpoint,
+                           backoff_time_s=1,
+                           timeout=HTTP_PROXY_TIMEOUT):
    http_is_ready = False
-    retries = num_retries
+    start_time = time.time()

    while not http_is_ready:
        try:
@@ -99,14 +102,11 @@ def block_until_http_ready(http_endpoint, num_retries=6, backoff_time_s=1):
        except Exception:
            pass

-        # Exponential backoff
-        time.sleep(backoff_time_s)
-        backoff_time_s *= 2
+        if 0 < timeout < time.time() - start_time:
+            raise TimeoutError(
+                "HTTP proxy not ready after {} seconds.".format(timeout))

-        retries -= 1
-        if retries == 0:
-            raise Exception(
-                "HTTP proxy not ready after {} retries.".format(num_retries))
+        time.sleep(backoff_time_s)


 def get_random_letters(length=6):