[Serve] Performance: Use uvloop when possible (#9216)

2026-06-28 17:34:51 +08:00 · 2020-06-30 15:59:13 -07:00
parent 77933c922d
commit be647b69ab
8 changed files with 118 additions and 47 deletions
@@ -82,7 +82,8 @@ from ray.includes.ray_config cimport RayConfig
 from ray.includes.global_state_accessor cimport CGlobalStateAccessor

 import ray
-from ray.async_compat import (sync_to_async, AsyncGetResponse)
+from ray.async_compat import (
+    sync_to_async, AsyncGetResponse, get_new_event_loop)
 import ray.memory_monitor as memory_monitor
 import ray.ray_constants as ray_constants
 from ray import profiling
@@ -1187,7 +1188,7 @@ cdef class CoreWorker:

    def create_or_get_event_loop(self):
        if self.async_event_loop is None:
-            self.async_event_loop = asyncio.new_event_loop()
+            self.async_event_loop = get_new_event_loop()
            asyncio.set_event_loop(self.async_event_loop)
            # Initialize the async plasma connection.
            # Delayed import due to async_api depends on _raylet.
@@ -7,9 +7,22 @@ from collections import namedtuple
 import time
 import inspect

+try:
+    import uvloop
+except ImportError:
+    uvloop = None
+
 import ray


+def get_new_event_loop():
+    """Construct a new event loop. Ray will use uvloop if it exists"""
+    if uvloop:
+        return uvloop.new_event_loop()
+    else:
+        return asyncio.new_event_loop()
+
+
 def sync_to_async(func):
    """Convert a blocking function to async function"""

@@ -0,0 +1,12 @@
+noop:
+	@echo "please specify which baseline to run"
+
+uvicorn:
+	uvicorn uvicorn_app:app --no-access-log --workers 1
+
+fastapi:
+	uvicorn fastapi_app:app --no-access-log --workers 1
+
+bench:
+	wrk -c 100 -t 10 -d 10s http://127.0.0.1:8000
+
@@ -0,0 +1,8 @@
+from fastapi import FastAPI
+
+app = FastAPI()
+
+
+@app.get("/")
+async def read_root():
+    return "Hello world"
@@ -0,0 +1,13 @@
+async def app(scope, receive, send):
+    assert scope["type"] == "http"
+    await send({
+        "type": "http.response.start",
+        "status": 200,
+        "headers": [
+            [b"content-type", b"text/plain"],
+        ]
+    })
+    await send({
+        "type": "http.response.body",
+        "body": b"Hello, world!",
+    })
@@ -1,35 +1,70 @@
-from ray import serve
-from ray.serve.constants import DEFAULT_HTTP_ADDRESS
-import requests
 import time
+from typing import Optional
+
+import requests
 import pandas as pd
 from tqdm import tqdm
+import click

-serve.init()
+from ray import serve
+from ray.serve.constants import DEFAULT_HTTP_ADDRESS
+from ray.serve import master
+
+master._TRACING_ENABLED = True


-def noop(_):
-    return ""
+def block_until_ready(url):
+    while requests.get(url).status_code == 404:
+        time.sleep(1)
+        print("Waiting for noop route to showup.")


-serve.create_backend("noop", noop)
-serve.create_endpoint("noop", backend="noop", route="/noop")
+def run_http_benchmark(url, num_queries):
+    latency = []
+    for _ in tqdm(range(num_queries + 200)):
+        start = time.perf_counter()
+        requests.get(url)
+        end = time.perf_counter()
+        latency.append(end - start)

-url = "{}/noop".format(DEFAULT_HTTP_ADDRESS)
-while requests.get(url).status_code == 404:
-    time.sleep(1)
-    print("Waiting for noop route to showup.")
+    # Remove initial samples
+    latency = latency[200:]

-latency = []
-for _ in tqdm(range(5200)):
-    start = time.perf_counter()
-    resp = requests.get(url)
-    end = time.perf_counter()
-    latency.append(end - start)
+    series = pd.Series(latency) * 1000
+    print("Latency for single noop backend (ms)")
+    print(series.describe(percentiles=[0.5, 0.9, 0.95, 0.99]))

-# Remove initial samples
-latency = latency[200:]

-series = pd.Series(latency) * 1000
-print("Latency for single noop backend (ms)")
-print(series.describe(percentiles=[0.5, 0.9, 0.95, 0.99]))
+@click.command()
+@click.option("--blocking", is_flag=True, required=False, help="Block forever")
+@click.option("--num-queries", type=int, required=False)
+@click.option("--num-replicas", type=int, default=1)
+@click.option("--max-concurrent-queries", type=int, required=False)
+def main(num_replicas: int, num_queries: Optional[int],
+         max_concurrent_queries: Optional[int], blocking: bool):
+    serve.init()
+
+    def noop(_):
+        return "hello world"
+
+    config = {
+        "num_replicas": num_replicas,
+        "max_concurrent_queries": max_concurrent_queries
+    }
+    print("Using config", config)
+    serve.create_backend("noop", noop, config=config)
+    serve.create_endpoint("noop", backend="noop", route="/noop")
+
+    url = "{}/noop".format(DEFAULT_HTTP_ADDRESS)
+    block_until_ready(url)
+
+    if num_queries:
+        run_http_benchmark(url, num_queries)
+    if blocking:
+        print("Endpoint {} is ready.".format(url))
+        while True:
+            time.sleep(5)
+
+
+if __name__ == "__main__":
+    main()
@@ -1,5 +1,4 @@
 import asyncio
-import socket

 import uvicorn

@@ -46,15 +45,6 @@ class HTTPProxy:
    def set_route_table(self, route_table):
        self.route_table = route_table

-    async def handle_lifespan_message(self, scope, receive, send):
-        assert scope["type"] == "lifespan"
-
-        message = await receive()
-        if message["type"] == "lifespan.startup":
-            await send({"type": "lifespan.startup.complete"})
-        elif message["type"] == "lifespan.shutdown":
-            await send({"type": "lifespan.shutdown.complete"})
-
    async def receive_http_body(self, scope, receive, send):
        body_buffer = []
        more_body = True
@@ -116,10 +106,6 @@ class HTTPProxy:
        # NOTE: This implements ASGI protocol specified in
        #       https://asgi.readthedocs.io/en/latest/specs/index.html

-        if scope["type"] == "lifespan":
-            await self.handle_lifespan_message(scope, receive, send)
-            return
-
        error_sender = self._make_error_sender(scope, receive, send)

        assert self.route_table is not None, (
@@ -202,18 +188,21 @@ class HTTPProxyActor:
        asyncio.get_event_loop().create_task(self.run())

    async def run(self):
-        sock = socket.socket()
-        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-        sock.bind((self.host, self.port))
-        sock.set_inheritable(True)
-
-        config = uvicorn.Config(self.app, lifespan="on", access_log=False)
+        # Note(simon): we have to use lower level uvicorn Config and Server
+        # class because we want to run the server as a coroutine. The only
+        # alternative is to call uvicorn.run which is blocking.
+        config = uvicorn.Config(
+            self.app,
+            host=self.host,
+            port=self.port,
+            lifespan="off",
+            access_log=False)
        server = uvicorn.Server(config=config)
        # TODO(edoakes): we need to override install_signal_handlers here
        # because the existing implementation fails if it isn't running in
        # the main thread and uvicorn doesn't expose a way to configure it.
        server.install_signal_handlers = lambda: None
-        await server.serve(sockets=[sock])
+        await server.serve()

    async def set_route_table(self, route_table):
        self.app.set_route_table(route_table)
@@ -559,7 +559,7 @@ def test_shutdown(serve_instance):
        pass

    instance_name = "shutdown"
-    serve.init(name=instance_name, http_port=8002)
+    serve.init(name=instance_name, http_port=8003)
    serve.create_backend("backend", f)
    serve.create_endpoint("endpoint", backend="backend")