mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 17:34:51 +08:00
[Serve] Performance: Use uvloop when possible (#9216)
This commit is contained in:
@@ -82,7 +82,8 @@ from ray.includes.ray_config cimport RayConfig
|
||||
from ray.includes.global_state_accessor cimport CGlobalStateAccessor
|
||||
|
||||
import ray
|
||||
from ray.async_compat import (sync_to_async, AsyncGetResponse)
|
||||
from ray.async_compat import (
|
||||
sync_to_async, AsyncGetResponse, get_new_event_loop)
|
||||
import ray.memory_monitor as memory_monitor
|
||||
import ray.ray_constants as ray_constants
|
||||
from ray import profiling
|
||||
@@ -1187,7 +1188,7 @@ cdef class CoreWorker:
|
||||
|
||||
def create_or_get_event_loop(self):
|
||||
if self.async_event_loop is None:
|
||||
self.async_event_loop = asyncio.new_event_loop()
|
||||
self.async_event_loop = get_new_event_loop()
|
||||
asyncio.set_event_loop(self.async_event_loop)
|
||||
# Initialize the async plasma connection.
|
||||
# Delayed import due to async_api depends on _raylet.
|
||||
|
||||
@@ -7,9 +7,22 @@ from collections import namedtuple
|
||||
import time
|
||||
import inspect
|
||||
|
||||
try:
|
||||
import uvloop
|
||||
except ImportError:
|
||||
uvloop = None
|
||||
|
||||
import ray
|
||||
|
||||
|
||||
def get_new_event_loop():
|
||||
"""Construct a new event loop. Ray will use uvloop if it exists"""
|
||||
if uvloop:
|
||||
return uvloop.new_event_loop()
|
||||
else:
|
||||
return asyncio.new_event_loop()
|
||||
|
||||
|
||||
def sync_to_async(func):
|
||||
"""Convert a blocking function to async function"""
|
||||
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
noop:
|
||||
@echo "please specify which baseline to run"
|
||||
|
||||
uvicorn:
|
||||
uvicorn uvicorn_app:app --no-access-log --workers 1
|
||||
|
||||
fastapi:
|
||||
uvicorn fastapi_app:app --no-access-log --workers 1
|
||||
|
||||
bench:
|
||||
wrk -c 100 -t 10 -d 10s http://127.0.0.1:8000
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
from fastapi import FastAPI
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def read_root():
|
||||
return "Hello world"
|
||||
@@ -0,0 +1,13 @@
|
||||
async def app(scope, receive, send):
|
||||
assert scope["type"] == "http"
|
||||
await send({
|
||||
"type": "http.response.start",
|
||||
"status": 200,
|
||||
"headers": [
|
||||
[b"content-type", b"text/plain"],
|
||||
]
|
||||
})
|
||||
await send({
|
||||
"type": "http.response.body",
|
||||
"body": b"Hello, world!",
|
||||
})
|
||||
@@ -1,35 +1,70 @@
|
||||
from ray import serve
|
||||
from ray.serve.constants import DEFAULT_HTTP_ADDRESS
|
||||
import requests
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
import click
|
||||
|
||||
serve.init()
|
||||
from ray import serve
|
||||
from ray.serve.constants import DEFAULT_HTTP_ADDRESS
|
||||
from ray.serve import master
|
||||
|
||||
master._TRACING_ENABLED = True
|
||||
|
||||
|
||||
def noop(_):
|
||||
return ""
|
||||
def block_until_ready(url):
|
||||
while requests.get(url).status_code == 404:
|
||||
time.sleep(1)
|
||||
print("Waiting for noop route to showup.")
|
||||
|
||||
|
||||
serve.create_backend("noop", noop)
|
||||
serve.create_endpoint("noop", backend="noop", route="/noop")
|
||||
def run_http_benchmark(url, num_queries):
|
||||
latency = []
|
||||
for _ in tqdm(range(num_queries + 200)):
|
||||
start = time.perf_counter()
|
||||
requests.get(url)
|
||||
end = time.perf_counter()
|
||||
latency.append(end - start)
|
||||
|
||||
url = "{}/noop".format(DEFAULT_HTTP_ADDRESS)
|
||||
while requests.get(url).status_code == 404:
|
||||
time.sleep(1)
|
||||
print("Waiting for noop route to showup.")
|
||||
# Remove initial samples
|
||||
latency = latency[200:]
|
||||
|
||||
latency = []
|
||||
for _ in tqdm(range(5200)):
|
||||
start = time.perf_counter()
|
||||
resp = requests.get(url)
|
||||
end = time.perf_counter()
|
||||
latency.append(end - start)
|
||||
series = pd.Series(latency) * 1000
|
||||
print("Latency for single noop backend (ms)")
|
||||
print(series.describe(percentiles=[0.5, 0.9, 0.95, 0.99]))
|
||||
|
||||
# Remove initial samples
|
||||
latency = latency[200:]
|
||||
|
||||
series = pd.Series(latency) * 1000
|
||||
print("Latency for single noop backend (ms)")
|
||||
print(series.describe(percentiles=[0.5, 0.9, 0.95, 0.99]))
|
||||
@click.command()
|
||||
@click.option("--blocking", is_flag=True, required=False, help="Block forever")
|
||||
@click.option("--num-queries", type=int, required=False)
|
||||
@click.option("--num-replicas", type=int, default=1)
|
||||
@click.option("--max-concurrent-queries", type=int, required=False)
|
||||
def main(num_replicas: int, num_queries: Optional[int],
|
||||
max_concurrent_queries: Optional[int], blocking: bool):
|
||||
serve.init()
|
||||
|
||||
def noop(_):
|
||||
return "hello world"
|
||||
|
||||
config = {
|
||||
"num_replicas": num_replicas,
|
||||
"max_concurrent_queries": max_concurrent_queries
|
||||
}
|
||||
print("Using config", config)
|
||||
serve.create_backend("noop", noop, config=config)
|
||||
serve.create_endpoint("noop", backend="noop", route="/noop")
|
||||
|
||||
url = "{}/noop".format(DEFAULT_HTTP_ADDRESS)
|
||||
block_until_ready(url)
|
||||
|
||||
if num_queries:
|
||||
run_http_benchmark(url, num_queries)
|
||||
if blocking:
|
||||
print("Endpoint {} is ready.".format(url))
|
||||
while True:
|
||||
time.sleep(5)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import asyncio
|
||||
import socket
|
||||
|
||||
import uvicorn
|
||||
|
||||
@@ -46,15 +45,6 @@ class HTTPProxy:
|
||||
def set_route_table(self, route_table):
|
||||
self.route_table = route_table
|
||||
|
||||
async def handle_lifespan_message(self, scope, receive, send):
|
||||
assert scope["type"] == "lifespan"
|
||||
|
||||
message = await receive()
|
||||
if message["type"] == "lifespan.startup":
|
||||
await send({"type": "lifespan.startup.complete"})
|
||||
elif message["type"] == "lifespan.shutdown":
|
||||
await send({"type": "lifespan.shutdown.complete"})
|
||||
|
||||
async def receive_http_body(self, scope, receive, send):
|
||||
body_buffer = []
|
||||
more_body = True
|
||||
@@ -116,10 +106,6 @@ class HTTPProxy:
|
||||
# NOTE: This implements ASGI protocol specified in
|
||||
# https://asgi.readthedocs.io/en/latest/specs/index.html
|
||||
|
||||
if scope["type"] == "lifespan":
|
||||
await self.handle_lifespan_message(scope, receive, send)
|
||||
return
|
||||
|
||||
error_sender = self._make_error_sender(scope, receive, send)
|
||||
|
||||
assert self.route_table is not None, (
|
||||
@@ -202,18 +188,21 @@ class HTTPProxyActor:
|
||||
asyncio.get_event_loop().create_task(self.run())
|
||||
|
||||
async def run(self):
|
||||
sock = socket.socket()
|
||||
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
sock.bind((self.host, self.port))
|
||||
sock.set_inheritable(True)
|
||||
|
||||
config = uvicorn.Config(self.app, lifespan="on", access_log=False)
|
||||
# Note(simon): we have to use lower level uvicorn Config and Server
|
||||
# class because we want to run the server as a coroutine. The only
|
||||
# alternative is to call uvicorn.run which is blocking.
|
||||
config = uvicorn.Config(
|
||||
self.app,
|
||||
host=self.host,
|
||||
port=self.port,
|
||||
lifespan="off",
|
||||
access_log=False)
|
||||
server = uvicorn.Server(config=config)
|
||||
# TODO(edoakes): we need to override install_signal_handlers here
|
||||
# because the existing implementation fails if it isn't running in
|
||||
# the main thread and uvicorn doesn't expose a way to configure it.
|
||||
server.install_signal_handlers = lambda: None
|
||||
await server.serve(sockets=[sock])
|
||||
await server.serve()
|
||||
|
||||
async def set_route_table(self, route_table):
|
||||
self.app.set_route_table(route_table)
|
||||
|
||||
@@ -559,7 +559,7 @@ def test_shutdown(serve_instance):
|
||||
pass
|
||||
|
||||
instance_name = "shutdown"
|
||||
serve.init(name=instance_name, http_port=8002)
|
||||
serve.init(name=instance_name, http_port=8003)
|
||||
serve.create_backend("backend", f)
|
||||
serve.create_endpoint("endpoint", backend="backend")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user