[Serve] Performance: Use uvloop when possible (#9216)

This commit is contained in:
Simon Mo
2020-06-30 15:59:13 -07:00
committed by GitHub
parent 77933c922d
commit be647b69ab
8 changed files with 118 additions and 47 deletions
+3 -2
View File
@@ -82,7 +82,8 @@ from ray.includes.ray_config cimport RayConfig
from ray.includes.global_state_accessor cimport CGlobalStateAccessor
import ray
from ray.async_compat import (sync_to_async, AsyncGetResponse)
from ray.async_compat import (
sync_to_async, AsyncGetResponse, get_new_event_loop)
import ray.memory_monitor as memory_monitor
import ray.ray_constants as ray_constants
from ray import profiling
@@ -1187,7 +1188,7 @@ cdef class CoreWorker:
def create_or_get_event_loop(self):
if self.async_event_loop is None:
self.async_event_loop = asyncio.new_event_loop()
self.async_event_loop = get_new_event_loop()
asyncio.set_event_loop(self.async_event_loop)
# Initialize the async plasma connection.
# Delayed import due to async_api depends on _raylet.
+13
View File
@@ -7,9 +7,22 @@ from collections import namedtuple
import time
import inspect
try:
import uvloop
except ImportError:
uvloop = None
import ray
def get_new_event_loop():
"""Construct a new event loop. Ray will use uvloop if it exists"""
if uvloop:
return uvloop.new_event_loop()
else:
return asyncio.new_event_loop()
def sync_to_async(func):
"""Convert a blocking function to async function"""
@@ -0,0 +1,12 @@
noop:
@echo "please specify which baseline to run"
uvicorn:
uvicorn uvicorn_app:app --no-access-log --workers 1
fastapi:
uvicorn fastapi_app:app --no-access-log --workers 1
bench:
wrk -c 100 -t 10 -d 10s http://127.0.0.1:8000
@@ -0,0 +1,8 @@
from fastapi import FastAPI
app = FastAPI()
@app.get("/")
async def read_root():
return "Hello world"
@@ -0,0 +1,13 @@
async def app(scope, receive, send):
assert scope["type"] == "http"
await send({
"type": "http.response.start",
"status": 200,
"headers": [
[b"content-type", b"text/plain"],
]
})
await send({
"type": "http.response.body",
"body": b"Hello, world!",
})
+58 -23
View File
@@ -1,35 +1,70 @@
from ray import serve
from ray.serve.constants import DEFAULT_HTTP_ADDRESS
import requests
import time
from typing import Optional
import requests
import pandas as pd
from tqdm import tqdm
import click
serve.init()
from ray import serve
from ray.serve.constants import DEFAULT_HTTP_ADDRESS
from ray.serve import master
master._TRACING_ENABLED = True
def noop(_):
return ""
def block_until_ready(url):
while requests.get(url).status_code == 404:
time.sleep(1)
print("Waiting for noop route to showup.")
serve.create_backend("noop", noop)
serve.create_endpoint("noop", backend="noop", route="/noop")
def run_http_benchmark(url, num_queries):
latency = []
for _ in tqdm(range(num_queries + 200)):
start = time.perf_counter()
requests.get(url)
end = time.perf_counter()
latency.append(end - start)
url = "{}/noop".format(DEFAULT_HTTP_ADDRESS)
while requests.get(url).status_code == 404:
time.sleep(1)
print("Waiting for noop route to showup.")
# Remove initial samples
latency = latency[200:]
latency = []
for _ in tqdm(range(5200)):
start = time.perf_counter()
resp = requests.get(url)
end = time.perf_counter()
latency.append(end - start)
series = pd.Series(latency) * 1000
print("Latency for single noop backend (ms)")
print(series.describe(percentiles=[0.5, 0.9, 0.95, 0.99]))
# Remove initial samples
latency = latency[200:]
series = pd.Series(latency) * 1000
print("Latency for single noop backend (ms)")
print(series.describe(percentiles=[0.5, 0.9, 0.95, 0.99]))
@click.command()
@click.option("--blocking", is_flag=True, required=False, help="Block forever")
@click.option("--num-queries", type=int, required=False)
@click.option("--num-replicas", type=int, default=1)
@click.option("--max-concurrent-queries", type=int, required=False)
def main(num_replicas: int, num_queries: Optional[int],
max_concurrent_queries: Optional[int], blocking: bool):
serve.init()
def noop(_):
return "hello world"
config = {
"num_replicas": num_replicas,
"max_concurrent_queries": max_concurrent_queries
}
print("Using config", config)
serve.create_backend("noop", noop, config=config)
serve.create_endpoint("noop", backend="noop", route="/noop")
url = "{}/noop".format(DEFAULT_HTTP_ADDRESS)
block_until_ready(url)
if num_queries:
run_http_benchmark(url, num_queries)
if blocking:
print("Endpoint {} is ready.".format(url))
while True:
time.sleep(5)
if __name__ == "__main__":
main()
+10 -21
View File
@@ -1,5 +1,4 @@
import asyncio
import socket
import uvicorn
@@ -46,15 +45,6 @@ class HTTPProxy:
def set_route_table(self, route_table):
self.route_table = route_table
async def handle_lifespan_message(self, scope, receive, send):
assert scope["type"] == "lifespan"
message = await receive()
if message["type"] == "lifespan.startup":
await send({"type": "lifespan.startup.complete"})
elif message["type"] == "lifespan.shutdown":
await send({"type": "lifespan.shutdown.complete"})
async def receive_http_body(self, scope, receive, send):
body_buffer = []
more_body = True
@@ -116,10 +106,6 @@ class HTTPProxy:
# NOTE: This implements ASGI protocol specified in
# https://asgi.readthedocs.io/en/latest/specs/index.html
if scope["type"] == "lifespan":
await self.handle_lifespan_message(scope, receive, send)
return
error_sender = self._make_error_sender(scope, receive, send)
assert self.route_table is not None, (
@@ -202,18 +188,21 @@ class HTTPProxyActor:
asyncio.get_event_loop().create_task(self.run())
async def run(self):
sock = socket.socket()
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind((self.host, self.port))
sock.set_inheritable(True)
config = uvicorn.Config(self.app, lifespan="on", access_log=False)
# Note(simon): we have to use lower level uvicorn Config and Server
# class because we want to run the server as a coroutine. The only
# alternative is to call uvicorn.run which is blocking.
config = uvicorn.Config(
self.app,
host=self.host,
port=self.port,
lifespan="off",
access_log=False)
server = uvicorn.Server(config=config)
# TODO(edoakes): we need to override install_signal_handlers here
# because the existing implementation fails if it isn't running in
# the main thread and uvicorn doesn't expose a way to configure it.
server.install_signal_handlers = lambda: None
await server.serve(sockets=[sock])
await server.serve()
async def set_route_table(self, route_table):
self.app.set_route_table(route_table)
+1 -1
View File
@@ -559,7 +559,7 @@ def test_shutdown(serve_instance):
pass
instance_name = "shutdown"
serve.init(name=instance_name, http_port=8002)
serve.init(name=instance_name, http_port=8003)
serve.create_backend("backend", f)
serve.create_endpoint("endpoint", backend="backend")