[Serve] [Doc] Add existing web server integration ServeHandle tutorial (#13127)

This commit is contained in:
architkulkarni
2021-01-04 08:28:34 -08:00
committed by GitHub
parent 61c3b6d3bf
commit a95275bdd9
8 changed files with 189 additions and 7 deletions
+3 -3
View File
@@ -523,7 +523,7 @@ class Client:
def start(detached: bool = False,
http_host: str = DEFAULT_HTTP_HOST,
http_host: Optional[str] = DEFAULT_HTTP_HOST,
http_port: int = DEFAULT_HTTP_PORT,
http_middlewares: List[Any] = []) -> Client:
"""Initialize a serve instance.
@@ -537,8 +537,8 @@ def start(detached: bool = False,
Args:
detached (bool): Whether not the instance should be detached from this
script.
http_host (str): Host for HTTP servers to listen on. Defaults to
"127.0.0.1". To expose Serve publicly, you probably want to set
http_host (str, optional): Host for HTTP servers to listen on. Defaults
to "127.0.0.1". To expose Serve publicly, you probably want to set
this to "0.0.0.0". One HTTP server will be started on each node in
the Ray cluster. To not start HTTP servers, set this to None.
http_port (int): Port for HTTP server. Defaults to 8000.
@@ -0,0 +1,28 @@
# File name: aiohttp_app.py
from aiohttp import web
import ray
from ray import serve
# Connect to the running Ray cluster.
ray.init(address="auto")
# Connect to the running Ray Serve instance.
client = serve.connect()
my_handle = client.get_handle("my_endpoint") # Returns a ServeHandle object.
# Define our AIOHTTP request handler.
async def handle_request(request):
# Offload the computation to our Ray Serve backend.
result = await my_handle.remote("dummy input")
return web.Response(text=result)
# Set up an HTTP endpoint.
app = web.Application()
app.add_routes([web.get("/dummy-model", handle_request)])
if __name__ == "__main__":
web.run_app(app)
@@ -0,0 +1,21 @@
# File name: deploy_serve.py
import ray
from ray import serve
# Connect to the running Ray cluster.
ray.init(address="auto")
# Start a detached Ray Serve instance. It will persist after the script exits.
client = serve.start(http_host=None, detached=True)
# Define a function to serve. Alternatively, you could define a stateful class.
async def my_model(request):
data = await request.body()
return f"Model received data: {data}"
# Set up a backend with the desired number of replicas and set up an endpoint.
backend_config = serve.BackendConfig(num_replicas=2)
client.create_backend("my_backend", my_model, config=backend_config)
client.create_endpoint("my_endpoint", backend="my_backend")
@@ -0,0 +1,12 @@
from fastapi import FastAPI
from transformers import pipeline # A simple API for NLP tasks.
app = FastAPI()
nlp_model = pipeline("text-generation", model="gpt2") # Load the model.
# The function below handles GET requests to the URL `/generate`.
@app.get("/generate")
def generate(query: str):
return nlp_model(query, max_length=50) # Output 50 words based on query.
@@ -0,0 +1,37 @@
import ray
from ray import serve
from fastapi import FastAPI
from transformers import pipeline
app = FastAPI()
serve_handle = None
@app.on_event("startup") # Code to be run when the server starts.
async def startup_event():
ray.init(address="auto") # Connect to the running Ray cluster.
client = serve.start(http_host=None) # Start the Ray Serve client.
# Define a callable class to use for our Ray Serve backend.
class GPT2:
def __init__(self):
self.nlp_model = pipeline("text-generation", model="gpt2")
async def __call__(self, request):
return self.nlp_model(await request.body(), max_length=50)
# Set up a Ray Serve backend with the desired number of replicas.
backend_config = serve.BackendConfig(num_replicas=2)
client.create_backend("gpt-2", GPT2, config=backend_config)
client.create_endpoint("generate", backend="gpt-2")
# Get a handle to our Ray Serve endpoint so we can query it in Python.
global serve_handle
serve_handle = client.get_handle("generate")
@app.get("/generate")
async def generate(query: str):
return await serve_handle.remote(query)