mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 20:22:39 +08:00
[Serve] [Doc] Add existing web server integration ServeHandle tutorial (#13127)
This commit is contained in:
@@ -523,7 +523,7 @@ class Client:
|
||||
|
||||
|
||||
def start(detached: bool = False,
|
||||
http_host: str = DEFAULT_HTTP_HOST,
|
||||
http_host: Optional[str] = DEFAULT_HTTP_HOST,
|
||||
http_port: int = DEFAULT_HTTP_PORT,
|
||||
http_middlewares: List[Any] = []) -> Client:
|
||||
"""Initialize a serve instance.
|
||||
@@ -537,8 +537,8 @@ def start(detached: bool = False,
|
||||
Args:
|
||||
detached (bool): Whether not the instance should be detached from this
|
||||
script.
|
||||
http_host (str): Host for HTTP servers to listen on. Defaults to
|
||||
"127.0.0.1". To expose Serve publicly, you probably want to set
|
||||
http_host (str, optional): Host for HTTP servers to listen on. Defaults
|
||||
to "127.0.0.1". To expose Serve publicly, you probably want to set
|
||||
this to "0.0.0.0". One HTTP server will be started on each node in
|
||||
the Ray cluster. To not start HTTP servers, set this to None.
|
||||
http_port (int): Port for HTTP server. Defaults to 8000.
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
# File name: aiohttp_app.py
|
||||
from aiohttp import web
|
||||
|
||||
import ray
|
||||
from ray import serve
|
||||
|
||||
# Connect to the running Ray cluster.
|
||||
ray.init(address="auto")
|
||||
|
||||
# Connect to the running Ray Serve instance.
|
||||
client = serve.connect()
|
||||
|
||||
my_handle = client.get_handle("my_endpoint") # Returns a ServeHandle object.
|
||||
|
||||
|
||||
# Define our AIOHTTP request handler.
|
||||
async def handle_request(request):
|
||||
# Offload the computation to our Ray Serve backend.
|
||||
result = await my_handle.remote("dummy input")
|
||||
return web.Response(text=result)
|
||||
|
||||
|
||||
# Set up an HTTP endpoint.
|
||||
app = web.Application()
|
||||
app.add_routes([web.get("/dummy-model", handle_request)])
|
||||
|
||||
if __name__ == "__main__":
|
||||
web.run_app(app)
|
||||
@@ -0,0 +1,21 @@
|
||||
# File name: deploy_serve.py
|
||||
import ray
|
||||
from ray import serve
|
||||
|
||||
# Connect to the running Ray cluster.
|
||||
ray.init(address="auto")
|
||||
|
||||
# Start a detached Ray Serve instance. It will persist after the script exits.
|
||||
client = serve.start(http_host=None, detached=True)
|
||||
|
||||
|
||||
# Define a function to serve. Alternatively, you could define a stateful class.
|
||||
async def my_model(request):
|
||||
data = await request.body()
|
||||
return f"Model received data: {data}"
|
||||
|
||||
|
||||
# Set up a backend with the desired number of replicas and set up an endpoint.
|
||||
backend_config = serve.BackendConfig(num_replicas=2)
|
||||
client.create_backend("my_backend", my_model, config=backend_config)
|
||||
client.create_endpoint("my_endpoint", backend="my_backend")
|
||||
@@ -0,0 +1,12 @@
|
||||
from fastapi import FastAPI
|
||||
from transformers import pipeline # A simple API for NLP tasks.
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
nlp_model = pipeline("text-generation", model="gpt2") # Load the model.
|
||||
|
||||
|
||||
# The function below handles GET requests to the URL `/generate`.
|
||||
@app.get("/generate")
|
||||
def generate(query: str):
|
||||
return nlp_model(query, max_length=50) # Output 50 words based on query.
|
||||
@@ -0,0 +1,37 @@
|
||||
import ray
|
||||
from ray import serve
|
||||
|
||||
from fastapi import FastAPI
|
||||
from transformers import pipeline
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
serve_handle = None
|
||||
|
||||
|
||||
@app.on_event("startup") # Code to be run when the server starts.
|
||||
async def startup_event():
|
||||
ray.init(address="auto") # Connect to the running Ray cluster.
|
||||
client = serve.start(http_host=None) # Start the Ray Serve client.
|
||||
|
||||
# Define a callable class to use for our Ray Serve backend.
|
||||
class GPT2:
|
||||
def __init__(self):
|
||||
self.nlp_model = pipeline("text-generation", model="gpt2")
|
||||
|
||||
async def __call__(self, request):
|
||||
return self.nlp_model(await request.body(), max_length=50)
|
||||
|
||||
# Set up a Ray Serve backend with the desired number of replicas.
|
||||
backend_config = serve.BackendConfig(num_replicas=2)
|
||||
client.create_backend("gpt-2", GPT2, config=backend_config)
|
||||
client.create_endpoint("generate", backend="gpt-2")
|
||||
|
||||
# Get a handle to our Ray Serve endpoint so we can query it in Python.
|
||||
global serve_handle
|
||||
serve_handle = client.get_handle("generate")
|
||||
|
||||
|
||||
@app.get("/generate")
|
||||
async def generate(query: str):
|
||||
return await serve_handle.remote(query)
|
||||
Reference in New Issue
Block a user