[Serve] [Doc] Add existing web server integration ServeHandle tutorial (#13127)

2026-06-27 20:22:39 +08:00 · 2021-01-04 08:28:34 -08:00
parent 61c3b6d3bf
commit a95275bdd9
8 changed files with 189 additions and 7 deletions
@@ -523,7 +523,7 @@ class Client:


 def start(detached: bool = False,
-          http_host: str = DEFAULT_HTTP_HOST,
+          http_host: Optional[str] = DEFAULT_HTTP_HOST,
          http_port: int = DEFAULT_HTTP_PORT,
          http_middlewares: List[Any] = []) -> Client:
    """Initialize a serve instance.
@@ -537,8 +537,8 @@ def start(detached: bool = False,
    Args:
        detached (bool): Whether not the instance should be detached from this
            script.
-        http_host (str): Host for HTTP servers to listen on. Defaults to
-            "127.0.0.1". To expose Serve publicly, you probably want to set
+        http_host (str, optional): Host for HTTP servers to listen on. Defaults
+            to "127.0.0.1". To expose Serve publicly, you probably want to set
            this to "0.0.0.0". One HTTP server will be started on each node in
            the Ray cluster. To not start HTTP servers, set this to None.
        http_port (int): Port for HTTP server. Defaults to 8000.
@@ -0,0 +1,28 @@
+# File name: aiohttp_app.py
+from aiohttp import web
+
+import ray
+from ray import serve
+
+# Connect to the running Ray cluster.
+ray.init(address="auto")
+
+# Connect to the running Ray Serve instance.
+client = serve.connect()
+
+my_handle = client.get_handle("my_endpoint")  # Returns a ServeHandle object.
+
+
+# Define our AIOHTTP request handler.
+async def handle_request(request):
+    # Offload the computation to our Ray Serve backend.
+    result = await my_handle.remote("dummy input")
+    return web.Response(text=result)
+
+
+# Set up an HTTP endpoint.
+app = web.Application()
+app.add_routes([web.get("/dummy-model", handle_request)])
+
+if __name__ == "__main__":
+    web.run_app(app)
@@ -0,0 +1,21 @@
+# File name: deploy_serve.py
+import ray
+from ray import serve
+
+# Connect to the running Ray cluster.
+ray.init(address="auto")
+
+# Start a detached Ray Serve instance.  It will persist after the script exits.
+client = serve.start(http_host=None, detached=True)
+
+
+# Define a function to serve. Alternatively, you could define a stateful class.
+async def my_model(request):
+    data = await request.body()
+    return f"Model received data: {data}"
+
+
+# Set up a backend with the desired number of replicas and set up an endpoint.
+backend_config = serve.BackendConfig(num_replicas=2)
+client.create_backend("my_backend", my_model, config=backend_config)
+client.create_endpoint("my_endpoint", backend="my_backend")
@@ -0,0 +1,12 @@
+from fastapi import FastAPI
+from transformers import pipeline  # A simple API for NLP tasks.
+
+app = FastAPI()
+
+nlp_model = pipeline("text-generation", model="gpt2")  # Load the model.
+
+
+# The function below handles GET requests to the URL `/generate`.
+@app.get("/generate")
+def generate(query: str):
+    return nlp_model(query, max_length=50)  # Output 50 words based on query.
@@ -0,0 +1,37 @@
+import ray
+from ray import serve
+
+from fastapi import FastAPI
+from transformers import pipeline
+
+app = FastAPI()
+
+serve_handle = None
+
+
+@app.on_event("startup")  # Code to be run when the server starts.
+async def startup_event():
+    ray.init(address="auto")  # Connect to the running Ray cluster.
+    client = serve.start(http_host=None)  # Start the Ray Serve client.
+
+    # Define a callable class to use for our Ray Serve backend.
+    class GPT2:
+        def __init__(self):
+            self.nlp_model = pipeline("text-generation", model="gpt2")
+
+        async def __call__(self, request):
+            return self.nlp_model(await request.body(), max_length=50)
+
+    # Set up a Ray Serve backend with the desired number of replicas.
+    backend_config = serve.BackendConfig(num_replicas=2)
+    client.create_backend("gpt-2", GPT2, config=backend_config)
+    client.create_endpoint("generate", backend="gpt-2")
+
+    # Get a handle to our Ray Serve endpoint so we can query it in Python.
+    global serve_handle
+    serve_handle = client.get_handle("generate")
+
+
+@app.get("/generate")
+async def generate(query: str):
+    return await serve_handle.remote(query)