[Serve] Add doc for model composition (#8871)

Co-authored-by: Edward Oakes <ed.nmi.oakes@gmail.com>
2026-07-04 16:14:43 +08:00 · 2020-06-10 11:09:22 -07:00
parent 0ba7472da9
commit cf53b35147
3 changed files with 34 additions and 2 deletions
@@ -1,11 +1,17 @@
 from random import random
-
 import requests
-
 from ray import serve

 serve.init()

+# Our pipeline will be structured as follows:
+# - Input comes in, the composed model sends it to model_one
+# - model_one outputs a random number between 0 and 1, if the value is
+#   greater than 0.5, then the data is sent to model_two
+# - otherwise, the data is returned to the user.
+
+# Let's define two models that just print out the data they received.
+

 def model_one(_unused_flask_request, data=None):
    print("Model 1 called with data ", data)
@@ -22,6 +28,7 @@ class ComposedModel:
        self.model_one = serve.get_handle("model_one")
        self.model_two = serve.get_handle("model_two")

+    # This method can be called concurrently!
    async def __call__(self, flask_request):
        data = flask_request.data

@@ -41,6 +48,8 @@ serve.create_endpoint("model_one", backend="model_one")
 serve.create_backend("model_two", model_two)
 serve.create_endpoint("model_two", backend="model_two")

+# max_concurrent_queries is optional. By default, if you pass in an async
+# function, Ray Serve sets the limit to a high number.
 serve.create_backend(
    "composed_backend", ComposedModel, config={"max_concurrent_queries": 10})
 serve.create_endpoint(