[Serve] Add doc for model composition (#8871)

Co-authored-by: Edward Oakes <ed.nmi.oakes@gmail.com>
This commit is contained in:
Simon Mo
2020-06-10 11:09:22 -07:00
committed by GitHub
parent 0ba7472da9
commit cf53b35147
3 changed files with 34 additions and 2 deletions
@@ -1,11 +1,17 @@
from random import random
import requests
from ray import serve
serve.init()
# Our pipeline will be structured as follows:
# - Input comes in, the composed model sends it to model_one
# - model_one outputs a random number between 0 and 1, if the value is
# greater than 0.5, then the data is sent to model_two
# - otherwise, the data is returned to the user.
# Let's define two models that just print out the data they received.
def model_one(_unused_flask_request, data=None):
print("Model 1 called with data ", data)
@@ -22,6 +28,7 @@ class ComposedModel:
self.model_one = serve.get_handle("model_one")
self.model_two = serve.get_handle("model_two")
# This method can be called concurrently!
async def __call__(self, flask_request):
data = flask_request.data
@@ -41,6 +48,8 @@ serve.create_endpoint("model_one", backend="model_one")
serve.create_backend("model_two", model_two)
serve.create_endpoint("model_two", backend="model_two")
# max_concurrent_queries is optional. By default, if you pass in an async
# function, Ray Serve sets the limit to a high number.
serve.create_backend(
"composed_backend", ComposedModel, config={"max_concurrent_queries": 10})
serve.create_endpoint(