mirror of
https://github.com/wassname/ray.git
synced 2026-07-04 16:14:43 +08:00
[Serve] Add doc for model composition (#8871)
Co-authored-by: Edward Oakes <ed.nmi.oakes@gmail.com>
This commit is contained in:
@@ -1,11 +1,17 @@
|
||||
from random import random
|
||||
|
||||
import requests
|
||||
|
||||
from ray import serve
|
||||
|
||||
serve.init()
|
||||
|
||||
# Our pipeline will be structured as follows:
|
||||
# - Input comes in, the composed model sends it to model_one
|
||||
# - model_one outputs a random number between 0 and 1, if the value is
|
||||
# greater than 0.5, then the data is sent to model_two
|
||||
# - otherwise, the data is returned to the user.
|
||||
|
||||
# Let's define two models that just print out the data they received.
|
||||
|
||||
|
||||
def model_one(_unused_flask_request, data=None):
|
||||
print("Model 1 called with data ", data)
|
||||
@@ -22,6 +28,7 @@ class ComposedModel:
|
||||
self.model_one = serve.get_handle("model_one")
|
||||
self.model_two = serve.get_handle("model_two")
|
||||
|
||||
# This method can be called concurrently!
|
||||
async def __call__(self, flask_request):
|
||||
data = flask_request.data
|
||||
|
||||
@@ -41,6 +48,8 @@ serve.create_endpoint("model_one", backend="model_one")
|
||||
serve.create_backend("model_two", model_two)
|
||||
serve.create_endpoint("model_two", backend="model_two")
|
||||
|
||||
# max_concurrent_queries is optional. By default, if you pass in an async
|
||||
# function, Ray Serve sets the limit to a high number.
|
||||
serve.create_backend(
|
||||
"composed_backend", ComposedModel, config={"max_concurrent_queries": 10})
|
||||
serve.create_endpoint(
|
||||
|
||||
Reference in New Issue
Block a user