From 5d124489a953e97344b7aca06a9cb33d518fbf1b Mon Sep 17 00:00:00 2001 From: Edward Oakes Date: Sat, 6 Jun 2020 21:10:42 -0500 Subject: [PATCH] [serve] Require backend when creating endpoint (#8764) --- doc/source/serve/advanced.rst | 19 ++- doc/source/serve/deployment.rst | 27 ++-- doc/source/serve/key-concepts.rst | 85 +++++------ python/ray/serve/api.py | 43 +++++- python/ray/serve/examples/benchmark.py | 3 +- .../serve/examples/doc/quickstart_class.py | 3 +- .../serve/examples/doc/quickstart_function.py | 3 +- .../ray/serve/examples/doc/tutorial_batch.py | 4 +- .../ray/serve/examples/doc/tutorial_deploy.py | 3 +- .../serve/examples/doc/tutorial_pytorch.py | 7 +- .../serve/examples/doc/tutorial_sklearn.py | 3 +- .../serve/examples/doc/tutorial_tensorflow.py | 3 +- python/ray/serve/examples/echo.py | 3 +- python/ray/serve/examples/echo_actor.py | 3 +- python/ray/serve/examples/echo_actor_batch.py | 3 +- python/ray/serve/examples/echo_batching.py | 3 +- python/ray/serve/examples/echo_error.py | 4 +- .../ray/serve/examples/echo_fixed_packing.py | 8 +- python/ray/serve/examples/echo_full.py | 9 +- python/ray/serve/examples/echo_pipeline.py | 12 +- python/ray/serve/examples/echo_round_robin.py | 8 +- python/ray/serve/examples/echo_slo_reverse.py | 6 +- python/ray/serve/examples/echo_split.py | 3 +- python/ray/serve/master.py | 77 +++++----- python/ray/serve/tests/test_api.py | 135 ++++++++++-------- python/ray/serve/tests/test_failure.py | 28 ++-- python/ray/serve/tests/test_handle.py | 14 +- python/ray/serve/tests/test_metric.py | 2 +- python/ray/serve/tests/test_nonblocking.py | 4 +- python/ray/serve/tests/test_persistence.py | 3 +- 30 files changed, 272 insertions(+), 256 deletions(-) diff --git a/doc/source/serve/advanced.rst b/doc/source/serve/advanced.rst index 499add2e9..f8e4f21f3 100644 --- a/doc/source/serve/advanced.rst +++ b/doc/source/serve/advanced.rst @@ -89,11 +89,9 @@ You can also have Ray Serve batch requests for performance. In order to do use t responses.append(request.json()) return responses - serve.create_endpoint("counter1", "/increment") - config = {"max_batch_size": 5} serve.create_backend("counter1", BatchingExample, config=config) - serve.set_traffic("counter1", {"counter1": 1.0}) + serve.create_endpoint("counter1", backend="counter1", route="/increment") Please take a look at :ref:`Batching Tutorial` for a deep dive. @@ -109,10 +107,11 @@ When calling ``set_traffic``, you provide a dictionary of backend name to a floa For example, here we split traffic 50/50 between two backends: .. code-block:: python - - serve.create_endpoint("fifty-fifty", "/fifty") + serve.create_backend("backend1", MyClass1) serve.create_backend("backend2", MyClass2) + + serve.create_endpoint("fifty-fifty", backend="backend1", route="/fifty") serve.set_traffic("fifty-fifty", {"backend1": 0.5, "backend2": 0.5}) Each request is routed randomly between the backends in the traffic dictionary according to the provided weights. @@ -125,11 +124,10 @@ A/B Testing .. code-block:: python - serve.create_endpoint("ab_endpoint", "/a-b-test") serve.create_backend("default_backend", MyClass) - + # Initially, set all traffic to be served by the "default" backend. - serve.set_traffic("ab_endpoint", {"default_backend": 1.0}) + serve.create_endpoint("ab_endpoint", backend="default_backend", route="/a-b-test") # Add a second backend and route 1% of the traffic to it. serve.create_backend("new_backend", MyNewClass) @@ -151,11 +149,10 @@ In the example below, we do this repeatedly in one script, but in practice this .. code-block:: python - serve.create_endpoint("incremental_endpoint", "/incremental") serve.create_backend("existing_backend", MyClass) - + # Initially, all traffic is served by the existing backend. - serve.set_traffic("incremental_endpoint", {"existing_backend": 1.0}) + serve.create_endpoint("incremental_endpoint", backend="existing_backend", route="/incremental") # Then we can slowly increase the proportion of traffic served by the new backend. serve.create_backend("new_backend", MyNewClass) diff --git a/doc/source/serve/deployment.rst b/doc/source/serve/deployment.rst index 9a1e9ec55..0a263a7b8 100644 --- a/doc/source/serve/deployment.rst +++ b/doc/source/serve/deployment.rst @@ -235,8 +235,7 @@ With the cluster now running, we can run a simple script to start Ray Serve and return "hello world" serve.create_backend("hello_backend", hello) - serve.create_endpoint("hello_endpoint", route="/hello") - serve.set_traffic("hello_endpoint", {"hello_backend": 1.0}) + serve.create_endpoint("hello_endpoint", backend="hello_backend", route="/hello") Save this script locally as ``deploy.py`` and run it on the head node using ``ray submit``: @@ -283,19 +282,17 @@ You can run multiple serve instances on the same Ray cluster by providing a ``na .. code-block:: python - # Create a first instance whose HTTP server listens on 8000. - serve.init(name="instance1", http_port=8000) - serve.create_endpoint("counter1", "/increment") + # Create a first cluster whose HTTP server listens on 8000. + serve.init(name="cluster1", http_port=8000) - # Create a second instance whose HTTP server listens on 8001. - serve.init(name="instance2", http_port=8001) - serve.create_endpoint("counter1", "/increment") + # Create a second cluster whose HTTP server listens on 8001. + serve.init(name="cluster2", http_port=8001) - # Create a backend that will be served on the second instance. - serve.create_backend("counter1", function) - serve.set_traffic("counter1", {"counter1": 1.0}) + # Create a backend that will be served on the second cluster. + serve.create_backend("backend2", function) + serve.create_endpoint("endpoint2", backend="backend2", route="/increment") - # Switch back the the first instance and create the same backend on it. - serve.init(name="instance1") - serve.create_backend("counter1", function) - serve.set_traffic("counter1", {"counter1": 1.0}) + # Switch back the the first cluster and create the same backend on it. + serve.init(name="cluster1") + serve.create_backend("backend1", function) + serve.create_endpoint("endpoint1", backend="backend1", route="/increment") diff --git a/doc/source/serve/key-concepts.rst b/doc/source/serve/key-concepts.rst index 00bbb5878..d28ecabf9 100644 --- a/doc/source/serve/key-concepts.rst +++ b/doc/source/serve/key-concepts.rst @@ -2,7 +2,7 @@ Key Concepts ============ -Ray Serve focuses on **simplicity** and only has two core concepts: endpoints and backends. +Ray Serve focuses on **simplicity** and only has two core concepts: backends and endpoints. To follow along, you'll need to make the necessary imports. @@ -11,51 +11,18 @@ To follow along, you'll need to make the necessary imports. from ray import serve serve.init() # Initializes Ray and Ray Serve. -.. _serve-endpoint: - -Endpoints -========= - -Endpoints allow you to name the "entity" that you'll be exposing, -the HTTP path that your application will expose. -Endpoints are "logical" and decoupled from the business logic or -model that you'll be serving. To create one, we'll simply specify the name, route, and methods. - -.. code-block:: python - - serve.create_endpoint("simple_endpoint", "/simple", methods=["GET"]) - -To view all of the existing endpoints that have created, use `serve.list_endpoints`. - -.. code-block:: python - - >>> serve.list_endpoints() - {'simple_endpoint': {'route': '/simple', 'methods': ['GET'], 'traffic': {}}} - -You can also delete an endpoint using ``serve.delete_endpoint``. -Endpoints and backends are independent, so deleting an endpoint will not delete its backends. -However, an endpoint must be deleted in order to delete the backends that serve its traffic. - -.. code-block:: python - - serve.delete_endpoint("simple_endpoint") - -.. _serve-backend: Backends ======== -Backends are the logical structures for your business logic or models and -how you specify what should happen when an endpoint is queried. +Backends define the implementation of your business logic or models that will handle requests when queries come in to :ref:`serve-endpoint`. To define a backend, first you must define the "handler" or the business logic you'd like to respond with. -The input to this request will be a `Flask Request object `_. -Use a function when your response is stateless and a class when you -might need to maintain some state (like a model). -For both functions and classes (that take as input Flask Requests), you'll need to -define them as backends to Ray Serve. -You can specify arguments to be passed to class constructors in ``serve.create_backend``, shown below. +The handler should take as input a `Flask Request object `_ and return any JSON-serializable object as output. +A backend is defined using ``serve.create_backend``, and the implementation can be defined as either a function or a class. +Use a function when your response is stateless and a class when you might need to maintain some state (like a model). +When using a class, you can specify arguments to be passed to the constructor in ``serve.create_backend``, shown below. -It's important to note that Ray Serve places these backends in individual worker processes, which are replicas of the model. +A backend consists of a number of *replicas*, which are individual copies of the function or class that are started in separate worker processes. .. code-block:: python @@ -91,22 +58,40 @@ Note that a backend cannot be deleted while it is in use by an endpoint because 'simple_backend_class': {'accepts_batches': False, 'num_replicas': 1, 'max_batch_size': None}, } -Setting Traffic -=============== +.. _`serve-endpoint`: -Lastly, we need to route traffic the particular backend to the server endpoint. -To do that we'll use the ``set_traffic`` capability. -A link is essentially a load-balancer and allow you to define queuing policies -for how you would like backends to be served via an endpoint. -For instance, you can route 50% of traffic to Model A and 50% of traffic to Model B. +Endpoints +========= + +While backends define the implementation of your request handling logic, endpoints allow you to expose them via HTTP. +Endpoints are "logical" and can have one or multiple backends that serve requests to them +To create an endpoint, we simply need to specify a name for the endpoint, the name of a backend to handle requests to the endpoint, and the route and methods where it will be accesible. +By default endpoints are serviced only by the backend provided to ``serve.create_endpoint``, but in some cases you may want to specify multiple backends for an endpoint, e.g., for A/B testing or incremental rollout. +For information on how to do this, please see :ref:`serve-split-traffic`. .. code-block:: python - serve.set_traffic("simple_backend", {"simple_endpoint": 1.0}) + serve.create_endpoint("simple_endpoint", backend="simple_backend", route="/simple", methods=["GET"]) -Once we've done that, we can now query our endpoint via HTTP (we use `requests` to make HTTP calls here). +After creating the endpoint, it is now exposed by the HTTP server and handles requests using the specified backend. +We can query the model to verify that it's working. .. code-block:: python import requests - print(requests.get("http://127.0.0.1:8000/-/routes", timeout=0.5).text) + print(requests.get("http://127.0.0.1:8000/simple").text) + +To view all of the existing endpoints that have created, use `serve.list_endpoints`. + +.. code-block:: python + + >>> serve.list_endpoints() + {'simple_endpoint': {'route': '/simple', 'methods': ['GET'], 'traffic': {}}} + +You can also delete an endpoint using ``serve.delete_endpoint``. +Endpoints and backends are independent, so deleting an endpoint will not delete its backends. +However, an endpoint must be deleted in order to delete the backends that serve its traffic. + +.. code-block:: python + + serve.delete_endpoint("simple_endpoint") diff --git a/python/ray/serve/api.py b/python/ray/serve/api.py index 358837edc..b59388363 100644 --- a/python/ray/serve/api.py +++ b/python/ray/serve/api.py @@ -119,18 +119,49 @@ def init(name=None, @_ensure_connected -def create_endpoint(endpoint_name, route=None, methods=["GET"]): +def create_endpoint(endpoint_name, + *, + backend=None, + route=None, + methods=["GET"]): """Create a service endpoint given route_expression. Args: - endpoint_name (str): A name to associate to the endpoint. It will be - used as key to set traffic policy. - route (str): A string begin with "/". HTTP server will use + endpoint_name (str): A name to associate to with the endpoint. + backend (str, required): The backend that will serve requests to + this endpoint. To change this or split traffic among backends, use + `serve.set_traffic`. + route (str, optional): A string begin with "/". HTTP server will use the string to match the path. + methods(List[str], optional): The HTTP methods that are valid for this + endpoint. """ + if backend is None: + raise TypeError("backend must be specified when creating " + "an endpoint.") + elif not isinstance(backend, str): + raise TypeError("backend must be a string, got {}.".format( + type(backend))) + + if route is not None: + if not isinstance(route, str) or not route.startswith("/"): + raise TypeError("route must be a string starting with '/'.") + + if not isinstance(methods, list): + raise TypeError( + "methods must be a list of strings, but got type {}".format( + type(methods))) + + upper_methods = [] + for method in methods: + if not isinstance(method, str): + raise TypeError("methods must be a list of strings, but contained " + "an element of type {}".format(type(method))) + upper_methods.append(method.upper()) + ray.get( - master_actor.create_endpoint.remote(route, endpoint_name, - [m.upper() for m in methods])) + master_actor.create_endpoint.remote(endpoint_name, {backend: 1.0}, + route, upper_methods)) @_ensure_connected diff --git a/python/ray/serve/examples/benchmark.py b/python/ray/serve/examples/benchmark.py index cf34c4d9d..3c70cef10 100644 --- a/python/ray/serve/examples/benchmark.py +++ b/python/ray/serve/examples/benchmark.py @@ -12,9 +12,8 @@ def noop(_): return "" -serve.create_endpoint("noop", "/noop") serve.create_backend("noop", noop) -serve.set_traffic("noop", {"noop": 1.0}) +serve.create_endpoint("noop", backend="noop", route="/noop") url = "{}/noop".format(DEFAULT_HTTP_ADDRESS) while requests.get(url).status_code == 404: diff --git a/python/ray/serve/examples/doc/quickstart_class.py b/python/ray/serve/examples/doc/quickstart_class.py index 5160ade01..5983bba13 100644 --- a/python/ray/serve/examples/doc/quickstart_class.py +++ b/python/ray/serve/examples/doc/quickstart_class.py @@ -12,9 +12,8 @@ class Counter: return {"current_counter": self.count} -serve.create_endpoint("counter", "/counter") serve.create_backend("counter", Counter) -serve.set_traffic("counter", {"counter": 1.0}) +serve.create_endpoint("counter", backend="counter", route="/counter") requests.get("http://127.0.0.1:8000/counter").json() # > {"current_counter": self.count} diff --git a/python/ray/serve/examples/doc/quickstart_function.py b/python/ray/serve/examples/doc/quickstart_function.py index d2f81d632..d1732f682 100644 --- a/python/ray/serve/examples/doc/quickstart_function.py +++ b/python/ray/serve/examples/doc/quickstart_function.py @@ -8,9 +8,8 @@ def echo(flask_request): return "hello " + flask_request.args.get("name", "serve!") -serve.create_endpoint("hello", "/hello") serve.create_backend("hello", echo) -serve.set_traffic("hello", {"hello": 1.0}) +serve.create_endpoint("hello", backend="hello", route="/hello") requests.get("http://127.0.0.1:8000/hello").text # > "hello serve!" diff --git a/python/ray/serve/examples/doc/tutorial_batch.py b/python/ray/serve/examples/doc/tutorial_batch.py index 1bfaa6e64..c703c8011 100644 --- a/python/ray/serve/examples/doc/tutorial_batch.py +++ b/python/ray/serve/examples/doc/tutorial_batch.py @@ -30,9 +30,9 @@ def batch_adder_v0(flask_requests: List): # __doc_deploy_begin__ serve.init() -serve.create_endpoint("adder", "/adder", methods=["GET"]) serve.create_backend("adder:v0", batch_adder_v0, config={"max_batch_size": 4}) -serve.set_traffic("adder", {"adder:v0": 1}) +serve.create_endpoint( + "adder", backend="adder:v0", route="/adder", methods=["GET"]) # __doc_deploy_end__ diff --git a/python/ray/serve/examples/doc/tutorial_deploy.py b/python/ray/serve/examples/doc/tutorial_deploy.py index 06960ed4d..6ba46f40a 100644 --- a/python/ray/serve/examples/doc/tutorial_deploy.py +++ b/python/ray/serve/examples/doc/tutorial_deploy.py @@ -70,9 +70,8 @@ ray.init(address="auto") # listen on 0.0.0.0 to make the HTTP server accessible from other machines. serve.init(http_host="0.0.0.0") -serve.create_endpoint("iris_classifier", "/regressor") serve.create_backend("lr:v1", BoostingModel) -serve.set_traffic("iris_classifier", {"lr:v1": 1, "version": "v1"}) +serve.create_endpoint("iris_classifier", backend="lr:v1", route="/regressor") # __doc_create_deploy_end__ # __doc_query_begin__ diff --git a/python/ray/serve/examples/doc/tutorial_pytorch.py b/python/ray/serve/examples/doc/tutorial_pytorch.py index a55bc92ae..106eaf0ed 100644 --- a/python/ray/serve/examples/doc/tutorial_pytorch.py +++ b/python/ray/serve/examples/doc/tutorial_pytorch.py @@ -45,9 +45,12 @@ class ImageModel: # __doc_deploy_begin__ serve.init() -serve.create_endpoint("predictor", "/image_predict", methods=["POST"]) serve.create_backend("resnet18:v0", ImageModel) -serve.set_traffic("predictor", {"resnet18:v0": 1}) +serve.create_endpoint( + "predictor", + backend="resnet18:v0", + route="/image_predict", + methods=["POST"]) # __doc_deploy_end__ # __doc_query_begin__ diff --git a/python/ray/serve/examples/doc/tutorial_sklearn.py b/python/ray/serve/examples/doc/tutorial_sklearn.py index 57c812cdd..a0c401586 100644 --- a/python/ray/serve/examples/doc/tutorial_sklearn.py +++ b/python/ray/serve/examples/doc/tutorial_sklearn.py @@ -66,9 +66,8 @@ class BoostingModel: # __doc_deploy_begin__ serve.init() -serve.create_endpoint("iris_classifier", "/regressor") serve.create_backend("lr:v1", BoostingModel) -serve.set_traffic("iris_classifier", {"lr:v1": 1}) +serve.create_endpoint("iris_classifier", backend="lr:v1", route="/regressor") # __doc_deploy_end__ # __doc_query_begin__ diff --git a/python/ray/serve/examples/doc/tutorial_tensorflow.py b/python/ray/serve/examples/doc/tutorial_tensorflow.py index 1dd800b7b..7c20aa9fd 100644 --- a/python/ray/serve/examples/doc/tutorial_tensorflow.py +++ b/python/ray/serve/examples/doc/tutorial_tensorflow.py @@ -69,9 +69,8 @@ class TFMnistModel: # __doc_deploy_begin__ serve.init() -serve.create_endpoint(endpoint_name="tf_classifier", route="/mnist") serve.create_backend("tf:v1", TFMnistModel, "/tmp/mnist_model.h5") -serve.set_traffic("tf_classifier", {"tf:v1": 1}) +serve.create_endpoint("tf_classifier", backend="tf:v1", route="/mnist") # __doc_deploy_end__ # __doc_query_begin__ diff --git a/python/ray/serve/examples/echo.py b/python/ray/serve/examples/echo.py index 73c326099..f722b63dd 100644 --- a/python/ray/serve/examples/echo.py +++ b/python/ray/serve/examples/echo.py @@ -16,9 +16,8 @@ def echo(flask_request): serve.init() -serve.create_endpoint("my_endpoint", "/echo") serve.create_backend("echo:v1", echo) -serve.set_traffic("my_endpoint", {"echo:v1": 1.0}) +serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo") while True: resp = requests.get("http://127.0.0.1:8000/echo").json() diff --git a/python/ray/serve/examples/echo_actor.py b/python/ray/serve/examples/echo_actor.py index dadf8628c..a919d962e 100644 --- a/python/ray/serve/examples/echo_actor.py +++ b/python/ray/serve/examples/echo_actor.py @@ -25,9 +25,8 @@ class MagicCounter: serve.init() -serve.create_endpoint("magic_counter", "/counter") serve.create_backend("counter:v1", MagicCounter, 42) # increment=42 -serve.set_traffic("magic_counter", {"counter:v1": 1.0}) +serve.create_endpoint("magic_counter", backend="counter:v1", route="/counter") print("Sending ten queries via HTTP") for i in range(10): diff --git a/python/ray/serve/examples/echo_actor_batch.py b/python/ray/serve/examples/echo_actor_batch.py index cad989eb1..5173a7c84 100644 --- a/python/ray/serve/examples/echo_actor_batch.py +++ b/python/ray/serve/examples/echo_actor_batch.py @@ -36,11 +36,10 @@ class MagicCounter: serve.init() -serve.create_endpoint("magic_counter", "/counter") serve.create_backend( "counter:v1", MagicCounter, 42, config={"max_batch_size": 5}) # increment=42 -serve.set_traffic("magic_counter", {"counter:v1": 1.0}) +serve.create_endpoint("magic_counter", backend="counter:v1", route="/counter") print("Sending ten queries via HTTP") for i in range(10): diff --git a/python/ray/serve/examples/echo_batching.py b/python/ray/serve/examples/echo_batching.py index a42fb0fb5..9220a93d7 100644 --- a/python/ray/serve/examples/echo_batching.py +++ b/python/ray/serve/examples/echo_batching.py @@ -27,14 +27,13 @@ class MagicCounter: serve.init() -serve.create_endpoint("magic_counter", "/counter") # specify max_batch_size in BackendConfig backend_config = {"max_batch_size": 5} serve.create_backend( "counter:v1", MagicCounter, 42, config=backend_config) # increment=42 print("Backend Config for backend: 'counter:v1'") print(backend_config) -serve.set_traffic("magic_counter", {"counter:v1": 1.0}) +serve.create_endpoint("magic_counter", backend="counter:v1", route="/counter") handle = serve.get_handle("magic_counter") future_list = [] diff --git a/python/ray/serve/examples/echo_error.py b/python/ray/serve/examples/echo_error.py index dd4671685..670a0fb99 100644 --- a/python/ray/serve/examples/echo_error.py +++ b/python/ray/serve/examples/echo_error.py @@ -28,9 +28,7 @@ def echo(_): serve.init() -serve.create_endpoint("my_endpoint", "/echo") -serve.create_backend("echo:v1", echo) -serve.set_traffic("my_endpoint", {"echo:v1": 1.0}) +serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo") for _ in range(2): resp = requests.get("http://127.0.0.1:8000/echo").json() diff --git a/python/ray/serve/examples/echo_fixed_packing.py b/python/ray/serve/examples/echo_fixed_packing.py index d2e96d54f..7c294c8c7 100644 --- a/python/ray/serve/examples/echo_fixed_packing.py +++ b/python/ray/serve/examples/echo_fixed_packing.py @@ -27,16 +27,16 @@ serve.init( queueing_policy=serve.RoutePolicy.FixedPacking, policy_kwargs={"packing_num": 5}) -# create a service -serve.create_endpoint("my_endpoint", "/echo") - # create first backend serve.create_backend("echo:v1", echo_v1) +# create service backed by the first backend +serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo") + # create second backend serve.create_backend("echo:v2", echo_v2) -# link and split the service to two backends +# split the service between the two backends serve.set_traffic("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5}) while True: diff --git a/python/ray/serve/examples/echo_full.py b/python/ray/serve/examples/echo_full.py index 36f5fb11c..84d6cf60d 100644 --- a/python/ray/serve/examples/echo_full.py +++ b/python/ray/serve/examples/echo_full.py @@ -13,9 +13,6 @@ from ray.serve.utils import pformat_color_json # initialize ray serve system. serve.init() -# an endpoint is associated with an http URL. -serve.create_endpoint("my_endpoint", "/echo") - # a backend can be a function or class. # it can be made to be invoked from web as well as python. @@ -27,9 +24,9 @@ def echo_v1(flask_request, response="hello from python!"): serve.create_backend("echo:v1", echo_v1) -# We can link an endpoint to a backend, the means all the traffic -# goes to my_endpoint will now goes to echo:v1 backend. -serve.set_traffic("my_endpoint", {"echo:v1": 1.0}) +# An endpoint is associated with an HTTP path and traffic to the endpoint +# will be serviced by the echo:v1 backend. +serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo") print(requests.get("http://127.0.0.1:8000/echo", timeout=0.5).text) # The service will be reachable from http diff --git a/python/ray/serve/examples/echo_pipeline.py b/python/ray/serve/examples/echo_pipeline.py index 40582ae9a..24f2970fa 100644 --- a/python/ray/serve/examples/echo_pipeline.py +++ b/python/ray/serve/examples/echo_pipeline.py @@ -15,36 +15,32 @@ def echo_v1(_, response="hello from python!"): return f"echo_v1({response})" -serve.create_endpoint("echo_v1", "/echo_v1") serve.create_backend("echo_v1", echo_v1) -serve.set_traffic("echo_v1", {"echo_v1": 1.0}) +serve.create_endpoint("echo_v1", backend="echo_v1", route="/echo_v1") def echo_v2(_, relay=""): return f"echo_v2({relay})" -serve.create_endpoint("echo_v2", "/echo_v2") serve.create_backend("echo_v2", echo_v2) -serve.set_traffic("echo_v2", {"echo_v2": 1.0}) +serve.create_endpoint("echo_v2", backend="echo_v2", route="/echo_v2") def echo_v3(_, relay=""): return f"echo_v3({relay})" -serve.create_endpoint("echo_v3", "/echo_v3") serve.create_backend("echo_v3", echo_v3) -serve.set_traffic("echo_v3", {"echo_v3": 1.0}) +serve.create_endpoint("echo_v3", backend="echo_v3", route="/echo_v3") def echo_v4(_, relay1="", relay2=""): return f"echo_v4({relay1} , {relay2})" -serve.create_endpoint("echo_v4", "/echo_v4") serve.create_backend("echo_v4", echo_v4) -serve.set_traffic("echo_v4", {"echo_v4": 1.0}) +serve.create_endpoint("echo_v4", backend="echo_v4", route="/echo_v4") """ The pipeline created is as follows - "my_endpoint1" diff --git a/python/ray/serve/examples/echo_round_robin.py b/python/ray/serve/examples/echo_round_robin.py index 1dabeb4e1..4dc376ae4 100644 --- a/python/ray/serve/examples/echo_round_robin.py +++ b/python/ray/serve/examples/echo_round_robin.py @@ -21,16 +21,16 @@ def echo_v2(_): # specify the router policy as RoundRobin serve.init(queueing_policy=serve.RoutePolicy.RoundRobin) -# create a service -serve.create_endpoint("my_endpoint", "/echo") - # create first backend serve.create_backend("echo:v1", echo_v1) +# create a service backend by the first backend +serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo") + # create second backend serve.create_backend("echo:v2", echo_v2) -# link and split the service to two backends +# split the service between the two backends serve.set_traffic("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5}) while True: diff --git a/python/ray/serve/examples/echo_slo_reverse.py b/python/ray/serve/examples/echo_slo_reverse.py index f0bf33ab7..098ac5ddd 100644 --- a/python/ray/serve/examples/echo_slo_reverse.py +++ b/python/ray/serve/examples/echo_slo_reverse.py @@ -12,9 +12,6 @@ import ray.serve as serve # initialize ray serve system. serve.init() -# an endpoint is associated with an http URL. -serve.create_endpoint("my_endpoint", "/echo") - # a backend can be a function or class. # it can be made to be invoked from web as well as python. @@ -25,7 +22,8 @@ def echo_v1(flask_request, response="hello from python!"): serve.create_backend("echo:v1", echo_v1) -serve.set_traffic("my_endpoint", {"echo:v1": 1.0}) + +serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo") # wait for routing table to get populated time.sleep(2) diff --git a/python/ray/serve/examples/echo_split.py b/python/ray/serve/examples/echo_split.py index 66beb8211..8de676525 100644 --- a/python/ray/serve/examples/echo_split.py +++ b/python/ray/serve/examples/echo_split.py @@ -20,9 +20,8 @@ def echo_v2(_): serve.init() -serve.create_endpoint("my_endpoint", "/echo") serve.create_backend("echo:v1", echo_v1) -serve.set_traffic("my_endpoint", {"echo:v1": 1.0}) +serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo") for _ in range(3): resp = requests.get("http://127.0.0.1:8000/echo").json() diff --git a/python/ray/serve/master.py b/python/ray/serve/master.py index 63598be05..1ded64026 100644 --- a/python/ray/serve/master.py +++ b/python/ray/serve/master.py @@ -465,44 +465,45 @@ class ServeMaster: } return endpoints - async def set_traffic(self, endpoint_name, traffic_policy_dictionary): + async def _set_traffic(self, endpoint_name, traffic_dict): + if endpoint_name not in self.get_all_endpoints(): + raise ValueError("Attempted to assign traffic for an endpoint '{}'" + " that is not registered.".format(endpoint_name)) + + assert isinstance(traffic_dict, + dict), "Traffic policy must be dictionary" + prob = 0 + for backend, weight in traffic_dict.items(): + if weight < 0: + raise ValueError( + "Attempted to assign a weight of {} to backend '{}'. " + "Weights cannot be negative.".format(weight, backend)) + prob += weight + if backend not in self.backends: + raise ValueError( + "Attempted to assign traffic to a backend '{}' that " + "is not registered.".format(backend)) + + # These weights will later be plugged into np.random.choice, which + # uses a tolerance of 1e-8. + assert np.isclose( + prob, 1, atol=1e-8 + ), "weights must sum to 1, currently they sum to {}".format(prob) + + self.traffic_policies[endpoint_name] = traffic_dict + + # NOTE(edoakes): we must write a checkpoint before pushing the + # update to avoid inconsistent state if we crash after pushing the + # update. + self._checkpoint() + await self.router.set_traffic.remote(endpoint_name, traffic_dict) + + async def set_traffic(self, endpoint_name, traffic_dict): """Sets the traffic policy for the specified endpoint.""" async with self.write_lock: - if endpoint_name not in self.get_all_endpoints(): - raise ValueError( - "Attempted to assign traffic for an endpoint '{}'" - " that is not registered.".format(endpoint_name)) + await self._set_traffic(endpoint_name, traffic_dict) - assert isinstance(traffic_policy_dictionary, - dict), "Traffic policy must be dictionary" - prob = 0 - for backend, weight in traffic_policy_dictionary.items(): - if weight < 0: - raise ValueError( - "Attempted to assign a weight of {} to backend '{}'. " - "Weights cannot be negative.".format(weight, backend)) - prob += weight - if backend not in self.backends: - raise ValueError( - "Attempted to assign traffic to a backend '{}' that " - "is not registered.".format(backend)) - - # These weights will later be plugged into np.random.choice, which - # uses a tolerance of 1e-8. - assert np.isclose( - prob, 1, atol=1e-8 - ), "weights must sum to 1, currently they sum to {}".format(prob) - - self.traffic_policies[endpoint_name] = traffic_policy_dictionary - - # NOTE(edoakes): we must write a checkpoint before pushing the - # update to avoid inconsistent state if we crash after pushing the - # update. - self._checkpoint() - await self.router.set_traffic.remote(endpoint_name, - traffic_policy_dictionary) - - async def create_endpoint(self, route, endpoint, methods): + async def create_endpoint(self, endpoint, traffic_dict, route, methods): """Create a new endpoint with the specified route and methods. If the route is None, this is a "headless" endpoint that will not @@ -537,10 +538,8 @@ class ServeMaster: self.routes[route] = (endpoint, methods) - # NOTE(edoakes): we must write a checkpoint before pushing the - # update to avoid inconsistent state if we crash after pushing the - # update. - self._checkpoint() + # NOTE(edoakes): checkpoint is written in self._set_traffic. + await self._set_traffic(endpoint, traffic_dict) await self.http_proxy.set_route_table.remote(self.routes) async def delete_endpoint(self, endpoint): diff --git a/python/ray/serve/tests/test_api.py b/python/ray/serve/tests/test_api.py index 7ab86a2b3..5f524022b 100644 --- a/python/ray/serve/tests/test_api.py +++ b/python/ray/serve/tests/test_api.py @@ -11,7 +11,13 @@ from ray.serve.utils import get_random_letters def test_e2e(serve_instance): serve.init() - serve.create_endpoint("endpoint", "/api", methods=["GET", "POST"]) + + def function(flask_request): + return {"method": flask_request.method} + + serve.create_backend("echo:v1", function) + serve.create_endpoint( + "endpoint", backend="echo:v1", route="/api", methods=["GET", "POST"]) retry_count = 5 timeout_sleep = 0.5 @@ -29,12 +35,6 @@ def test_e2e(serve_instance): assert False, ("Route table hasn't been updated after 3 tries." "The latest error was {}").format(e) - def function(flask_request): - return {"method": flask_request.method} - - serve.create_backend("echo:v1", function) - serve.set_traffic("endpoint", {"echo:v1": 1.0}) - resp = requests.get("http://127.0.0.1:8000/api").json()["method"] assert resp == "GET" @@ -43,14 +43,12 @@ def test_e2e(serve_instance): def test_call_method(serve_instance): - serve.create_endpoint("endpoint", "/api") - class CallMethod: def method(self, request): return "hello" serve.create_backend("backend", CallMethod) - serve.set_traffic("endpoint", {"backend": 1.0}) + serve.create_endpoint("endpoint", backend="backend", route="/api") # Test HTTP path. resp = requests.get( @@ -65,37 +63,46 @@ def test_call_method(serve_instance): def test_no_route(serve_instance): - serve.create_endpoint("noroute-endpoint") - def func(_, i=1): return 1 serve.create_backend("backend:1", func) - serve.set_traffic("noroute-endpoint", {"backend:1": 1.0}) + serve.create_endpoint("noroute-endpoint", backend="backend:1") service_handle = serve.get_handle("noroute-endpoint") result = ray.get(service_handle.remote(i=1)) assert result == 1 def test_reject_duplicate_route(serve_instance): + def f(): + pass + + serve.create_backend("backend", f) + route = "/foo" - serve.create_endpoint("bar", route=route) + serve.create_endpoint("bar", backend="backend", route=route) with pytest.raises(ValueError): - serve.create_endpoint("foo", route=route) + serve.create_endpoint("foo", backend="backend", route=route) def test_reject_duplicate_endpoint(serve_instance): + def f(): + pass + + serve.create_backend("backend", f) + endpoint_name = "foo" - serve.create_endpoint(endpoint_name, route="/ok") + serve.create_endpoint(endpoint_name, backend="backend", route="/ok") with pytest.raises(ValueError): - serve.create_endpoint(endpoint_name, route="/different") + serve.create_endpoint( + endpoint_name, backend="backend", route="/different") def test_set_traffic_missing_data(serve_instance): endpoint_name = "foobar" backend_name = "foo_backend" - serve.create_endpoint(endpoint_name) serve.create_backend(backend_name, lambda: 5) + serve.create_endpoint(endpoint_name, backend=backend_name) with pytest.raises(ValueError): serve.set_traffic(endpoint_name, {"nonexistent_backend": 1.0}) with pytest.raises(ValueError): @@ -111,16 +118,14 @@ def test_scaling_replicas(serve_instance): self.count += 1 return self.count - serve.create_endpoint("counter", "/increment") + serve.create_backend("counter:v1", Counter, config={"num_replicas": 2}) + serve.create_endpoint("counter", backend="counter:v1", route="/increment") # Keep checking the routing table until /increment is populated while "/increment" not in requests.get( "http://127.0.0.1:8000/-/routes").json(): time.sleep(0.2) - serve.create_backend("counter:v1", Counter, config={"num_replicas": 2}) - serve.set_traffic("counter", {"counter:v1": 1.0}) - counter_result = [] for _ in range(10): resp = requests.get("http://127.0.0.1:8000/increment").json() @@ -151,18 +156,17 @@ def test_batching(serve_instance): batch_size = serve.context.batch_size return [self.count] * batch_size - serve.create_endpoint("counter1", "/increment2") + # set the max batch size + serve.create_backend( + "counter:v11", BatchingExample, config={"max_batch_size": 5}) + serve.create_endpoint( + "counter1", backend="counter:v11", route="/increment2") # Keep checking the routing table until /increment is populated while "/increment2" not in requests.get( "http://127.0.0.1:8000/-/routes").json(): time.sleep(0.2) - # set the max batch size - serve.create_backend( - "counter:v11", BatchingExample, config={"max_batch_size": 5}) - serve.set_traffic("counter1", {"counter:v11": 1.0}) - future_list = [] handle = serve.get_handle("counter1") for _ in range(20): @@ -186,11 +190,11 @@ def test_batching_exception(serve_instance): batch_size = serve.context.batch_size return batch_size - serve.create_endpoint("exception-test", "/noListReturned") # set the max batch size serve.create_backend( "exception:v1", NoListReturned, config={"max_batch_size": 5}) - serve.set_traffic("exception-test", {"exception:v1": 1.0}) + serve.create_endpoint( + "exception-test", backend="exception:v1", route="/noListReturned") handle = serve.get_handle("exception-test") with pytest.raises(ray.exceptions.RayTaskError): @@ -207,7 +211,6 @@ def test_updating_config(serve_instance): batch_size = serve.context.batch_size return [1] * batch_size - serve.create_endpoint("bsimple", "/bsimple") serve.create_backend( "bsimple:v1", BatchSimple, @@ -215,6 +218,8 @@ def test_updating_config(serve_instance): "max_batch_size": 2, "num_replicas": 3 }) + serve.create_endpoint("bsimple", backend="bsimple:v1", route="/bsimple") + master_actor = serve.api._get_master_actor() old_replica_tag_list = ray.get( master_actor._list_replicas.remote("bsimple:v1")) @@ -234,13 +239,12 @@ def test_updating_config(serve_instance): def test_delete_backend(serve_instance): - serve.create_endpoint("delete_backend", "/delete-backend") - def function(): return "hello" serve.create_backend("delete:v1", function) - serve.set_traffic("delete_backend", {"delete:v1": 1.0}) + serve.create_endpoint( + "delete_backend", backend="delete:v1", route="/delete-backend") assert requests.get("http://127.0.0.1:8000/delete-backend").text == "hello" @@ -274,18 +278,18 @@ def test_delete_backend(serve_instance): @pytest.mark.parametrize("route", [None, "/delete-endpoint"]) def test_delete_endpoint(serve_instance, route): - endpoint_name = "delete_endpoint" + str(route) - serve.create_endpoint(endpoint_name, route=route) - serve.delete_endpoint(endpoint_name) - - # Check that we can reuse a deleted endpoint name and route. - serve.create_endpoint(endpoint_name, route=route) - def function(): return "hello" - serve.create_backend("delete-endpoint:v1", function) - serve.set_traffic(endpoint_name, {"delete-endpoint:v1": 1.0}) + backend_name = "delete-endpoint:v1" + serve.create_backend(backend_name, function) + + endpoint_name = "delete_endpoint" + str(route) + serve.create_endpoint(endpoint_name, backend=backend_name, route=route) + serve.delete_endpoint(endpoint_name) + + # Check that we can reuse a deleted endpoint name and route. + serve.create_endpoint(endpoint_name, backend=backend_name, route=route) if route is not None: assert requests.get( @@ -296,8 +300,7 @@ def test_delete_endpoint(serve_instance, route): # Check that deleting the endpoint doesn't delete the backend. serve.delete_endpoint(endpoint_name) - serve.create_endpoint(endpoint_name, route=route) - serve.set_traffic(endpoint_name, {"delete-endpoint:v1": 1.0}) + serve.create_endpoint(endpoint_name, backend=backend_name, route=route) if route is not None: assert requests.get( @@ -309,8 +312,6 @@ def test_delete_endpoint(serve_instance, route): @pytest.mark.parametrize("route", [None, "/shard"]) def test_shard_key(serve_instance, route): - serve.create_endpoint("endpoint", route=route) - # Create five backends that return different integers. num_backends = 5 traffic_dict = {} @@ -323,6 +324,8 @@ def test_shard_key(serve_instance, route): traffic_dict[backend_name] = 1.0 / num_backends serve.create_backend(backend_name, function) + serve.create_endpoint( + "endpoint", backend=list(traffic_dict.keys())[0], route=route) serve.set_traffic("endpoint", traffic_dict) def do_request(shard_key): @@ -355,26 +358,24 @@ def test_name(): endpoint = "endpoint" serve.init(name="cluster1", http_port=8001) - serve.create_endpoint(endpoint, route=route) def function(): return "hello1" serve.create_backend(backend, function) - serve.set_traffic(endpoint, {backend: 1.0}) + serve.create_endpoint(endpoint, backend=backend, route=route) assert requests.get("http://127.0.0.1:8001" + route).text == "hello1" # Create a second cluster on port 8002. Create an endpoint and backend with # the same names and check that they don't collide. serve.init(name="cluster2", http_port=8002) - serve.create_endpoint(endpoint, route=route) def function(): return "hello2" serve.create_backend(backend, function) - serve.set_traffic(endpoint, {backend: 1.0}) + serve.create_endpoint(endpoint, backend=backend, route=route) assert requests.get("http://127.0.0.1:8001" + route).text == "hello1" assert requests.get("http://127.0.0.1:8002" + route).text == "hello2" @@ -419,10 +420,9 @@ def test_parallel_start(serve_instance): def __call__(self, _): return "Ready" - serve.create_endpoint("test-parallel") serve.create_backend( "p:v0", LongStartingServable, config={"num_replicas": 2}) - serve.set_traffic("test-parallel", {"p:v0": 1}) + serve.create_endpoint("test-parallel", backend="p:v0") handle = serve.get_handle("test-parallel") ray.get(handle.remote(), timeout=10) @@ -434,17 +434,20 @@ def test_list_endpoints(serve_instance): def f(): pass - serve.create_endpoint("endpoint", "/api", methods=["GET", "POST"]) - serve.create_endpoint("endpoint2", methods=["POST"]) serve.create_backend("backend", f) - serve.set_traffic("endpoint2", {"backend": 1.0}) + serve.create_backend("backend2", f) + serve.create_endpoint( + "endpoint", backend="backend", route="/api", methods=["GET", "POST"]) + serve.create_endpoint("endpoint2", backend="backend2", methods=["POST"]) endpoints = serve.list_endpoints() assert "endpoint" in endpoints assert endpoints["endpoint"] == { "route": "/api", "methods": ["GET", "POST"], - "traffic": {} + "traffic": { + "backend": 1.0 + } } assert "endpoint2" in endpoints @@ -452,7 +455,7 @@ def test_list_endpoints(serve_instance): "route": None, "methods": ["POST"], "traffic": { - "backend": 1.0 + "backend2": 1.0 } } @@ -488,3 +491,19 @@ def test_list_backends(serve_instance): serve.delete_backend("backend2") assert len(serve.list_backends()) == 0 + + +def test_endpoint_input_validation(serve_instance): + serve.init() + + def f(): + pass + + serve.create_backend("backend", f) + with pytest.raises(TypeError): + serve.create_endpoint("endpoint") + with pytest.raises(TypeError): + serve.create_endpoint("endpoint", route="/hello") + with pytest.raises(TypeError): + serve.create_endpoint("endpoint", backend=2) + serve.create_endpoint("endpoint", backend="backend") diff --git a/python/ray/serve/tests/test_failure.py b/python/ray/serve/tests/test_failure.py index 909d82832..21c42fb60 100644 --- a/python/ray/serve/tests/test_failure.py +++ b/python/ray/serve/tests/test_failure.py @@ -21,13 +21,13 @@ def request_with_retries(endpoint, timeout=30): def test_master_failure(serve_instance): serve.init() - serve.create_endpoint("master_failure", "/master_failure") def function(): return "hello1" serve.create_backend("master_failure:v1", function) - serve.set_traffic("master_failure", {"master_failure:v1": 1.0}) + serve.create_endpoint( + "master_failure", backend="master_failure:v1", route="/master_failure") assert request_with_retries("/master_failure", timeout=1).text == "hello1" @@ -56,12 +56,14 @@ def test_master_failure(serve_instance): def function(): return "hello3" - ray.kill(serve.api._get_master_actor(), no_restart=False) - serve.create_endpoint("master_failure_2", "/master_failure_2") ray.kill(serve.api._get_master_actor(), no_restart=False) serve.create_backend("master_failure_2", function) ray.kill(serve.api._get_master_actor(), no_restart=False) - serve.set_traffic("master_failure_2", {"master_failure_2": 1.0}) + serve.create_endpoint( + "master_failure_2", + backend="master_failure_2", + route="/master_failure_2") + ray.kill(serve.api._get_master_actor(), no_restart=False) for _ in range(10): response = request_with_retries("/master_failure", timeout=30) @@ -78,13 +80,13 @@ def _kill_http_proxy(): def test_http_proxy_failure(serve_instance): serve.init() - serve.create_endpoint("proxy_failure", "/proxy_failure") def function(): return "hello1" serve.create_backend("proxy_failure:v1", function) - serve.set_traffic("proxy_failure", {"proxy_failure:v1": 1.0}) + serve.create_endpoint( + "proxy_failure", backend="proxy_failure:v1", route="/proxy_failure") assert request_with_retries("/proxy_failure", timeout=1.0).text == "hello1" @@ -112,13 +114,13 @@ def _kill_router(): def test_router_failure(serve_instance): serve.init() - serve.create_endpoint("router_failure", "/router_failure") def function(): return "hello1" serve.create_backend("router_failure:v1", function) - serve.set_traffic("router_failure", {"router_failure:v1": 1.0}) + serve.create_endpoint( + "router_failure", backend="router_failure:v1", route="/router_failure") assert request_with_retries("/router_failure", timeout=5).text == "hello1" @@ -154,14 +156,14 @@ def _get_worker_handles(backend): # serving requests. def test_worker_restart(serve_instance): serve.init() - serve.create_endpoint("worker_failure", "/worker_failure") class Worker1: def __call__(self): return os.getpid() serve.create_backend("worker_failure:v1", Worker1) - serve.set_traffic("worker_failure", {"worker_failure:v1": 1.0}) + serve.create_endpoint( + "worker_failure", backend="worker_failure:v1", route="/worker_failure") # Get the PID of the worker. old_pid = request_with_retries("/worker_failure", timeout=1).text @@ -186,7 +188,6 @@ def test_worker_restart(serve_instance): def test_worker_replica_failure(serve_instance): serve.http_proxy.MAX_ACTOR_DEAD_RETRIES = 0 serve.init() - serve.create_endpoint("replica_failure", "/replica_failure") class Worker: # Assumes that two replicas are started. Will hang forever in the @@ -216,7 +217,8 @@ def test_worker_replica_failure(serve_instance): temp_path = tempfile.gettempdir() + "/" + serve.utils.get_random_letters() serve.create_backend("replica_failure", Worker, temp_path) serve.update_backend_config("replica_failure", {"num_replicas": 2}) - serve.set_traffic("replica_failure", {"replica_failure": 1.0}) + serve.create_endpoint( + "replica_failure", backend="replica_failure", route="/replica_failure") # Wait until both replicas have been started. responses = set() diff --git a/python/ray/serve/tests/test_handle.py b/python/ray/serve/tests/test_handle.py index dae876153..1ad0d65e4 100644 --- a/python/ray/serve/tests/test_handle.py +++ b/python/ray/serve/tests/test_handle.py @@ -18,12 +18,18 @@ def test_handle_in_endpoint(serve_instance): def __call__(self): return ray.get(self.handle.remote()) - serve.create_endpoint("endpoint1", "/endpoint1", methods=["GET", "POST"]) serve.create_backend("endpoint1:v0", Endpoint1) - serve.set_traffic("endpoint1", {"endpoint1:v0": 1.0}) + serve.create_endpoint( + "endpoint1", + backend="endpoint1:v0", + route="/endpoint1", + methods=["GET", "POST"]) - serve.create_endpoint("endpoint2", "/endpoint2", methods=["GET", "POST"]) serve.create_backend("endpoint2:v0", Endpoint2) - serve.set_traffic("endpoint2", {"endpoint2:v0": 1.0}) + serve.create_endpoint( + "endpoint2", + backend="endpoint2:v0", + route="/endpoint2", + methods=["GET", "POST"]) assert requests.get("http://127.0.0.1:8000/endpoint2").text == "hello" diff --git a/python/ray/serve/tests/test_metric.py b/python/ray/serve/tests/test_metric.py index 0a04c9858..e5c89f2b7 100644 --- a/python/ray/serve/tests/test_metric.py +++ b/python/ray/serve/tests/test_metric.py @@ -144,8 +144,8 @@ async def test_system_metric_endpoints(serve_instance): def test_error_counter(flask_request): 1 / 0 - serve.create_endpoint("test_metrics", "/measure") serve.create_backend("m:v1", test_error_counter) + serve.create_endpoint("test_metrics", backend="m:v1", route="/measure") serve.set_traffic("test_metrics", {"m:v1": 1}) # Check metrics are exposed under http endpoint diff --git a/python/ray/serve/tests/test_nonblocking.py b/python/ray/serve/tests/test_nonblocking.py index 008695e00..747667af2 100644 --- a/python/ray/serve/tests/test_nonblocking.py +++ b/python/ray/serve/tests/test_nonblocking.py @@ -6,13 +6,13 @@ from ray import serve def test_nonblocking(): serve.init() - serve.create_endpoint("nonblocking", "/nonblocking") def function(flask_request): return {"method": flask_request.method} serve.create_backend("nonblocking:v1", function) - serve.set_traffic("nonblocking", {"nonblocking:v1": 1.0}) + serve.create_endpoint( + "nonblocking", backend="nonblocking:v1", route="/nonblocking") resp = requests.get("http://127.0.0.1:8000/nonblocking").json()["method"] assert resp == "GET" diff --git a/python/ray/serve/tests/test_persistence.py b/python/ray/serve/tests/test_persistence.py index 328a808be..3a873c9a3 100644 --- a/python/ray/serve/tests/test_persistence.py +++ b/python/ray/serve/tests/test_persistence.py @@ -17,9 +17,8 @@ serve.init() def driver(flask_request): return "OK!" -serve.create_endpoint("driver", "/driver") serve.create_backend("driver", driver) -serve.set_traffic("driver", {{"driver": 1.0}}) +serve.create_endpoint("driver", backend="driver", route="/driver") """.format(ray.worker._global_node._redis_address) with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: