From 5d124489a953e97344b7aca06a9cb33d518fbf1b Mon Sep 17 00:00:00 2001
From: Edward Oakes <ed.nmi.oakes@gmail.com>
Date: Sat, 6 Jun 2020 21:10:42 -0500
Subject: [PATCH] [serve] Require backend when creating endpoint (#8764)

---
 doc/source/serve/advanced.rst                 |  19 ++-
 doc/source/serve/deployment.rst               |  27 ++--
 doc/source/serve/key-concepts.rst             |  85 +++++------
 python/ray/serve/api.py                       |  43 +++++-
 python/ray/serve/examples/benchmark.py        |   3 +-
 .../serve/examples/doc/quickstart_class.py    |   3 +-
 .../serve/examples/doc/quickstart_function.py |   3 +-
 .../ray/serve/examples/doc/tutorial_batch.py  |   4 +-
 .../ray/serve/examples/doc/tutorial_deploy.py |   3 +-
 .../serve/examples/doc/tutorial_pytorch.py    |   7 +-
 .../serve/examples/doc/tutorial_sklearn.py    |   3 +-
 .../serve/examples/doc/tutorial_tensorflow.py |   3 +-
 python/ray/serve/examples/echo.py             |   3 +-
 python/ray/serve/examples/echo_actor.py       |   3 +-
 python/ray/serve/examples/echo_actor_batch.py |   3 +-
 python/ray/serve/examples/echo_batching.py    |   3 +-
 python/ray/serve/examples/echo_error.py       |   4 +-
 .../ray/serve/examples/echo_fixed_packing.py  |   8 +-
 python/ray/serve/examples/echo_full.py        |   9 +-
 python/ray/serve/examples/echo_pipeline.py    |  12 +-
 python/ray/serve/examples/echo_round_robin.py |   8 +-
 python/ray/serve/examples/echo_slo_reverse.py |   6 +-
 python/ray/serve/examples/echo_split.py       |   3 +-
 python/ray/serve/master.py                    |  77 +++++-----
 python/ray/serve/tests/test_api.py            | 135 ++++++++++--------
 python/ray/serve/tests/test_failure.py        |  28 ++--
 python/ray/serve/tests/test_handle.py         |  14 +-
 python/ray/serve/tests/test_metric.py         |   2 +-
 python/ray/serve/tests/test_nonblocking.py    |   4 +-
 python/ray/serve/tests/test_persistence.py    |   3 +-
 30 files changed, 272 insertions(+), 256 deletions(-)

diff --git a/doc/source/serve/advanced.rst b/doc/source/serve/advanced.rst
index 499add2e9..f8e4f21f3 100644
--- a/doc/source/serve/advanced.rst
+++ b/doc/source/serve/advanced.rst
@@ -89,11 +89,9 @@ You can also have Ray Serve batch requests for performance. In order to do use t
                   responses.append(request.json())
           return responses
 
-  serve.create_endpoint("counter1", "/increment")
-
   config = {"max_batch_size": 5}
   serve.create_backend("counter1", BatchingExample, config=config)
-  serve.set_traffic("counter1", {"counter1": 1.0})
+  serve.create_endpoint("counter1", backend="counter1", route="/increment")
 
 Please take a look at :ref:`Batching Tutorial<serve-batch-tutorial>` for a deep
 dive.
@@ -109,10 +107,11 @@ When calling ``set_traffic``, you provide a dictionary of backend name to a floa
 For example, here we split traffic 50/50 between two backends:
 
 .. code-block:: python
-
-  serve.create_endpoint("fifty-fifty", "/fifty")
+  
   serve.create_backend("backend1", MyClass1)
   serve.create_backend("backend2", MyClass2)
+
+  serve.create_endpoint("fifty-fifty", backend="backend1", route="/fifty")
   serve.set_traffic("fifty-fifty", {"backend1": 0.5, "backend2": 0.5})
 
 Each request is routed randomly between the backends in the traffic dictionary according to the provided weights.
@@ -125,11 +124,10 @@ A/B Testing
 
 .. code-block:: python
 
-  serve.create_endpoint("ab_endpoint", "/a-b-test")
   serve.create_backend("default_backend", MyClass)
-
+  
   # Initially, set all traffic to be served by the "default" backend.
-  serve.set_traffic("ab_endpoint", {"default_backend": 1.0})
+  serve.create_endpoint("ab_endpoint", backend="default_backend", route="/a-b-test")
 
   # Add a second backend and route 1% of the traffic to it.
   serve.create_backend("new_backend", MyNewClass)
@@ -151,11 +149,10 @@ In the example below, we do this repeatedly in one script, but in practice this
 
 .. code-block:: python
 
-  serve.create_endpoint("incremental_endpoint", "/incremental")
   serve.create_backend("existing_backend", MyClass)
-
+  
   # Initially, all traffic is served by the existing backend.
-  serve.set_traffic("incremental_endpoint", {"existing_backend": 1.0})
+  serve.create_endpoint("incremental_endpoint", backend="existing_backend", route="/incremental")
 
   # Then we can slowly increase the proportion of traffic served by the new backend.
   serve.create_backend("new_backend", MyNewClass)
diff --git a/doc/source/serve/deployment.rst b/doc/source/serve/deployment.rst
index 9a1e9ec55..0a263a7b8 100644
--- a/doc/source/serve/deployment.rst
+++ b/doc/source/serve/deployment.rst
@@ -235,8 +235,7 @@ With the cluster now running, we can run a simple script to start Ray Serve and
         return "hello world"
 
     serve.create_backend("hello_backend", hello)
-    serve.create_endpoint("hello_endpoint", route="/hello")
-    serve.set_traffic("hello_endpoint", {"hello_backend": 1.0})
+    serve.create_endpoint("hello_endpoint", backend="hello_backend", route="/hello")
 
 Save this script locally as ``deploy.py`` and run it on the head node using ``ray submit``:
 
@@ -283,19 +282,17 @@ You can run multiple serve instances on the same Ray cluster by providing a ``na
 
 .. code-block:: python
 
-  # Create a first instance whose HTTP server listens on 8000.
-  serve.init(name="instance1", http_port=8000)
-  serve.create_endpoint("counter1", "/increment")
+  # Create a first cluster whose HTTP server listens on 8000.
+  serve.init(name="cluster1", http_port=8000)
 
-  # Create a second instance whose HTTP server listens on 8001.
-  serve.init(name="instance2", http_port=8001)
-  serve.create_endpoint("counter1", "/increment")
+  # Create a second cluster whose HTTP server listens on 8001.
+  serve.init(name="cluster2", http_port=8001)
 
-  # Create a backend that will be served on the second instance.
-  serve.create_backend("counter1", function)
-  serve.set_traffic("counter1", {"counter1": 1.0})
+  # Create a backend that will be served on the second cluster.
+  serve.create_backend("backend2", function)
+  serve.create_endpoint("endpoint2", backend="backend2", route="/increment")
 
-  # Switch back the the first instance and create the same backend on it.
-  serve.init(name="instance1")
-  serve.create_backend("counter1", function)
-  serve.set_traffic("counter1", {"counter1": 1.0})
+  # Switch back the the first cluster and create the same backend on it.
+  serve.init(name="cluster1")
+  serve.create_backend("backend1", function)
+  serve.create_endpoint("endpoint1", backend="backend1", route="/increment")
diff --git a/doc/source/serve/key-concepts.rst b/doc/source/serve/key-concepts.rst
index 00bbb5878..d28ecabf9 100644
--- a/doc/source/serve/key-concepts.rst
+++ b/doc/source/serve/key-concepts.rst
@@ -2,7 +2,7 @@
 Key Concepts
 ============
 
-Ray Serve focuses on **simplicity** and only has two core concepts: endpoints and backends.
+Ray Serve focuses on **simplicity** and only has two core concepts: backends and endpoints.
 
 To follow along, you'll need to make the necessary imports.
 
@@ -11,51 +11,18 @@ To follow along, you'll need to make the necessary imports.
   from ray import serve
   serve.init() # Initializes Ray and Ray Serve.
 
-.. _serve-endpoint:
-
-Endpoints
-=========
-
-Endpoints allow you to name the "entity" that you'll be exposing, 
-the HTTP path that your application will expose. 
-Endpoints are "logical" and decoupled from the business logic or 
-model that you'll be serving. To create one, we'll simply specify the name, route, and methods.
-
-.. code-block:: python
-
-  serve.create_endpoint("simple_endpoint", "/simple", methods=["GET"])
-
-To view all of the existing endpoints that have created, use `serve.list_endpoints`.
-
-.. code-block:: python
-
-  >>> serve.list_endpoints()
-  {'simple_endpoint': {'route': '/simple', 'methods': ['GET'], 'traffic': {}}}
-
-You can also delete an endpoint using ``serve.delete_endpoint``.
-Endpoints and backends are independent, so deleting an endpoint will not delete its backends.
-However, an endpoint must be deleted in order to delete the backends that serve its traffic.
-
-.. code-block:: python
-
-  serve.delete_endpoint("simple_endpoint")
-
-.. _serve-backend:
 
 Backends
 ========
 
-Backends are the logical structures for your business logic or models and 
-how you specify what should happen when an endpoint is queried.
+Backends define the implementation of your business logic or models that will handle requests when queries come in to :ref:`serve-endpoint`.
 To define a backend, first you must define the "handler" or the business logic you'd like to respond with. 
-The input to this request will be a `Flask Request object <https://flask.palletsprojects.com/en/1.1.x/api/?highlight=request#flask.Request>`_.
-Use a function when your response is stateless and a class when you
-might need to maintain some state (like a model). 
-For both functions and classes (that take as input Flask Requests), you'll need to 
-define them as backends to Ray Serve.
-You can specify arguments to be passed to class constructors in ``serve.create_backend``, shown below.
+The handler should take as input a `Flask Request object <https://flask.palletsprojects.com/en/1.1.x/api/?highlight=request#flask.Request>`_ and return any JSON-serializable object as output.
+A backend is defined using ``serve.create_backend``, and the implementation can be defined as either a function or a class.
+Use a function when your response is stateless and a class when you might need to maintain some state (like a model). 
+When using a class, you can specify arguments to be passed to the constructor in ``serve.create_backend``, shown below.
 
-It's important to note that Ray Serve places these backends in individual worker processes, which are replicas of the model.
+A backend consists of a number of *replicas*, which are individual copies of the function or class that are started in separate worker processes.
 
 .. code-block:: python
   
@@ -91,22 +58,40 @@ Note that a backend cannot be deleted while it is in use by an endpoint because
       'simple_backend_class': {'accepts_batches': False, 'num_replicas': 1, 'max_batch_size': None},
   }
 
-Setting Traffic
-===============
+.. _`serve-endpoint`:
 
-Lastly, we need to route traffic the particular backend to the server endpoint. 
-To do that we'll use the ``set_traffic`` capability.
-A link is essentially a load-balancer and allow you to define queuing policies 
-for how you would like backends to be served via an endpoint.
-For instance, you can route 50% of traffic to Model A and 50% of traffic to Model B.
+Endpoints
+=========
+
+While backends define the implementation of your request handling logic, endpoints allow you to expose them via HTTP.
+Endpoints are "logical" and can have one or multiple backends that serve requests to them
+To create an endpoint, we simply need to specify a name for the endpoint, the name of a backend to handle requests to the endpoint, and the route and methods where it will be accesible.
+By default endpoints are serviced only by the backend provided to ``serve.create_endpoint``, but in some cases you may want to specify multiple backends for an endpoint, e.g., for A/B testing or incremental rollout.
+For information on how to do this, please see :ref:`serve-split-traffic`.
 
 .. code-block:: python
 
-  serve.set_traffic("simple_backend", {"simple_endpoint": 1.0})
+  serve.create_endpoint("simple_endpoint", backend="simple_backend", route="/simple", methods=["GET"])
 
-Once we've done that, we can now query our endpoint via HTTP (we use `requests` to make HTTP calls here).
+After creating the endpoint, it is now exposed by the HTTP server and handles requests using the specified backend.
+We can query the model to verify that it's working.
 
 .. code-block:: python
   
   import requests
-  print(requests.get("http://127.0.0.1:8000/-/routes", timeout=0.5).text)
+  print(requests.get("http://127.0.0.1:8000/simple").text)
+
+To view all of the existing endpoints that have created, use `serve.list_endpoints`.
+
+.. code-block:: python
+
+  >>> serve.list_endpoints()
+  {'simple_endpoint': {'route': '/simple', 'methods': ['GET'], 'traffic': {}}}
+
+You can also delete an endpoint using ``serve.delete_endpoint``.
+Endpoints and backends are independent, so deleting an endpoint will not delete its backends.
+However, an endpoint must be deleted in order to delete the backends that serve its traffic.
+
+.. code-block:: python
+
+  serve.delete_endpoint("simple_endpoint")
diff --git a/python/ray/serve/api.py b/python/ray/serve/api.py
index 358837edc..b59388363 100644
--- a/python/ray/serve/api.py
+++ b/python/ray/serve/api.py
@@ -119,18 +119,49 @@ def init(name=None,
 
 
 @_ensure_connected
-def create_endpoint(endpoint_name, route=None, methods=["GET"]):
+def create_endpoint(endpoint_name,
+                    *,
+                    backend=None,
+                    route=None,
+                    methods=["GET"]):
     """Create a service endpoint given route_expression.
 
     Args:
-        endpoint_name (str): A name to associate to the endpoint. It will be
-            used as key to set traffic policy.
-        route (str): A string begin with "/". HTTP server will use
+        endpoint_name (str): A name to associate to with the endpoint.
+        backend (str, required): The backend that will serve requests to
+            this endpoint. To change this or split traffic among backends, use
+            `serve.set_traffic`.
+        route (str, optional): A string begin with "/". HTTP server will use
             the string to match the path.
+        methods(List[str], optional): The HTTP methods that are valid for this
+            endpoint.
     """
+    if backend is None:
+        raise TypeError("backend must be specified when creating "
+                        "an endpoint.")
+    elif not isinstance(backend, str):
+        raise TypeError("backend must be a string, got {}.".format(
+            type(backend)))
+
+    if route is not None:
+        if not isinstance(route, str) or not route.startswith("/"):
+            raise TypeError("route must be a string starting with '/'.")
+
+    if not isinstance(methods, list):
+        raise TypeError(
+            "methods must be a list of strings, but got type {}".format(
+                type(methods)))
+
+    upper_methods = []
+    for method in methods:
+        if not isinstance(method, str):
+            raise TypeError("methods must be a list of strings, but contained "
+                            "an element of type {}".format(type(method)))
+        upper_methods.append(method.upper())
+
     ray.get(
-        master_actor.create_endpoint.remote(route, endpoint_name,
-                                            [m.upper() for m in methods]))
+        master_actor.create_endpoint.remote(endpoint_name, {backend: 1.0},
+                                            route, upper_methods))
 
 
 @_ensure_connected
diff --git a/python/ray/serve/examples/benchmark.py b/python/ray/serve/examples/benchmark.py
index cf34c4d9d..3c70cef10 100644
--- a/python/ray/serve/examples/benchmark.py
+++ b/python/ray/serve/examples/benchmark.py
@@ -12,9 +12,8 @@ def noop(_):
     return ""
 
 
-serve.create_endpoint("noop", "/noop")
 serve.create_backend("noop", noop)
-serve.set_traffic("noop", {"noop": 1.0})
+serve.create_endpoint("noop", backend="noop", route="/noop")
 
 url = "{}/noop".format(DEFAULT_HTTP_ADDRESS)
 while requests.get(url).status_code == 404:
diff --git a/python/ray/serve/examples/doc/quickstart_class.py b/python/ray/serve/examples/doc/quickstart_class.py
index 5160ade01..5983bba13 100644
--- a/python/ray/serve/examples/doc/quickstart_class.py
+++ b/python/ray/serve/examples/doc/quickstart_class.py
@@ -12,9 +12,8 @@ class Counter:
         return {"current_counter": self.count}
 
 
-serve.create_endpoint("counter", "/counter")
 serve.create_backend("counter", Counter)
-serve.set_traffic("counter", {"counter": 1.0})
+serve.create_endpoint("counter", backend="counter", route="/counter")
 
 requests.get("http://127.0.0.1:8000/counter").json()
 # > {"current_counter": self.count}
diff --git a/python/ray/serve/examples/doc/quickstart_function.py b/python/ray/serve/examples/doc/quickstart_function.py
index d2f81d632..d1732f682 100644
--- a/python/ray/serve/examples/doc/quickstart_function.py
+++ b/python/ray/serve/examples/doc/quickstart_function.py
@@ -8,9 +8,8 @@ def echo(flask_request):
     return "hello " + flask_request.args.get("name", "serve!")
 
 
-serve.create_endpoint("hello", "/hello")
 serve.create_backend("hello", echo)
-serve.set_traffic("hello", {"hello": 1.0})
+serve.create_endpoint("hello", backend="hello", route="/hello")
 
 requests.get("http://127.0.0.1:8000/hello").text
 # > "hello serve!"
diff --git a/python/ray/serve/examples/doc/tutorial_batch.py b/python/ray/serve/examples/doc/tutorial_batch.py
index 1bfaa6e64..c703c8011 100644
--- a/python/ray/serve/examples/doc/tutorial_batch.py
+++ b/python/ray/serve/examples/doc/tutorial_batch.py
@@ -30,9 +30,9 @@ def batch_adder_v0(flask_requests: List):
 
 # __doc_deploy_begin__
 serve.init()
-serve.create_endpoint("adder", "/adder", methods=["GET"])
 serve.create_backend("adder:v0", batch_adder_v0, config={"max_batch_size": 4})
-serve.set_traffic("adder", {"adder:v0": 1})
+serve.create_endpoint(
+    "adder", backend="adder:v0", route="/adder", methods=["GET"])
 # __doc_deploy_end__
 
 
diff --git a/python/ray/serve/examples/doc/tutorial_deploy.py b/python/ray/serve/examples/doc/tutorial_deploy.py
index 06960ed4d..6ba46f40a 100644
--- a/python/ray/serve/examples/doc/tutorial_deploy.py
+++ b/python/ray/serve/examples/doc/tutorial_deploy.py
@@ -70,9 +70,8 @@ ray.init(address="auto")
 
 # listen on 0.0.0.0 to make the HTTP server accessible from other machines.
 serve.init(http_host="0.0.0.0")
-serve.create_endpoint("iris_classifier", "/regressor")
 serve.create_backend("lr:v1", BoostingModel)
-serve.set_traffic("iris_classifier", {"lr:v1": 1, "version": "v1"})
+serve.create_endpoint("iris_classifier", backend="lr:v1", route="/regressor")
 # __doc_create_deploy_end__
 
 # __doc_query_begin__
diff --git a/python/ray/serve/examples/doc/tutorial_pytorch.py b/python/ray/serve/examples/doc/tutorial_pytorch.py
index a55bc92ae..106eaf0ed 100644
--- a/python/ray/serve/examples/doc/tutorial_pytorch.py
+++ b/python/ray/serve/examples/doc/tutorial_pytorch.py
@@ -45,9 +45,12 @@ class ImageModel:
 
 # __doc_deploy_begin__
 serve.init()
-serve.create_endpoint("predictor", "/image_predict", methods=["POST"])
 serve.create_backend("resnet18:v0", ImageModel)
-serve.set_traffic("predictor", {"resnet18:v0": 1})
+serve.create_endpoint(
+    "predictor",
+    backend="resnet18:v0",
+    route="/image_predict",
+    methods=["POST"])
 # __doc_deploy_end__
 
 # __doc_query_begin__
diff --git a/python/ray/serve/examples/doc/tutorial_sklearn.py b/python/ray/serve/examples/doc/tutorial_sklearn.py
index 57c812cdd..a0c401586 100644
--- a/python/ray/serve/examples/doc/tutorial_sklearn.py
+++ b/python/ray/serve/examples/doc/tutorial_sklearn.py
@@ -66,9 +66,8 @@ class BoostingModel:
 
 # __doc_deploy_begin__
 serve.init()
-serve.create_endpoint("iris_classifier", "/regressor")
 serve.create_backend("lr:v1", BoostingModel)
-serve.set_traffic("iris_classifier", {"lr:v1": 1})
+serve.create_endpoint("iris_classifier", backend="lr:v1", route="/regressor")
 # __doc_deploy_end__
 
 # __doc_query_begin__
diff --git a/python/ray/serve/examples/doc/tutorial_tensorflow.py b/python/ray/serve/examples/doc/tutorial_tensorflow.py
index 1dd800b7b..7c20aa9fd 100644
--- a/python/ray/serve/examples/doc/tutorial_tensorflow.py
+++ b/python/ray/serve/examples/doc/tutorial_tensorflow.py
@@ -69,9 +69,8 @@ class TFMnistModel:
 
 # __doc_deploy_begin__
 serve.init()
-serve.create_endpoint(endpoint_name="tf_classifier", route="/mnist")
 serve.create_backend("tf:v1", TFMnistModel, "/tmp/mnist_model.h5")
-serve.set_traffic("tf_classifier", {"tf:v1": 1})
+serve.create_endpoint("tf_classifier", backend="tf:v1", route="/mnist")
 # __doc_deploy_end__
 
 # __doc_query_begin__
diff --git a/python/ray/serve/examples/echo.py b/python/ray/serve/examples/echo.py
index 73c326099..f722b63dd 100644
--- a/python/ray/serve/examples/echo.py
+++ b/python/ray/serve/examples/echo.py
@@ -16,9 +16,8 @@ def echo(flask_request):
 
 serve.init()
 
-serve.create_endpoint("my_endpoint", "/echo")
 serve.create_backend("echo:v1", echo)
-serve.set_traffic("my_endpoint", {"echo:v1": 1.0})
+serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo")
 
 while True:
     resp = requests.get("http://127.0.0.1:8000/echo").json()
diff --git a/python/ray/serve/examples/echo_actor.py b/python/ray/serve/examples/echo_actor.py
index dadf8628c..a919d962e 100644
--- a/python/ray/serve/examples/echo_actor.py
+++ b/python/ray/serve/examples/echo_actor.py
@@ -25,9 +25,8 @@ class MagicCounter:
 
 
 serve.init()
-serve.create_endpoint("magic_counter", "/counter")
 serve.create_backend("counter:v1", MagicCounter, 42)  # increment=42
-serve.set_traffic("magic_counter", {"counter:v1": 1.0})
+serve.create_endpoint("magic_counter", backend="counter:v1", route="/counter")
 
 print("Sending ten queries via HTTP")
 for i in range(10):
diff --git a/python/ray/serve/examples/echo_actor_batch.py b/python/ray/serve/examples/echo_actor_batch.py
index cad989eb1..5173a7c84 100644
--- a/python/ray/serve/examples/echo_actor_batch.py
+++ b/python/ray/serve/examples/echo_actor_batch.py
@@ -36,11 +36,10 @@ class MagicCounter:
 
 
 serve.init()
-serve.create_endpoint("magic_counter", "/counter")
 serve.create_backend(
     "counter:v1", MagicCounter, 42,
     config={"max_batch_size": 5})  # increment=42
-serve.set_traffic("magic_counter", {"counter:v1": 1.0})
+serve.create_endpoint("magic_counter", backend="counter:v1", route="/counter")
 
 print("Sending ten queries via HTTP")
 for i in range(10):
diff --git a/python/ray/serve/examples/echo_batching.py b/python/ray/serve/examples/echo_batching.py
index a42fb0fb5..9220a93d7 100644
--- a/python/ray/serve/examples/echo_batching.py
+++ b/python/ray/serve/examples/echo_batching.py
@@ -27,14 +27,13 @@ class MagicCounter:
 
 
 serve.init()
-serve.create_endpoint("magic_counter", "/counter")
 # specify max_batch_size in BackendConfig
 backend_config = {"max_batch_size": 5}
 serve.create_backend(
     "counter:v1", MagicCounter, 42, config=backend_config)  # increment=42
 print("Backend Config for backend: 'counter:v1'")
 print(backend_config)
-serve.set_traffic("magic_counter", {"counter:v1": 1.0})
+serve.create_endpoint("magic_counter", backend="counter:v1", route="/counter")
 
 handle = serve.get_handle("magic_counter")
 future_list = []
diff --git a/python/ray/serve/examples/echo_error.py b/python/ray/serve/examples/echo_error.py
index dd4671685..670a0fb99 100644
--- a/python/ray/serve/examples/echo_error.py
+++ b/python/ray/serve/examples/echo_error.py
@@ -28,9 +28,7 @@ def echo(_):
 
 serve.init()
 
-serve.create_endpoint("my_endpoint", "/echo")
-serve.create_backend("echo:v1", echo)
-serve.set_traffic("my_endpoint", {"echo:v1": 1.0})
+serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo")
 
 for _ in range(2):
     resp = requests.get("http://127.0.0.1:8000/echo").json()
diff --git a/python/ray/serve/examples/echo_fixed_packing.py b/python/ray/serve/examples/echo_fixed_packing.py
index d2e96d54f..7c294c8c7 100644
--- a/python/ray/serve/examples/echo_fixed_packing.py
+++ b/python/ray/serve/examples/echo_fixed_packing.py
@@ -27,16 +27,16 @@ serve.init(
     queueing_policy=serve.RoutePolicy.FixedPacking,
     policy_kwargs={"packing_num": 5})
 
-# create a service
-serve.create_endpoint("my_endpoint", "/echo")
-
 # create first backend
 serve.create_backend("echo:v1", echo_v1)
 
+# create service backed by the first backend
+serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo")
+
 # create second backend
 serve.create_backend("echo:v2", echo_v2)
 
-# link and split the service to two backends
+# split the service between the two backends
 serve.set_traffic("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5})
 
 while True:
diff --git a/python/ray/serve/examples/echo_full.py b/python/ray/serve/examples/echo_full.py
index 36f5fb11c..84d6cf60d 100644
--- a/python/ray/serve/examples/echo_full.py
+++ b/python/ray/serve/examples/echo_full.py
@@ -13,9 +13,6 @@ from ray.serve.utils import pformat_color_json
 # initialize ray serve system.
 serve.init()
 
-# an endpoint is associated with an http URL.
-serve.create_endpoint("my_endpoint", "/echo")
-
 
 # a backend can be a function or class.
 # it can be made to be invoked from web as well as python.
@@ -27,9 +24,9 @@ def echo_v1(flask_request, response="hello from python!"):
 
 serve.create_backend("echo:v1", echo_v1)
 
-# We can link an endpoint to a backend, the means all the traffic
-# goes to my_endpoint will now goes to echo:v1 backend.
-serve.set_traffic("my_endpoint", {"echo:v1": 1.0})
+# An endpoint is associated with an HTTP path and traffic to the endpoint
+# will be serviced by the echo:v1 backend.
+serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo")
 
 print(requests.get("http://127.0.0.1:8000/echo", timeout=0.5).text)
 # The service will be reachable from http
diff --git a/python/ray/serve/examples/echo_pipeline.py b/python/ray/serve/examples/echo_pipeline.py
index 40582ae9a..24f2970fa 100644
--- a/python/ray/serve/examples/echo_pipeline.py
+++ b/python/ray/serve/examples/echo_pipeline.py
@@ -15,36 +15,32 @@ def echo_v1(_, response="hello from python!"):
     return f"echo_v1({response})"
 
 
-serve.create_endpoint("echo_v1", "/echo_v1")
 serve.create_backend("echo_v1", echo_v1)
-serve.set_traffic("echo_v1", {"echo_v1": 1.0})
+serve.create_endpoint("echo_v1", backend="echo_v1", route="/echo_v1")
 
 
 def echo_v2(_, relay=""):
     return f"echo_v2({relay})"
 
 
-serve.create_endpoint("echo_v2", "/echo_v2")
 serve.create_backend("echo_v2", echo_v2)
-serve.set_traffic("echo_v2", {"echo_v2": 1.0})
+serve.create_endpoint("echo_v2", backend="echo_v2", route="/echo_v2")
 
 
 def echo_v3(_, relay=""):
     return f"echo_v3({relay})"
 
 
-serve.create_endpoint("echo_v3", "/echo_v3")
 serve.create_backend("echo_v3", echo_v3)
-serve.set_traffic("echo_v3", {"echo_v3": 1.0})
+serve.create_endpoint("echo_v3", backend="echo_v3", route="/echo_v3")
 
 
 def echo_v4(_, relay1="", relay2=""):
     return f"echo_v4({relay1} , {relay2})"
 
 
-serve.create_endpoint("echo_v4", "/echo_v4")
 serve.create_backend("echo_v4", echo_v4)
-serve.set_traffic("echo_v4", {"echo_v4": 1.0})
+serve.create_endpoint("echo_v4", backend="echo_v4", route="/echo_v4")
 """
 The pipeline created is as follows -
             "my_endpoint1"
diff --git a/python/ray/serve/examples/echo_round_robin.py b/python/ray/serve/examples/echo_round_robin.py
index 1dabeb4e1..4dc376ae4 100644
--- a/python/ray/serve/examples/echo_round_robin.py
+++ b/python/ray/serve/examples/echo_round_robin.py
@@ -21,16 +21,16 @@ def echo_v2(_):
 # specify the router policy as RoundRobin
 serve.init(queueing_policy=serve.RoutePolicy.RoundRobin)
 
-# create a service
-serve.create_endpoint("my_endpoint", "/echo")
-
 # create first backend
 serve.create_backend("echo:v1", echo_v1)
 
+# create a service backend by the first backend
+serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo")
+
 # create second backend
 serve.create_backend("echo:v2", echo_v2)
 
-# link and split the service to two backends
+# split the service between the two backends
 serve.set_traffic("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5})
 
 while True:
diff --git a/python/ray/serve/examples/echo_slo_reverse.py b/python/ray/serve/examples/echo_slo_reverse.py
index f0bf33ab7..098ac5ddd 100644
--- a/python/ray/serve/examples/echo_slo_reverse.py
+++ b/python/ray/serve/examples/echo_slo_reverse.py
@@ -12,9 +12,6 @@ import ray.serve as serve
 # initialize ray serve system.
 serve.init()
 
-# an endpoint is associated with an http URL.
-serve.create_endpoint("my_endpoint", "/echo")
-
 
 # a backend can be a function or class.
 # it can be made to be invoked from web as well as python.
@@ -25,7 +22,8 @@ def echo_v1(flask_request, response="hello from python!"):
 
 
 serve.create_backend("echo:v1", echo_v1)
-serve.set_traffic("my_endpoint", {"echo:v1": 1.0})
+
+serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo")
 
 # wait for routing table to get populated
 time.sleep(2)
diff --git a/python/ray/serve/examples/echo_split.py b/python/ray/serve/examples/echo_split.py
index 66beb8211..8de676525 100644
--- a/python/ray/serve/examples/echo_split.py
+++ b/python/ray/serve/examples/echo_split.py
@@ -20,9 +20,8 @@ def echo_v2(_):
 
 serve.init()
 
-serve.create_endpoint("my_endpoint", "/echo")
 serve.create_backend("echo:v1", echo_v1)
-serve.set_traffic("my_endpoint", {"echo:v1": 1.0})
+serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo")
 
 for _ in range(3):
     resp = requests.get("http://127.0.0.1:8000/echo").json()
diff --git a/python/ray/serve/master.py b/python/ray/serve/master.py
index 63598be05..1ded64026 100644
--- a/python/ray/serve/master.py
+++ b/python/ray/serve/master.py
@@ -465,44 +465,45 @@ class ServeMaster:
             }
         return endpoints
 
-    async def set_traffic(self, endpoint_name, traffic_policy_dictionary):
+    async def _set_traffic(self, endpoint_name, traffic_dict):
+        if endpoint_name not in self.get_all_endpoints():
+            raise ValueError("Attempted to assign traffic for an endpoint '{}'"
+                             " that is not registered.".format(endpoint_name))
+
+        assert isinstance(traffic_dict,
+                          dict), "Traffic policy must be dictionary"
+        prob = 0
+        for backend, weight in traffic_dict.items():
+            if weight < 0:
+                raise ValueError(
+                    "Attempted to assign a weight of {} to backend '{}'. "
+                    "Weights cannot be negative.".format(weight, backend))
+            prob += weight
+            if backend not in self.backends:
+                raise ValueError(
+                    "Attempted to assign traffic to a backend '{}' that "
+                    "is not registered.".format(backend))
+
+        # These weights will later be plugged into np.random.choice, which
+        # uses a tolerance of 1e-8.
+        assert np.isclose(
+            prob, 1, atol=1e-8
+        ), "weights must sum to 1, currently they sum to {}".format(prob)
+
+        self.traffic_policies[endpoint_name] = traffic_dict
+
+        # NOTE(edoakes): we must write a checkpoint before pushing the
+        # update to avoid inconsistent state if we crash after pushing the
+        # update.
+        self._checkpoint()
+        await self.router.set_traffic.remote(endpoint_name, traffic_dict)
+
+    async def set_traffic(self, endpoint_name, traffic_dict):
         """Sets the traffic policy for the specified endpoint."""
         async with self.write_lock:
-            if endpoint_name not in self.get_all_endpoints():
-                raise ValueError(
-                    "Attempted to assign traffic for an endpoint '{}'"
-                    " that is not registered.".format(endpoint_name))
+            await self._set_traffic(endpoint_name, traffic_dict)
 
-            assert isinstance(traffic_policy_dictionary,
-                              dict), "Traffic policy must be dictionary"
-            prob = 0
-            for backend, weight in traffic_policy_dictionary.items():
-                if weight < 0:
-                    raise ValueError(
-                        "Attempted to assign a weight of {} to backend '{}'. "
-                        "Weights cannot be negative.".format(weight, backend))
-                prob += weight
-                if backend not in self.backends:
-                    raise ValueError(
-                        "Attempted to assign traffic to a backend '{}' that "
-                        "is not registered.".format(backend))
-
-            # These weights will later be plugged into np.random.choice, which
-            # uses a tolerance of 1e-8.
-            assert np.isclose(
-                prob, 1, atol=1e-8
-            ), "weights must sum to 1, currently they sum to {}".format(prob)
-
-            self.traffic_policies[endpoint_name] = traffic_policy_dictionary
-
-            # NOTE(edoakes): we must write a checkpoint before pushing the
-            # update to avoid inconsistent state if we crash after pushing the
-            # update.
-            self._checkpoint()
-            await self.router.set_traffic.remote(endpoint_name,
-                                                 traffic_policy_dictionary)
-
-    async def create_endpoint(self, route, endpoint, methods):
+    async def create_endpoint(self, endpoint, traffic_dict, route, methods):
         """Create a new endpoint with the specified route and methods.
 
         If the route is None, this is a "headless" endpoint that will not
@@ -537,10 +538,8 @@ class ServeMaster:
 
             self.routes[route] = (endpoint, methods)
 
-            # NOTE(edoakes): we must write a checkpoint before pushing the
-            # update to avoid inconsistent state if we crash after pushing the
-            # update.
-            self._checkpoint()
+            # NOTE(edoakes): checkpoint is written in self._set_traffic.
+            await self._set_traffic(endpoint, traffic_dict)
             await self.http_proxy.set_route_table.remote(self.routes)
 
     async def delete_endpoint(self, endpoint):
diff --git a/python/ray/serve/tests/test_api.py b/python/ray/serve/tests/test_api.py
index 7ab86a2b3..5f524022b 100644
--- a/python/ray/serve/tests/test_api.py
+++ b/python/ray/serve/tests/test_api.py
@@ -11,7 +11,13 @@ from ray.serve.utils import get_random_letters
 
 def test_e2e(serve_instance):
     serve.init()
-    serve.create_endpoint("endpoint", "/api", methods=["GET", "POST"])
+
+    def function(flask_request):
+        return {"method": flask_request.method}
+
+    serve.create_backend("echo:v1", function)
+    serve.create_endpoint(
+        "endpoint", backend="echo:v1", route="/api", methods=["GET", "POST"])
 
     retry_count = 5
     timeout_sleep = 0.5
@@ -29,12 +35,6 @@ def test_e2e(serve_instance):
                 assert False, ("Route table hasn't been updated after 3 tries."
                                "The latest error was {}").format(e)
 
-    def function(flask_request):
-        return {"method": flask_request.method}
-
-    serve.create_backend("echo:v1", function)
-    serve.set_traffic("endpoint", {"echo:v1": 1.0})
-
     resp = requests.get("http://127.0.0.1:8000/api").json()["method"]
     assert resp == "GET"
 
@@ -43,14 +43,12 @@ def test_e2e(serve_instance):
 
 
 def test_call_method(serve_instance):
-    serve.create_endpoint("endpoint", "/api")
-
     class CallMethod:
         def method(self, request):
             return "hello"
 
     serve.create_backend("backend", CallMethod)
-    serve.set_traffic("endpoint", {"backend": 1.0})
+    serve.create_endpoint("endpoint", backend="backend", route="/api")
 
     # Test HTTP path.
     resp = requests.get(
@@ -65,37 +63,46 @@ def test_call_method(serve_instance):
 
 
 def test_no_route(serve_instance):
-    serve.create_endpoint("noroute-endpoint")
-
     def func(_, i=1):
         return 1
 
     serve.create_backend("backend:1", func)
-    serve.set_traffic("noroute-endpoint", {"backend:1": 1.0})
+    serve.create_endpoint("noroute-endpoint", backend="backend:1")
     service_handle = serve.get_handle("noroute-endpoint")
     result = ray.get(service_handle.remote(i=1))
     assert result == 1
 
 
 def test_reject_duplicate_route(serve_instance):
+    def f():
+        pass
+
+    serve.create_backend("backend", f)
+
     route = "/foo"
-    serve.create_endpoint("bar", route=route)
+    serve.create_endpoint("bar", backend="backend", route=route)
     with pytest.raises(ValueError):
-        serve.create_endpoint("foo", route=route)
+        serve.create_endpoint("foo", backend="backend", route=route)
 
 
 def test_reject_duplicate_endpoint(serve_instance):
+    def f():
+        pass
+
+    serve.create_backend("backend", f)
+
     endpoint_name = "foo"
-    serve.create_endpoint(endpoint_name, route="/ok")
+    serve.create_endpoint(endpoint_name, backend="backend", route="/ok")
     with pytest.raises(ValueError):
-        serve.create_endpoint(endpoint_name, route="/different")
+        serve.create_endpoint(
+            endpoint_name, backend="backend", route="/different")
 
 
 def test_set_traffic_missing_data(serve_instance):
     endpoint_name = "foobar"
     backend_name = "foo_backend"
-    serve.create_endpoint(endpoint_name)
     serve.create_backend(backend_name, lambda: 5)
+    serve.create_endpoint(endpoint_name, backend=backend_name)
     with pytest.raises(ValueError):
         serve.set_traffic(endpoint_name, {"nonexistent_backend": 1.0})
     with pytest.raises(ValueError):
@@ -111,16 +118,14 @@ def test_scaling_replicas(serve_instance):
             self.count += 1
             return self.count
 
-    serve.create_endpoint("counter", "/increment")
+    serve.create_backend("counter:v1", Counter, config={"num_replicas": 2})
+    serve.create_endpoint("counter", backend="counter:v1", route="/increment")
 
     # Keep checking the routing table until /increment is populated
     while "/increment" not in requests.get(
             "http://127.0.0.1:8000/-/routes").json():
         time.sleep(0.2)
 
-    serve.create_backend("counter:v1", Counter, config={"num_replicas": 2})
-    serve.set_traffic("counter", {"counter:v1": 1.0})
-
     counter_result = []
     for _ in range(10):
         resp = requests.get("http://127.0.0.1:8000/increment").json()
@@ -151,18 +156,17 @@ def test_batching(serve_instance):
             batch_size = serve.context.batch_size
             return [self.count] * batch_size
 
-    serve.create_endpoint("counter1", "/increment2")
+    # set the max batch size
+    serve.create_backend(
+        "counter:v11", BatchingExample, config={"max_batch_size": 5})
+    serve.create_endpoint(
+        "counter1", backend="counter:v11", route="/increment2")
 
     # Keep checking the routing table until /increment is populated
     while "/increment2" not in requests.get(
             "http://127.0.0.1:8000/-/routes").json():
         time.sleep(0.2)
 
-    # set the max batch size
-    serve.create_backend(
-        "counter:v11", BatchingExample, config={"max_batch_size": 5})
-    serve.set_traffic("counter1", {"counter:v11": 1.0})
-
     future_list = []
     handle = serve.get_handle("counter1")
     for _ in range(20):
@@ -186,11 +190,11 @@ def test_batching_exception(serve_instance):
             batch_size = serve.context.batch_size
             return batch_size
 
-    serve.create_endpoint("exception-test", "/noListReturned")
     # set the max batch size
     serve.create_backend(
         "exception:v1", NoListReturned, config={"max_batch_size": 5})
-    serve.set_traffic("exception-test", {"exception:v1": 1.0})
+    serve.create_endpoint(
+        "exception-test", backend="exception:v1", route="/noListReturned")
 
     handle = serve.get_handle("exception-test")
     with pytest.raises(ray.exceptions.RayTaskError):
@@ -207,7 +211,6 @@ def test_updating_config(serve_instance):
             batch_size = serve.context.batch_size
             return [1] * batch_size
 
-    serve.create_endpoint("bsimple", "/bsimple")
     serve.create_backend(
         "bsimple:v1",
         BatchSimple,
@@ -215,6 +218,8 @@ def test_updating_config(serve_instance):
             "max_batch_size": 2,
             "num_replicas": 3
         })
+    serve.create_endpoint("bsimple", backend="bsimple:v1", route="/bsimple")
+
     master_actor = serve.api._get_master_actor()
     old_replica_tag_list = ray.get(
         master_actor._list_replicas.remote("bsimple:v1"))
@@ -234,13 +239,12 @@ def test_updating_config(serve_instance):
 
 
 def test_delete_backend(serve_instance):
-    serve.create_endpoint("delete_backend", "/delete-backend")
-
     def function():
         return "hello"
 
     serve.create_backend("delete:v1", function)
-    serve.set_traffic("delete_backend", {"delete:v1": 1.0})
+    serve.create_endpoint(
+        "delete_backend", backend="delete:v1", route="/delete-backend")
 
     assert requests.get("http://127.0.0.1:8000/delete-backend").text == "hello"
 
@@ -274,18 +278,18 @@ def test_delete_backend(serve_instance):
 
 @pytest.mark.parametrize("route", [None, "/delete-endpoint"])
 def test_delete_endpoint(serve_instance, route):
-    endpoint_name = "delete_endpoint" + str(route)
-    serve.create_endpoint(endpoint_name, route=route)
-    serve.delete_endpoint(endpoint_name)
-
-    # Check that we can reuse a deleted endpoint name and route.
-    serve.create_endpoint(endpoint_name, route=route)
-
     def function():
         return "hello"
 
-    serve.create_backend("delete-endpoint:v1", function)
-    serve.set_traffic(endpoint_name, {"delete-endpoint:v1": 1.0})
+    backend_name = "delete-endpoint:v1"
+    serve.create_backend(backend_name, function)
+
+    endpoint_name = "delete_endpoint" + str(route)
+    serve.create_endpoint(endpoint_name, backend=backend_name, route=route)
+    serve.delete_endpoint(endpoint_name)
+
+    # Check that we can reuse a deleted endpoint name and route.
+    serve.create_endpoint(endpoint_name, backend=backend_name, route=route)
 
     if route is not None:
         assert requests.get(
@@ -296,8 +300,7 @@ def test_delete_endpoint(serve_instance, route):
 
     # Check that deleting the endpoint doesn't delete the backend.
     serve.delete_endpoint(endpoint_name)
-    serve.create_endpoint(endpoint_name, route=route)
-    serve.set_traffic(endpoint_name, {"delete-endpoint:v1": 1.0})
+    serve.create_endpoint(endpoint_name, backend=backend_name, route=route)
 
     if route is not None:
         assert requests.get(
@@ -309,8 +312,6 @@ def test_delete_endpoint(serve_instance, route):
 
 @pytest.mark.parametrize("route", [None, "/shard"])
 def test_shard_key(serve_instance, route):
-    serve.create_endpoint("endpoint", route=route)
-
     # Create five backends that return different integers.
     num_backends = 5
     traffic_dict = {}
@@ -323,6 +324,8 @@ def test_shard_key(serve_instance, route):
         traffic_dict[backend_name] = 1.0 / num_backends
         serve.create_backend(backend_name, function)
 
+    serve.create_endpoint(
+        "endpoint", backend=list(traffic_dict.keys())[0], route=route)
     serve.set_traffic("endpoint", traffic_dict)
 
     def do_request(shard_key):
@@ -355,26 +358,24 @@ def test_name():
     endpoint = "endpoint"
 
     serve.init(name="cluster1", http_port=8001)
-    serve.create_endpoint(endpoint, route=route)
 
     def function():
         return "hello1"
 
     serve.create_backend(backend, function)
-    serve.set_traffic(endpoint, {backend: 1.0})
+    serve.create_endpoint(endpoint, backend=backend, route=route)
 
     assert requests.get("http://127.0.0.1:8001" + route).text == "hello1"
 
     # Create a second cluster on port 8002. Create an endpoint and backend with
     # the same names and check that they don't collide.
     serve.init(name="cluster2", http_port=8002)
-    serve.create_endpoint(endpoint, route=route)
 
     def function():
         return "hello2"
 
     serve.create_backend(backend, function)
-    serve.set_traffic(endpoint, {backend: 1.0})
+    serve.create_endpoint(endpoint, backend=backend, route=route)
 
     assert requests.get("http://127.0.0.1:8001" + route).text == "hello1"
     assert requests.get("http://127.0.0.1:8002" + route).text == "hello2"
@@ -419,10 +420,9 @@ def test_parallel_start(serve_instance):
         def __call__(self, _):
             return "Ready"
 
-    serve.create_endpoint("test-parallel")
     serve.create_backend(
         "p:v0", LongStartingServable, config={"num_replicas": 2})
-    serve.set_traffic("test-parallel", {"p:v0": 1})
+    serve.create_endpoint("test-parallel", backend="p:v0")
     handle = serve.get_handle("test-parallel")
 
     ray.get(handle.remote(), timeout=10)
@@ -434,17 +434,20 @@ def test_list_endpoints(serve_instance):
     def f():
         pass
 
-    serve.create_endpoint("endpoint", "/api", methods=["GET", "POST"])
-    serve.create_endpoint("endpoint2", methods=["POST"])
     serve.create_backend("backend", f)
-    serve.set_traffic("endpoint2", {"backend": 1.0})
+    serve.create_backend("backend2", f)
+    serve.create_endpoint(
+        "endpoint", backend="backend", route="/api", methods=["GET", "POST"])
+    serve.create_endpoint("endpoint2", backend="backend2", methods=["POST"])
 
     endpoints = serve.list_endpoints()
     assert "endpoint" in endpoints
     assert endpoints["endpoint"] == {
         "route": "/api",
         "methods": ["GET", "POST"],
-        "traffic": {}
+        "traffic": {
+            "backend": 1.0
+        }
     }
 
     assert "endpoint2" in endpoints
@@ -452,7 +455,7 @@ def test_list_endpoints(serve_instance):
         "route": None,
         "methods": ["POST"],
         "traffic": {
-            "backend": 1.0
+            "backend2": 1.0
         }
     }
 
@@ -488,3 +491,19 @@ def test_list_backends(serve_instance):
 
     serve.delete_backend("backend2")
     assert len(serve.list_backends()) == 0
+
+
+def test_endpoint_input_validation(serve_instance):
+    serve.init()
+
+    def f():
+        pass
+
+    serve.create_backend("backend", f)
+    with pytest.raises(TypeError):
+        serve.create_endpoint("endpoint")
+    with pytest.raises(TypeError):
+        serve.create_endpoint("endpoint", route="/hello")
+    with pytest.raises(TypeError):
+        serve.create_endpoint("endpoint", backend=2)
+    serve.create_endpoint("endpoint", backend="backend")
diff --git a/python/ray/serve/tests/test_failure.py b/python/ray/serve/tests/test_failure.py
index 909d82832..21c42fb60 100644
--- a/python/ray/serve/tests/test_failure.py
+++ b/python/ray/serve/tests/test_failure.py
@@ -21,13 +21,13 @@ def request_with_retries(endpoint, timeout=30):
 
 def test_master_failure(serve_instance):
     serve.init()
-    serve.create_endpoint("master_failure", "/master_failure")
 
     def function():
         return "hello1"
 
     serve.create_backend("master_failure:v1", function)
-    serve.set_traffic("master_failure", {"master_failure:v1": 1.0})
+    serve.create_endpoint(
+        "master_failure", backend="master_failure:v1", route="/master_failure")
 
     assert request_with_retries("/master_failure", timeout=1).text == "hello1"
 
@@ -56,12 +56,14 @@ def test_master_failure(serve_instance):
     def function():
         return "hello3"
 
-    ray.kill(serve.api._get_master_actor(), no_restart=False)
-    serve.create_endpoint("master_failure_2", "/master_failure_2")
     ray.kill(serve.api._get_master_actor(), no_restart=False)
     serve.create_backend("master_failure_2", function)
     ray.kill(serve.api._get_master_actor(), no_restart=False)
-    serve.set_traffic("master_failure_2", {"master_failure_2": 1.0})
+    serve.create_endpoint(
+        "master_failure_2",
+        backend="master_failure_2",
+        route="/master_failure_2")
+    ray.kill(serve.api._get_master_actor(), no_restart=False)
 
     for _ in range(10):
         response = request_with_retries("/master_failure", timeout=30)
@@ -78,13 +80,13 @@ def _kill_http_proxy():
 
 def test_http_proxy_failure(serve_instance):
     serve.init()
-    serve.create_endpoint("proxy_failure", "/proxy_failure")
 
     def function():
         return "hello1"
 
     serve.create_backend("proxy_failure:v1", function)
-    serve.set_traffic("proxy_failure", {"proxy_failure:v1": 1.0})
+    serve.create_endpoint(
+        "proxy_failure", backend="proxy_failure:v1", route="/proxy_failure")
 
     assert request_with_retries("/proxy_failure", timeout=1.0).text == "hello1"
 
@@ -112,13 +114,13 @@ def _kill_router():
 
 def test_router_failure(serve_instance):
     serve.init()
-    serve.create_endpoint("router_failure", "/router_failure")
 
     def function():
         return "hello1"
 
     serve.create_backend("router_failure:v1", function)
-    serve.set_traffic("router_failure", {"router_failure:v1": 1.0})
+    serve.create_endpoint(
+        "router_failure", backend="router_failure:v1", route="/router_failure")
 
     assert request_with_retries("/router_failure", timeout=5).text == "hello1"
 
@@ -154,14 +156,14 @@ def _get_worker_handles(backend):
 # serving requests.
 def test_worker_restart(serve_instance):
     serve.init()
-    serve.create_endpoint("worker_failure", "/worker_failure")
 
     class Worker1:
         def __call__(self):
             return os.getpid()
 
     serve.create_backend("worker_failure:v1", Worker1)
-    serve.set_traffic("worker_failure", {"worker_failure:v1": 1.0})
+    serve.create_endpoint(
+        "worker_failure", backend="worker_failure:v1", route="/worker_failure")
 
     # Get the PID of the worker.
     old_pid = request_with_retries("/worker_failure", timeout=1).text
@@ -186,7 +188,6 @@ def test_worker_restart(serve_instance):
 def test_worker_replica_failure(serve_instance):
     serve.http_proxy.MAX_ACTOR_DEAD_RETRIES = 0
     serve.init()
-    serve.create_endpoint("replica_failure", "/replica_failure")
 
     class Worker:
         # Assumes that two replicas are started. Will hang forever in the
@@ -216,7 +217,8 @@ def test_worker_replica_failure(serve_instance):
     temp_path = tempfile.gettempdir() + "/" + serve.utils.get_random_letters()
     serve.create_backend("replica_failure", Worker, temp_path)
     serve.update_backend_config("replica_failure", {"num_replicas": 2})
-    serve.set_traffic("replica_failure", {"replica_failure": 1.0})
+    serve.create_endpoint(
+        "replica_failure", backend="replica_failure", route="/replica_failure")
 
     # Wait until both replicas have been started.
     responses = set()
diff --git a/python/ray/serve/tests/test_handle.py b/python/ray/serve/tests/test_handle.py
index dae876153..1ad0d65e4 100644
--- a/python/ray/serve/tests/test_handle.py
+++ b/python/ray/serve/tests/test_handle.py
@@ -18,12 +18,18 @@ def test_handle_in_endpoint(serve_instance):
         def __call__(self):
             return ray.get(self.handle.remote())
 
-    serve.create_endpoint("endpoint1", "/endpoint1", methods=["GET", "POST"])
     serve.create_backend("endpoint1:v0", Endpoint1)
-    serve.set_traffic("endpoint1", {"endpoint1:v0": 1.0})
+    serve.create_endpoint(
+        "endpoint1",
+        backend="endpoint1:v0",
+        route="/endpoint1",
+        methods=["GET", "POST"])
 
-    serve.create_endpoint("endpoint2", "/endpoint2", methods=["GET", "POST"])
     serve.create_backend("endpoint2:v0", Endpoint2)
-    serve.set_traffic("endpoint2", {"endpoint2:v0": 1.0})
+    serve.create_endpoint(
+        "endpoint2",
+        backend="endpoint2:v0",
+        route="/endpoint2",
+        methods=["GET", "POST"])
 
     assert requests.get("http://127.0.0.1:8000/endpoint2").text == "hello"
diff --git a/python/ray/serve/tests/test_metric.py b/python/ray/serve/tests/test_metric.py
index 0a04c9858..e5c89f2b7 100644
--- a/python/ray/serve/tests/test_metric.py
+++ b/python/ray/serve/tests/test_metric.py
@@ -144,8 +144,8 @@ async def test_system_metric_endpoints(serve_instance):
     def test_error_counter(flask_request):
         1 / 0
 
-    serve.create_endpoint("test_metrics", "/measure")
     serve.create_backend("m:v1", test_error_counter)
+    serve.create_endpoint("test_metrics", backend="m:v1", route="/measure")
     serve.set_traffic("test_metrics", {"m:v1": 1})
 
     # Check metrics are exposed under http endpoint
diff --git a/python/ray/serve/tests/test_nonblocking.py b/python/ray/serve/tests/test_nonblocking.py
index 008695e00..747667af2 100644
--- a/python/ray/serve/tests/test_nonblocking.py
+++ b/python/ray/serve/tests/test_nonblocking.py
@@ -6,13 +6,13 @@ from ray import serve
 
 def test_nonblocking():
     serve.init()
-    serve.create_endpoint("nonblocking", "/nonblocking")
 
     def function(flask_request):
         return {"method": flask_request.method}
 
     serve.create_backend("nonblocking:v1", function)
-    serve.set_traffic("nonblocking", {"nonblocking:v1": 1.0})
+    serve.create_endpoint(
+        "nonblocking", backend="nonblocking:v1", route="/nonblocking")
 
     resp = requests.get("http://127.0.0.1:8000/nonblocking").json()["method"]
     assert resp == "GET"
diff --git a/python/ray/serve/tests/test_persistence.py b/python/ray/serve/tests/test_persistence.py
index 328a808be..3a873c9a3 100644
--- a/python/ray/serve/tests/test_persistence.py
+++ b/python/ray/serve/tests/test_persistence.py
@@ -17,9 +17,8 @@ serve.init()
 def driver(flask_request):
     return "OK!"
 
-serve.create_endpoint("driver", "/driver")
 serve.create_backend("driver", driver)
-serve.set_traffic("driver", {{"driver": 1.0}})
+serve.create_endpoint("driver", backend="driver", route="/driver")
 """.format(ray.worker._global_node._redis_address)
 
     with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: