[Serve] First iteration of the serve doc (#5834)

* Address comments * Lint * Add py3 warning
2026-06-30 20:18:33 +08:00 · 2019-10-03 15:14:09 -07:00
parent 0dee225ce1
commit fa1214c44a
4 changed files with 76 additions and 1 deletions
@@ -172,10 +172,13 @@ script:
  # `cluster_tests.py` runs on Jenkins, not Travis.
  - if [ $RAY_CI_TUNE_AFFECTED == "1" ]; then python -m pytest -v --durations=10 --timeout=300 --ignore=python/ray/tune/tests/test_cluster.py --ignore=python/ray/tune/tests/test_tune_restore.py --ignore=python/ray/tune/tests/test_actor_reuse.py python/ray/tune/tests; fi

+  # ray serve tests
+  - if [ $RAY_CI_SERVE_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest -v --durations=5 --timeout=300 python/ray/experimental/serve/tests; fi
+  - if [ $RAY_CI_SERVE_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || ./ci/suppress_output python python/ray/experimental/serve/example/echo_full.py; fi
+
  # ray tests
  # Python3.5+ only. Otherwise we will get `SyntaxError` regardless of how we set the tester.
  - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest -v --durations=5 --timeout=300 python/ray/experimental/test/async_test.py; fi
-  - if [ $RAY_CI_SERVE_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest -v --durations=5 --timeout=300 python/ray/experimental/serve/tests; fi
  - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then python -m pytest -v --durations=10 --timeout=300 python/ray/tests --ignore=python/ray/tests/perf_integration_tests; fi

 deploy:
@@ -252,6 +252,7 @@ Getting Involved
   projects.rst
   signals.rst
   async_api.rst
+   serve.rst

 .. toctree::
   :maxdepth: -1
@@ -0,0 +1,20 @@
+Ray Serve (Experimental)
+========================
+
+Ray Serve is a serving library that exposes python function/classes to HTTP.
+It has built-in support for flexible traffic policy. This means you can easy
+split incoming traffic to multiple implementations. 
+
+With Ray Serve, you can deploy your services at any scale.
+
+.. warning::
+  Ray Serve is Python 3 only.
+
+Quickstart
+----------
+.. literalinclude:: ../../python/ray/experimental/serve/examples/echo_full.py
+
+API
+---
+.. automodule:: ray.experimental.serve
+    :members:
@@ -0,0 +1,51 @@
+"""
+Full example of ray.serve module
+"""
+
+import ray
+import ray.experimental.serve as serve
+import requests
+import time
+
+# initialize ray serve system.
+# blocking=True will wait for HTTP server to be ready to serve request.
+serve.init(blocking=True)
+
+# an endpoint is associated with an http URL.
+serve.create_endpoint("my_endpoint", "/echo")
+
+
+# a backend can be a function or class.
+def echo_v1(request):
+    return request
+
+
+serve.create_backend(echo_v1, "echo:v1")
+
+# We can link an endpoint to a backend, the means all the traffic
+# goes to my_endpoint will now goes to echo:v1 backend.
+serve.link("my_endpoint", "echo:v1")
+
+print(requests.get("http://127.0.0.1:8000/echo").json())
+# The service will be reachable from http
+
+print(ray.get(serve.get_handle("my_endpoint").remote("hello")))
+
+# as well as within the ray system.
+
+
+# We can also add a new backend and split the traffic.
+def echo_v2(request):
+    # magic
+    return "something new"
+
+
+serve.create_backend(echo_v2, "echo:v2")
+
+# The two backend will now split the traffic 50%-50%.
+serve.split("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5})
+
+# Observe requests are now split between two backends.
+for _ in range(10):
+    print(requests.get("http://127.0.0.1:8000/echo").json())
+    time.sleep(0.5)