From fa1214c44a4d73dba9d3f2c5a033e98addfeacc2 Mon Sep 17 00:00:00 2001
From: Simon Mo <xmo@berkeley.edu>
Date: Thu, 3 Oct 2019 15:14:09 -0700
Subject: [PATCH] [Serve] First iteration of the serve doc (#5834)

* Address comments

* Lint

* Add py3 warning
---
 .travis.yml                                   |  5 +-
 doc/source/index.rst                          |  1 +
 doc/source/serve.rst                          | 20 ++++++++
 .../experimental/serve/examples/echo_full.py  | 51 +++++++++++++++++++
 4 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 doc/source/serve.rst
 create mode 100644 python/ray/experimental/serve/examples/echo_full.py

diff --git a/.travis.yml b/.travis.yml
index 0da63f9e7..d4f6d339f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -172,10 +172,13 @@ script:
   # `cluster_tests.py` runs on Jenkins, not Travis.
   - if [ $RAY_CI_TUNE_AFFECTED == "1" ]; then python -m pytest -v --durations=10 --timeout=300 --ignore=python/ray/tune/tests/test_cluster.py --ignore=python/ray/tune/tests/test_tune_restore.py --ignore=python/ray/tune/tests/test_actor_reuse.py python/ray/tune/tests; fi
 
+  # ray serve tests
+  - if [ $RAY_CI_SERVE_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest -v --durations=5 --timeout=300 python/ray/experimental/serve/tests; fi
+  - if [ $RAY_CI_SERVE_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || ./ci/suppress_output python python/ray/experimental/serve/example/echo_full.py; fi
+
   # ray tests
   # Python3.5+ only. Otherwise we will get `SyntaxError` regardless of how we set the tester.
   - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest -v --durations=5 --timeout=300 python/ray/experimental/test/async_test.py; fi
-  - if [ $RAY_CI_SERVE_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest -v --durations=5 --timeout=300 python/ray/experimental/serve/tests; fi
   - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then python -m pytest -v --durations=10 --timeout=300 python/ray/tests --ignore=python/ray/tests/perf_integration_tests; fi
 
 deploy:
diff --git a/doc/source/index.rst b/doc/source/index.rst
index b12659afc..21d0b695e 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -252,6 +252,7 @@ Getting Involved
    projects.rst
    signals.rst
    async_api.rst
+   serve.rst
 
 .. toctree::
    :maxdepth: -1
diff --git a/doc/source/serve.rst b/doc/source/serve.rst
new file mode 100644
index 000000000..06781b40a
--- /dev/null
+++ b/doc/source/serve.rst
@@ -0,0 +1,20 @@
+Ray Serve (Experimental)
+========================
+
+Ray Serve is a serving library that exposes python function/classes to HTTP.
+It has built-in support for flexible traffic policy. This means you can easy
+split incoming traffic to multiple implementations. 
+
+With Ray Serve, you can deploy your services at any scale.
+
+.. warning::
+  Ray Serve is Python 3 only.
+
+Quickstart
+----------
+.. literalinclude:: ../../python/ray/experimental/serve/examples/echo_full.py
+
+API
+---
+.. automodule:: ray.experimental.serve
+    :members:
diff --git a/python/ray/experimental/serve/examples/echo_full.py b/python/ray/experimental/serve/examples/echo_full.py
new file mode 100644
index 000000000..86801d4f1
--- /dev/null
+++ b/python/ray/experimental/serve/examples/echo_full.py
@@ -0,0 +1,51 @@
+"""
+Full example of ray.serve module
+"""
+
+import ray
+import ray.experimental.serve as serve
+import requests
+import time
+
+# initialize ray serve system.
+# blocking=True will wait for HTTP server to be ready to serve request.
+serve.init(blocking=True)
+
+# an endpoint is associated with an http URL.
+serve.create_endpoint("my_endpoint", "/echo")
+
+
+# a backend can be a function or class.
+def echo_v1(request):
+    return request
+
+
+serve.create_backend(echo_v1, "echo:v1")
+
+# We can link an endpoint to a backend, the means all the traffic
+# goes to my_endpoint will now goes to echo:v1 backend.
+serve.link("my_endpoint", "echo:v1")
+
+print(requests.get("http://127.0.0.1:8000/echo").json())
+# The service will be reachable from http
+
+print(ray.get(serve.get_handle("my_endpoint").remote("hello")))
+
+# as well as within the ray system.
+
+
+# We can also add a new backend and split the traffic.
+def echo_v2(request):
+    # magic
+    return "something new"
+
+
+serve.create_backend(echo_v2, "echo:v2")
+
+# The two backend will now split the traffic 50%-50%.
+serve.split("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5})
+
+# Observe requests are now split between two backends.
+for _ in range(10):
+    print(requests.get("http://127.0.0.1:8000/echo").json())
+    time.sleep(0.5)