mirror of
https://github.com/wassname/ray.git
synced 2026-06-30 20:18:33 +08:00
[Serve] First iteration of the serve doc (#5834)
* Address comments * Lint * Add py3 warning
This commit is contained in:
+4
-1
@@ -172,10 +172,13 @@ script:
|
||||
# `cluster_tests.py` runs on Jenkins, not Travis.
|
||||
- if [ $RAY_CI_TUNE_AFFECTED == "1" ]; then python -m pytest -v --durations=10 --timeout=300 --ignore=python/ray/tune/tests/test_cluster.py --ignore=python/ray/tune/tests/test_tune_restore.py --ignore=python/ray/tune/tests/test_actor_reuse.py python/ray/tune/tests; fi
|
||||
|
||||
# ray serve tests
|
||||
- if [ $RAY_CI_SERVE_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest -v --durations=5 --timeout=300 python/ray/experimental/serve/tests; fi
|
||||
- if [ $RAY_CI_SERVE_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || ./ci/suppress_output python python/ray/experimental/serve/example/echo_full.py; fi
|
||||
|
||||
# ray tests
|
||||
# Python3.5+ only. Otherwise we will get `SyntaxError` regardless of how we set the tester.
|
||||
- if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest -v --durations=5 --timeout=300 python/ray/experimental/test/async_test.py; fi
|
||||
- if [ $RAY_CI_SERVE_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest -v --durations=5 --timeout=300 python/ray/experimental/serve/tests; fi
|
||||
- if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then python -m pytest -v --durations=10 --timeout=300 python/ray/tests --ignore=python/ray/tests/perf_integration_tests; fi
|
||||
|
||||
deploy:
|
||||
|
||||
@@ -252,6 +252,7 @@ Getting Involved
|
||||
projects.rst
|
||||
signals.rst
|
||||
async_api.rst
|
||||
serve.rst
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: -1
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
Ray Serve (Experimental)
|
||||
========================
|
||||
|
||||
Ray Serve is a serving library that exposes python function/classes to HTTP.
|
||||
It has built-in support for flexible traffic policy. This means you can easy
|
||||
split incoming traffic to multiple implementations.
|
||||
|
||||
With Ray Serve, you can deploy your services at any scale.
|
||||
|
||||
.. warning::
|
||||
Ray Serve is Python 3 only.
|
||||
|
||||
Quickstart
|
||||
----------
|
||||
.. literalinclude:: ../../python/ray/experimental/serve/examples/echo_full.py
|
||||
|
||||
API
|
||||
---
|
||||
.. automodule:: ray.experimental.serve
|
||||
:members:
|
||||
@@ -0,0 +1,51 @@
|
||||
"""
|
||||
Full example of ray.serve module
|
||||
"""
|
||||
|
||||
import ray
|
||||
import ray.experimental.serve as serve
|
||||
import requests
|
||||
import time
|
||||
|
||||
# initialize ray serve system.
|
||||
# blocking=True will wait for HTTP server to be ready to serve request.
|
||||
serve.init(blocking=True)
|
||||
|
||||
# an endpoint is associated with an http URL.
|
||||
serve.create_endpoint("my_endpoint", "/echo")
|
||||
|
||||
|
||||
# a backend can be a function or class.
|
||||
def echo_v1(request):
|
||||
return request
|
||||
|
||||
|
||||
serve.create_backend(echo_v1, "echo:v1")
|
||||
|
||||
# We can link an endpoint to a backend, the means all the traffic
|
||||
# goes to my_endpoint will now goes to echo:v1 backend.
|
||||
serve.link("my_endpoint", "echo:v1")
|
||||
|
||||
print(requests.get("http://127.0.0.1:8000/echo").json())
|
||||
# The service will be reachable from http
|
||||
|
||||
print(ray.get(serve.get_handle("my_endpoint").remote("hello")))
|
||||
|
||||
# as well as within the ray system.
|
||||
|
||||
|
||||
# We can also add a new backend and split the traffic.
|
||||
def echo_v2(request):
|
||||
# magic
|
||||
return "something new"
|
||||
|
||||
|
||||
serve.create_backend(echo_v2, "echo:v2")
|
||||
|
||||
# The two backend will now split the traffic 50%-50%.
|
||||
serve.split("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5})
|
||||
|
||||
# Observe requests are now split between two backends.
|
||||
for _ in range(10):
|
||||
print(requests.get("http://127.0.0.1:8000/echo").json())
|
||||
time.sleep(0.5)
|
||||
Reference in New Issue
Block a user