Files
ray/python/ray/serve/api.py
T
2020-05-17 00:14:42 -05:00

307 lines
10 KiB
Python

from functools import wraps
from multiprocessing import cpu_count
import ray
from ray.serve.constants import (DEFAULT_HTTP_HOST, DEFAULT_HTTP_PORT,
SERVE_MASTER_NAME)
from ray.serve.master import ServeMaster
from ray.serve.handle import RayServeHandle
from ray.serve.utils import (block_until_http_ready, format_actor_name,
retry_actor_failures)
from ray.serve.exceptions import RayServeException
from ray.serve.config import BackendConfig, ReplicaConfig
from ray.serve.router import Query
from ray.serve.request_params import RequestMetadata
from ray.serve.metric import InMemoryExporter
master_actor = None
def _get_master_actor():
"""Used for internal purpose because using just import serve.global_state
will always reference the original None object.
"""
global master_actor
if master_actor is None:
raise RayServeException("Please run serve.init to initialize or "
"connect to existing ray serve cluster.")
return master_actor
def _ensure_connected(f):
@wraps(f)
def check(*args, **kwargs):
_get_master_actor()
return f(*args, **kwargs)
return check
def accept_batch(f):
"""Annotation to mark a serving function that batch is accepted.
This annotation need to be used to mark a function expect all arguments
to be passed into a list.
Example:
>>> @serve.accept_batch
def serving_func(flask_request):
assert isinstance(flask_request, list)
...
>>> class ServingActor:
@serve.accept_batch
def __call__(self, *, python_arg=None):
assert isinstance(python_arg, list)
"""
f._serve_accept_batch = True
return f
def init(cluster_name=None,
blocking=False,
start_server=True,
http_host=DEFAULT_HTTP_HOST,
http_port=DEFAULT_HTTP_PORT,
ray_init_kwargs={
"object_store_memory": int(1e8),
"num_cpus": max(cpu_count(), 8)
},
metric_exporter=InMemoryExporter):
"""Initialize a serve cluster.
If serve cluster has already initialized, this function will just return.
Calling `ray.init` before `serve.init` is optional. When there is not a ray
cluster initialized, serve will call `ray.init` with `object_store_memory`
requirement.
Args:
cluster_name (str): A unique name for this serve cluster. This allows
multiple serve clusters to run on the same ray cluster. Must be
specified in all subsequent serve.init() calls.
blocking (bool): If true, the function will wait for the HTTP server to
be healthy, and other components to be ready before returns.
start_server (bool): If true, `serve.init` starts http server.
(Default: True)
http_host (str): Host for HTTP server. Default to "0.0.0.0".
http_port (int): Port for HTTP server. Default to 8000.
ray_init_kwargs (dict): Argument passed to ray.init, if there is no ray
connection. Default to {"object_store_memory": int(1e8)} for
performance stability reason
metric_exporter(ExporterInterface): The class aggregates metrics from
all RayServe actors and optionally export them to external
services. RayServe has two options built in: InMemoryExporter and
PrometheusExporter
"""
if cluster_name is not None and not isinstance(cluster_name, str):
raise TypeError("cluster_name must be a string.")
# Initialize ray if needed.
if not ray.is_initialized():
ray.init(**ray_init_kwargs)
# Try to get serve master actor if it exists
global master_actor
master_actor_name = format_actor_name(SERVE_MASTER_NAME, cluster_name)
try:
master_actor = ray.util.get_actor(master_actor_name)
return
except ValueError:
pass
# Register serialization context once
ray.register_custom_serializer(Query, Query.ray_serialize,
Query.ray_deserialize)
ray.register_custom_serializer(RequestMetadata,
RequestMetadata.ray_serialize,
RequestMetadata.ray_deserialize)
# TODO(edoakes): for now, always start the HTTP proxy on the node that
# serve.init() was run on. We should consider making this configurable
# in the future.
http_node_id = ray.state.current_node_id()
master_actor = ServeMaster.options(
detached=True,
name=master_actor_name,
max_restarts=-1,
).remote(cluster_name, start_server, http_node_id, http_host, http_port,
metric_exporter)
if start_server and blocking:
block_until_http_ready("http://{}:{}/-/routes".format(
http_host, http_port))
@_ensure_connected
def create_endpoint(endpoint_name, route=None, methods=["GET"]):
"""Create a service endpoint given route_expression.
Args:
endpoint_name (str): A name to associate to the endpoint. It will be
used as key to set traffic policy.
route (str): A string begin with "/". HTTP server will use
the string to match the path.
blocking (bool): If true, the function will wait for service to be
registered before returning
"""
retry_actor_failures(master_actor.create_endpoint, route, endpoint_name,
[m.upper() for m in methods])
@_ensure_connected
def delete_endpoint(endpoint):
"""Delete the given endpoint.
Does not delete any associated backends.
"""
retry_actor_failures(master_actor.delete_endpoint, endpoint)
@_ensure_connected
def update_backend_config(backend_tag, config_options):
"""Update a backend configuration for a backend tag.
Keys not specified in the passed will be left unchanged.
Args:
backend_tag(str): A registered backend.
config_options(dict): Backend config options to update.
"""
if not isinstance(config_options, dict):
raise ValueError("config_options must be a dictionary.")
retry_actor_failures(master_actor.update_backend_config, backend_tag,
config_options)
@_ensure_connected
def get_backend_config(backend_tag):
"""Get the backend configuration for a backend tag.
Args:
backend_tag(str): A registered backend.
"""
return retry_actor_failures(master_actor.get_backend_config, backend_tag)
@_ensure_connected
def create_backend(backend_tag,
func_or_class,
*actor_init_args,
ray_actor_options=None,
config=None):
"""Create a backend with the provided tag.
The backend will serve requests with func_or_class.
Args:
backend_tag (str): a unique tag assign to identify this backend.
func_or_class (callable, class): a function or a class implementing
__call__.
actor_init_args (optional): the arguments to pass to the class.
initialization method.
ray_actor_options (optional): options to be passed into the
@ray.remote decorator for the backend actor.
config: (optional) configuration options for this backend.
"""
if config is None:
config = {}
if not isinstance(config, dict):
raise TypeError("config must be a dictionary.")
replica_config = ReplicaConfig(
func_or_class, *actor_init_args, ray_actor_options=ray_actor_options)
backend_config = BackendConfig(config, replica_config.accepts_batches)
retry_actor_failures(master_actor.create_backend, backend_tag,
backend_config, replica_config)
@_ensure_connected
def delete_backend(backend_tag):
"""Delete the given backend.
The backend must not currently be used by any endpoints.
"""
retry_actor_failures(master_actor.delete_backend, backend_tag)
@_ensure_connected
def set_traffic(endpoint_name, traffic_policy_dictionary):
"""Associate a service endpoint with traffic policy.
Example:
>>> serve.set_traffic("service-name", {
"backend:v1": 0.5,
"backend:v2": 0.5
})
Args:
endpoint_name (str): A registered service endpoint.
traffic_policy_dictionary (dict): a dictionary maps backend names
to their traffic weights. The weights must sum to 1.
"""
retry_actor_failures(master_actor.set_traffic, endpoint_name,
traffic_policy_dictionary)
@_ensure_connected
def get_handle(endpoint_name,
relative_slo_ms=None,
absolute_slo_ms=None,
missing_ok=False):
"""Retrieve RayServeHandle for service endpoint to invoke it from Python.
Args:
endpoint_name (str): A registered service endpoint.
relative_slo_ms(float): Specify relative deadline in milliseconds for
queries fired using this handle. (Default: None)
absolute_slo_ms(float): Specify absolute deadline in milliseconds for
queries fired using this handle. (Default: None)
missing_ok (bool): If true, skip the check for the endpoint existence.
It can be useful when the endpoint has not been registered.
Returns:
RayServeHandle
"""
if not missing_ok:
assert endpoint_name in retry_actor_failures(
master_actor.get_all_endpoints)
return RayServeHandle(
retry_actor_failures(master_actor.get_router)[0],
endpoint_name,
relative_slo_ms,
absolute_slo_ms,
)
@_ensure_connected
def stat():
"""Retrieve metric statistics about ray serve system.
Returns:
metric_stats(Any): Metric information returned by the metric exporter.
This can vary by exporter. For the default InMemoryExporter, it
returns a list of the following format:
.. code-block::python
[
{"info": {
"name": ...,
"type": COUNTER|MEASURE,
"label_key": label_value,
"label_key": label_value,
...
}, "value": float}
]
For PrometheusExporter, it returns the metrics in prometheus format
in plain text.
"""
[metric_exporter] = ray.get(master_actor.get_metric_exporter.remote())
return ray.get(metric_exporter.inspect_metrics.remote())