mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 17:18:45 +08:00
307 lines
10 KiB
Python
307 lines
10 KiB
Python
from functools import wraps
|
|
|
|
import ray
|
|
from ray.serve.constants import (DEFAULT_HTTP_HOST, DEFAULT_HTTP_PORT,
|
|
SERVE_MASTER_NAME, HTTP_PROXY_TIMEOUT)
|
|
from ray.serve.master import ServeMaster
|
|
from ray.serve.handle import RayServeHandle
|
|
from ray.serve.utils import (block_until_http_ready, format_actor_name,
|
|
retry_actor_failures)
|
|
from ray.serve.exceptions import RayServeException
|
|
from ray.serve.config import BackendConfig, ReplicaConfig
|
|
from ray.serve.router import Query
|
|
from ray.serve.request_params import RequestMetadata
|
|
from ray.serve.metric import InMemoryExporter
|
|
|
|
master_actor = None
|
|
|
|
|
|
def _get_master_actor():
|
|
"""Used for internal purpose because using just import serve.global_state
|
|
will always reference the original None object.
|
|
"""
|
|
global master_actor
|
|
if master_actor is None:
|
|
raise RayServeException("Please run serve.init to initialize or "
|
|
"connect to existing ray serve cluster.")
|
|
return master_actor
|
|
|
|
|
|
def _ensure_connected(f):
|
|
@wraps(f)
|
|
def check(*args, **kwargs):
|
|
_get_master_actor()
|
|
return f(*args, **kwargs)
|
|
|
|
return check
|
|
|
|
|
|
def accept_batch(f):
|
|
"""Annotation to mark a serving function that batch is accepted.
|
|
|
|
This annotation need to be used to mark a function expect all arguments
|
|
to be passed into a list.
|
|
|
|
Example:
|
|
|
|
>>> @serve.accept_batch
|
|
def serving_func(flask_request):
|
|
assert isinstance(flask_request, list)
|
|
...
|
|
|
|
>>> class ServingActor:
|
|
@serve.accept_batch
|
|
def __call__(self, *, python_arg=None):
|
|
assert isinstance(python_arg, list)
|
|
"""
|
|
f._serve_accept_batch = True
|
|
return f
|
|
|
|
|
|
def init(name=None,
|
|
http_host=DEFAULT_HTTP_HOST,
|
|
http_port=DEFAULT_HTTP_PORT,
|
|
metric_exporter=InMemoryExporter):
|
|
"""Initialize or connect to a serve cluster.
|
|
|
|
If serve cluster is already initialized, this function will just return.
|
|
|
|
If `ray.init` has not been called in this process, it will be called with
|
|
no arguments. To specify kwargs to `ray.init`, it should be called
|
|
separately before calling `serve.init`.
|
|
|
|
Args:
|
|
name (str): A unique name for this serve instance. This allows
|
|
multiple serve instances to run on the same ray cluster. Must be
|
|
specified in all subsequent serve.init() calls.
|
|
http_host (str): Host for HTTP server. Default to "0.0.0.0".
|
|
http_port (int): Port for HTTP server. Default to 8000.
|
|
metric_exporter(ExporterInterface): The class aggregates metrics from
|
|
all RayServe actors and optionally export them to external
|
|
services. RayServe has two options built in: InMemoryExporter and
|
|
PrometheusExporter
|
|
"""
|
|
if name is not None and not isinstance(name, str):
|
|
raise TypeError("name must be a string.")
|
|
|
|
# Initialize ray if needed.
|
|
if not ray.is_initialized():
|
|
ray.init()
|
|
|
|
# Try to get serve master actor if it exists
|
|
global master_actor
|
|
master_actor_name = format_actor_name(SERVE_MASTER_NAME, name)
|
|
try:
|
|
master_actor = ray.get_actor(master_actor_name)
|
|
return
|
|
except ValueError:
|
|
pass
|
|
|
|
# Register serialization context once
|
|
ray.register_custom_serializer(Query, Query.ray_serialize,
|
|
Query.ray_deserialize)
|
|
ray.register_custom_serializer(RequestMetadata,
|
|
RequestMetadata.ray_serialize,
|
|
RequestMetadata.ray_deserialize)
|
|
|
|
# TODO(edoakes): for now, always start the HTTP proxy on the node that
|
|
# serve.init() was run on. We should consider making this configurable
|
|
# in the future.
|
|
http_node_id = ray.state.current_node_id()
|
|
master_actor = ServeMaster.options(
|
|
name=master_actor_name,
|
|
max_restarts=-1,
|
|
).remote(name, http_node_id, http_host, http_port, metric_exporter)
|
|
|
|
block_until_http_ready(
|
|
"http://{}:{}/-/routes".format(http_host, http_port),
|
|
timeout=HTTP_PROXY_TIMEOUT)
|
|
|
|
|
|
@_ensure_connected
|
|
def create_endpoint(endpoint_name, route=None, methods=["GET"]):
|
|
"""Create a service endpoint given route_expression.
|
|
|
|
Args:
|
|
endpoint_name (str): A name to associate to the endpoint. It will be
|
|
used as key to set traffic policy.
|
|
route (str): A string begin with "/". HTTP server will use
|
|
the string to match the path.
|
|
"""
|
|
retry_actor_failures(master_actor.create_endpoint, route, endpoint_name,
|
|
[m.upper() for m in methods])
|
|
|
|
|
|
@_ensure_connected
|
|
def delete_endpoint(endpoint):
|
|
"""Delete the given endpoint.
|
|
|
|
Does not delete any associated backends.
|
|
"""
|
|
retry_actor_failures(master_actor.delete_endpoint, endpoint)
|
|
|
|
|
|
@_ensure_connected
|
|
def list_endpoints():
|
|
"""Returns a dictionary of all registered endpoints.
|
|
|
|
The dictionary keys are endpoint names and values are dictionaries
|
|
of the form: {"methods": List[str], "traffic": Dict[str, float]}.
|
|
"""
|
|
return retry_actor_failures(master_actor.get_all_endpoints)
|
|
|
|
|
|
@_ensure_connected
|
|
def update_backend_config(backend_tag, config_options):
|
|
"""Update a backend configuration for a backend tag.
|
|
|
|
Keys not specified in the passed will be left unchanged.
|
|
|
|
Args:
|
|
backend_tag(str): A registered backend.
|
|
config_options(dict): Backend config options to update.
|
|
"""
|
|
if not isinstance(config_options, dict):
|
|
raise ValueError("config_options must be a dictionary.")
|
|
retry_actor_failures(master_actor.update_backend_config, backend_tag,
|
|
config_options)
|
|
|
|
|
|
@_ensure_connected
|
|
def get_backend_config(backend_tag):
|
|
"""Get the backend configuration for a backend tag.
|
|
|
|
Args:
|
|
backend_tag(str): A registered backend.
|
|
"""
|
|
return retry_actor_failures(master_actor.get_backend_config, backend_tag)
|
|
|
|
|
|
@_ensure_connected
|
|
def create_backend(backend_tag,
|
|
func_or_class,
|
|
*actor_init_args,
|
|
ray_actor_options=None,
|
|
config=None):
|
|
"""Create a backend with the provided tag.
|
|
|
|
The backend will serve requests with func_or_class.
|
|
|
|
Args:
|
|
backend_tag (str): a unique tag assign to identify this backend.
|
|
func_or_class (callable, class): a function or a class implementing
|
|
__call__.
|
|
actor_init_args (optional): the arguments to pass to the class.
|
|
initialization method.
|
|
ray_actor_options (optional): options to be passed into the
|
|
@ray.remote decorator for the backend actor.
|
|
config: (optional) configuration options for this backend.
|
|
"""
|
|
if config is None:
|
|
config = {}
|
|
if not isinstance(config, dict):
|
|
raise TypeError("config must be a dictionary.")
|
|
|
|
replica_config = ReplicaConfig(
|
|
func_or_class, *actor_init_args, ray_actor_options=ray_actor_options)
|
|
backend_config = BackendConfig(config, replica_config.accepts_batches)
|
|
|
|
retry_actor_failures(master_actor.create_backend, backend_tag,
|
|
backend_config, replica_config)
|
|
|
|
|
|
@_ensure_connected
|
|
def list_backends():
|
|
"""Returns a dictionary of all registered backends.
|
|
|
|
Dictionary maps backend tags to backend configs.
|
|
"""
|
|
return retry_actor_failures(master_actor.get_all_backends)
|
|
|
|
|
|
@_ensure_connected
|
|
def delete_backend(backend_tag):
|
|
"""Delete the given backend.
|
|
|
|
The backend must not currently be used by any endpoints.
|
|
"""
|
|
retry_actor_failures(master_actor.delete_backend, backend_tag)
|
|
|
|
|
|
@_ensure_connected
|
|
def set_traffic(endpoint_name, traffic_policy_dictionary):
|
|
"""Associate a service endpoint with traffic policy.
|
|
|
|
Example:
|
|
|
|
>>> serve.set_traffic("service-name", {
|
|
"backend:v1": 0.5,
|
|
"backend:v2": 0.5
|
|
})
|
|
|
|
Args:
|
|
endpoint_name (str): A registered service endpoint.
|
|
traffic_policy_dictionary (dict): a dictionary maps backend names
|
|
to their traffic weights. The weights must sum to 1.
|
|
"""
|
|
retry_actor_failures(master_actor.set_traffic, endpoint_name,
|
|
traffic_policy_dictionary)
|
|
|
|
|
|
@_ensure_connected
|
|
def get_handle(endpoint_name,
|
|
relative_slo_ms=None,
|
|
absolute_slo_ms=None,
|
|
missing_ok=False):
|
|
"""Retrieve RayServeHandle for service endpoint to invoke it from Python.
|
|
|
|
Args:
|
|
endpoint_name (str): A registered service endpoint.
|
|
relative_slo_ms(float): Specify relative deadline in milliseconds for
|
|
queries fired using this handle. (Default: None)
|
|
absolute_slo_ms(float): Specify absolute deadline in milliseconds for
|
|
queries fired using this handle. (Default: None)
|
|
missing_ok (bool): If true, skip the check for the endpoint existence.
|
|
It can be useful when the endpoint has not been registered.
|
|
|
|
Returns:
|
|
RayServeHandle
|
|
"""
|
|
if not missing_ok:
|
|
assert endpoint_name in retry_actor_failures(
|
|
master_actor.get_all_endpoints)
|
|
|
|
return RayServeHandle(
|
|
retry_actor_failures(master_actor.get_router)[0],
|
|
endpoint_name,
|
|
relative_slo_ms,
|
|
absolute_slo_ms,
|
|
)
|
|
|
|
|
|
@_ensure_connected
|
|
def stat():
|
|
"""Retrieve metric statistics about ray serve system.
|
|
|
|
Returns:
|
|
metric_stats(Any): Metric information returned by the metric exporter.
|
|
This can vary by exporter. For the default InMemoryExporter, it
|
|
returns a list of the following format:
|
|
|
|
.. code-block::python
|
|
[
|
|
{"info": {
|
|
"name": ...,
|
|
"type": COUNTER|MEASURE,
|
|
"label_key": label_value,
|
|
"label_key": label_value,
|
|
...
|
|
}, "value": float}
|
|
]
|
|
|
|
For PrometheusExporter, it returns the metrics in prometheus format
|
|
in plain text.
|
|
"""
|
|
[metric_exporter] = ray.get(master_actor.get_metric_exporter.remote())
|
|
return ray.get(metric_exporter.inspect_metrics.remote())
|