mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 17:50:55 +08:00
307 lines
10 KiB
Python
307 lines
10 KiB
Python
from functools import wraps
|
|
|
|
from multiprocessing import cpu_count
|
|
|
|
import ray
|
|
from ray.serve.constants import (DEFAULT_HTTP_HOST, DEFAULT_HTTP_PORT,
|
|
SERVE_MASTER_NAME)
|
|
from ray.serve.master import ServeMaster
|
|
from ray.serve.handle import RayServeHandle
|
|
from ray.serve.utils import (block_until_http_ready, format_actor_name,
|
|
retry_actor_failures)
|
|
from ray.serve.exceptions import RayServeException
|
|
from ray.serve.config import BackendConfig, ReplicaConfig
|
|
from ray.serve.router import Query
|
|
from ray.serve.request_params import RequestMetadata
|
|
from ray.serve.metric import InMemoryExporter
|
|
|
|
master_actor = None
|
|
|
|
|
|
def _get_master_actor():
|
|
"""Used for internal purpose because using just import serve.global_state
|
|
will always reference the original None object.
|
|
"""
|
|
global master_actor
|
|
if master_actor is None:
|
|
raise RayServeException("Please run serve.init to initialize or "
|
|
"connect to existing ray serve cluster.")
|
|
return master_actor
|
|
|
|
|
|
def _ensure_connected(f):
|
|
@wraps(f)
|
|
def check(*args, **kwargs):
|
|
_get_master_actor()
|
|
return f(*args, **kwargs)
|
|
|
|
return check
|
|
|
|
|
|
def accept_batch(f):
|
|
"""Annotation to mark a serving function that batch is accepted.
|
|
|
|
This annotation need to be used to mark a function expect all arguments
|
|
to be passed into a list.
|
|
|
|
Example:
|
|
|
|
>>> @serve.accept_batch
|
|
def serving_func(flask_request):
|
|
assert isinstance(flask_request, list)
|
|
...
|
|
|
|
>>> class ServingActor:
|
|
@serve.accept_batch
|
|
def __call__(self, *, python_arg=None):
|
|
assert isinstance(python_arg, list)
|
|
"""
|
|
f._serve_accept_batch = True
|
|
return f
|
|
|
|
|
|
def init(cluster_name=None,
|
|
blocking=False,
|
|
start_server=True,
|
|
http_host=DEFAULT_HTTP_HOST,
|
|
http_port=DEFAULT_HTTP_PORT,
|
|
ray_init_kwargs={
|
|
"object_store_memory": int(1e8),
|
|
"num_cpus": max(cpu_count(), 8)
|
|
},
|
|
metric_exporter=InMemoryExporter):
|
|
"""Initialize a serve cluster.
|
|
|
|
If serve cluster has already initialized, this function will just return.
|
|
|
|
Calling `ray.init` before `serve.init` is optional. When there is not a ray
|
|
cluster initialized, serve will call `ray.init` with `object_store_memory`
|
|
requirement.
|
|
|
|
Args:
|
|
cluster_name (str): A unique name for this serve cluster. This allows
|
|
multiple serve clusters to run on the same ray cluster. Must be
|
|
specified in all subsequent serve.init() calls.
|
|
blocking (bool): If true, the function will wait for the HTTP server to
|
|
be healthy, and other components to be ready before returns.
|
|
start_server (bool): If true, `serve.init` starts http server.
|
|
(Default: True)
|
|
http_host (str): Host for HTTP server. Default to "0.0.0.0".
|
|
http_port (int): Port for HTTP server. Default to 8000.
|
|
ray_init_kwargs (dict): Argument passed to ray.init, if there is no ray
|
|
connection. Default to {"object_store_memory": int(1e8)} for
|
|
performance stability reason
|
|
metric_exporter(ExporterInterface): The class aggregates metrics from
|
|
all RayServe actors and optionally export them to external
|
|
services. RayServe has two options built in: InMemoryExporter and
|
|
PrometheusExporter
|
|
"""
|
|
if cluster_name is not None and not isinstance(cluster_name, str):
|
|
raise TypeError("cluster_name must be a string.")
|
|
|
|
# Initialize ray if needed.
|
|
if not ray.is_initialized():
|
|
ray.init(**ray_init_kwargs)
|
|
|
|
# Try to get serve master actor if it exists
|
|
global master_actor
|
|
master_actor_name = format_actor_name(SERVE_MASTER_NAME, cluster_name)
|
|
try:
|
|
master_actor = ray.util.get_actor(master_actor_name)
|
|
return
|
|
except ValueError:
|
|
pass
|
|
|
|
# Register serialization context once
|
|
ray.register_custom_serializer(Query, Query.ray_serialize,
|
|
Query.ray_deserialize)
|
|
ray.register_custom_serializer(RequestMetadata,
|
|
RequestMetadata.ray_serialize,
|
|
RequestMetadata.ray_deserialize)
|
|
|
|
# TODO(edoakes): for now, always start the HTTP proxy on the node that
|
|
# serve.init() was run on. We should consider making this configurable
|
|
# in the future.
|
|
http_node_id = ray.state.current_node_id()
|
|
master_actor = ServeMaster.options(
|
|
detached=True,
|
|
name=master_actor_name,
|
|
max_restarts=-1,
|
|
).remote(cluster_name, start_server, http_node_id, http_host, http_port,
|
|
metric_exporter)
|
|
|
|
if start_server and blocking:
|
|
block_until_http_ready("http://{}:{}/-/routes".format(
|
|
http_host, http_port))
|
|
|
|
|
|
@_ensure_connected
|
|
def create_endpoint(endpoint_name, route=None, methods=["GET"]):
|
|
"""Create a service endpoint given route_expression.
|
|
|
|
Args:
|
|
endpoint_name (str): A name to associate to the endpoint. It will be
|
|
used as key to set traffic policy.
|
|
route (str): A string begin with "/". HTTP server will use
|
|
the string to match the path.
|
|
blocking (bool): If true, the function will wait for service to be
|
|
registered before returning
|
|
"""
|
|
retry_actor_failures(master_actor.create_endpoint, route, endpoint_name,
|
|
[m.upper() for m in methods])
|
|
|
|
|
|
@_ensure_connected
|
|
def delete_endpoint(endpoint):
|
|
"""Delete the given endpoint.
|
|
|
|
Does not delete any associated backends.
|
|
"""
|
|
retry_actor_failures(master_actor.delete_endpoint, endpoint)
|
|
|
|
|
|
@_ensure_connected
|
|
def update_backend_config(backend_tag, config_options):
|
|
"""Update a backend configuration for a backend tag.
|
|
|
|
Keys not specified in the passed will be left unchanged.
|
|
|
|
Args:
|
|
backend_tag(str): A registered backend.
|
|
config_options(dict): Backend config options to update.
|
|
"""
|
|
if not isinstance(config_options, dict):
|
|
raise ValueError("config_options must be a dictionary.")
|
|
retry_actor_failures(master_actor.update_backend_config, backend_tag,
|
|
config_options)
|
|
|
|
|
|
@_ensure_connected
|
|
def get_backend_config(backend_tag):
|
|
"""Get the backend configuration for a backend tag.
|
|
|
|
Args:
|
|
backend_tag(str): A registered backend.
|
|
"""
|
|
return retry_actor_failures(master_actor.get_backend_config, backend_tag)
|
|
|
|
|
|
@_ensure_connected
|
|
def create_backend(backend_tag,
|
|
func_or_class,
|
|
*actor_init_args,
|
|
ray_actor_options=None,
|
|
config=None):
|
|
"""Create a backend with the provided tag.
|
|
|
|
The backend will serve requests with func_or_class.
|
|
|
|
Args:
|
|
backend_tag (str): a unique tag assign to identify this backend.
|
|
func_or_class (callable, class): a function or a class implementing
|
|
__call__.
|
|
actor_init_args (optional): the arguments to pass to the class.
|
|
initialization method.
|
|
ray_actor_options (optional): options to be passed into the
|
|
@ray.remote decorator for the backend actor.
|
|
config: (optional) configuration options for this backend.
|
|
"""
|
|
if config is None:
|
|
config = {}
|
|
if not isinstance(config, dict):
|
|
raise TypeError("config must be a dictionary.")
|
|
|
|
replica_config = ReplicaConfig(
|
|
func_or_class, *actor_init_args, ray_actor_options=ray_actor_options)
|
|
backend_config = BackendConfig(config, replica_config.accepts_batches)
|
|
|
|
retry_actor_failures(master_actor.create_backend, backend_tag,
|
|
backend_config, replica_config)
|
|
|
|
|
|
@_ensure_connected
|
|
def delete_backend(backend_tag):
|
|
"""Delete the given backend.
|
|
|
|
The backend must not currently be used by any endpoints.
|
|
"""
|
|
retry_actor_failures(master_actor.delete_backend, backend_tag)
|
|
|
|
|
|
@_ensure_connected
|
|
def set_traffic(endpoint_name, traffic_policy_dictionary):
|
|
"""Associate a service endpoint with traffic policy.
|
|
|
|
Example:
|
|
|
|
>>> serve.set_traffic("service-name", {
|
|
"backend:v1": 0.5,
|
|
"backend:v2": 0.5
|
|
})
|
|
|
|
Args:
|
|
endpoint_name (str): A registered service endpoint.
|
|
traffic_policy_dictionary (dict): a dictionary maps backend names
|
|
to their traffic weights. The weights must sum to 1.
|
|
"""
|
|
retry_actor_failures(master_actor.set_traffic, endpoint_name,
|
|
traffic_policy_dictionary)
|
|
|
|
|
|
@_ensure_connected
|
|
def get_handle(endpoint_name,
|
|
relative_slo_ms=None,
|
|
absolute_slo_ms=None,
|
|
missing_ok=False):
|
|
"""Retrieve RayServeHandle for service endpoint to invoke it from Python.
|
|
|
|
Args:
|
|
endpoint_name (str): A registered service endpoint.
|
|
relative_slo_ms(float): Specify relative deadline in milliseconds for
|
|
queries fired using this handle. (Default: None)
|
|
absolute_slo_ms(float): Specify absolute deadline in milliseconds for
|
|
queries fired using this handle. (Default: None)
|
|
missing_ok (bool): If true, skip the check for the endpoint existence.
|
|
It can be useful when the endpoint has not been registered.
|
|
|
|
Returns:
|
|
RayServeHandle
|
|
"""
|
|
if not missing_ok:
|
|
assert endpoint_name in retry_actor_failures(
|
|
master_actor.get_all_endpoints)
|
|
|
|
return RayServeHandle(
|
|
retry_actor_failures(master_actor.get_router)[0],
|
|
endpoint_name,
|
|
relative_slo_ms,
|
|
absolute_slo_ms,
|
|
)
|
|
|
|
|
|
@_ensure_connected
|
|
def stat():
|
|
"""Retrieve metric statistics about ray serve system.
|
|
|
|
Returns:
|
|
metric_stats(Any): Metric information returned by the metric exporter.
|
|
This can vary by exporter. For the default InMemoryExporter, it
|
|
returns a list of the following format:
|
|
|
|
.. code-block::python
|
|
[
|
|
{"info": {
|
|
"name": ...,
|
|
"type": COUNTER|MEASURE,
|
|
"label_key": label_value,
|
|
"label_key": label_value,
|
|
...
|
|
}, "value": float}
|
|
]
|
|
|
|
For PrometheusExporter, it returns the metrics in prometheus format
|
|
in plain text.
|
|
"""
|
|
[metric_exporter] = ray.get(master_actor.get_metric_exporter.remote())
|
|
return ray.get(metric_exporter.inspect_metrics.remote())
|