ray/python/ray/serve/api.py

from functools import wraps

from multiprocessing import cpu_count

import ray
from ray.serve.constants import (DEFAULT_HTTP_HOST, DEFAULT_HTTP_PORT,
                                 SERVE_MASTER_NAME)
from ray.serve.master import ServeMaster
from ray.serve.handle import RayServeHandle
from ray.serve.utils import (block_until_http_ready, format_actor_name,
                             retry_actor_failures)
from ray.serve.exceptions import RayServeException
from ray.serve.config import BackendConfig, ReplicaConfig
from ray.serve.router import Query
from ray.serve.request_params import RequestMetadata
from ray.serve.metric import InMemoryExporter

master_actor = None


def _get_master_actor():
    """Used for internal purpose because using just import serve.global_state
    will always reference the original None object.
    """
    global master_actor
    if master_actor is None:
        raise RayServeException("Please run serve.init to initialize or "
                                "connect to existing ray serve cluster.")
    return master_actor


def _ensure_connected(f):
    @wraps(f)
    def check(*args, **kwargs):
        _get_master_actor()
        return f(*args, **kwargs)

    return check


def accept_batch(f):
    """Annotation to mark a serving function that batch is accepted.

    This annotation need to be used to mark a function expect all arguments
    to be passed into a list.

    Example:

    >>> @serve.accept_batch
        def serving_func(flask_request):
            assert isinstance(flask_request, list)
            ...

    >>> class ServingActor:
            @serve.accept_batch
            def __call__(self, *, python_arg=None):
                assert isinstance(python_arg, list)
    """
    f._serve_accept_batch = True
    return f


def init(cluster_name=None,
         blocking=False,
         start_server=True,
         http_host=DEFAULT_HTTP_HOST,
         http_port=DEFAULT_HTTP_PORT,
         ray_init_kwargs={
             "object_store_memory": int(1e8),
             "num_cpus": max(cpu_count(), 8)
         },
         metric_exporter=InMemoryExporter):
    """Initialize a serve cluster.

    If serve cluster has already initialized, this function will just return.

    Calling `ray.init` before `serve.init` is optional. When there is not a ray
    cluster initialized, serve will call `ray.init` with `object_store_memory`
    requirement.

    Args:
        cluster_name (str): A unique name for this serve cluster. This allows
            multiple serve clusters to run on the same ray cluster. Must be
            specified in all subsequent serve.init() calls.
        blocking (bool): If true, the function will wait for the HTTP server to
            be healthy, and other components to be ready before returns.
        start_server (bool): If true, `serve.init` starts http server.
            (Default: True)
        http_host (str): Host for HTTP server. Default to "0.0.0.0".
        http_port (int): Port for HTTP server. Default to 8000.
        ray_init_kwargs (dict): Argument passed to ray.init, if there is no ray
            connection. Default to {"object_store_memory": int(1e8)} for
            performance stability reason
        metric_exporter(ExporterInterface): The class aggregates metrics from
            all RayServe actors and optionally export them to external
            services. RayServe has two options built in: InMemoryExporter and
            PrometheusExporter
    """
    if cluster_name is not None and not isinstance(cluster_name, str):
        raise TypeError("cluster_name must be a string.")

    # Initialize ray if needed.
    if not ray.is_initialized():
        ray.init(**ray_init_kwargs)

    # Try to get serve master actor if it exists
    global master_actor
    master_actor_name = format_actor_name(SERVE_MASTER_NAME, cluster_name)
    try:
        master_actor = ray.util.get_actor(master_actor_name)
        return
    except ValueError:
        pass

    # Register serialization context once
    ray.register_custom_serializer(Query, Query.ray_serialize,
                                   Query.ray_deserialize)
    ray.register_custom_serializer(RequestMetadata,
                                   RequestMetadata.ray_serialize,
                                   RequestMetadata.ray_deserialize)

    # TODO(edoakes): for now, always start the HTTP proxy on the node that
    # serve.init() was run on. We should consider making this configurable
    # in the future.
    http_node_id = ray.state.current_node_id()
    master_actor = ServeMaster.options(
        detached=True,
        name=master_actor_name,
        max_restarts=-1,
    ).remote(cluster_name, start_server, http_node_id, http_host, http_port,
             metric_exporter)

    if start_server and blocking:
        block_until_http_ready("http://{}:{}/-/routes".format(
            http_host, http_port))


@_ensure_connected
def create_endpoint(endpoint_name, route=None, methods=["GET"]):
    """Create a service endpoint given route_expression.

    Args:
        endpoint_name (str): A name to associate to the endpoint. It will be
            used as key to set traffic policy.
        route (str): A string begin with "/". HTTP server will use
            the string to match the path.
        blocking (bool): If true, the function will wait for service to be
            registered before returning
    """
    retry_actor_failures(master_actor.create_endpoint, route, endpoint_name,
                         [m.upper() for m in methods])


@_ensure_connected
def delete_endpoint(endpoint):
    """Delete the given endpoint.

    Does not delete any associated backends.
    """
    retry_actor_failures(master_actor.delete_endpoint, endpoint)


@_ensure_connected
def update_backend_config(backend_tag, config_options):
    """Update a backend configuration for a backend tag.

    Keys not specified in the passed will be left unchanged.

    Args:
        backend_tag(str): A registered backend.
        config_options(dict): Backend config options to update.
    """
    if not isinstance(config_options, dict):
        raise ValueError("config_options must be a dictionary.")
    retry_actor_failures(master_actor.update_backend_config, backend_tag,
                         config_options)


@_ensure_connected
def get_backend_config(backend_tag):
    """Get the backend configuration for a backend tag.

    Args:
        backend_tag(str): A registered backend.
    """
    return retry_actor_failures(master_actor.get_backend_config, backend_tag)


@_ensure_connected
def create_backend(backend_tag,
                   func_or_class,
                   *actor_init_args,
                   ray_actor_options=None,
                   config=None):
    """Create a backend with the provided tag.

    The backend will serve requests with func_or_class.

    Args:
        backend_tag (str): a unique tag assign to identify this backend.
        func_or_class (callable, class): a function or a class implementing
            __call__.
        actor_init_args (optional): the arguments to pass to the class.
            initialization method.
        ray_actor_options (optional): options to be passed into the
            @ray.remote decorator for the backend actor.
        config: (optional) configuration options for this backend.
    """
    if config is None:
        config = {}
    if not isinstance(config, dict):
        raise TypeError("config must be a dictionary.")

    replica_config = ReplicaConfig(
        func_or_class, *actor_init_args, ray_actor_options=ray_actor_options)
    backend_config = BackendConfig(config, replica_config.accepts_batches)

    retry_actor_failures(master_actor.create_backend, backend_tag,
                         backend_config, replica_config)


@_ensure_connected
def delete_backend(backend_tag):
    """Delete the given backend.

    The backend must not currently be used by any endpoints.
    """
    retry_actor_failures(master_actor.delete_backend, backend_tag)


@_ensure_connected
def set_traffic(endpoint_name, traffic_policy_dictionary):
    """Associate a service endpoint with traffic policy.

    Example:

    >>> serve.set_traffic("service-name", {
        "backend:v1": 0.5,
        "backend:v2": 0.5
    })

    Args:
        endpoint_name (str): A registered service endpoint.
        traffic_policy_dictionary (dict): a dictionary maps backend names
            to their traffic weights. The weights must sum to 1.
    """
    retry_actor_failures(master_actor.set_traffic, endpoint_name,
                         traffic_policy_dictionary)


@_ensure_connected
def get_handle(endpoint_name,
               relative_slo_ms=None,
               absolute_slo_ms=None,
               missing_ok=False):
    """Retrieve RayServeHandle for service endpoint to invoke it from Python.

    Args:
        endpoint_name (str): A registered service endpoint.
        relative_slo_ms(float): Specify relative deadline in milliseconds for
            queries fired using this handle. (Default: None)
        absolute_slo_ms(float): Specify absolute deadline in milliseconds for
            queries fired using this handle. (Default: None)
        missing_ok (bool): If true, skip the check for the endpoint existence.
            It can be useful when the endpoint has not been registered.

    Returns:
        RayServeHandle
    """
    if not missing_ok:
        assert endpoint_name in retry_actor_failures(
            master_actor.get_all_endpoints)

    return RayServeHandle(
        retry_actor_failures(master_actor.get_router)[0],
        endpoint_name,
        relative_slo_ms,
        absolute_slo_ms,
    )


@_ensure_connected
def stat():
    """Retrieve metric statistics about ray serve system.

    Returns:
        metric_stats(Any): Metric information returned by the metric exporter.
            This can vary by exporter. For the default InMemoryExporter, it
            returns a list of the following format:

            .. code-block::python
              [
                  {"info": {
                      "name": ...,
                      "type": COUNTER|MEASURE,
                      "label_key": label_value,
                      "label_key": label_value,
                      ...
                  }, "value": float}
              ]

            For PrometheusExporter, it returns the metrics in prometheus format
            in plain text.
    """
    [metric_exporter] = ray.get(master_actor.get_metric_exporter.remote())
    return ray.get(metric_exporter.inspect_metrics.remote())