Files
ray/python/ray/experimental/serve/queues.py
T
Simon Mo 9bb3633cd9 [Serve] Implement metric interface (#5852)
* Implement metric interface

* Address comment: made actor_handles a dict

* Fix iteration

* Lint

* Mark lightweight actors as num_cpus=0 to prevent resource starvation

* Be more explicit about the readiness condition

* Make task_runner non-blocking

* Lint
2019-10-07 09:29:26 -07:00

178 lines
6.1 KiB
Python

from collections import defaultdict, deque
import numpy as np
import ray
from ray.experimental.serve.utils import get_custom_object_id, logger
class Query:
def __init__(self,
request_args,
request_kwargs,
request_context,
result_object_id=None):
self.request_args = request_args
self.request_kwargs = request_kwargs
self.request_context = request_context
if result_object_id is None:
self.result_object_id = get_custom_object_id()
else:
self.result_object_id = result_object_id
class WorkIntent:
def __init__(self, work_object_id=None):
if work_object_id is None:
self.work_object_id = get_custom_object_id()
else:
self.work_object_id = work_object_id
class CentralizedQueues:
"""A router that routes request to available workers.
Router aceepts each request from the `enqueue_request` method and enqueues
it. It also accepts worker request to work (called work_intention in code)
from workers via the `dequeue_request` method. The traffic policy is used
to match requests with their corresponding workers.
Behavior:
>>> # psuedo-code
>>> queue = CentralizedQueues()
>>> queue.enqueue_request(
"service-name", request_args, request_kwargs, request_context)
# nothing happens, request is queued.
# returns result ObjectID, which will contains the final result
>>> queue.dequeue_request('backend-1')
# nothing happens, work intention is queued.
# return work ObjectID, which will contains the future request payload
>>> queue.link('service-name', 'backend-1')
# here the enqueue_requester is matched with worker, request
# data is put into work ObjectID, and the worker processes the request
# and store the result into result ObjectID
Traffic policy splits the traffic among different workers
probabilistically:
1. When all backends are ready to receive traffic, we will randomly
choose a backend based on the weights assigned by the traffic policy
dictionary.
2. When more than 1 but not all backends are ready, we will normalize the
weights of the ready backends to 1 and choose a backend via sampling.
3. When there is only 1 backend ready, we will only use that backend.
"""
def __init__(self):
# service_name -> request queue
self.queues = defaultdict(deque)
# service_name -> traffic_policy
self.traffic = defaultdict(dict)
# backend_name -> worker queue
self.workers = defaultdict(deque)
def is_ready(self):
return True
def _serve_metric(self):
return {
"service_{}_queue_size".format(service_name): {
"value": len(queue),
"type": "counter",
}
for service_name, queue in self.queues.items()
}
def enqueue_request(self, service, request_args, request_kwargs,
request_context):
query = Query(request_args, request_kwargs, request_context)
self.queues[service].append(query)
self.flush()
return query.result_object_id.binary()
def dequeue_request(self, backend):
intention = WorkIntent()
self.workers[backend].append(intention)
self.flush()
return intention.work_object_id.binary()
def link(self, service, backend):
logger.debug("Link %s with %s", service, backend)
self.traffic[service][backend] = 1.0
self.flush()
def set_traffic(self, service, traffic_dict):
logger.debug("Setting traffic for service %s to %s", service,
traffic_dict)
self.traffic[service] = traffic_dict
self.flush()
def flush(self):
"""In the default case, flush calls ._flush.
When this class is a Ray actor, .flush can be scheduled as a remote
method invocation.
"""
self._flush()
def _get_available_backends(self, service):
backends_in_policy = set(self.traffic[service].keys())
available_workers = {
backend
for backend, queues in self.workers.items() if len(queues) > 0
}
return list(backends_in_policy.intersection(available_workers))
def _flush(self):
for service, queue in self.queues.items():
ready_backends = self._get_available_backends(service)
while len(queue) and len(ready_backends):
# Fast path, only one backend available.
if len(ready_backends) == 1:
backend = ready_backends[0]
request, work = (queue.popleft(),
self.workers[backend].popleft())
ray.worker.global_worker.put_object(
work.work_object_id, request)
# We have more than one backend available.
# We will roll a dice among the multiple backends.
else:
backend_weights = np.array([
self.traffic[service][backend_name]
for backend_name in ready_backends
])
# Normalize the weights to 1.
backend_weights /= backend_weights.sum()
chosen_backend = np.random.choice(
ready_backends, p=backend_weights).squeeze()
request, work = (
queue.popleft(),
self.workers[chosen_backend].popleft(),
)
ray.worker.global_worker.put_object(
work.work_object_id, request)
ready_backends = self._get_available_backends(service)
@ray.remote
class CentralizedQueuesActor(CentralizedQueues):
self_handle = None
def register_self_handle(self, handle_to_this_actor):
self.self_handle = handle_to_this_actor
def flush(self):
if self.self_handle:
self.self_handle._flush.remote()
else:
self._flush()