from collections import defaultdict, deque import numpy as np import ray from ray.experimental.serve.utils import get_custom_object_id, logger class Query: def __init__(self, request_args, request_kwargs, request_context, result_object_id=None): self.request_args = request_args self.request_kwargs = request_kwargs self.request_context = request_context if result_object_id is None: self.result_object_id = get_custom_object_id() else: self.result_object_id = result_object_id class WorkIntent: def __init__(self, work_object_id=None): if work_object_id is None: self.work_object_id = get_custom_object_id() else: self.work_object_id = work_object_id class CentralizedQueues: """A router that routes request to available workers. Router aceepts each request from the `enqueue_request` method and enqueues it. It also accepts worker request to work (called work_intention in code) from workers via the `dequeue_request` method. The traffic policy is used to match requests with their corresponding workers. Behavior: >>> # psuedo-code >>> queue = CentralizedQueues() >>> queue.enqueue_request( "service-name", request_args, request_kwargs, request_context) # nothing happens, request is queued. # returns result ObjectID, which will contains the final result >>> queue.dequeue_request('backend-1') # nothing happens, work intention is queued. # return work ObjectID, which will contains the future request payload >>> queue.link('service-name', 'backend-1') # here the enqueue_requester is matched with worker, request # data is put into work ObjectID, and the worker processes the request # and store the result into result ObjectID Traffic policy splits the traffic among different workers probabilistically: 1. When all backends are ready to receive traffic, we will randomly choose a backend based on the weights assigned by the traffic policy dictionary. 2. When more than 1 but not all backends are ready, we will normalize the weights of the ready backends to 1 and choose a backend via sampling. 3. When there is only 1 backend ready, we will only use that backend. """ def __init__(self): # service_name -> request queue self.queues = defaultdict(deque) # service_name -> traffic_policy self.traffic = defaultdict(dict) # backend_name -> worker queue self.workers = defaultdict(deque) def is_ready(self): return True def _serve_metric(self): return { "service_{}_queue_size".format(service_name): { "value": len(queue), "type": "counter", } for service_name, queue in self.queues.items() } def enqueue_request(self, service, request_args, request_kwargs, request_context): query = Query(request_args, request_kwargs, request_context) self.queues[service].append(query) self.flush() return query.result_object_id.binary() def dequeue_request(self, backend): intention = WorkIntent() self.workers[backend].append(intention) self.flush() return intention.work_object_id.binary() def link(self, service, backend): logger.debug("Link %s with %s", service, backend) self.traffic[service][backend] = 1.0 self.flush() def set_traffic(self, service, traffic_dict): logger.debug("Setting traffic for service %s to %s", service, traffic_dict) self.traffic[service] = traffic_dict self.flush() def flush(self): """In the default case, flush calls ._flush. When this class is a Ray actor, .flush can be scheduled as a remote method invocation. """ self._flush() def _get_available_backends(self, service): backends_in_policy = set(self.traffic[service].keys()) available_workers = { backend for backend, queues in self.workers.items() if len(queues) > 0 } return list(backends_in_policy.intersection(available_workers)) def _flush(self): for service, queue in self.queues.items(): ready_backends = self._get_available_backends(service) while len(queue) and len(ready_backends): # Fast path, only one backend available. if len(ready_backends) == 1: backend = ready_backends[0] request, work = (queue.popleft(), self.workers[backend].popleft()) ray.worker.global_worker.put_object( work.work_object_id, request) # We have more than one backend available. # We will roll a dice among the multiple backends. else: backend_weights = np.array([ self.traffic[service][backend_name] for backend_name in ready_backends ]) # Normalize the weights to 1. backend_weights /= backend_weights.sum() chosen_backend = np.random.choice( ready_backends, p=backend_weights).squeeze() request, work = ( queue.popleft(), self.workers[chosen_backend].popleft(), ) ray.worker.global_worker.put_object( work.work_object_id, request) ready_backends = self._get_available_backends(service) @ray.remote class CentralizedQueuesActor(CentralizedQueues): self_handle = None def register_self_handle(self, handle_to_this_actor): self.self_handle = handle_to_this_actor def flush(self): if self.self_handle: self.self_handle._flush.remote() else: self._flush()