mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 12:10:40 +08:00
9bb3633cd9
* Implement metric interface * Address comment: made actor_handles a dict * Fix iteration * Lint * Mark lightweight actors as num_cpus=0 to prevent resource starvation * Be more explicit about the readiness condition * Make task_runner non-blocking * Lint
178 lines
6.1 KiB
Python
178 lines
6.1 KiB
Python
from collections import defaultdict, deque
|
|
|
|
import numpy as np
|
|
|
|
import ray
|
|
from ray.experimental.serve.utils import get_custom_object_id, logger
|
|
|
|
|
|
class Query:
|
|
def __init__(self,
|
|
request_args,
|
|
request_kwargs,
|
|
request_context,
|
|
result_object_id=None):
|
|
self.request_args = request_args
|
|
self.request_kwargs = request_kwargs
|
|
self.request_context = request_context
|
|
|
|
if result_object_id is None:
|
|
self.result_object_id = get_custom_object_id()
|
|
else:
|
|
self.result_object_id = result_object_id
|
|
|
|
|
|
class WorkIntent:
|
|
def __init__(self, work_object_id=None):
|
|
if work_object_id is None:
|
|
self.work_object_id = get_custom_object_id()
|
|
else:
|
|
self.work_object_id = work_object_id
|
|
|
|
|
|
class CentralizedQueues:
|
|
"""A router that routes request to available workers.
|
|
|
|
Router aceepts each request from the `enqueue_request` method and enqueues
|
|
it. It also accepts worker request to work (called work_intention in code)
|
|
from workers via the `dequeue_request` method. The traffic policy is used
|
|
to match requests with their corresponding workers.
|
|
|
|
Behavior:
|
|
>>> # psuedo-code
|
|
>>> queue = CentralizedQueues()
|
|
>>> queue.enqueue_request(
|
|
"service-name", request_args, request_kwargs, request_context)
|
|
# nothing happens, request is queued.
|
|
# returns result ObjectID, which will contains the final result
|
|
>>> queue.dequeue_request('backend-1')
|
|
# nothing happens, work intention is queued.
|
|
# return work ObjectID, which will contains the future request payload
|
|
>>> queue.link('service-name', 'backend-1')
|
|
# here the enqueue_requester is matched with worker, request
|
|
# data is put into work ObjectID, and the worker processes the request
|
|
# and store the result into result ObjectID
|
|
|
|
Traffic policy splits the traffic among different workers
|
|
probabilistically:
|
|
|
|
1. When all backends are ready to receive traffic, we will randomly
|
|
choose a backend based on the weights assigned by the traffic policy
|
|
dictionary.
|
|
|
|
2. When more than 1 but not all backends are ready, we will normalize the
|
|
weights of the ready backends to 1 and choose a backend via sampling.
|
|
|
|
3. When there is only 1 backend ready, we will only use that backend.
|
|
"""
|
|
|
|
def __init__(self):
|
|
# service_name -> request queue
|
|
self.queues = defaultdict(deque)
|
|
|
|
# service_name -> traffic_policy
|
|
self.traffic = defaultdict(dict)
|
|
|
|
# backend_name -> worker queue
|
|
self.workers = defaultdict(deque)
|
|
|
|
def is_ready(self):
|
|
return True
|
|
|
|
def _serve_metric(self):
|
|
return {
|
|
"service_{}_queue_size".format(service_name): {
|
|
"value": len(queue),
|
|
"type": "counter",
|
|
}
|
|
for service_name, queue in self.queues.items()
|
|
}
|
|
|
|
def enqueue_request(self, service, request_args, request_kwargs,
|
|
request_context):
|
|
query = Query(request_args, request_kwargs, request_context)
|
|
self.queues[service].append(query)
|
|
self.flush()
|
|
return query.result_object_id.binary()
|
|
|
|
def dequeue_request(self, backend):
|
|
intention = WorkIntent()
|
|
self.workers[backend].append(intention)
|
|
self.flush()
|
|
return intention.work_object_id.binary()
|
|
|
|
def link(self, service, backend):
|
|
logger.debug("Link %s with %s", service, backend)
|
|
self.traffic[service][backend] = 1.0
|
|
self.flush()
|
|
|
|
def set_traffic(self, service, traffic_dict):
|
|
logger.debug("Setting traffic for service %s to %s", service,
|
|
traffic_dict)
|
|
self.traffic[service] = traffic_dict
|
|
self.flush()
|
|
|
|
def flush(self):
|
|
"""In the default case, flush calls ._flush.
|
|
|
|
When this class is a Ray actor, .flush can be scheduled as a remote
|
|
method invocation.
|
|
"""
|
|
self._flush()
|
|
|
|
def _get_available_backends(self, service):
|
|
backends_in_policy = set(self.traffic[service].keys())
|
|
available_workers = {
|
|
backend
|
|
for backend, queues in self.workers.items() if len(queues) > 0
|
|
}
|
|
return list(backends_in_policy.intersection(available_workers))
|
|
|
|
def _flush(self):
|
|
for service, queue in self.queues.items():
|
|
ready_backends = self._get_available_backends(service)
|
|
|
|
while len(queue) and len(ready_backends):
|
|
# Fast path, only one backend available.
|
|
if len(ready_backends) == 1:
|
|
backend = ready_backends[0]
|
|
request, work = (queue.popleft(),
|
|
self.workers[backend].popleft())
|
|
ray.worker.global_worker.put_object(
|
|
work.work_object_id, request)
|
|
|
|
# We have more than one backend available.
|
|
# We will roll a dice among the multiple backends.
|
|
else:
|
|
backend_weights = np.array([
|
|
self.traffic[service][backend_name]
|
|
for backend_name in ready_backends
|
|
])
|
|
# Normalize the weights to 1.
|
|
backend_weights /= backend_weights.sum()
|
|
chosen_backend = np.random.choice(
|
|
ready_backends, p=backend_weights).squeeze()
|
|
|
|
request, work = (
|
|
queue.popleft(),
|
|
self.workers[chosen_backend].popleft(),
|
|
)
|
|
ray.worker.global_worker.put_object(
|
|
work.work_object_id, request)
|
|
|
|
ready_backends = self._get_available_backends(service)
|
|
|
|
|
|
@ray.remote
|
|
class CentralizedQueuesActor(CentralizedQueues):
|
|
self_handle = None
|
|
|
|
def register_self_handle(self, handle_to_this_actor):
|
|
self.self_handle = handle_to_this_actor
|
|
|
|
def flush(self):
|
|
if self.self_handle:
|
|
self.self_handle._flush.remote()
|
|
else:
|
|
self._flush()
|