mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 23:25:54 +08:00
51 lines
1.5 KiB
Python
51 lines
1.5 KiB
Python
import time
|
|
from ray.serve.constants import DEFAULT_LATENCY_SLO_MS
|
|
import ray.cloudpickle as pickle
|
|
|
|
|
|
class RequestMetadata:
|
|
"""
|
|
Request arguments required for enqueuing a request to the endpoint queue.
|
|
|
|
Args:
|
|
endpoint(str): A registered endpoint.
|
|
request_context(TaskContext): Context of a request.
|
|
request_slo_ms(float): Expected time for the query to get
|
|
completed.
|
|
is_wall_clock_time(bool): if True, router won't add wall clock
|
|
time to `request_slo_ms`.
|
|
"""
|
|
|
|
def __init__(self,
|
|
endpoint,
|
|
request_context,
|
|
relative_slo_ms=None,
|
|
absolute_slo_ms=None,
|
|
call_method="__call__",
|
|
shard_key=None):
|
|
|
|
self.endpoint = endpoint
|
|
self.request_context = request_context
|
|
self.relative_slo_ms = relative_slo_ms
|
|
self.absolute_slo_ms = absolute_slo_ms
|
|
self.call_method = call_method
|
|
self.shard_key = shard_key
|
|
|
|
def adjust_relative_slo_ms(self) -> float:
|
|
"""Normalize the input latency objective to absolute timestamp.
|
|
|
|
"""
|
|
slo_ms = self.relative_slo_ms
|
|
if slo_ms is None:
|
|
slo_ms = DEFAULT_LATENCY_SLO_MS
|
|
current_time_ms = time.time() * 1000
|
|
return current_time_ms + slo_ms
|
|
|
|
def ray_serialize(self):
|
|
return pickle.dumps(self.__dict__)
|
|
|
|
@staticmethod
|
|
def ray_deserialize(value):
|
|
kwargs = pickle.loads(value)
|
|
return RequestMetadata(**kwargs)
|