Files
ray/python/ray/serve/request_params.py
T

51 lines
1.5 KiB
Python

import time
from ray.serve.constants import DEFAULT_LATENCY_SLO_MS
import ray.cloudpickle as pickle
class RequestMetadata:
"""
Request arguments required for enqueuing a request to the endpoint queue.
Args:
endpoint(str): A registered endpoint.
request_context(TaskContext): Context of a request.
request_slo_ms(float): Expected time for the query to get
completed.
is_wall_clock_time(bool): if True, router won't add wall clock
time to `request_slo_ms`.
"""
def __init__(self,
endpoint,
request_context,
relative_slo_ms=None,
absolute_slo_ms=None,
call_method="__call__",
shard_key=None):
self.endpoint = endpoint
self.request_context = request_context
self.relative_slo_ms = relative_slo_ms
self.absolute_slo_ms = absolute_slo_ms
self.call_method = call_method
self.shard_key = shard_key
def adjust_relative_slo_ms(self) -> float:
"""Normalize the input latency objective to absolute timestamp.
"""
slo_ms = self.relative_slo_ms
if slo_ms is None:
slo_ms = DEFAULT_LATENCY_SLO_MS
current_time_ms = time.time() * 1000
return current_time_ms + slo_ms
def ray_serialize(self):
return pickle.dumps(self.__dict__)
@staticmethod
def ray_deserialize(value):
kwargs = pickle.loads(value)
return RequestMetadata(**kwargs)