[Metric] custom metrics refinement (#10861)

* In progress

* In Progress.

* Addressed code review.

* Add unit tests.

* Add a simple doc.

* Fixed test failure.

* Fix all test failures from serve.

* Addressed code review.
This commit is contained in:
SangBin Cho
2020-09-25 09:10:28 -07:00
committed by GitHub
parent 609c1b8acd
commit 109481afd9
9 changed files with 421 additions and 124 deletions
+72 -60
View File
@@ -12,7 +12,7 @@ from ray.async_compat import sync_to_async
from ray.serve.utils import (parse_request_item, _get_logger, chain_future,
unpack_future)
from ray.serve.exceptions import RayServeException
from ray.experimental import metrics
from ray.util import metrics
from ray.serve.config import BackendConfig
from ray.serve.router import Query
from ray.serve.constants import DEFAULT_LATENCY_BUCKET_MS
@@ -159,43 +159,72 @@ class RayServeWorker:
self.num_ongoing_requests = 0
self.request_counter = metrics.Count(
"backend_request_counter", ("Number of queries that have been "
"processed in this replica"),
"requests", ["backend"])
self.error_counter = metrics.Count("backend_error_counter",
("Number of exceptions that have "
"occurred in the backend"),
"errors", ["backend"])
"backend_request_counter",
description=("Number of queries that have been "
"processed in this replica"),
tag_keys=("backend", ))
self.request_counter.set_default_tags({"backend": self.backend_tag})
self.error_counter = metrics.Count(
"backend_error_counter",
description=("Number of exceptions that have "
"occurred in the backend"),
tag_keys=("backend", ))
self.error_counter.set_default_tags({"backend": self.backend_tag})
self.restart_counter = metrics.Count(
"backend_worker_starts",
("The number of time this replica workers "
"has been restarted due to failure."), "restarts",
["backend", "replica_tag"])
self.queuing_latency_tracker = metrics.Histogram(
"backend_queuing_latency_ms",
("The latency for queries waiting in the replica's queue "
"waiting to be processed or batched."), "ms",
DEFAULT_LATENCY_BUCKET_MS, ["backend", "replica_tag"])
self.processing_latency_tracker = metrics.Histogram(
"backend_processing_latency_ms",
"The latency for queries to be processed", "ms",
DEFAULT_LATENCY_BUCKET_MS,
["backend", "replica_tag", "batch_size"])
self.num_queued_items = metrics.Gauge(
"replica_queued_queries",
"Current number of queries queued in the the backend replicas",
"requests", ["backend", "replica_tag"])
self.num_processing_items = metrics.Gauge(
"replica_processing_queries",
"Current number of queries being processed", "requests",
["backend", "replica_tag"])
self.restart_counter.record(1, {
description=("The number of time this replica workers "
"has been restarted due to failure."),
tag_keys=("backend", "replica_tag"))
self.restart_counter.set_default_tags({
"backend": self.backend_tag,
"replica_tag": self.replica_tag
})
self.queuing_latency_tracker = metrics.Histogram(
"backend_queuing_latency_ms",
description=(
"The latency for queries waiting in the replica's queue "
"waiting to be processed or batched."),
boundaries=DEFAULT_LATENCY_BUCKET_MS,
tag_keys=("backend", "replica_tag"))
self.queuing_latency_tracker.set_default_tags({
"backend": self.backend_tag,
"replica_tag": self.replica_tag
})
self.processing_latency_tracker = metrics.Histogram(
"backend_processing_latency_ms",
description="The latency for queries to be processed",
boundaries=DEFAULT_LATENCY_BUCKET_MS,
tag_keys=("backend", "replica_tag", "batch_size"))
self.processing_latency_tracker.set_default_tags({
"backend": self.backend_tag,
"replica_tag": self.replica_tag
})
self.num_queued_items = metrics.Gauge(
"replica_queued_queries",
description=("Current number of queries queued in the "
"the backend replicas"),
tag_keys=("backend", "replica_tag"))
self.num_queued_items.set_default_tags({
"backend": self.backend_tag,
"replica_tag": self.replica_tag
})
self.num_processing_items = metrics.Gauge(
"replica_processing_queries",
description="Current number of queries being processed",
tag_keys=("backend", "replica_tag"))
self.num_processing_items.set_default_tags({
"backend": self.backend_tag,
"replica_tag": self.replica_tag
})
self.restart_counter.record(1)
asyncio.get_event_loop().create_task(self.main_loop())
def get_runner_method(self, request_item: Query) -> Callable:
@@ -216,17 +245,13 @@ class RayServeWorker:
start = time.time()
try:
result = await method_to_call(arg)
self.request_counter.record(1, {"backend": self.backend_tag})
self.request_counter.record(1)
except Exception as e:
result = wrap_to_ray_error(e)
self.error_counter.record(1, {"backend": self.backend_tag})
self.error_counter.record(1)
self.processing_latency_tracker.record(
(time.time() - start) * 1000, {
"backend": self.backend_tag,
"replica": self.replica_tag,
"batch_size": "1"
})
(time.time() - start) * 1000, tags={"batch_size": "1"})
return result
@@ -248,8 +273,7 @@ class RayServeWorker:
"Please only send the same type of requests in batching "
"mode.")
self.request_counter.record(batch_size,
{"backend": self.backend_tag})
self.request_counter.record(batch_size)
call_method = ensure_async(call_methods.pop())
result_list = await call_method(args)
@@ -274,15 +298,12 @@ class RayServeWorker:
raise RayServeException(error_message)
except Exception as e:
wrapped_exception = wrap_to_ray_error(e)
self.error_counter.record(1, {"backend": self.backend_tag})
self.error_counter.record(1)
result_list = [wrapped_exception for _ in range(batch_size)]
self.processing_latency_tracker.record(
(time.time() - timing_start) * 1000, {
"backend": self.backend_tag,
"replica_tag": self.replica_tag,
"batch_size": str(batch_size)
})
(time.time() - timing_start) * 1000,
tags={"batch_size": str(batch_size)})
return result_list
@@ -294,21 +315,12 @@ class RayServeWorker:
batch = await self.batch_queue.wait_for_batch()
# Record metrics
self.num_queued_items.record(self.batch_queue.qsize(), {
"backend": self.backend_tag,
"replica_tag": self.replica_tag
})
self.num_processing_items.record(
self.num_ongoing_requests - self.batch_queue.qsize(), {
"backend": self.backend_tag,
"replica_tag": self.replica_tag
})
self.num_queued_items.record(self.batch_queue.qsize())
self.num_processing_items.record(self.num_ongoing_requests -
self.batch_queue.qsize())
for query in batch:
queuing_time = (time.time() - query.tick_enter_replica) * 1000
self.queuing_latency_tracker.record(queuing_time, {
"backend": self.backend_tag,
"replica_tag": self.replica_tag
})
self.queuing_latency_tracker.record(queuing_time)
all_evaluated_futures = []
+5 -4
View File
@@ -7,7 +7,7 @@ import uvicorn
import ray
from ray.exceptions import RayTaskError
from ray.serve.context import TaskContext
from ray.experimental import metrics
from ray.util import metrics
from ray.serve.http_util import Response
from ray.serve.router import Router, RequestMetadata
@@ -32,8 +32,9 @@ class HTTPProxy:
self.route_table = await controller.get_router_config.remote()
self.request_counter = metrics.Count(
"num_http_requests", "The number of HTTP requests processed",
"requests", ["route"])
"num_http_requests",
description="The number of HTTP requests processed",
tag_keys=("route", ))
self.router = Router()
await self.router.setup(name, controller_name)
@@ -80,7 +81,7 @@ class HTTPProxy:
assert scope["type"] == "http"
current_path = scope["path"]
self.request_counter.record(1, {"route": current_path})
self.request_counter.record(1, tags={"route": current_path})
if current_path.startswith("/-/"):
await self._handle_system_request(scope, receive, send)
+20 -13
View File
@@ -9,7 +9,7 @@ from dataclasses import dataclass, field
from ray.exceptions import RayTaskError
import ray
from ray.experimental import metrics
from ray.util import metrics
from ray.serve.context import TaskContext
from ray.serve.endpoint_policy import RandomEndpointPolicy
from ray.serve.utils import logger, chain_future
@@ -139,21 +139,25 @@ class Router:
# -- Metrics Registration -- #
self.num_router_requests = metrics.Count(
"num_router_requests",
"Number of requests processed by the router.", "requests",
["endpoint"])
description="Number of requests processed by the router.",
tag_keys=("endpoint", ))
self.num_error_endpoint_requests = metrics.Count(
"num_error_endpoint_requests",
("Number of requests that errored when getting results "
"for the endpoint."), "requests", ["endpoint"])
description=(
"Number of requests that errored when getting results "
"for the endpoint."),
tag_keys=("endpoint", ))
self.num_error_backend_requests = metrics.Count(
"num_error_backend_requests",
("Number of requests that errored when getting result "
"from the backend."), "requests", ["backend"])
description=("Number of requests that errored when getting result "
"from the backend."),
tag_keys=("backend", ))
self.backend_queue_size = metrics.Gauge(
"backend_queued_queries",
"Current number of queries queued in the router for a backend",
"requests", ["backend"])
description=("Current number of queries queued "
"in the router for a backend"),
tag_keys=("backend", ))
asyncio.get_event_loop().create_task(self.report_queue_lengths())
@@ -161,7 +165,7 @@ class Router:
**request_kwargs):
endpoint = request_meta.endpoint
logger.debug("Received a request for endpoint {}".format(endpoint))
self.num_router_requests.record(1, {"endpoint": endpoint})
self.num_router_requests.record(1, tags={"endpoint": endpoint})
request_context = request_meta.request_context
query = Query(
@@ -177,7 +181,8 @@ class Router:
try:
result = await query.async_future
except RayTaskError as e:
self.num_error_endpoint_requests.record(1, {"endpoint": endpoint})
self.num_error_endpoint_requests.record(
1, tags={"endpoint": endpoint})
result = e
return result
@@ -301,7 +306,8 @@ class Router:
else:
result = await object_ref
except RayTaskError as error:
self.num_error_backend_requests.record(1, {"backend": backend})
self.num_error_backend_requests.record(
1, tags={"backend": backend})
result = error
self.queries_counter[backend][backend_replica_tag] -= 1
await self.mark_worker_idle(backend, backend_replica_tag)
@@ -358,6 +364,7 @@ class Router:
self.name, queue_lengths)
for backend, length in queue_lengths.items():
self.backend_queue_size.record(length, {"backend": backend})
self.backend_queue_size.record(
length, tags={"backend": backend})
await asyncio.sleep(REPORT_QUEUE_LENGTH_PERIOD_S)