diff --git a/doc/source/serve/advanced.rst b/doc/source/serve/advanced.rst
index 2c61bb945..a21a6cd41 100644
--- a/doc/source/serve/advanced.rst
+++ b/doc/source/serve/advanced.rst
@@ -239,6 +239,69 @@ That's it. Let's take a look at an example:
.. literalinclude:: ../../../python/ray/serve/examples/doc/snippet_model_composition.py
+Monitoring
+==========
+
+Ray Serve exposes system metrics like number of requests through Python API
+``serve.stat`` and HTTP ``/-/metrics`` API. By default, it uses a custom
+structured format for easy parsing and debugging.
+
+Via python:
+
+.. code-block:: python
+
+ serve.stat()
+ """
+ [..., {
+ "info": {
+ "name": "num_http_requests",
+ "route": "/-/routes",
+ "type": "MetricType.COUNTER"
+ },
+ "value": 1
+ },
+ {
+ "info": {
+ "name": "num_http_requests",
+ "route": "/echo",
+ "type": "MetricType.COUNTER"
+ },
+ "value": 10
+ }, ...]
+ """
+
+Via HTTP:
+
+.. code-block::
+
+ curl http://localhost:8000/-/metrics
+ # Returns the same output as above in JSON format.
+
+You can also access the result in `Prometheus `_ format,
+by setting the ``metric_exporter`` option in :mod:`serve.init `.
+
+.. code-block:: python
+
+ from ray.serve.metric import PrometheusExporter
+ serve.init(metric_exporter=PrometheusExporter)
+
+.. code-block::
+
+ curl http://localhost:8000/-/metrics
+
+ # HELP backend_request_counter_total Number of queries that have been processed in this replica
+ # TYPE backend_request_counter_total counter
+ backend_request_counter_total{backend="echo:v1"} 5.0
+ backend_request_counter_total{backend="echo:v2"} 5.0
+ ...
+
+The metric exporter is extensible and you can customize it for your own metric
+infrastructure. We are gathering feedback and welcome contribution! Feel free
+to submit a github issue to chat with us in #serve channel in `community slack `_.
+
+Here's an simple example of a dummy exporter that writes metrics to file:
+
+.. literalinclude:: ../../../python/ray/serve/examples/doc/snippet_metric_export.py
.. _serve-faq:
@@ -269,3 +332,4 @@ Once a endpoint is deleted, its tag can be reused.
.. code-block:: python
serve.delete_endpoint("simple_endpoint")
+
diff --git a/python/ray/serve/BUILD b/python/ray/serve/BUILD
index de97c45be..56e25b7b6 100644
--- a/python/ray/serve/BUILD
+++ b/python/ray/serve/BUILD
@@ -193,6 +193,14 @@ py_test(
deps = [":serve_lib"]
)
+py_test(
+ name = "snippet_metric_export",
+ size = "small",
+ srcs = glob(["examples/doc/*.py"]),
+ tags = ["exclusive"],
+ deps = [":serve_lib"]
+)
+
# Disable the deployment tutorial test because it requires
# ray start --head in the background.
# py_test(
diff --git a/python/ray/serve/examples/doc/snippet_metric_export.py b/python/ray/serve/examples/doc/snippet_metric_export.py
new file mode 100644
index 000000000..059301955
--- /dev/null
+++ b/python/ray/serve/examples/doc/snippet_metric_export.py
@@ -0,0 +1,55 @@
+import json
+import time
+
+import requests
+
+from ray import serve
+from ray.serve.metric.exporter import ExporterInterface
+
+
+class FileExporter(ExporterInterface):
+ def __init__(self):
+ self.file = open("/tmp/serve_metrics.log", "w")
+
+ def export(self, metric_metadata, metric_batch):
+ for metric_item in metric_batch:
+ data = metric_metadata[metric_item.key].__dict__
+ data["labels"] = metric_item.labels
+ data["values"] = metric_item.value
+ self.file.write(json.dumps(data))
+ self.file.write("\n")
+ self.file.flush()
+
+ def inspect_metrics(self):
+ return "Metric is located at /tmp/serve_metrics.log"
+
+
+serve.init(metric_exporter=FileExporter)
+
+
+def echo(flask_request):
+ return "hello " + flask_request.args.get("name", "serve!")
+
+
+serve.create_backend("hello", echo)
+serve.create_endpoint("hello", backend="hello", route="/hello")
+
+for _ in range(5):
+ requests.get("http://127.0.0.1:8000/hello").text
+ time.sleep(0.2)
+
+print("Retrieving metrics from file...")
+with open("/tmp/serve_metrics.log") as metric_log:
+ for line in metric_log:
+ print(line)
+
+# Retrieving metrics from file...
+# {"name": "backend_worker_starts",
+# "type": 1,
+# "description": "The number of time this replica workers ...",
+# "label_names": ["replica_tag"],
+# "default_labels": {"backend": "hello"}, "
+# labels": {"replica_tag": "hello#XwzPQn"},
+# "values": 1
+# }
+# ...
diff --git a/python/ray/serve/examples/echo_full.py b/python/ray/serve/examples/echo_full.py
index 0caf11d9b..4fa99b862 100644
--- a/python/ray/serve/examples/echo_full.py
+++ b/python/ray/serve/examples/echo_full.py
@@ -1,30 +1,13 @@
-"""
-Full example of ray.serve module
-"""
-
-import json
import time
-from pygments import formatters, highlight, lexers
-
import requests
import ray
import ray.serve as serve
-
-
-def pformat_color_json(d):
- """Use pygments to pretty format and colorize dictionary"""
- formatted_json = json.dumps(d, sort_keys=True, indent=4)
-
- colorful_json = highlight(formatted_json, lexers.JsonLexer(),
- formatters.TerminalFormatter())
-
- return colorful_json
-
+from ray.serve.metric import PrometheusExporter
# initialize ray serve system.
-serve.init()
+serve.init(metric_exporter=PrometheusExporter)
# a backend can be a function or class.
@@ -70,4 +53,4 @@ serve.update_backend_config("echo:v1", {"num_replicas": 2})
serve.update_backend_config("echo:v2", {"num_replicas": 2})
# As well as retrieving relevant system metrics
-print(pformat_color_json(serve.stat()))
+print(serve.stat().decode())