diff --git a/doc/source/serve/advanced.rst b/doc/source/serve/advanced.rst
index 2c61bb945..a21a6cd41 100644
--- a/doc/source/serve/advanced.rst
+++ b/doc/source/serve/advanced.rst
@@ -239,6 +239,69 @@ That's it. Let's take a look at an example:
 
 .. literalinclude:: ../../../python/ray/serve/examples/doc/snippet_model_composition.py
 
+Monitoring
+==========
+
+Ray Serve exposes system metrics like number of requests through Python API
+``serve.stat`` and HTTP ``/-/metrics`` API. By default, it uses a custom
+structured format for easy parsing and debugging.
+
+Via python:
+
+.. code-block:: python
+
+  serve.stat()
+  """
+    [..., {
+          "info": {
+              "name": "num_http_requests",
+              "route": "/-/routes",
+              "type": "MetricType.COUNTER"
+          },
+          "value": 1
+      },
+      {
+          "info": {
+              "name": "num_http_requests",
+              "route": "/echo",
+              "type": "MetricType.COUNTER"
+          },
+          "value": 10
+      }, ...]
+  """
+
+Via HTTP:
+
+.. code-block::
+
+  curl http://localhost:8000/-/metrics
+  # Returns the same output as above in JSON format.
+
+You can also access the result in `Prometheus <https://prometheus.io/>`_ format,
+by setting the ``metric_exporter`` option in :mod:`serve.init <ray.serve.init>`.
+
+.. code-block:: python
+
+  from ray.serve.metric import PrometheusExporter
+  serve.init(metric_exporter=PrometheusExporter)
+
+.. code-block::
+
+  curl http://localhost:8000/-/metrics
+
+  # HELP backend_request_counter_total Number of queries that have been processed in this replica
+  # TYPE backend_request_counter_total counter
+  backend_request_counter_total{backend="echo:v1"} 5.0
+  backend_request_counter_total{backend="echo:v2"} 5.0
+  ...
+
+The metric exporter is extensible and you can customize it for your own metric
+infrastructure. We are gathering feedback and welcome contribution! Feel free
+to submit a github issue to chat with us in #serve channel in `community slack <https://forms.gle/9TSdDYUgxYs8SA9e8>`_.
+
+Here's an simple example of a dummy exporter that writes metrics to file:
+
+.. literalinclude:: ../../../python/ray/serve/examples/doc/snippet_metric_export.py
 
 .. _serve-faq:
 
@@ -269,3 +332,4 @@ Once a endpoint is deleted, its tag can be reused.
 .. code-block:: python
 
   serve.delete_endpoint("simple_endpoint")
+
diff --git a/python/ray/serve/BUILD b/python/ray/serve/BUILD
index de97c45be..56e25b7b6 100644
--- a/python/ray/serve/BUILD
+++ b/python/ray/serve/BUILD
@@ -193,6 +193,14 @@ py_test(
     deps = [":serve_lib"]
 )
 
+py_test(
+    name = "snippet_metric_export",
+    size = "small",
+    srcs = glob(["examples/doc/*.py"]),
+    tags = ["exclusive"],
+    deps = [":serve_lib"]
+)
+
 # Disable the deployment tutorial test because it requires
 # ray start --head in the background.
 # py_test(
diff --git a/python/ray/serve/examples/doc/snippet_metric_export.py b/python/ray/serve/examples/doc/snippet_metric_export.py
new file mode 100644
index 000000000..059301955
--- /dev/null
+++ b/python/ray/serve/examples/doc/snippet_metric_export.py
@@ -0,0 +1,55 @@
+import json
+import time
+
+import requests
+
+from ray import serve
+from ray.serve.metric.exporter import ExporterInterface
+
+
+class FileExporter(ExporterInterface):
+    def __init__(self):
+        self.file = open("/tmp/serve_metrics.log", "w")
+
+    def export(self, metric_metadata, metric_batch):
+        for metric_item in metric_batch:
+            data = metric_metadata[metric_item.key].__dict__
+            data["labels"] = metric_item.labels
+            data["values"] = metric_item.value
+            self.file.write(json.dumps(data))
+            self.file.write("\n")
+        self.file.flush()
+
+    def inspect_metrics(self):
+        return "Metric is located at /tmp/serve_metrics.log"
+
+
+serve.init(metric_exporter=FileExporter)
+
+
+def echo(flask_request):
+    return "hello " + flask_request.args.get("name", "serve!")
+
+
+serve.create_backend("hello", echo)
+serve.create_endpoint("hello", backend="hello", route="/hello")
+
+for _ in range(5):
+    requests.get("http://127.0.0.1:8000/hello").text
+    time.sleep(0.2)
+
+print("Retrieving metrics from file...")
+with open("/tmp/serve_metrics.log") as metric_log:
+    for line in metric_log:
+        print(line)
+
+# Retrieving metrics from file...
+# {"name": "backend_worker_starts",
+#  "type": 1,
+#  "description": "The number of time this replica workers ...",
+#  "label_names": ["replica_tag"],
+#  "default_labels": {"backend": "hello"}, "
+#  labels": {"replica_tag": "hello#XwzPQn"},
+#  "values": 1
+# }
+# ...
diff --git a/python/ray/serve/examples/echo_full.py b/python/ray/serve/examples/echo_full.py
index 0caf11d9b..4fa99b862 100644
--- a/python/ray/serve/examples/echo_full.py
+++ b/python/ray/serve/examples/echo_full.py
@@ -1,30 +1,13 @@
-"""
-Full example of ray.serve module
-"""
-
-import json
 import time
 
-from pygments import formatters, highlight, lexers
-
 import requests
 
 import ray
 import ray.serve as serve
-
-
-def pformat_color_json(d):
-    """Use pygments to pretty format and colorize dictionary"""
-    formatted_json = json.dumps(d, sort_keys=True, indent=4)
-
-    colorful_json = highlight(formatted_json, lexers.JsonLexer(),
-                              formatters.TerminalFormatter())
-
-    return colorful_json
-
+from ray.serve.metric import PrometheusExporter
 
 # initialize ray serve system.
-serve.init()
+serve.init(metric_exporter=PrometheusExporter)
 
 
 # a backend can be a function or class.
@@ -70,4 +53,4 @@ serve.update_backend_config("echo:v1", {"num_replicas": 2})
 serve.update_backend_config("echo:v2", {"num_replicas": 2})
 
 # As well as retrieving relevant system metrics
-print(pformat_color_json(serve.stat()))
+print(serve.stat().decode())