From 6676b30eeffc4f93445f7f6ef0f7c556eebe8ad9 Mon Sep 17 00:00:00 2001 From: architkulkarni Date: Wed, 7 Oct 2020 10:57:40 -0700 Subject: [PATCH] [Serve] Serve multi node tests (#10980) --- ci/long_running_tests/workloads/serve.py | 14 +- .../baselines/2020-10-07/handle.txt | 9 + .../baselines/2020-10-07/microbenchmark.txt | 37 ++++ .../baselines/2020-10-07/scalability.txt | 209 ++++++++++++++++++ python/ray/serve/benchmarks/cluster.yaml | 45 ++++ python/ray/serve/benchmarks/handle.py | 86 +++++++ python/ray/serve/benchmarks/microbenchmark.py | 67 ++++-- python/ray/serve/benchmarks/scalability.py | 119 ++++++++++ 8 files changed, 553 insertions(+), 33 deletions(-) create mode 100644 python/ray/serve/benchmarks/baselines/2020-10-07/handle.txt create mode 100644 python/ray/serve/benchmarks/baselines/2020-10-07/microbenchmark.txt create mode 100644 python/ray/serve/benchmarks/baselines/2020-10-07/scalability.txt create mode 100644 python/ray/serve/benchmarks/cluster.yaml create mode 100644 python/ray/serve/benchmarks/handle.py create mode 100644 python/ray/serve/benchmarks/scalability.py diff --git a/ci/long_running_tests/workloads/serve.py b/ci/long_running_tests/workloads/serve.py index 58f405c11..6d404ac51 100644 --- a/ci/long_running_tests/workloads/serve.py +++ b/ci/long_running_tests/workloads/serve.py @@ -24,13 +24,6 @@ for i in range(num_nodes): redis_max_memory=redis_max_memory, dashboard_host="0.0.0.0") -print("Downloading load testing tool") -subprocess.call([ - "bash", "-c", "rm hey_linux_amd64 || true;" - "wget https://storage.googleapis.com/hey-release/hey_linux_amd64;" - "chmod +x hey_linux_amd64" -]) - ray.init(address=cluster.address, dashboard_host="0.0.0.0") client = serve.start() @@ -54,12 +47,15 @@ for _ in range(5): time.sleep(0.5) connections = int(config["num_replicas"] * config["max_batch_size"] * 0.75) +num_threads = 2 +time_to_run = "60m" while True: proc = subprocess.Popen( [ - "./hey_linux_amd64", "-c", - str(connections), "-z", "60m", "http://127.0.0.1:8000/echo" + "wrk", "-c", + str(connections), "-t", + str(num_threads), "-s", time_to_run, "http://127.0.0.1:8000/echo" ], stdout=PIPE, stderr=PIPE) diff --git a/python/ray/serve/benchmarks/baselines/2020-10-07/handle.txt b/python/ray/serve/benchmarks/baselines/2020-10-07/handle.txt new file mode 100644 index 000000000..ca4f94a28 --- /dev/null +++ b/python/ray/serve/benchmarks/baselines/2020-10-07/handle.txt @@ -0,0 +1,9 @@ +0 forwarders and 1 worker replicas: 1282 requests/s +0 forwarders and 5 worker replicas: 1375 requests/s +0 forwarders and 10 worker replicas: 1362 requests/s +1 forwarders and 1 worker replicas: 608 requests/s +1 forwarders and 5 worker replicas: 626 requests/s +1 forwarders and 10 worker replicas: 627 requests/s +2 forwarders and 1 worker replicas: 609 requests/s +2 forwarders and 5 worker replicas: 620 requests/s +2 forwarders and 10 worker replicas: 609 requests/s \ No newline at end of file diff --git a/python/ray/serve/benchmarks/baselines/2020-10-07/microbenchmark.txt b/python/ray/serve/benchmarks/baselines/2020-10-07/microbenchmark.txt new file mode 100644 index 000000000..e7d07e5bd --- /dev/null +++ b/python/ray/serve/benchmarks/baselines/2020-10-07/microbenchmark.txt @@ -0,0 +1,37 @@ +2020-10-07 09:20:47,110 INFO worker.py:634 -- Connecting to existing Ray cluster at address: 172.31.146.145:6379 +{'max_batch_size': 1, 'max_concurrent_queries': 1, 'num_replicas': 1}, intermediate_handles=False: + single client small data 94.94 +- 0.41 requests/s + 8 clients small data 480.75 +- 28.58 requests/s +{'max_batch_size': 1, 'max_concurrent_queries': 10000, 'num_replicas': 1}, intermediate_handles=False: + single client small data 94.14 +- 1.89 requests/s + 8 clients small data 457.8 +- 8.53 requests/s +{'max_batch_size': 10000, 'max_concurrent_queries': 10000, 'num_replicas': 1}, intermediate_handles=False: + single client small data 95.34 +- 0.81 requests/s + 8 clients small data 455.35 +- 2.0 requests/s +{'max_batch_size': 1, 'max_concurrent_queries': 1, 'num_replicas': 8}, intermediate_handles=False: + single client small data 77.61 +- 1.22 requests/s + 8 clients small data 623.41 +- 5.7 requests/s +{'max_batch_size': 1, 'max_concurrent_queries': 10000, 'num_replicas': 8}, intermediate_handles=False: + single client small data 77.38 +- 0.73 requests/s + 8 clients small data 642.45 +- 9.81 requests/s +{'max_batch_size': 10000, 'max_concurrent_queries': 10000, 'num_replicas': 8}, intermediate_handles=False: + single client small data 77.99 +- 2.44 requests/s + 8 clients small data 636.86 +- 10.21 requests/s +{'max_batch_size': 1, 'max_concurrent_queries': 1, 'num_replicas': 1}, intermediate_handles=True: + single client small data 52.91 +- 0.18 requests/s + 8 clients small data 311.4 +- 1.87 requests/s +{'max_batch_size': 1, 'max_concurrent_queries': 10000, 'num_replicas': 1}, intermediate_handles=True: + single client small data 52.68 +- 0.45 requests/s + 8 clients small data 311.67 +- 2.45 requests/s +{'max_batch_size': 10000, 'max_concurrent_queries': 10000, 'num_replicas': 1}, intermediate_handles=True: + single client small data 52.79 +- 0.5 requests/s + 8 clients small data 309.75 +- 3.03 requests/s +{'max_batch_size': 1, 'max_concurrent_queries': 1, 'num_replicas': 8}, intermediate_handles=True: + single client small data 49.92 +- 0.52 requests/s + 8 clients small data 295.44 +- 2.01 requests/s +{'max_batch_size': 1, 'max_concurrent_queries': 10000, 'num_replicas': 8}, intermediate_handles=True: + single client small data 49.74 +- 0.74 requests/s + 8 clients small data 296.69 +- 1.84 requests/s +{'max_batch_size': 10000, 'max_concurrent_queries': 10000, 'num_replicas': 8}, intermediate_handles=True: + single client small data 49.57 +- 0.36 requests/s + 8 clients small data 293.47 +- 1.88 requests/s \ No newline at end of file diff --git a/python/ray/serve/benchmarks/baselines/2020-10-07/scalability.txt b/python/ray/serve/benchmarks/baselines/2020-10-07/scalability.txt new file mode 100644 index 000000000..978a0b076 --- /dev/null +++ b/python/ray/serve/benchmarks/baselines/2020-10-07/scalability.txt @@ -0,0 +1,209 @@ +2020-10-07 09:15:57,387 INFO :111 -- Results for node 1 of 21: +2020-10-07 09:15:57,388 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 263.96ms 96.29ms 506.39ms 69.14% + Req/Sec 115.63 79.29 650.00 75.40% + 2307 requests in 10.00s, 315.66KB read +Requests/sec: 230.61 +Transfer/sec: 31.55KB + +2020-10-07 09:15:57,388 INFO :111 -- Results for node 2 of 21: +2020-10-07 09:15:57,389 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 282.79ms 75.00ms 500.26ms 63.32% + Req/Sec 108.20 60.17 240.00 58.92% + 2159 requests in 10.02s, 295.42KB read +Requests/sec: 215.47 +Transfer/sec: 29.48KB + +2020-10-07 09:15:57,389 INFO :111 -- Results for node 3 of 21: +2020-10-07 09:15:57,390 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 284.60ms 75.70ms 505.43ms 63.33% + Req/Sec 108.37 59.15 240.00 63.39% + 2149 requests in 10.00s, 294.43KB read +Requests/sec: 214.80 +Transfer/sec: 29.43KB + +2020-10-07 09:15:57,390 INFO :111 -- Results for node 4 of 21: +2020-10-07 09:15:57,391 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 282.22ms 77.38ms 508.22ms 65.28% + Req/Sec 108.98 61.47 250.00 62.98% + 2163 requests in 10.03s, 295.97KB read +Requests/sec: 215.60 +Transfer/sec: 29.50KB + +2020-10-07 09:15:57,392 INFO :111 -- Results for node 5 of 21: +2020-10-07 09:15:57,392 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 278.77ms 79.19ms 509.73ms 64.19% + Req/Sec 109.77 62.22 303.00 67.03% + 2195 requests in 10.01s, 300.35KB read +Requests/sec: 219.32 +Transfer/sec: 30.01KB + +2020-10-07 09:15:57,395 INFO :111 -- Results for node 6 of 21: +2020-10-07 09:15:57,396 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 267.82ms 93.51ms 500.75ms 67.95% + Req/Sec 114.75 72.64 480.00 69.57% + 2281 requests in 10.01s, 312.35KB read +Requests/sec: 227.96 +Transfer/sec: 31.22KB + +2020-10-07 09:15:57,396 INFO :111 -- Results for node 7 of 21: +2020-10-07 09:15:57,397 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 285.04ms 75.40ms 492.30ms 63.57% + Req/Sec 108.51 61.33 242.00 58.06% + 2144 requests in 10.00s, 293.50KB read +Requests/sec: 214.32 +Transfer/sec: 29.34KB + +2020-10-07 09:15:57,397 INFO :111 -- Results for node 8 of 21: +2020-10-07 09:15:57,398 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 271.42ms 89.56ms 508.15ms 68.90% + Req/Sec 113.23 66.06 340.00 65.95% + 2254 requests in 10.01s, 308.16KB read +Requests/sec: 225.13 +Transfer/sec: 30.78KB + +2020-10-07 09:15:57,399 INFO :111 -- Results for node 9 of 21: +2020-10-07 09:15:57,400 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 269.81ms 91.28ms 500.31ms 68.51% + Req/Sec 113.60 72.20 410.00 64.17% + 2255 requests in 10.01s, 308.67KB read +Requests/sec: 225.38 +Transfer/sec: 30.85KB + +2020-10-07 09:15:57,400 INFO :111 -- Results for node 10 of 21: +2020-10-07 09:15:57,401 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 283.93ms 74.67ms 507.84ms 64.11% + Req/Sec 108.93 55.89 242.00 62.09% + 2154 requests in 10.01s, 294.86KB read +Requests/sec: 215.24 +Transfer/sec: 29.46KB + +2020-10-07 09:15:57,401 INFO :111 -- Results for node 11 of 21: +2020-10-07 09:15:57,402 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 282.35ms 75.86ms 495.90ms 63.86% + Req/Sec 108.15 59.10 222.00 60.22% + 2158 requests in 10.00s, 295.41KB read +Requests/sec: 215.72 +Transfer/sec: 29.53KB + +2020-10-07 09:15:57,402 INFO :111 -- Results for node 12 of 21: +2020-10-07 09:15:57,403 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 283.37ms 75.04ms 499.01ms 63.79% + Req/Sec 108.77 61.15 262.00 60.00% + 2157 requests in 10.01s, 295.03KB read +Requests/sec: 215.55 +Transfer/sec: 29.48KB + +2020-10-07 09:15:57,406 INFO :111 -- Results for node 13 of 21: +2020-10-07 09:15:57,406 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 273.55ms 86.08ms 504.93ms 66.70% + Req/Sec 112.26 64.36 272.00 60.33% + 2234 requests in 10.01s, 305.93KB read +Requests/sec: 223.25 +Transfer/sec: 30.57KB + +2020-10-07 09:15:57,407 INFO :111 -- Results for node 14 of 21: +2020-10-07 09:15:57,407 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 279.76ms 78.28ms 506.39ms 65.57% + Req/Sec 109.14 62.56 272.00 60.75% + 2181 requests in 10.00s, 298.93KB read +Requests/sec: 218.03 +Transfer/sec: 29.88KB + +2020-10-07 09:15:57,408 INFO :111 -- Results for node 15 of 21: +2020-10-07 09:15:57,408 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 285.56ms 75.73ms 493.67ms 63.94% + Req/Sec 108.23 58.97 240.00 61.41% + 2149 requests in 10.04s, 294.43KB read +Requests/sec: 214.12 +Transfer/sec: 29.34KB + +2020-10-07 09:15:57,409 INFO :111 -- Results for node 16 of 21: +2020-10-07 09:15:57,410 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 275.84ms 83.78ms 504.36ms 66.76% + Req/Sec 110.52 61.73 290.00 63.64% + 2208 requests in 10.00s, 302.00KB read +Requests/sec: 220.72 +Transfer/sec: 30.19KB + +2020-10-07 09:15:57,410 INFO :111 -- Results for node 17 of 21: +2020-10-07 09:15:57,411 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 277.22ms 82.84ms 501.03ms 66.64% + Req/Sec 110.92 59.32 240.00 63.78% + 2206 requests in 10.04s, 301.60KB read +Requests/sec: 219.78 +Transfer/sec: 30.05KB + +2020-10-07 09:15:57,411 INFO :111 -- Results for node 18 of 21: +2020-10-07 09:15:57,412 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 282.49ms 76.79ms 495.28ms 64.60% + Req/Sec 108.16 61.20 250.00 61.62% + 2161 requests in 10.04s, 296.44KB read +Requests/sec: 215.19 +Transfer/sec: 29.52KB + +2020-10-07 09:15:57,414 INFO :111 -- Results for node 19 of 21: +2020-10-07 09:15:57,414 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 281.33ms 77.56ms 504.81ms 65.10% + Req/Sec 108.46 61.28 252.00 60.44% + 2166 requests in 10.01s, 296.50KB read +Requests/sec: 216.46 +Transfer/sec: 29.63KB + +2020-10-07 09:15:57,415 INFO :111 -- Results for node 20 of 21: +2020-10-07 09:15:57,416 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 284.18ms 74.78ms 506.23ms 62.89% + Req/Sec 109.16 60.05 240.00 59.46% + 2153 requests in 10.01s, 294.85KB read +Requests/sec: 215.16 +Transfer/sec: 29.47KB + +2020-10-07 09:15:57,417 INFO :111 -- Results for node 21 of 21: +2020-10-07 09:15:57,417 INFO :112 -- Running 10s test @ http://127.0.0.1:8000/hey + 2 threads and 63 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 281.58ms 78.06ms 499.69ms 65.93% + Req/Sec 108.51 60.56 260.00 64.29% + 2163 requests in 10.01s, 296.09KB read +Requests/sec: 216.12 +Transfer/sec: 29.59KB diff --git a/python/ray/serve/benchmarks/cluster.yaml b/python/ray/serve/benchmarks/cluster.yaml new file mode 100644 index 000000000..70e6d9df4 --- /dev/null +++ b/python/ray/serve/benchmarks/cluster.yaml @@ -0,0 +1,45 @@ +cluster_name: default +min_workers: 22 +max_workers: 22 +initial_workers: 22 +autoscaling_mode: default +docker: + image: 'anyscale/ray-ml:latest' + container_name: ray_container + pull_before_run: true +target_utilization_fraction: 0.8 +idle_timeout_minutes: 5 +provider: + type: aws + region: us-west-2 + availability_zone: us-west-2a +auth: + ssh_user: ubuntu +head_node: + InstanceType: m5.xlarge + BlockDeviceMappings: + - + DeviceName: /dev/sda1 + Ebs: + VolumeSize: 100 +worker_nodes: + InstanceType: m5.xlarge +file_mounts: {} +cluster_synced_files: + - /tmp/ray_tmp_mount/~/blank-serve-test +file_mounts_sync_continuously: true +initialization_commands: [] +setup_commands: + - apt-get install build-essential libssl-dev git -y + - 'rm -r wrk || true && git clone https://github.com/wg/wrk.git wrk && cd wrk && make && cp wrk /usr/local/bin' +head_setup_commands: [] +worker_setup_commands: [] +head_start_ray_commands: + - ray stop + - ulimit -n 65536; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml +worker_start_ray_commands: + - ray stop + - 'ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076' +metadata: + anyscale: + working_dir: ~/blank-serve-test diff --git a/python/ray/serve/benchmarks/handle.py b/python/ray/serve/benchmarks/handle.py new file mode 100644 index 000000000..b8d243e88 --- /dev/null +++ b/python/ray/serve/benchmarks/handle.py @@ -0,0 +1,86 @@ +# A test that stresses the serve handles. We spin up a backend with a bunch +# (0-2) of replicas that just forward requests to another backend. +# +# By comparing using the forward replicas with just calling the worker +# replicas, we measure the (latency) overhead in the handle. This answers +# the question: How much of a latency/throughput hit is there when I +# compose models? +# +# By comparing the qps as we fix the number of forwarder replicas and vary the +# number of worker replicas, we measure the limit of a single async actor. This +# answers the question: How many "ForwardActor"s or these kinds of high-level +# pipeline workers do I need to provision for my workload? every 1k qps, +# 2k qps? +# +# Sample output: +# 0 forwarders and 1 worker replicas: 1282 requests/s +# 0 forwarders and 5 worker replicas: 1375 requests/s +# 0 forwarders and 10 worker replicas: 1362 requests/s +# 1 forwarders and 1 worker replicas: 608 requests/s +# 1 forwarders and 5 worker replicas: 626 requests/s +# 1 forwarders and 10 worker replicas: 627 requests/s +# 2 forwarders and 1 worker replicas: 609 requests/s +# 2 forwarders and 5 worker replicas: 620 requests/s +# 2 forwarders and 10 worker replicas: 609 requests/s + +import ray +from ray import serve +from ray.serve import BackendConfig +from ray.serve.utils import logger +import time + +num_queries = 2000 + +ray.init(address="auto") + +client = serve.start() + + +def hello_world(_): + return b"Hello World" + + +class ForwardActor: + def __init__(self): + client = serve.connect() + self.handle = client.get_handle("hello_world") + + async def __call__(self, _): + await self.handle.remote() + + +client.create_backend("hello_world", hello_world) +client.create_endpoint("hello_world", backend="hello_world") + +client.create_backend("ForwardActor", ForwardActor) +client.create_endpoint("ForwardActor", backend="ForwardActor") + + +def run_test(num_replicas, num_forwarders): + replicas_config = BackendConfig(num_replicas=num_replicas) + client.update_backend_config("hello_world", replicas_config) + + if (num_forwarders == 0): + handle = client.get_handle("hello_world") + else: + forwarders_config = BackendConfig(num_replicas=num_forwarders) + client.update_backend_config("ForwardActor", forwarders_config) + handle = client.get_handle("ForwardActor") + + # warmup - helpful to wait for gc.collect() and actors to start + start = time.time() + while time.time() - start < 1: + ray.get(handle.remote()) + + # real test + start = time.time() + ray.get([handle.remote() for _ in range(num_queries)]) + qps = num_queries / (time.time() - start) + + logger.info("{} forwarders and {} worker replicas: {} requests/s".format( + num_forwarders, num_replicas, int(qps))) + + +for num_forwarders in [0, 1, 2]: + for num_replicas in [1, 5, 10]: + run_test(num_replicas, num_forwarders) diff --git a/python/ray/serve/benchmarks/microbenchmark.py b/python/ray/serve/benchmarks/microbenchmark.py index de63581c9..e4f058e0f 100644 --- a/python/ray/serve/benchmarks/microbenchmark.py +++ b/python/ray/serve/benchmarks/microbenchmark.py @@ -1,3 +1,7 @@ +# Runs several scenarios with varying max batch size, max concurrent queries, +# number of replicas, and with intermediate serve handles (to simulate ensemble +# models) either on or off. + import aiohttp import asyncio import time @@ -79,33 +83,48 @@ async def trial(actors, session, data_size): async def main(): ray.init(log_to_driver=False) client = serve.start() - client.create_backend("backend", backend) client.create_endpoint("endpoint", backend="backend", route="/api") + for intermediate_handles in [False, True]: + if (intermediate_handles): - actors = [Client.remote() for _ in range(NUM_CLIENTS)] - for num_replicas in [1, 8]: - for backend_config in [ - { - "max_batch_size": 1, - "max_concurrent_queries": 1 - }, - { - "max_batch_size": 1, - "max_concurrent_queries": 10000 - }, - { - "max_batch_size": 10000, - "max_concurrent_queries": 10000 - }, - ]: - backend_config["num_replicas"] = num_replicas - client.update_backend_config("backend", backend_config) - print(repr(backend_config) + ":") - async with aiohttp.ClientSession() as session: - # TODO(edoakes): large data causes broken pipe errors. - for data_size in ["small"]: - await trial(actors, session, data_size) + client.create_endpoint( + "backend", backend="backend", route="/backend") + + class forwardActor: + def __init__(self): + self.handle = client.get_handle("backend") + + def __call__(self, _): + return ray.get(self.handle.remote()) + + client.create_backend("forwardActor", forwardActor) + client.set_traffic("endpoint", {"backend": 0, "forwardActor": 1}) + actors = [Client.remote() for _ in range(NUM_CLIENTS)] + for num_replicas in [1, 8]: + for backend_config in [ + { + "max_batch_size": 1, + "max_concurrent_queries": 1 + }, + { + "max_batch_size": 1, + "max_concurrent_queries": 10000 + }, + { + "max_batch_size": 10000, + "max_concurrent_queries": 10000 + }, + ]: + backend_config["num_replicas"] = num_replicas + client.update_backend_config("backend", backend_config) + print( + repr(backend_config) + ", intermediate_handles=" + + str(intermediate_handles) + ":") + async with aiohttp.ClientSession() as session: + # TODO(edoakes): large data causes broken pipe errors. + for data_size in ["small"]: + await trial(actors, session, data_size) if __name__ == "__main__": diff --git a/python/ray/serve/benchmarks/scalability.py b/python/ray/serve/benchmarks/scalability.py new file mode 100644 index 000000000..d11564567 --- /dev/null +++ b/python/ray/serve/benchmarks/scalability.py @@ -0,0 +1,119 @@ +# A multi-node scalability test. We put an http proxy on the head node and spin +# up 20 nodes and put as many replicas as possible on the cluster, and run a +# stress test. +# +# Test will measure latency and throughput under a high load using `wrk` +# running on each node. +# +# Results for node 1 of 21: +# Running 10s test @ http://127.0.0.1:8000/hey +# 2 threads and 63 connections +# Thread Stats Avg Stdev Max +/- Stdev +# Latency 263.96ms 96.29ms 506.39ms 69.14% +# Req/Sec 115.63 79.29 650.00 75.40% +# 2307 requests in 10.00s, 315.66KB read +# Requests/sec: 230.61 +# Transfer/sec: 31.55KB +# +# Results for node 2 of 21: +# Running 10s test @ http://127.0.0.1:8000/hey +# 2 threads and 63 connections +# Thread Stats Avg Stdev Max +/- Stdev +# Latency 282.79ms 75.00ms 500.26ms 63.32% +# Req/Sec 108.20 60.17 240.00 58.92% +# 2159 requests in 10.02s, 295.42KB read +# Requests/sec: 215.47 +# Transfer/sec: 29.48KB +# +# [...] similar results for remaining nodes + +import time +import subprocess +import requests + +import ray +from ray import serve +from ray.serve import BackendConfig +from ray.serve.utils import logger + +from ray.util.placement_group import (placement_group, remove_placement_group) + +ray.shutdown() +ray.init(address="auto") +client = serve.start() + +# These numbers need to correspond with the autoscaler config file. +# The number of remote nodes in the autoscaler should upper bound +# these because sometimes nodes fail to update. +num_workers = 20 +expected_num_nodes = num_workers + 1 +cpus_per_node = 4 +num_remote_cpus = expected_num_nodes * cpus_per_node + +# Wait until the expected number of nodes have joined the cluster. +while True: + num_nodes = len(ray.nodes()) + logger.info("Waiting for nodes {}/{}".format(num_nodes, + expected_num_nodes)) + if num_nodes >= expected_num_nodes: + break + time.sleep(5) +logger.info("Nodes have all joined. There are %s resources.", + ray.cluster_resources()) + + +def hey(_): + time.sleep(0.01) # Sleep for 10ms + return b"hey" + + +num_connections = int(num_remote_cpus * 0.75) +num_threads = 2 +time_to_run = "10s" + +pg = placement_group( + [{ + "CPU": 1 + } for _ in range(expected_num_nodes)], strategy="STRICT_SPREAD") +ray.get(pg.ready()) + +# The number of replicas is the number of cores remaining after accounting +# for the one HTTP proxy actor on each node, the "hey" requester task on each +# node, and the serve controller. +# num_replicas = expected_num_nodes * (cpus_per_node - 2) - 1 +num_replicas = ray.available_resources()["CPU"] +logger.info("Starting %i replicas", num_replicas) +client.create_backend( + "hey", hey, config=BackendConfig(num_replicas=num_replicas)) +client.create_endpoint("hey", backend="hey", route="/hey") + + +@ray.remote +def run_wrk(): + logger.info("Warming up for ~3 seconds") + for _ in range(5): + resp = requests.get("http://127.0.0.1:8000/hey").text + logger.info("Received response \'" + resp + "\'") + time.sleep(0.5) + + result = subprocess.run( + [ + "wrk", "-c", + str(num_connections), "-t", + str(num_threads), "-d", time_to_run, "http://127.0.0.1:8000/hey" + ], + stdout=subprocess.PIPE) + return result.stdout.decode() + + +results = ray.get([ + run_wrk.options(placement_group=pg, + placement_group_bundle_index=i).remote() + for i in range(expected_num_nodes) +]) + +for i in range(expected_num_nodes): + logger.info("Results for node %i of %i:", i + 1, expected_num_nodes) + logger.info(results[i]) + +remove_placement_group(pg)