[Serve] Serve multi node tests (#10980)

This commit is contained in:
architkulkarni
2020-10-07 10:57:40 -07:00
committed by GitHub
parent 68106425db
commit 6676b30eef
8 changed files with 553 additions and 33 deletions
@@ -0,0 +1,9 @@
0 forwarders and 1 worker replicas: 1282 requests/s
0 forwarders and 5 worker replicas: 1375 requests/s
0 forwarders and 10 worker replicas: 1362 requests/s
1 forwarders and 1 worker replicas: 608 requests/s
1 forwarders and 5 worker replicas: 626 requests/s
1 forwarders and 10 worker replicas: 627 requests/s
2 forwarders and 1 worker replicas: 609 requests/s
2 forwarders and 5 worker replicas: 620 requests/s
2 forwarders and 10 worker replicas: 609 requests/s
@@ -0,0 +1,37 @@
2020-10-07 09:20:47,110 INFO worker.py:634 -- Connecting to existing Ray cluster at address: 172.31.146.145:6379
{'max_batch_size': 1, 'max_concurrent_queries': 1, 'num_replicas': 1}, intermediate_handles=False:
single client small data 94.94 +- 0.41 requests/s
8 clients small data 480.75 +- 28.58 requests/s
{'max_batch_size': 1, 'max_concurrent_queries': 10000, 'num_replicas': 1}, intermediate_handles=False:
single client small data 94.14 +- 1.89 requests/s
8 clients small data 457.8 +- 8.53 requests/s
{'max_batch_size': 10000, 'max_concurrent_queries': 10000, 'num_replicas': 1}, intermediate_handles=False:
single client small data 95.34 +- 0.81 requests/s
8 clients small data 455.35 +- 2.0 requests/s
{'max_batch_size': 1, 'max_concurrent_queries': 1, 'num_replicas': 8}, intermediate_handles=False:
single client small data 77.61 +- 1.22 requests/s
8 clients small data 623.41 +- 5.7 requests/s
{'max_batch_size': 1, 'max_concurrent_queries': 10000, 'num_replicas': 8}, intermediate_handles=False:
single client small data 77.38 +- 0.73 requests/s
8 clients small data 642.45 +- 9.81 requests/s
{'max_batch_size': 10000, 'max_concurrent_queries': 10000, 'num_replicas': 8}, intermediate_handles=False:
single client small data 77.99 +- 2.44 requests/s
8 clients small data 636.86 +- 10.21 requests/s
{'max_batch_size': 1, 'max_concurrent_queries': 1, 'num_replicas': 1}, intermediate_handles=True:
single client small data 52.91 +- 0.18 requests/s
8 clients small data 311.4 +- 1.87 requests/s
{'max_batch_size': 1, 'max_concurrent_queries': 10000, 'num_replicas': 1}, intermediate_handles=True:
single client small data 52.68 +- 0.45 requests/s
8 clients small data 311.67 +- 2.45 requests/s
{'max_batch_size': 10000, 'max_concurrent_queries': 10000, 'num_replicas': 1}, intermediate_handles=True:
single client small data 52.79 +- 0.5 requests/s
8 clients small data 309.75 +- 3.03 requests/s
{'max_batch_size': 1, 'max_concurrent_queries': 1, 'num_replicas': 8}, intermediate_handles=True:
single client small data 49.92 +- 0.52 requests/s
8 clients small data 295.44 +- 2.01 requests/s
{'max_batch_size': 1, 'max_concurrent_queries': 10000, 'num_replicas': 8}, intermediate_handles=True:
single client small data 49.74 +- 0.74 requests/s
8 clients small data 296.69 +- 1.84 requests/s
{'max_batch_size': 10000, 'max_concurrent_queries': 10000, 'num_replicas': 8}, intermediate_handles=True:
single client small data 49.57 +- 0.36 requests/s
8 clients small data 293.47 +- 1.88 requests/s
@@ -0,0 +1,209 @@
2020-10-07 09:15:57,387 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 1 of 21:
2020-10-07 09:15:57,388 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 263.96ms 96.29ms 506.39ms 69.14%
Req/Sec 115.63 79.29 650.00 75.40%
2307 requests in 10.00s, 315.66KB read
Requests/sec: 230.61
Transfer/sec: 31.55KB
2020-10-07 09:15:57,388 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 2 of 21:
2020-10-07 09:15:57,389 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 282.79ms 75.00ms 500.26ms 63.32%
Req/Sec 108.20 60.17 240.00 58.92%
2159 requests in 10.02s, 295.42KB read
Requests/sec: 215.47
Transfer/sec: 29.48KB
2020-10-07 09:15:57,389 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 3 of 21:
2020-10-07 09:15:57,390 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 284.60ms 75.70ms 505.43ms 63.33%
Req/Sec 108.37 59.15 240.00 63.39%
2149 requests in 10.00s, 294.43KB read
Requests/sec: 214.80
Transfer/sec: 29.43KB
2020-10-07 09:15:57,390 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 4 of 21:
2020-10-07 09:15:57,391 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 282.22ms 77.38ms 508.22ms 65.28%
Req/Sec 108.98 61.47 250.00 62.98%
2163 requests in 10.03s, 295.97KB read
Requests/sec: 215.60
Transfer/sec: 29.50KB
2020-10-07 09:15:57,392 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 5 of 21:
2020-10-07 09:15:57,392 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 278.77ms 79.19ms 509.73ms 64.19%
Req/Sec 109.77 62.22 303.00 67.03%
2195 requests in 10.01s, 300.35KB read
Requests/sec: 219.32
Transfer/sec: 30.01KB
2020-10-07 09:15:57,395 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 6 of 21:
2020-10-07 09:15:57,396 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 267.82ms 93.51ms 500.75ms 67.95%
Req/Sec 114.75 72.64 480.00 69.57%
2281 requests in 10.01s, 312.35KB read
Requests/sec: 227.96
Transfer/sec: 31.22KB
2020-10-07 09:15:57,396 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 7 of 21:
2020-10-07 09:15:57,397 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 285.04ms 75.40ms 492.30ms 63.57%
Req/Sec 108.51 61.33 242.00 58.06%
2144 requests in 10.00s, 293.50KB read
Requests/sec: 214.32
Transfer/sec: 29.34KB
2020-10-07 09:15:57,397 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 8 of 21:
2020-10-07 09:15:57,398 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 271.42ms 89.56ms 508.15ms 68.90%
Req/Sec 113.23 66.06 340.00 65.95%
2254 requests in 10.01s, 308.16KB read
Requests/sec: 225.13
Transfer/sec: 30.78KB
2020-10-07 09:15:57,399 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 9 of 21:
2020-10-07 09:15:57,400 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 269.81ms 91.28ms 500.31ms 68.51%
Req/Sec 113.60 72.20 410.00 64.17%
2255 requests in 10.01s, 308.67KB read
Requests/sec: 225.38
Transfer/sec: 30.85KB
2020-10-07 09:15:57,400 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 10 of 21:
2020-10-07 09:15:57,401 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 283.93ms 74.67ms 507.84ms 64.11%
Req/Sec 108.93 55.89 242.00 62.09%
2154 requests in 10.01s, 294.86KB read
Requests/sec: 215.24
Transfer/sec: 29.46KB
2020-10-07 09:15:57,401 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 11 of 21:
2020-10-07 09:15:57,402 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 282.35ms 75.86ms 495.90ms 63.86%
Req/Sec 108.15 59.10 222.00 60.22%
2158 requests in 10.00s, 295.41KB read
Requests/sec: 215.72
Transfer/sec: 29.53KB
2020-10-07 09:15:57,402 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 12 of 21:
2020-10-07 09:15:57,403 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 283.37ms 75.04ms 499.01ms 63.79%
Req/Sec 108.77 61.15 262.00 60.00%
2157 requests in 10.01s, 295.03KB read
Requests/sec: 215.55
Transfer/sec: 29.48KB
2020-10-07 09:15:57,406 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 13 of 21:
2020-10-07 09:15:57,406 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 273.55ms 86.08ms 504.93ms 66.70%
Req/Sec 112.26 64.36 272.00 60.33%
2234 requests in 10.01s, 305.93KB read
Requests/sec: 223.25
Transfer/sec: 30.57KB
2020-10-07 09:15:57,407 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 14 of 21:
2020-10-07 09:15:57,407 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 279.76ms 78.28ms 506.39ms 65.57%
Req/Sec 109.14 62.56 272.00 60.75%
2181 requests in 10.00s, 298.93KB read
Requests/sec: 218.03
Transfer/sec: 29.88KB
2020-10-07 09:15:57,408 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 15 of 21:
2020-10-07 09:15:57,408 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 285.56ms 75.73ms 493.67ms 63.94%
Req/Sec 108.23 58.97 240.00 61.41%
2149 requests in 10.04s, 294.43KB read
Requests/sec: 214.12
Transfer/sec: 29.34KB
2020-10-07 09:15:57,409 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 16 of 21:
2020-10-07 09:15:57,410 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 275.84ms 83.78ms 504.36ms 66.76%
Req/Sec 110.52 61.73 290.00 63.64%
2208 requests in 10.00s, 302.00KB read
Requests/sec: 220.72
Transfer/sec: 30.19KB
2020-10-07 09:15:57,410 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 17 of 21:
2020-10-07 09:15:57,411 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 277.22ms 82.84ms 501.03ms 66.64%
Req/Sec 110.92 59.32 240.00 63.78%
2206 requests in 10.04s, 301.60KB read
Requests/sec: 219.78
Transfer/sec: 30.05KB
2020-10-07 09:15:57,411 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 18 of 21:
2020-10-07 09:15:57,412 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 282.49ms 76.79ms 495.28ms 64.60%
Req/Sec 108.16 61.20 250.00 61.62%
2161 requests in 10.04s, 296.44KB read
Requests/sec: 215.19
Transfer/sec: 29.52KB
2020-10-07 09:15:57,414 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 19 of 21:
2020-10-07 09:15:57,414 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 281.33ms 77.56ms 504.81ms 65.10%
Req/Sec 108.46 61.28 252.00 60.44%
2166 requests in 10.01s, 296.50KB read
Requests/sec: 216.46
Transfer/sec: 29.63KB
2020-10-07 09:15:57,415 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 20 of 21:
2020-10-07 09:15:57,416 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 284.18ms 74.78ms 506.23ms 62.89%
Req/Sec 109.16 60.05 240.00 59.46%
2153 requests in 10.01s, 294.85KB read
Requests/sec: 215.16
Transfer/sec: 29.47KB
2020-10-07 09:15:57,417 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 21 of 21:
2020-10-07 09:15:57,417 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
2 threads and 63 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 281.58ms 78.06ms 499.69ms 65.93%
Req/Sec 108.51 60.56 260.00 64.29%
2163 requests in 10.01s, 296.09KB read
Requests/sec: 216.12
Transfer/sec: 29.59KB
+45
View File
@@ -0,0 +1,45 @@
cluster_name: default
min_workers: 22
max_workers: 22
initial_workers: 22
autoscaling_mode: default
docker:
image: 'anyscale/ray-ml:latest'
container_name: ray_container
pull_before_run: true
target_utilization_fraction: 0.8
idle_timeout_minutes: 5
provider:
type: aws
region: us-west-2
availability_zone: us-west-2a
auth:
ssh_user: ubuntu
head_node:
InstanceType: m5.xlarge
BlockDeviceMappings:
-
DeviceName: /dev/sda1
Ebs:
VolumeSize: 100
worker_nodes:
InstanceType: m5.xlarge
file_mounts: {}
cluster_synced_files:
- /tmp/ray_tmp_mount/~/blank-serve-test
file_mounts_sync_continuously: true
initialization_commands: []
setup_commands:
- apt-get install build-essential libssl-dev git -y
- 'rm -r wrk || true && git clone https://github.com/wg/wrk.git wrk && cd wrk && make && cp wrk /usr/local/bin'
head_setup_commands: []
worker_setup_commands: []
head_start_ray_commands:
- ray stop
- ulimit -n 65536; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml
worker_start_ray_commands:
- ray stop
- 'ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076'
metadata:
anyscale:
working_dir: ~/blank-serve-test
+86
View File
@@ -0,0 +1,86 @@
# A test that stresses the serve handles. We spin up a backend with a bunch
# (0-2) of replicas that just forward requests to another backend.
#
# By comparing using the forward replicas with just calling the worker
# replicas, we measure the (latency) overhead in the handle. This answers
# the question: How much of a latency/throughput hit is there when I
# compose models?
#
# By comparing the qps as we fix the number of forwarder replicas and vary the
# number of worker replicas, we measure the limit of a single async actor. This
# answers the question: How many "ForwardActor"s or these kinds of high-level
# pipeline workers do I need to provision for my workload? every 1k qps,
# 2k qps?
#
# Sample output:
# 0 forwarders and 1 worker replicas: 1282 requests/s
# 0 forwarders and 5 worker replicas: 1375 requests/s
# 0 forwarders and 10 worker replicas: 1362 requests/s
# 1 forwarders and 1 worker replicas: 608 requests/s
# 1 forwarders and 5 worker replicas: 626 requests/s
# 1 forwarders and 10 worker replicas: 627 requests/s
# 2 forwarders and 1 worker replicas: 609 requests/s
# 2 forwarders and 5 worker replicas: 620 requests/s
# 2 forwarders and 10 worker replicas: 609 requests/s
import ray
from ray import serve
from ray.serve import BackendConfig
from ray.serve.utils import logger
import time
num_queries = 2000
ray.init(address="auto")
client = serve.start()
def hello_world(_):
return b"Hello World"
class ForwardActor:
def __init__(self):
client = serve.connect()
self.handle = client.get_handle("hello_world")
async def __call__(self, _):
await self.handle.remote()
client.create_backend("hello_world", hello_world)
client.create_endpoint("hello_world", backend="hello_world")
client.create_backend("ForwardActor", ForwardActor)
client.create_endpoint("ForwardActor", backend="ForwardActor")
def run_test(num_replicas, num_forwarders):
replicas_config = BackendConfig(num_replicas=num_replicas)
client.update_backend_config("hello_world", replicas_config)
if (num_forwarders == 0):
handle = client.get_handle("hello_world")
else:
forwarders_config = BackendConfig(num_replicas=num_forwarders)
client.update_backend_config("ForwardActor", forwarders_config)
handle = client.get_handle("ForwardActor")
# warmup - helpful to wait for gc.collect() and actors to start
start = time.time()
while time.time() - start < 1:
ray.get(handle.remote())
# real test
start = time.time()
ray.get([handle.remote() for _ in range(num_queries)])
qps = num_queries / (time.time() - start)
logger.info("{} forwarders and {} worker replicas: {} requests/s".format(
num_forwarders, num_replicas, int(qps)))
for num_forwarders in [0, 1, 2]:
for num_replicas in [1, 5, 10]:
run_test(num_replicas, num_forwarders)
+43 -24
View File
@@ -1,3 +1,7 @@
# Runs several scenarios with varying max batch size, max concurrent queries,
# number of replicas, and with intermediate serve handles (to simulate ensemble
# models) either on or off.
import aiohttp
import asyncio
import time
@@ -79,33 +83,48 @@ async def trial(actors, session, data_size):
async def main():
ray.init(log_to_driver=False)
client = serve.start()
client.create_backend("backend", backend)
client.create_endpoint("endpoint", backend="backend", route="/api")
for intermediate_handles in [False, True]:
if (intermediate_handles):
actors = [Client.remote() for _ in range(NUM_CLIENTS)]
for num_replicas in [1, 8]:
for backend_config in [
{
"max_batch_size": 1,
"max_concurrent_queries": 1
},
{
"max_batch_size": 1,
"max_concurrent_queries": 10000
},
{
"max_batch_size": 10000,
"max_concurrent_queries": 10000
},
]:
backend_config["num_replicas"] = num_replicas
client.update_backend_config("backend", backend_config)
print(repr(backend_config) + ":")
async with aiohttp.ClientSession() as session:
# TODO(edoakes): large data causes broken pipe errors.
for data_size in ["small"]:
await trial(actors, session, data_size)
client.create_endpoint(
"backend", backend="backend", route="/backend")
class forwardActor:
def __init__(self):
self.handle = client.get_handle("backend")
def __call__(self, _):
return ray.get(self.handle.remote())
client.create_backend("forwardActor", forwardActor)
client.set_traffic("endpoint", {"backend": 0, "forwardActor": 1})
actors = [Client.remote() for _ in range(NUM_CLIENTS)]
for num_replicas in [1, 8]:
for backend_config in [
{
"max_batch_size": 1,
"max_concurrent_queries": 1
},
{
"max_batch_size": 1,
"max_concurrent_queries": 10000
},
{
"max_batch_size": 10000,
"max_concurrent_queries": 10000
},
]:
backend_config["num_replicas"] = num_replicas
client.update_backend_config("backend", backend_config)
print(
repr(backend_config) + ", intermediate_handles=" +
str(intermediate_handles) + ":")
async with aiohttp.ClientSession() as session:
# TODO(edoakes): large data causes broken pipe errors.
for data_size in ["small"]:
await trial(actors, session, data_size)
if __name__ == "__main__":
+119
View File
@@ -0,0 +1,119 @@
# A multi-node scalability test. We put an http proxy on the head node and spin
# up 20 nodes and put as many replicas as possible on the cluster, and run a
# stress test.
#
# Test will measure latency and throughput under a high load using `wrk`
# running on each node.
#
# Results for node 1 of 21:
# Running 10s test @ http://127.0.0.1:8000/hey
# 2 threads and 63 connections
# Thread Stats Avg Stdev Max +/- Stdev
# Latency 263.96ms 96.29ms 506.39ms 69.14%
# Req/Sec 115.63 79.29 650.00 75.40%
# 2307 requests in 10.00s, 315.66KB read
# Requests/sec: 230.61
# Transfer/sec: 31.55KB
#
# Results for node 2 of 21:
# Running 10s test @ http://127.0.0.1:8000/hey
# 2 threads and 63 connections
# Thread Stats Avg Stdev Max +/- Stdev
# Latency 282.79ms 75.00ms 500.26ms 63.32%
# Req/Sec 108.20 60.17 240.00 58.92%
# 2159 requests in 10.02s, 295.42KB read
# Requests/sec: 215.47
# Transfer/sec: 29.48KB
#
# [...] similar results for remaining nodes
import time
import subprocess
import requests
import ray
from ray import serve
from ray.serve import BackendConfig
from ray.serve.utils import logger
from ray.util.placement_group import (placement_group, remove_placement_group)
ray.shutdown()
ray.init(address="auto")
client = serve.start()
# These numbers need to correspond with the autoscaler config file.
# The number of remote nodes in the autoscaler should upper bound
# these because sometimes nodes fail to update.
num_workers = 20
expected_num_nodes = num_workers + 1
cpus_per_node = 4
num_remote_cpus = expected_num_nodes * cpus_per_node
# Wait until the expected number of nodes have joined the cluster.
while True:
num_nodes = len(ray.nodes())
logger.info("Waiting for nodes {}/{}".format(num_nodes,
expected_num_nodes))
if num_nodes >= expected_num_nodes:
break
time.sleep(5)
logger.info("Nodes have all joined. There are %s resources.",
ray.cluster_resources())
def hey(_):
time.sleep(0.01) # Sleep for 10ms
return b"hey"
num_connections = int(num_remote_cpus * 0.75)
num_threads = 2
time_to_run = "10s"
pg = placement_group(
[{
"CPU": 1
} for _ in range(expected_num_nodes)], strategy="STRICT_SPREAD")
ray.get(pg.ready())
# The number of replicas is the number of cores remaining after accounting
# for the one HTTP proxy actor on each node, the "hey" requester task on each
# node, and the serve controller.
# num_replicas = expected_num_nodes * (cpus_per_node - 2) - 1
num_replicas = ray.available_resources()["CPU"]
logger.info("Starting %i replicas", num_replicas)
client.create_backend(
"hey", hey, config=BackendConfig(num_replicas=num_replicas))
client.create_endpoint("hey", backend="hey", route="/hey")
@ray.remote
def run_wrk():
logger.info("Warming up for ~3 seconds")
for _ in range(5):
resp = requests.get("http://127.0.0.1:8000/hey").text
logger.info("Received response \'" + resp + "\'")
time.sleep(0.5)
result = subprocess.run(
[
"wrk", "-c",
str(num_connections), "-t",
str(num_threads), "-d", time_to_run, "http://127.0.0.1:8000/hey"
],
stdout=subprocess.PIPE)
return result.stdout.decode()
results = ray.get([
run_wrk.options(placement_group=pg,
placement_group_bundle_index=i).remote()
for i in range(expected_num_nodes)
])
for i in range(expected_num_nodes):
logger.info("Results for node %i of %i:", i + 1, expected_num_nodes)
logger.info(results[i])
remove_placement_group(pg)