mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 20:07:41 +08:00
[Serve] Serve multi node tests (#10980)
This commit is contained in:
@@ -0,0 +1,9 @@
|
||||
0 forwarders and 1 worker replicas: 1282 requests/s
|
||||
0 forwarders and 5 worker replicas: 1375 requests/s
|
||||
0 forwarders and 10 worker replicas: 1362 requests/s
|
||||
1 forwarders and 1 worker replicas: 608 requests/s
|
||||
1 forwarders and 5 worker replicas: 626 requests/s
|
||||
1 forwarders and 10 worker replicas: 627 requests/s
|
||||
2 forwarders and 1 worker replicas: 609 requests/s
|
||||
2 forwarders and 5 worker replicas: 620 requests/s
|
||||
2 forwarders and 10 worker replicas: 609 requests/s
|
||||
@@ -0,0 +1,37 @@
|
||||
2020-10-07 09:20:47,110 INFO worker.py:634 -- Connecting to existing Ray cluster at address: 172.31.146.145:6379
|
||||
{'max_batch_size': 1, 'max_concurrent_queries': 1, 'num_replicas': 1}, intermediate_handles=False:
|
||||
single client small data 94.94 +- 0.41 requests/s
|
||||
8 clients small data 480.75 +- 28.58 requests/s
|
||||
{'max_batch_size': 1, 'max_concurrent_queries': 10000, 'num_replicas': 1}, intermediate_handles=False:
|
||||
single client small data 94.14 +- 1.89 requests/s
|
||||
8 clients small data 457.8 +- 8.53 requests/s
|
||||
{'max_batch_size': 10000, 'max_concurrent_queries': 10000, 'num_replicas': 1}, intermediate_handles=False:
|
||||
single client small data 95.34 +- 0.81 requests/s
|
||||
8 clients small data 455.35 +- 2.0 requests/s
|
||||
{'max_batch_size': 1, 'max_concurrent_queries': 1, 'num_replicas': 8}, intermediate_handles=False:
|
||||
single client small data 77.61 +- 1.22 requests/s
|
||||
8 clients small data 623.41 +- 5.7 requests/s
|
||||
{'max_batch_size': 1, 'max_concurrent_queries': 10000, 'num_replicas': 8}, intermediate_handles=False:
|
||||
single client small data 77.38 +- 0.73 requests/s
|
||||
8 clients small data 642.45 +- 9.81 requests/s
|
||||
{'max_batch_size': 10000, 'max_concurrent_queries': 10000, 'num_replicas': 8}, intermediate_handles=False:
|
||||
single client small data 77.99 +- 2.44 requests/s
|
||||
8 clients small data 636.86 +- 10.21 requests/s
|
||||
{'max_batch_size': 1, 'max_concurrent_queries': 1, 'num_replicas': 1}, intermediate_handles=True:
|
||||
single client small data 52.91 +- 0.18 requests/s
|
||||
8 clients small data 311.4 +- 1.87 requests/s
|
||||
{'max_batch_size': 1, 'max_concurrent_queries': 10000, 'num_replicas': 1}, intermediate_handles=True:
|
||||
single client small data 52.68 +- 0.45 requests/s
|
||||
8 clients small data 311.67 +- 2.45 requests/s
|
||||
{'max_batch_size': 10000, 'max_concurrent_queries': 10000, 'num_replicas': 1}, intermediate_handles=True:
|
||||
single client small data 52.79 +- 0.5 requests/s
|
||||
8 clients small data 309.75 +- 3.03 requests/s
|
||||
{'max_batch_size': 1, 'max_concurrent_queries': 1, 'num_replicas': 8}, intermediate_handles=True:
|
||||
single client small data 49.92 +- 0.52 requests/s
|
||||
8 clients small data 295.44 +- 2.01 requests/s
|
||||
{'max_batch_size': 1, 'max_concurrent_queries': 10000, 'num_replicas': 8}, intermediate_handles=True:
|
||||
single client small data 49.74 +- 0.74 requests/s
|
||||
8 clients small data 296.69 +- 1.84 requests/s
|
||||
{'max_batch_size': 10000, 'max_concurrent_queries': 10000, 'num_replicas': 8}, intermediate_handles=True:
|
||||
single client small data 49.57 +- 0.36 requests/s
|
||||
8 clients small data 293.47 +- 1.88 requests/s
|
||||
@@ -0,0 +1,209 @@
|
||||
2020-10-07 09:15:57,387 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 1 of 21:
|
||||
2020-10-07 09:15:57,388 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 263.96ms 96.29ms 506.39ms 69.14%
|
||||
Req/Sec 115.63 79.29 650.00 75.40%
|
||||
2307 requests in 10.00s, 315.66KB read
|
||||
Requests/sec: 230.61
|
||||
Transfer/sec: 31.55KB
|
||||
|
||||
2020-10-07 09:15:57,388 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 2 of 21:
|
||||
2020-10-07 09:15:57,389 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 282.79ms 75.00ms 500.26ms 63.32%
|
||||
Req/Sec 108.20 60.17 240.00 58.92%
|
||||
2159 requests in 10.02s, 295.42KB read
|
||||
Requests/sec: 215.47
|
||||
Transfer/sec: 29.48KB
|
||||
|
||||
2020-10-07 09:15:57,389 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 3 of 21:
|
||||
2020-10-07 09:15:57,390 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 284.60ms 75.70ms 505.43ms 63.33%
|
||||
Req/Sec 108.37 59.15 240.00 63.39%
|
||||
2149 requests in 10.00s, 294.43KB read
|
||||
Requests/sec: 214.80
|
||||
Transfer/sec: 29.43KB
|
||||
|
||||
2020-10-07 09:15:57,390 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 4 of 21:
|
||||
2020-10-07 09:15:57,391 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 282.22ms 77.38ms 508.22ms 65.28%
|
||||
Req/Sec 108.98 61.47 250.00 62.98%
|
||||
2163 requests in 10.03s, 295.97KB read
|
||||
Requests/sec: 215.60
|
||||
Transfer/sec: 29.50KB
|
||||
|
||||
2020-10-07 09:15:57,392 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 5 of 21:
|
||||
2020-10-07 09:15:57,392 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 278.77ms 79.19ms 509.73ms 64.19%
|
||||
Req/Sec 109.77 62.22 303.00 67.03%
|
||||
2195 requests in 10.01s, 300.35KB read
|
||||
Requests/sec: 219.32
|
||||
Transfer/sec: 30.01KB
|
||||
|
||||
2020-10-07 09:15:57,395 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 6 of 21:
|
||||
2020-10-07 09:15:57,396 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 267.82ms 93.51ms 500.75ms 67.95%
|
||||
Req/Sec 114.75 72.64 480.00 69.57%
|
||||
2281 requests in 10.01s, 312.35KB read
|
||||
Requests/sec: 227.96
|
||||
Transfer/sec: 31.22KB
|
||||
|
||||
2020-10-07 09:15:57,396 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 7 of 21:
|
||||
2020-10-07 09:15:57,397 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 285.04ms 75.40ms 492.30ms 63.57%
|
||||
Req/Sec 108.51 61.33 242.00 58.06%
|
||||
2144 requests in 10.00s, 293.50KB read
|
||||
Requests/sec: 214.32
|
||||
Transfer/sec: 29.34KB
|
||||
|
||||
2020-10-07 09:15:57,397 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 8 of 21:
|
||||
2020-10-07 09:15:57,398 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 271.42ms 89.56ms 508.15ms 68.90%
|
||||
Req/Sec 113.23 66.06 340.00 65.95%
|
||||
2254 requests in 10.01s, 308.16KB read
|
||||
Requests/sec: 225.13
|
||||
Transfer/sec: 30.78KB
|
||||
|
||||
2020-10-07 09:15:57,399 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 9 of 21:
|
||||
2020-10-07 09:15:57,400 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 269.81ms 91.28ms 500.31ms 68.51%
|
||||
Req/Sec 113.60 72.20 410.00 64.17%
|
||||
2255 requests in 10.01s, 308.67KB read
|
||||
Requests/sec: 225.38
|
||||
Transfer/sec: 30.85KB
|
||||
|
||||
2020-10-07 09:15:57,400 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 10 of 21:
|
||||
2020-10-07 09:15:57,401 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 283.93ms 74.67ms 507.84ms 64.11%
|
||||
Req/Sec 108.93 55.89 242.00 62.09%
|
||||
2154 requests in 10.01s, 294.86KB read
|
||||
Requests/sec: 215.24
|
||||
Transfer/sec: 29.46KB
|
||||
|
||||
2020-10-07 09:15:57,401 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 11 of 21:
|
||||
2020-10-07 09:15:57,402 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 282.35ms 75.86ms 495.90ms 63.86%
|
||||
Req/Sec 108.15 59.10 222.00 60.22%
|
||||
2158 requests in 10.00s, 295.41KB read
|
||||
Requests/sec: 215.72
|
||||
Transfer/sec: 29.53KB
|
||||
|
||||
2020-10-07 09:15:57,402 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 12 of 21:
|
||||
2020-10-07 09:15:57,403 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 283.37ms 75.04ms 499.01ms 63.79%
|
||||
Req/Sec 108.77 61.15 262.00 60.00%
|
||||
2157 requests in 10.01s, 295.03KB read
|
||||
Requests/sec: 215.55
|
||||
Transfer/sec: 29.48KB
|
||||
|
||||
2020-10-07 09:15:57,406 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 13 of 21:
|
||||
2020-10-07 09:15:57,406 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 273.55ms 86.08ms 504.93ms 66.70%
|
||||
Req/Sec 112.26 64.36 272.00 60.33%
|
||||
2234 requests in 10.01s, 305.93KB read
|
||||
Requests/sec: 223.25
|
||||
Transfer/sec: 30.57KB
|
||||
|
||||
2020-10-07 09:15:57,407 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 14 of 21:
|
||||
2020-10-07 09:15:57,407 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 279.76ms 78.28ms 506.39ms 65.57%
|
||||
Req/Sec 109.14 62.56 272.00 60.75%
|
||||
2181 requests in 10.00s, 298.93KB read
|
||||
Requests/sec: 218.03
|
||||
Transfer/sec: 29.88KB
|
||||
|
||||
2020-10-07 09:15:57,408 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 15 of 21:
|
||||
2020-10-07 09:15:57,408 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 285.56ms 75.73ms 493.67ms 63.94%
|
||||
Req/Sec 108.23 58.97 240.00 61.41%
|
||||
2149 requests in 10.04s, 294.43KB read
|
||||
Requests/sec: 214.12
|
||||
Transfer/sec: 29.34KB
|
||||
|
||||
2020-10-07 09:15:57,409 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 16 of 21:
|
||||
2020-10-07 09:15:57,410 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 275.84ms 83.78ms 504.36ms 66.76%
|
||||
Req/Sec 110.52 61.73 290.00 63.64%
|
||||
2208 requests in 10.00s, 302.00KB read
|
||||
Requests/sec: 220.72
|
||||
Transfer/sec: 30.19KB
|
||||
|
||||
2020-10-07 09:15:57,410 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 17 of 21:
|
||||
2020-10-07 09:15:57,411 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 277.22ms 82.84ms 501.03ms 66.64%
|
||||
Req/Sec 110.92 59.32 240.00 63.78%
|
||||
2206 requests in 10.04s, 301.60KB read
|
||||
Requests/sec: 219.78
|
||||
Transfer/sec: 30.05KB
|
||||
|
||||
2020-10-07 09:15:57,411 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 18 of 21:
|
||||
2020-10-07 09:15:57,412 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 282.49ms 76.79ms 495.28ms 64.60%
|
||||
Req/Sec 108.16 61.20 250.00 61.62%
|
||||
2161 requests in 10.04s, 296.44KB read
|
||||
Requests/sec: 215.19
|
||||
Transfer/sec: 29.52KB
|
||||
|
||||
2020-10-07 09:15:57,414 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 19 of 21:
|
||||
2020-10-07 09:15:57,414 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 281.33ms 77.56ms 504.81ms 65.10%
|
||||
Req/Sec 108.46 61.28 252.00 60.44%
|
||||
2166 requests in 10.01s, 296.50KB read
|
||||
Requests/sec: 216.46
|
||||
Transfer/sec: 29.63KB
|
||||
|
||||
2020-10-07 09:15:57,415 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 20 of 21:
|
||||
2020-10-07 09:15:57,416 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 284.18ms 74.78ms 506.23ms 62.89%
|
||||
Req/Sec 109.16 60.05 240.00 59.46%
|
||||
2153 requests in 10.01s, 294.85KB read
|
||||
Requests/sec: 215.16
|
||||
Transfer/sec: 29.47KB
|
||||
|
||||
2020-10-07 09:15:57,417 INFO <ipython-input-1-1b7ee15d465b>:111 -- Results for node 21 of 21:
|
||||
2020-10-07 09:15:57,417 INFO <ipython-input-1-1b7ee15d465b>:112 -- Running 10s test @ http://127.0.0.1:8000/hey
|
||||
2 threads and 63 connections
|
||||
Thread Stats Avg Stdev Max +/- Stdev
|
||||
Latency 281.58ms 78.06ms 499.69ms 65.93%
|
||||
Req/Sec 108.51 60.56 260.00 64.29%
|
||||
2163 requests in 10.01s, 296.09KB read
|
||||
Requests/sec: 216.12
|
||||
Transfer/sec: 29.59KB
|
||||
@@ -0,0 +1,45 @@
|
||||
cluster_name: default
|
||||
min_workers: 22
|
||||
max_workers: 22
|
||||
initial_workers: 22
|
||||
autoscaling_mode: default
|
||||
docker:
|
||||
image: 'anyscale/ray-ml:latest'
|
||||
container_name: ray_container
|
||||
pull_before_run: true
|
||||
target_utilization_fraction: 0.8
|
||||
idle_timeout_minutes: 5
|
||||
provider:
|
||||
type: aws
|
||||
region: us-west-2
|
||||
availability_zone: us-west-2a
|
||||
auth:
|
||||
ssh_user: ubuntu
|
||||
head_node:
|
||||
InstanceType: m5.xlarge
|
||||
BlockDeviceMappings:
|
||||
-
|
||||
DeviceName: /dev/sda1
|
||||
Ebs:
|
||||
VolumeSize: 100
|
||||
worker_nodes:
|
||||
InstanceType: m5.xlarge
|
||||
file_mounts: {}
|
||||
cluster_synced_files:
|
||||
- /tmp/ray_tmp_mount/~/blank-serve-test
|
||||
file_mounts_sync_continuously: true
|
||||
initialization_commands: []
|
||||
setup_commands:
|
||||
- apt-get install build-essential libssl-dev git -y
|
||||
- 'rm -r wrk || true && git clone https://github.com/wg/wrk.git wrk && cd wrk && make && cp wrk /usr/local/bin'
|
||||
head_setup_commands: []
|
||||
worker_setup_commands: []
|
||||
head_start_ray_commands:
|
||||
- ray stop
|
||||
- ulimit -n 65536; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml
|
||||
worker_start_ray_commands:
|
||||
- ray stop
|
||||
- 'ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076'
|
||||
metadata:
|
||||
anyscale:
|
||||
working_dir: ~/blank-serve-test
|
||||
@@ -0,0 +1,86 @@
|
||||
# A test that stresses the serve handles. We spin up a backend with a bunch
|
||||
# (0-2) of replicas that just forward requests to another backend.
|
||||
#
|
||||
# By comparing using the forward replicas with just calling the worker
|
||||
# replicas, we measure the (latency) overhead in the handle. This answers
|
||||
# the question: How much of a latency/throughput hit is there when I
|
||||
# compose models?
|
||||
#
|
||||
# By comparing the qps as we fix the number of forwarder replicas and vary the
|
||||
# number of worker replicas, we measure the limit of a single async actor. This
|
||||
# answers the question: How many "ForwardActor"s or these kinds of high-level
|
||||
# pipeline workers do I need to provision for my workload? every 1k qps,
|
||||
# 2k qps?
|
||||
#
|
||||
# Sample output:
|
||||
# 0 forwarders and 1 worker replicas: 1282 requests/s
|
||||
# 0 forwarders and 5 worker replicas: 1375 requests/s
|
||||
# 0 forwarders and 10 worker replicas: 1362 requests/s
|
||||
# 1 forwarders and 1 worker replicas: 608 requests/s
|
||||
# 1 forwarders and 5 worker replicas: 626 requests/s
|
||||
# 1 forwarders and 10 worker replicas: 627 requests/s
|
||||
# 2 forwarders and 1 worker replicas: 609 requests/s
|
||||
# 2 forwarders and 5 worker replicas: 620 requests/s
|
||||
# 2 forwarders and 10 worker replicas: 609 requests/s
|
||||
|
||||
import ray
|
||||
from ray import serve
|
||||
from ray.serve import BackendConfig
|
||||
from ray.serve.utils import logger
|
||||
import time
|
||||
|
||||
num_queries = 2000
|
||||
|
||||
ray.init(address="auto")
|
||||
|
||||
client = serve.start()
|
||||
|
||||
|
||||
def hello_world(_):
|
||||
return b"Hello World"
|
||||
|
||||
|
||||
class ForwardActor:
|
||||
def __init__(self):
|
||||
client = serve.connect()
|
||||
self.handle = client.get_handle("hello_world")
|
||||
|
||||
async def __call__(self, _):
|
||||
await self.handle.remote()
|
||||
|
||||
|
||||
client.create_backend("hello_world", hello_world)
|
||||
client.create_endpoint("hello_world", backend="hello_world")
|
||||
|
||||
client.create_backend("ForwardActor", ForwardActor)
|
||||
client.create_endpoint("ForwardActor", backend="ForwardActor")
|
||||
|
||||
|
||||
def run_test(num_replicas, num_forwarders):
|
||||
replicas_config = BackendConfig(num_replicas=num_replicas)
|
||||
client.update_backend_config("hello_world", replicas_config)
|
||||
|
||||
if (num_forwarders == 0):
|
||||
handle = client.get_handle("hello_world")
|
||||
else:
|
||||
forwarders_config = BackendConfig(num_replicas=num_forwarders)
|
||||
client.update_backend_config("ForwardActor", forwarders_config)
|
||||
handle = client.get_handle("ForwardActor")
|
||||
|
||||
# warmup - helpful to wait for gc.collect() and actors to start
|
||||
start = time.time()
|
||||
while time.time() - start < 1:
|
||||
ray.get(handle.remote())
|
||||
|
||||
# real test
|
||||
start = time.time()
|
||||
ray.get([handle.remote() for _ in range(num_queries)])
|
||||
qps = num_queries / (time.time() - start)
|
||||
|
||||
logger.info("{} forwarders and {} worker replicas: {} requests/s".format(
|
||||
num_forwarders, num_replicas, int(qps)))
|
||||
|
||||
|
||||
for num_forwarders in [0, 1, 2]:
|
||||
for num_replicas in [1, 5, 10]:
|
||||
run_test(num_replicas, num_forwarders)
|
||||
@@ -1,3 +1,7 @@
|
||||
# Runs several scenarios with varying max batch size, max concurrent queries,
|
||||
# number of replicas, and with intermediate serve handles (to simulate ensemble
|
||||
# models) either on or off.
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import time
|
||||
@@ -79,33 +83,48 @@ async def trial(actors, session, data_size):
|
||||
async def main():
|
||||
ray.init(log_to_driver=False)
|
||||
client = serve.start()
|
||||
|
||||
client.create_backend("backend", backend)
|
||||
client.create_endpoint("endpoint", backend="backend", route="/api")
|
||||
for intermediate_handles in [False, True]:
|
||||
if (intermediate_handles):
|
||||
|
||||
actors = [Client.remote() for _ in range(NUM_CLIENTS)]
|
||||
for num_replicas in [1, 8]:
|
||||
for backend_config in [
|
||||
{
|
||||
"max_batch_size": 1,
|
||||
"max_concurrent_queries": 1
|
||||
},
|
||||
{
|
||||
"max_batch_size": 1,
|
||||
"max_concurrent_queries": 10000
|
||||
},
|
||||
{
|
||||
"max_batch_size": 10000,
|
||||
"max_concurrent_queries": 10000
|
||||
},
|
||||
]:
|
||||
backend_config["num_replicas"] = num_replicas
|
||||
client.update_backend_config("backend", backend_config)
|
||||
print(repr(backend_config) + ":")
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# TODO(edoakes): large data causes broken pipe errors.
|
||||
for data_size in ["small"]:
|
||||
await trial(actors, session, data_size)
|
||||
client.create_endpoint(
|
||||
"backend", backend="backend", route="/backend")
|
||||
|
||||
class forwardActor:
|
||||
def __init__(self):
|
||||
self.handle = client.get_handle("backend")
|
||||
|
||||
def __call__(self, _):
|
||||
return ray.get(self.handle.remote())
|
||||
|
||||
client.create_backend("forwardActor", forwardActor)
|
||||
client.set_traffic("endpoint", {"backend": 0, "forwardActor": 1})
|
||||
actors = [Client.remote() for _ in range(NUM_CLIENTS)]
|
||||
for num_replicas in [1, 8]:
|
||||
for backend_config in [
|
||||
{
|
||||
"max_batch_size": 1,
|
||||
"max_concurrent_queries": 1
|
||||
},
|
||||
{
|
||||
"max_batch_size": 1,
|
||||
"max_concurrent_queries": 10000
|
||||
},
|
||||
{
|
||||
"max_batch_size": 10000,
|
||||
"max_concurrent_queries": 10000
|
||||
},
|
||||
]:
|
||||
backend_config["num_replicas"] = num_replicas
|
||||
client.update_backend_config("backend", backend_config)
|
||||
print(
|
||||
repr(backend_config) + ", intermediate_handles=" +
|
||||
str(intermediate_handles) + ":")
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# TODO(edoakes): large data causes broken pipe errors.
|
||||
for data_size in ["small"]:
|
||||
await trial(actors, session, data_size)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -0,0 +1,119 @@
|
||||
# A multi-node scalability test. We put an http proxy on the head node and spin
|
||||
# up 20 nodes and put as many replicas as possible on the cluster, and run a
|
||||
# stress test.
|
||||
#
|
||||
# Test will measure latency and throughput under a high load using `wrk`
|
||||
# running on each node.
|
||||
#
|
||||
# Results for node 1 of 21:
|
||||
# Running 10s test @ http://127.0.0.1:8000/hey
|
||||
# 2 threads and 63 connections
|
||||
# Thread Stats Avg Stdev Max +/- Stdev
|
||||
# Latency 263.96ms 96.29ms 506.39ms 69.14%
|
||||
# Req/Sec 115.63 79.29 650.00 75.40%
|
||||
# 2307 requests in 10.00s, 315.66KB read
|
||||
# Requests/sec: 230.61
|
||||
# Transfer/sec: 31.55KB
|
||||
#
|
||||
# Results for node 2 of 21:
|
||||
# Running 10s test @ http://127.0.0.1:8000/hey
|
||||
# 2 threads and 63 connections
|
||||
# Thread Stats Avg Stdev Max +/- Stdev
|
||||
# Latency 282.79ms 75.00ms 500.26ms 63.32%
|
||||
# Req/Sec 108.20 60.17 240.00 58.92%
|
||||
# 2159 requests in 10.02s, 295.42KB read
|
||||
# Requests/sec: 215.47
|
||||
# Transfer/sec: 29.48KB
|
||||
#
|
||||
# [...] similar results for remaining nodes
|
||||
|
||||
import time
|
||||
import subprocess
|
||||
import requests
|
||||
|
||||
import ray
|
||||
from ray import serve
|
||||
from ray.serve import BackendConfig
|
||||
from ray.serve.utils import logger
|
||||
|
||||
from ray.util.placement_group import (placement_group, remove_placement_group)
|
||||
|
||||
ray.shutdown()
|
||||
ray.init(address="auto")
|
||||
client = serve.start()
|
||||
|
||||
# These numbers need to correspond with the autoscaler config file.
|
||||
# The number of remote nodes in the autoscaler should upper bound
|
||||
# these because sometimes nodes fail to update.
|
||||
num_workers = 20
|
||||
expected_num_nodes = num_workers + 1
|
||||
cpus_per_node = 4
|
||||
num_remote_cpus = expected_num_nodes * cpus_per_node
|
||||
|
||||
# Wait until the expected number of nodes have joined the cluster.
|
||||
while True:
|
||||
num_nodes = len(ray.nodes())
|
||||
logger.info("Waiting for nodes {}/{}".format(num_nodes,
|
||||
expected_num_nodes))
|
||||
if num_nodes >= expected_num_nodes:
|
||||
break
|
||||
time.sleep(5)
|
||||
logger.info("Nodes have all joined. There are %s resources.",
|
||||
ray.cluster_resources())
|
||||
|
||||
|
||||
def hey(_):
|
||||
time.sleep(0.01) # Sleep for 10ms
|
||||
return b"hey"
|
||||
|
||||
|
||||
num_connections = int(num_remote_cpus * 0.75)
|
||||
num_threads = 2
|
||||
time_to_run = "10s"
|
||||
|
||||
pg = placement_group(
|
||||
[{
|
||||
"CPU": 1
|
||||
} for _ in range(expected_num_nodes)], strategy="STRICT_SPREAD")
|
||||
ray.get(pg.ready())
|
||||
|
||||
# The number of replicas is the number of cores remaining after accounting
|
||||
# for the one HTTP proxy actor on each node, the "hey" requester task on each
|
||||
# node, and the serve controller.
|
||||
# num_replicas = expected_num_nodes * (cpus_per_node - 2) - 1
|
||||
num_replicas = ray.available_resources()["CPU"]
|
||||
logger.info("Starting %i replicas", num_replicas)
|
||||
client.create_backend(
|
||||
"hey", hey, config=BackendConfig(num_replicas=num_replicas))
|
||||
client.create_endpoint("hey", backend="hey", route="/hey")
|
||||
|
||||
|
||||
@ray.remote
|
||||
def run_wrk():
|
||||
logger.info("Warming up for ~3 seconds")
|
||||
for _ in range(5):
|
||||
resp = requests.get("http://127.0.0.1:8000/hey").text
|
||||
logger.info("Received response \'" + resp + "\'")
|
||||
time.sleep(0.5)
|
||||
|
||||
result = subprocess.run(
|
||||
[
|
||||
"wrk", "-c",
|
||||
str(num_connections), "-t",
|
||||
str(num_threads), "-d", time_to_run, "http://127.0.0.1:8000/hey"
|
||||
],
|
||||
stdout=subprocess.PIPE)
|
||||
return result.stdout.decode()
|
||||
|
||||
|
||||
results = ray.get([
|
||||
run_wrk.options(placement_group=pg,
|
||||
placement_group_bundle_index=i).remote()
|
||||
for i in range(expected_num_nodes)
|
||||
])
|
||||
|
||||
for i in range(expected_num_nodes):
|
||||
logger.info("Results for node %i of %i:", i + 1, expected_num_nodes)
|
||||
logger.info(results[i])
|
||||
|
||||
remove_placement_group(pg)
|
||||
Reference in New Issue
Block a user