[Serve] Fix Serve Release Tests (#12777)

This commit is contained in:
Simon Mo
2020-12-11 11:53:47 -08:00
committed by GitHub
parent 4ad4463be6
commit 3d8c1cbae6
8 changed files with 80 additions and 99 deletions
+4 -3
View File
@@ -1,7 +1,7 @@
cluster_name: default
min_workers: 22
max_workers: 22
initial_workers: 22
min_workers: 5
max_workers: 5
initial_workers: 5
autoscaling_mode: default
docker:
image: 'anyscale/ray-ml:latest'
@@ -28,6 +28,7 @@ initialization_commands: []
setup_commands:
- apt-get install build-essential libssl-dev git -y
- 'rm -r wrk || true && git clone https://github.com/wg/wrk.git wrk && cd wrk && make -j && cp wrk /usr/local/bin'
- ray install-nightly
head_setup_commands: []
worker_setup_commands: []
head_start_ray_commands:
@@ -86,13 +86,14 @@ async def main():
client.create_backend("backend", backend)
client.create_endpoint("endpoint", backend="backend", route="/api")
for intermediate_handles in [False, True]:
if (intermediate_handles):
if intermediate_handles:
client.create_endpoint(
"backend", backend="backend", route="/backend")
class forwardActor:
def __init__(self):
client = serve.connect()
self.handle = client.get_handle("backend")
def __call__(self, _):
+32 -29
View File
@@ -36,73 +36,76 @@ from ray import serve
from ray.serve import BackendConfig
from ray.serve.utils import logger
from ray.util.placement_group import (placement_group, remove_placement_group)
from ray.util.placement_group import placement_group, remove_placement_group
ray.shutdown()
ray.init(address="auto")
client = serve.start()
# These numbers need to correspond with the autoscaler config file.
# The number of remote nodes in the autoscaler should upper bound
# these because sometimes nodes fail to update.
num_workers = 20
expected_num_nodes = num_workers + 1
cpus_per_node = 4
num_remote_cpus = expected_num_nodes * cpus_per_node
# We ask for more worker but only need to run on smaller subset.
# This should account for worker nodes failed to launch.
expected_num_nodes = 6
num_replicas = 11
# wrk HTTP load testing config
num_connections = 20
num_threads = 2
time_to_run = "20s"
# Wait until the expected number of nodes have joined the cluster.
while True:
num_nodes = len(ray.nodes())
num_nodes = len(list(filter(lambda node: node["Alive"], ray.nodes())))
logger.info("Waiting for nodes {}/{}".format(num_nodes,
expected_num_nodes))
if num_nodes >= expected_num_nodes:
break
time.sleep(5)
logger.info("Nodes have all joined. There are %s resources.",
ray.cluster_resources())
client = serve.start()
def hey(_):
time.sleep(0.01) # Sleep for 10ms
return b"hey"
num_connections = int(num_remote_cpus * 0.75)
num_threads = 2
time_to_run = "10s"
pg = placement_group(
[{
"CPU": 1
} for _ in range(expected_num_nodes)], strategy="STRICT_SPREAD")
ray.get(pg.ready())
# The number of replicas is the number of cores remaining after accounting
# for the one HTTP proxy actor on each node, the "hey" requester task on each
# node, and the serve controller.
# num_replicas = expected_num_nodes * (cpus_per_node - 2) - 1
num_replicas = ray.available_resources()["CPU"]
logger.info("Starting %i replicas", num_replicas)
client.create_backend(
"hey", hey, config=BackendConfig(num_replicas=num_replicas))
client.create_endpoint("hey", backend="hey", route="/hey")
@ray.remote
@ray.remote(num_cpus=0)
def run_wrk():
logger.info("Warming up for ~3 seconds")
for _ in range(5):
resp = requests.get("http://127.0.0.1:8000/hey").text
logger.info("Received response \'" + resp + "\'")
time.sleep(0.5)
logger.info("Warming up")
for _ in range(10):
try:
resp = requests.get("http://127.0.0.1:8000/hey").text
logger.info("Received response '" + resp + "'")
time.sleep(0.5)
except Exception as e:
logger.info(f"Got exception {e}")
result = subprocess.run(
[
"wrk", "-c",
str(num_connections), "-t",
str(num_threads), "-d", time_to_run, "http://127.0.0.1:8000/hey"
"wrk",
"-c",
str(num_connections),
"-t",
str(num_threads),
"-d",
time_to_run,
"http://127.0.0.1:8000/hey",
],
stdout=subprocess.PIPE)
stdout=subprocess.PIPE,
)
return result.stdout.decode()
+1
View File
@@ -23,6 +23,7 @@ initialization_commands: []
setup_commands:
- apt-get install build-essential libssl-dev git -y
- 'rm -r wrk || true && git clone https://github.com/wg/wrk.git wrk && cd wrk && make -j && cp wrk /usr/local/bin'
- ray install-nightly
head_setup_commands: []
worker_setup_commands: []
head_start_ray_commands: