From 761b5848147f258b9afd46fd718fcf417c34855f Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Thu, 16 Jul 2020 15:34:08 -0700 Subject: [PATCH] [Serve] Improve buffering for simple cases (#9485) --- python/ray/serve/config.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/ray/serve/config.py b/python/ray/serve/config.py index d05eed013..7ad12b0b4 100644 --- a/python/ray/serve/config.py +++ b/python/ray/serve/config.py @@ -36,7 +36,10 @@ class BackendConfig: # timeout is default zero seconds, then we keep the existing # behavior to allow at most max batch size queries. if self.is_blocking and self.batch_wait_timeout == 0: - self.max_concurrent_queries = self.max_batch_size or 1 + if self.max_batch_size: + self.max_concurrent_queries = 2 * self.max_batch_size + else: + self.max_concurrent_queries = 8 # Pipeline/async mode: if the servable is not blocking, # router should just keep pushing queries to the worker