diff --git a/python/ray/includes/ray_config.pxd b/python/ray/includes/ray_config.pxd index 1837cd1de..ff81b531c 100644 --- a/python/ray/includes/ray_config.pxd +++ b/python/ray/includes/ray_config.pxd @@ -47,6 +47,8 @@ cdef extern from "ray/common/ray_config.h" nogil: int64_t kill_worker_timeout_milliseconds() const + int64_t worker_register_timeout_seconds() const + int64_t max_time_for_handler_milliseconds() const int64_t max_time_for_loop() const diff --git a/python/ray/includes/ray_config.pxi b/python/ray/includes/ray_config.pxi index fecfb1481..8014052d4 100644 --- a/python/ray/includes/ray_config.pxi +++ b/python/ray/includes/ray_config.pxi @@ -56,7 +56,8 @@ cdef class Config: @staticmethod def raylet_client_connect_timeout_milliseconds(): - return RayConfig.instance().raylet_client_connect_timeout_milliseconds() + return (RayConfig.instance() + .raylet_client_connect_timeout_milliseconds()) @staticmethod def raylet_fetch_timeout_milliseconds(): @@ -80,6 +81,10 @@ cdef class Config: def kill_worker_timeout_milliseconds(): return RayConfig.instance().kill_worker_timeout_milliseconds() + @staticmethod + def worker_register_timeout_seconds(): + return RayConfig.instance().worker_register_timeout_seconds() + @staticmethod def max_time_for_handler_milliseconds(): return RayConfig.instance().max_time_for_handler_milliseconds() diff --git a/src/ray/common/ray_config_def.h b/src/ray/common/ray_config_def.h index 0721f481f..df443a999 100644 --- a/src/ray/common/ray_config_def.h +++ b/src/ray/common/ray_config_def.h @@ -192,6 +192,10 @@ RAY_CONFIG(size_t, raylet_max_active_object_ids, 0) /// the worker SIGKILL. RAY_CONFIG(int64_t, kill_worker_timeout_milliseconds, 100) +/// The duration that we wait after the worekr is launched before the +/// starting_worker_timeout_callback() is called. +RAY_CONFIG(int64_t, worker_register_timeout_seconds, 30) + /// This is a timeout used to cause failures in the plasma manager and raylet /// when certain event loop handlers take too long. RAY_CONFIG(int64_t, max_time_for_handler_milliseconds, 1000) diff --git a/src/ray/raylet/worker_pool.cc b/src/ray/raylet/worker_pool.cc index bbe370dd3..19693114d 100644 --- a/src/ray/raylet/worker_pool.cc +++ b/src/ray/raylet/worker_pool.cc @@ -250,9 +250,9 @@ Process WorkerPool::StartWorkerProcess(const Language &language, void WorkerPool::MonitorStartingWorkerProcess(const Process &proc, const Language &language) { - constexpr static size_t worker_register_timeout_seconds = 30; auto timer = std::make_shared( - *io_service_, boost::posix_time::seconds(worker_register_timeout_seconds)); + *io_service_, boost::posix_time::seconds( + RayConfig::instance().worker_register_timeout_seconds())); // Capture timer in lambda to copy it once, so that it can avoid destructing timer. timer->async_wait( [timer, language, proc, this](const boost::system::error_code e) -> void {