diff --git a/src/ray/ray_config.h b/src/ray/ray_config.h index 4887026d0..a528c3718 100644 --- a/src/ray/ray_config.h +++ b/src/ray/ray_config.h @@ -107,6 +107,8 @@ class RayConfig { int num_workers_per_process() const { return num_workers_per_process_; } + int64_t max_task_lease_timeout_ms() const { return max_task_lease_timeout_ms_; } + void initialize(const std::unordered_map &config_map) { RAY_CHECK(!initialized_); for (auto const &pair : config_map) { @@ -180,6 +182,8 @@ class RayConfig { object_manager_default_chunk_size_ = pair.second; } else if (pair.first == "object_manager_repeated_push_delay_ms") { object_manager_repeated_push_delay_ms_ = pair.second; + } else if (pair.first == "max_task_lease_timeout_ms") { + max_task_lease_timeout_ms_ = pair.second; } else { RAY_LOG(FATAL) << "Received unexpected config parameter " << pair.first; } @@ -224,6 +228,7 @@ class RayConfig { object_manager_repeated_push_delay_ms_(60000), object_manager_default_chunk_size_(1000000), num_workers_per_process_(1), + max_task_lease_timeout_ms_(60 * 1000), initialized_(false) {} ~RayConfig() {} @@ -352,6 +357,9 @@ class RayConfig { /// Number of workers per process int num_workers_per_process_; + // Maximum timeout in milliseconds within which a task lease must be renewed. + int64_t max_task_lease_timeout_ms_; + /// Whether the initialization of the instance has been called before. /// The RayConfig instance can only (and must) be initialized once. bool initialized_; diff --git a/src/ray/raylet/task_dependency_manager.cc b/src/ray/raylet/task_dependency_manager.cc index 16e90d3e8..1d491b75f 100644 --- a/src/ray/raylet/task_dependency_manager.cc +++ b/src/ray/raylet/task_dependency_manager.cc @@ -279,7 +279,8 @@ void TaskDependencyManager::AcquireTaskLease(const TaskID &task_id) { }); it->second.expires_at = now_ms + it->second.lease_period; - it->second.lease_period *= 2; + it->second.lease_period = std::min(it->second.lease_period * 2, + RayConfig::instance().max_task_lease_timeout_ms()); } void TaskDependencyManager::TaskCanceled(const TaskID &task_id) {