From 29663d89f1fc5bac7af8237a095a85a80be0c2d4 Mon Sep 17 00:00:00 2001 From: SangBin Cho Date: Fri, 25 Sep 2020 20:53:42 -0700 Subject: [PATCH] [Placement Group] Remove warning msg for placement groups. (#11034) * Done. * Addressed code review. * Fixed typo. * Addressed code review. --- python/ray/tests/test_placement_group.py | 15 ++++++++++++++- python/ray/util/placement_group.py | 6 +++++- src/ray/raylet/node_manager.cc | 19 ++++++++++++++++++- 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/python/ray/tests/test_placement_group.py b/python/ray/tests/test_placement_group.py index 31ee9bd74..0fbe98f2a 100644 --- a/python/ray/tests/test_placement_group.py +++ b/python/ray/tests/test_placement_group.py @@ -8,7 +8,8 @@ except ImportError: pytest_timeout = None import ray -from ray.test_utils import get_other_nodes, wait_for_condition +from ray.test_utils import (get_other_nodes, wait_for_condition, + get_error_message) import ray.cluster_utils from ray._raylet import PlacementGroupID from ray.util.placement_group import (PlacementGroup, @@ -846,5 +847,17 @@ def test_capture_child_tasks(ray_start_cluster): assert len(node_id_set) == 1 +def test_ready_warning_suppressed(ray_start_regular, error_pubsub): + p = error_pubsub + # Create an infeasible pg. + pg = ray.util.placement_group([{"CPU": 2}] * 2, strategy="STRICT_PACK") + with pytest.raises(ray.exceptions.GetTimeoutError): + ray.get(pg.ready(), timeout=0.5) + + errors = get_error_message( + p, 1, ray.ray_constants.INFEASIBLE_TASK_ERROR, timeout=0.1) + assert len(errors) == 0 + + if __name__ == "__main__": sys.exit(pytest.main(["-v", __file__])) diff --git a/python/ray/util/placement_group.py b/python/ray/util/placement_group.py index 03b7f96e7..a108e4f22 100644 --- a/python/ray/util/placement_group.py +++ b/python/ray/util/placement_group.py @@ -78,9 +78,13 @@ class PlacementGroup: return len(self.bundle_cache) def _get_none_zero_resource(self, bundle: List[Dict]): + # This number shouldn't be changed. + # When it is specified, node manager won't warn about infeasible + # tasks. + INFEASIBLE_TASK_SUPPRESS_MAGIC_NUMBER = 0.0101 for key, value in bundle.items(): if value > 0: - value = min(value, 0.001) + value = INFEASIBLE_TASK_SUPPRESS_MAGIC_NUMBER return key, value assert False, "This code should be unreachable." diff --git a/src/ray/raylet/node_manager.cc b/src/ray/raylet/node_manager.cc index b394702b2..099ff4b48 100644 --- a/src/ray/raylet/node_manager.cc +++ b/src/ray/raylet/node_manager.cc @@ -2159,8 +2159,25 @@ void NodeManager::ScheduleTasks( for (const auto &task : local_queues_.GetTasks(TaskState::PLACEABLE)) { task_dependency_manager_.TaskPending(task); move_task_set.insert(task.GetTaskSpecification().TaskId()); + + // This block is used to suppress infeasible task warning. + bool suppress_warning = false; + const auto &required_resources = task.GetTaskSpecification().GetRequiredResources(); + const auto &resources_map = required_resources.GetResourceMap(); + const auto &it = resources_map.begin(); + // It is a hack to suppress infeasible task warning. + // If the first resource of a task requires this magic number, infeasible warning is + // suppressed. It is currently only used by placement group ready API. We don't want + // to have this in ray_config_def.h because the use case is very narrow, and we don't + // want to expose this anywhere. + double INFEASIBLE_TASK_SUPPRESS_MAGIC_NUMBER = 0.0101; + if (it != resources_map.end() && + it->second == INFEASIBLE_TASK_SUPPRESS_MAGIC_NUMBER) { + suppress_warning = true; + } + // Push a warning to the task's driver that this task is currently infeasible. - { + if (!suppress_warning) { // TODO(rkn): Define this constant somewhere else. std::string type = "infeasible_task"; std::ostringstream error_message;