[Placement Group]Add strict spread strategy (#10174)

* support STRICT_SPREAD strategy

* fix review comments

* rebase master

* fix lint error

* fix lint error

Co-authored-by: 灵洵 <fengbin.ffb@antfin.com>
This commit is contained in:
fangfengbin
2020-08-21 01:18:58 +08:00
committed by GitHub
parent 224933b5e4
commit a462ae2747
11 changed files with 278 additions and 63 deletions
+5 -2
View File
@@ -70,6 +70,7 @@ from ray.includes.common cimport (
PLACEMENT_STRATEGY_PACK,
PLACEMENT_STRATEGY_SPREAD,
PLACEMENT_STRATEGY_STRICT_PACK,
PLACEMENT_STRATEGY_STRICT_SPREAD,
)
from ray.includes.unique_ids cimport (
CActorID,
@@ -1064,9 +1065,11 @@ cdef class CoreWorker:
c_strategy = PLACEMENT_STRATEGY_PACK
elif strategy == b"SPREAD":
c_strategy = PLACEMENT_STRATEGY_SPREAD
elif strategy == b"STRICT_PACK":
c_strategy = PLACEMENT_STRATEGY_STRICT_PACK
else:
if strategy == b"STRICT_PACK":
c_strategy = PLACEMENT_STRATEGY_STRICT_PACK
if strategy == b"STRICT_SPREAD":
c_strategy = PLACEMENT_STRATEGY_STRICT_SPREAD
else:
raise TypeError(strategy)
@@ -19,6 +19,7 @@ def placement_group(bundles: List[Dict[str, float]],
PACK: Packs Bundles into as few nodes as possible.
SPREAD: Places Bundles across distinct nodes as even as possible.
STRICT_PACK: Packs Bundles into one node.
STRICT_SPREAD: Packs Bundles across distinct nodes.
The group is not allowed to span multiple nodes.
name: The name of the placement group.
"""
+2
View File
@@ -176,6 +176,8 @@ cdef extern from "src/ray/protobuf/common.pb.h" nogil:
"ray::PlacementStrategy::SPREAD"
cdef CPlacementStrategy PLACEMENT_STRATEGY_STRICT_PACK \
"ray::PlacementStrategy::STRICT_PACK"
cdef CPlacementStrategy PLACEMENT_STRATEGY_STRICT_SPREAD \
"ray::PlacementStrategy::STRICT_SPREAD"
cdef extern from "ray/common/task/scheduling_resources.h" nogil:
cdef cppclass ResourceSet "ray::ResourceSet":
+58 -1
View File
@@ -137,7 +137,7 @@ def test_placement_group_spread(ray_start_cluster):
# Get all actors.
actor_infos = ray.actors()
# Make sure all actors in counter_list are collocated in one node.
# Make sure all actors in counter_list are located in separate nodes.
actor_info_1 = actor_infos.get(actor_1._actor_id.hex())
actor_info_2 = actor_infos.get(actor_2._actor_id.hex())
@@ -148,6 +148,63 @@ def test_placement_group_spread(ray_start_cluster):
assert node_of_actor_1 != node_of_actor_2
def test_placement_group_strict_spread(ray_start_cluster):
@ray.remote(num_cpus=2)
class Actor(object):
def __init__(self):
self.n = 0
def value(self):
return self.n
cluster = ray_start_cluster
num_nodes = 3
for _ in range(num_nodes):
cluster.add_node(num_cpus=4)
ray.init(address=cluster.address)
placement_group_id = ray.experimental.placement_group(
name="name",
strategy="STRICT_SPREAD",
bundles=[{
"CPU": 2
}, {
"CPU": 2
}, {
"CPU": 2
}])
actor_1 = Actor.options(
placement_group_id=placement_group_id,
placement_group_bundle_index=0).remote()
actor_2 = Actor.options(
placement_group_id=placement_group_id,
placement_group_bundle_index=1).remote()
actor_3 = Actor.options(
placement_group_id=placement_group_id,
placement_group_bundle_index=2).remote()
print(ray.get(actor_1.value.remote()))
print(ray.get(actor_2.value.remote()))
print(ray.get(actor_3.value.remote()))
# Get all actors.
actor_infos = ray.actors()
# Make sure all actors in counter_list are located in separate nodes.
actor_info_1 = actor_infos.get(actor_1._actor_id.hex())
actor_info_2 = actor_infos.get(actor_2._actor_id.hex())
actor_info_3 = actor_infos.get(actor_3._actor_id.hex())
assert actor_info_1 and actor_info_2 and actor_info_3
node_of_actor_1 = actor_info_1["Address"]["NodeID"]
node_of_actor_2 = actor_info_2["Address"]["NodeID"]
node_of_actor_3 = actor_info_3["Address"]["NodeID"]
assert node_of_actor_1 != node_of_actor_2
assert node_of_actor_1 != node_of_actor_3
assert node_of_actor_2 != node_of_actor_3
def test_placement_group_actor_resource_ids(ray_start_cluster):
@ray.remote(num_cpus=1)
class F: