[GCS]Use new getting all available resources interface instead of pub-sub … (#10914)

* Use new all available resources getting interface instead of pub-sub in state.py

* add missing server handler and test cases, fix comments

* add fine grained test assert

* per comments

* involve new added function _available_resources_per_node

* change  ClientID to NodeID

* fix compile

* fix client id and lint

* robust tests check

* robust tests
This commit is contained in:
Tao Wang
2020-09-30 00:41:10 +08:00
committed by GitHub
parent 47eb6613b5
commit 1db83764bf
20 changed files with 192 additions and 41 deletions
+10 -4
View File
@@ -695,12 +695,16 @@ def test_accelerator_type_api(shutdown_only):
@ray.remote(accelerator_type=v100)
def decorated_func(quantity):
return ray.available_resources()[resource_name] < quantity
wait_for_condition(
lambda: ray.available_resources()[resource_name] < quantity)
return True
assert ray.get(decorated_func.remote(quantity))
def via_options_func(quantity):
return ray.available_resources()[resource_name] < quantity
wait_for_condition(
lambda: ray.available_resources()[resource_name] < quantity)
return True
assert ray.get(
ray.remote(via_options_func).options(
@@ -725,13 +729,15 @@ def test_accelerator_type_api(shutdown_only):
# Avoid a race condition where the actor hasn't been initialized and
# claimed the resources yet.
ray.get(decorated_actor.initialized.remote())
assert ray.available_resources()[resource_name] < quantity
wait_for_condition(
lambda: ray.available_resources()[resource_name] < quantity)
quantity = ray.available_resources()[resource_name]
with_options = ray.remote(ActorWithOptions).options(
accelerator_type=v100).remote()
ray.get(with_options.initialized.remote())
assert ray.available_resources()[resource_name] < quantity
wait_for_condition(
lambda: ray.available_resources()[resource_name] < quantity)
def test_detect_docker_cpus():
+8 -1
View File
@@ -219,8 +219,12 @@ def test_many_fractional_resources(shutdown_only):
stop_time = time.time() + 10
correct_available_resources = False
while time.time() < stop_time:
if (ray.available_resources()["CPU"] == 2.0
available_resources = ray.available_resources()
if ("CPU" in available_resources
and ray.available_resources()["CPU"] == 2.0
and "GPU" in available_resources
and ray.available_resources()["GPU"] == 2.0
and "Custom" in available_resources
and ray.available_resources()["Custom"] == 2.0):
correct_available_resources = True
break
@@ -346,6 +350,9 @@ def test_ray_options(shutdown_only):
@ray.remote(
num_cpus=2, num_gpus=3, memory=150 * 2**20, resources={"custom1": 1})
def foo():
import time
# Sleep for a heartbeat period to ensure resources changing reported.
time.sleep(0.1)
return ray.available_resources()
ray.init(num_cpus=10, num_gpus=10, resources={"custom1": 2})
+3 -1
View File
@@ -647,7 +647,9 @@ def test_release_cpus_when_actor_creation_task_blocking(shutdown_only):
return False
def assert_available_resources():
return 1 == ray.available_resources()["CPU"]
available_resources = ray.available_resources()
return "CPU" in available_resources and 1 == ray.available_resources(
)["CPU"]
result = wait_until(assert_available_resources, 1000)
assert result is True