Files
ray/python/ray/tests/test_global_state.py
T
Stephanie Wang f75dfd60a3 [api] API deprecations and cleanups for 1.0 (internal_config and Checkpointable actor) (#10333)
* remove

* internal config updates, remove Checkpointable

* Lower object timeout default

* remove json

* Fix flaky test

* Fix unit test
2020-08-27 10:19:53 -07:00

221 lines
6.5 KiB
Python

import pytest
try:
import pytest_timeout
except ImportError:
pytest_timeout = None
import time
import ray
import ray.ray_constants
import ray.test_utils
# TODO(rliaw): The proper way to do this is to have the pytest config setup.
@pytest.mark.skipif(
pytest_timeout is None,
reason="Timeout package not installed; skipping test that may hang.")
@pytest.mark.timeout(10)
def test_replenish_resources(ray_start_regular):
cluster_resources = ray.cluster_resources()
available_resources = ray.available_resources()
assert cluster_resources == available_resources
@ray.remote
def cpu_task():
pass
ray.get(cpu_task.remote())
resources_reset = False
while not resources_reset:
available_resources = ray.available_resources()
resources_reset = (cluster_resources == available_resources)
assert resources_reset
@pytest.mark.skipif(
pytest_timeout is None,
reason="Timeout package not installed; skipping test that may hang.")
@pytest.mark.timeout(10)
def test_uses_resources(ray_start_regular):
cluster_resources = ray.cluster_resources()
@ray.remote
def cpu_task():
time.sleep(1)
cpu_task.remote()
resource_used = False
while not resource_used:
available_resources = ray.available_resources()
resource_used = available_resources.get(
"CPU", 0) == cluster_resources.get("CPU", 0) - 1
assert resource_used
@pytest.mark.skipif(
pytest_timeout is None,
reason="Timeout package not installed; skipping test that may hang.")
@pytest.mark.timeout(120)
def test_add_remove_cluster_resources(ray_start_cluster_head):
"""Tests that Global State API is consistent with actual cluster."""
cluster = ray_start_cluster_head
assert ray.cluster_resources()["CPU"] == 1
nodes = []
nodes += [cluster.add_node(num_cpus=1)]
cluster.wait_for_nodes()
assert ray.cluster_resources()["CPU"] == 2
cluster.remove_node(nodes.pop())
cluster.wait_for_nodes()
assert ray.cluster_resources()["CPU"] == 1
for i in range(5):
nodes += [cluster.add_node(num_cpus=1)]
cluster.wait_for_nodes()
assert ray.cluster_resources()["CPU"] == 6
def test_global_state_actor_table(ray_start_regular):
@ray.remote
class Actor:
def ready(self):
pass
# actor table should be empty at first
assert len(ray.actors()) == 0
# actor table should contain only one entry
a = Actor.remote()
ray.get(a.ready.remote())
assert len(ray.actors()) == 1
# actor table should contain only this entry
# even when the actor goes out of scope
del a
def get_state():
return list(ray.actors().values())[0]["State"]
dead_state = ray.gcs_utils.ActorTableData.DEAD
for _ in range(10):
if get_state() == dead_state:
break
else:
time.sleep(0.5)
assert get_state() == dead_state
def test_global_state_actor_entry(ray_start_regular):
@ray.remote
class Actor:
def ready(self):
pass
# actor table should be empty at first
assert len(ray.actors()) == 0
a = Actor.remote()
b = Actor.remote()
ray.get(a.ready.remote())
ray.get(b.ready.remote())
assert len(ray.actors()) == 2
a_actor_id = a._actor_id.hex()
b_actor_id = b._actor_id.hex()
assert ray.actors(actor_id=a_actor_id)["ActorID"] == a_actor_id
assert ray.actors(
actor_id=a_actor_id)["State"] == ray.gcs_utils.ActorTableData.ALIVE
assert ray.actors(actor_id=b_actor_id)["ActorID"] == b_actor_id
assert ray.actors(
actor_id=b_actor_id)["State"] == ray.gcs_utils.ActorTableData.ALIVE
@pytest.mark.parametrize("max_shapes", [0, 2, -1])
def test_load_report(shutdown_only, max_shapes):
resource1 = "A"
resource2 = "B"
cluster = ray.init(
num_cpus=1,
resources={resource1: 1},
_system_config={
"max_resource_shapes_per_load_report": max_shapes,
})
redis = ray.services.create_redis_client(
cluster["redis_address"],
password=ray.ray_constants.REDIS_DEFAULT_PASSWORD)
client = redis.pubsub(ignore_subscribe_messages=True)
client.psubscribe(ray.gcs_utils.XRAY_HEARTBEAT_BATCH_PATTERN)
@ray.remote
def sleep():
time.sleep(1000)
sleep.remote()
for _ in range(3):
sleep.remote()
sleep.options(resources={resource1: 1}).remote()
sleep.options(resources={resource2: 1}).remote()
class Checker:
def __init__(self):
self.report = None
def check_load_report(self):
try:
message = client.get_message()
except redis.exceptions.ConnectionError:
pass
if message is None:
return False
pattern = message["pattern"]
data = message["data"]
if pattern != ray.gcs_utils.XRAY_HEARTBEAT_BATCH_PATTERN:
return False
pub_message = ray.gcs_utils.PubSubMessage.FromString(data)
heartbeat_data = pub_message.data
heartbeat = ray.gcs_utils.HeartbeatBatchTableData.FromString(
heartbeat_data)
self.report = heartbeat.resource_load_by_shape.resource_demands
if max_shapes == 0:
return True
elif max_shapes == 2:
return len(self.report) >= 2
else:
return len(self.report) >= 3
# Wait for load information to arrive.
checker = Checker()
ray.test_utils.wait_for_condition(checker.check_load_report)
# Check that we respect the max shapes limit.
if max_shapes != -1:
assert len(checker.report) <= max_shapes
if max_shapes > 0:
# Check that we always include the 1-CPU resource shape.
one_cpu_shape = {"CPU": 1}
one_cpu_found = False
for demand in checker.report:
if demand.shape == one_cpu_shape:
one_cpu_found = True
assert one_cpu_found
# Check that we differentiate between infeasible and ready tasks.
for demand in checker.report:
if resource2 in demand.shape:
assert demand.num_infeasible_requests_queued > 0
assert demand.num_ready_requests_queued == 0
else:
assert demand.num_ready_requests_queued > 0
assert demand.num_infeasible_requests_queued == 0
if __name__ == "__main__":
import pytest
import sys
sys.exit(pytest.main(["-v", __file__]))