mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 01:46:10 +08:00
[Serve] Hotfix: Fix actor handle hashing in metric monitoring (#5886)
This commit is contained in:
@@ -98,6 +98,8 @@ class GlobalState:
|
||||
logger.debug((LOG_PREFIX + "Checking if HTTP server is ready."
|
||||
"{} retries left.").format(retries))
|
||||
time.sleep(backoff_time_s)
|
||||
# Exponential backoff
|
||||
backoff_time_s *= 2
|
||||
retries -= 1
|
||||
if retries == 0:
|
||||
raise Exception(
|
||||
|
||||
@@ -27,11 +27,11 @@ class MetricMonitor:
|
||||
return True
|
||||
|
||||
def add_target(self, target_handle):
|
||||
hex_id = target_handle._ray_actor_id.hex()
|
||||
hex_id = target_handle._ray_core_handle.actor_id().hex()
|
||||
self.actor_handles[hex_id] = target_handle
|
||||
|
||||
def remove_target(self, target_handle):
|
||||
hex_id = target_handle._ray_actor_id.hex()
|
||||
hex_id = target_handle._ray_core_handle.actor_id().hex()
|
||||
self.actor_handles.pop(hex_id)
|
||||
|
||||
def scrape(self):
|
||||
|
||||
@@ -7,19 +7,21 @@ from ray.experimental import serve
|
||||
|
||||
|
||||
def test_e2e(serve_instance):
|
||||
serve.create_endpoint("endpoint", "/api")
|
||||
serve.create_endpoint("endpoint", "/api", blocking=True)
|
||||
result = ray.get(
|
||||
serve.global_state.kv_store_actor_handle.list_service.remote())
|
||||
assert result == {"/api": "endpoint"}
|
||||
|
||||
retry_count = 3
|
||||
retry_count = 5
|
||||
timeout_sleep = 0.5
|
||||
while True:
|
||||
try:
|
||||
resp = requests.get("http://127.0.0.1:8000/").json()
|
||||
assert resp == result
|
||||
break
|
||||
except Exception:
|
||||
time.sleep(0.5)
|
||||
time.sleep(timeout_sleep)
|
||||
timeout_sleep *= 2
|
||||
retry_count -= 1
|
||||
if retry_count == 0:
|
||||
assert False, "Route table hasn't been updated after 3 tries."
|
||||
|
||||
@@ -38,11 +38,10 @@ def test_metric_gc(ray_instance, start_target_actor):
|
||||
target_actor = start_target_actor
|
||||
# this means when new scrapes are invoked, the
|
||||
metric_monitor = MetricMonitor.remote(gc_window_seconds=0)
|
||||
metric_monitor.add_target.remote(target_actor)
|
||||
ray.get(metric_monitor.add_target.remote(target_actor))
|
||||
|
||||
ray.get(metric_monitor.scrape.remote())
|
||||
df = ray.get(metric_monitor._get_dataframe.remote())
|
||||
print(df)
|
||||
assert len(df) == 102
|
||||
|
||||
# Old metric sould be cleared. So only 1 counter + 101 list values left.
|
||||
@@ -56,10 +55,10 @@ def test_metric_system(ray_instance, start_target_actor):
|
||||
|
||||
metric_monitor = MetricMonitor.remote()
|
||||
|
||||
metric_monitor.add_target.remote(target_actor)
|
||||
ray.get(metric_monitor.add_target.remote(target_actor))
|
||||
|
||||
# Scrape once
|
||||
metric_monitor.scrape.remote()
|
||||
ray.get(metric_monitor.scrape.remote())
|
||||
|
||||
percentiles = [50, 90, 95]
|
||||
agg_windows_seconds = [60]
|
||||
|
||||
Reference in New Issue
Block a user