diff --git a/python/ray/experimental/serve/global_state.py b/python/ray/experimental/serve/global_state.py index f85da82d5..5fc733aff 100644 --- a/python/ray/experimental/serve/global_state.py +++ b/python/ray/experimental/serve/global_state.py @@ -98,6 +98,8 @@ class GlobalState: logger.debug((LOG_PREFIX + "Checking if HTTP server is ready." "{} retries left.").format(retries)) time.sleep(backoff_time_s) + # Exponential backoff + backoff_time_s *= 2 retries -= 1 if retries == 0: raise Exception( diff --git a/python/ray/experimental/serve/metric.py b/python/ray/experimental/serve/metric.py index b56bd9943..756b1ecc1 100644 --- a/python/ray/experimental/serve/metric.py +++ b/python/ray/experimental/serve/metric.py @@ -27,11 +27,11 @@ class MetricMonitor: return True def add_target(self, target_handle): - hex_id = target_handle._ray_actor_id.hex() + hex_id = target_handle._ray_core_handle.actor_id().hex() self.actor_handles[hex_id] = target_handle def remove_target(self, target_handle): - hex_id = target_handle._ray_actor_id.hex() + hex_id = target_handle._ray_core_handle.actor_id().hex() self.actor_handles.pop(hex_id) def scrape(self): diff --git a/python/ray/experimental/serve/tests/test_api.py b/python/ray/experimental/serve/tests/test_api.py index e11fa2bd3..648aa6e6b 100644 --- a/python/ray/experimental/serve/tests/test_api.py +++ b/python/ray/experimental/serve/tests/test_api.py @@ -7,19 +7,21 @@ from ray.experimental import serve def test_e2e(serve_instance): - serve.create_endpoint("endpoint", "/api") + serve.create_endpoint("endpoint", "/api", blocking=True) result = ray.get( serve.global_state.kv_store_actor_handle.list_service.remote()) assert result == {"/api": "endpoint"} - retry_count = 3 + retry_count = 5 + timeout_sleep = 0.5 while True: try: resp = requests.get("http://127.0.0.1:8000/").json() assert resp == result break except Exception: - time.sleep(0.5) + time.sleep(timeout_sleep) + timeout_sleep *= 2 retry_count -= 1 if retry_count == 0: assert False, "Route table hasn't been updated after 3 tries." diff --git a/python/ray/experimental/serve/tests/test_metric.py b/python/ray/experimental/serve/tests/test_metric.py index d60cb20f5..427a3c4a8 100644 --- a/python/ray/experimental/serve/tests/test_metric.py +++ b/python/ray/experimental/serve/tests/test_metric.py @@ -38,11 +38,10 @@ def test_metric_gc(ray_instance, start_target_actor): target_actor = start_target_actor # this means when new scrapes are invoked, the metric_monitor = MetricMonitor.remote(gc_window_seconds=0) - metric_monitor.add_target.remote(target_actor) + ray.get(metric_monitor.add_target.remote(target_actor)) ray.get(metric_monitor.scrape.remote()) df = ray.get(metric_monitor._get_dataframe.remote()) - print(df) assert len(df) == 102 # Old metric sould be cleared. So only 1 counter + 101 list values left. @@ -56,10 +55,10 @@ def test_metric_system(ray_instance, start_target_actor): metric_monitor = MetricMonitor.remote() - metric_monitor.add_target.remote(target_actor) + ray.get(metric_monitor.add_target.remote(target_actor)) # Scrape once - metric_monitor.scrape.remote() + ray.get(metric_monitor.scrape.remote()) percentiles = [50, 90, 95] agg_windows_seconds = [60]