[Autoscaler] New output log format (#12772)

This commit is contained in:
Alex Wu
2020-12-23 12:02:55 -08:00
committed by GitHub
parent d95c8b8a41
commit 8df94e33e0
11 changed files with 907 additions and 154 deletions
+19 -15
View File
@@ -15,11 +15,14 @@ from ray.autoscaler._private.constants import AUTOSCALER_UPDATE_INTERVAL_S
from ray.autoscaler._private.load_metrics import LoadMetrics
from ray.autoscaler._private.constants import \
AUTOSCALER_MAX_RESOURCE_DEMAND_VECTOR_SIZE
from ray.autoscaler._private.util import DEBUG_AUTOSCALING_STATUS
import ray.gcs_utils
import ray.utils
import ray.ray_constants as ray_constants
from ray.ray_logging import setup_component_logger
from ray._raylet import GlobalStateAccessor
from ray.experimental.internal_kv import _internal_kv_put, \
_internal_kv_initialized
import redis
@@ -65,11 +68,7 @@ def parse_resource_demands(resource_load_by_shape):
except Exception:
logger.exception("Failed to parse resource demands.")
# Bound the total number of bundles to 2xMAX_RESOURCE_DEMAND_VECTOR_SIZE.
# This guarantees the resource demand scheduler bin packing algorithm takes
# a reasonable amount of time to run.
return waiting_bundles[:AUTOSCALER_MAX_RESOURCE_DEMAND_VECTOR_SIZE], \
infeasible_bundles[:AUTOSCALER_MAX_RESOURCE_DEMAND_VECTOR_SIZE]
return waiting_bundles, infeasible_bundles
class Monitor:
@@ -184,14 +183,8 @@ class Monitor:
data: a resource request as JSON, e.g. {"CPU": 1}
"""
if not self.autoscaler:
return
try:
self.autoscaler.request_resources(json.loads(data))
except Exception:
# We don't want this to kill the monitor.
traceback.print_exc()
resource_request = json.loads(data)
self.load_metrics.set_resource_requests(resource_request)
def process_messages(self, max_messages=10000):
"""Process all messages ready in the subscription channels.
@@ -257,12 +250,23 @@ class Monitor:
# Handle messages from the subscription channels.
while True:
self.update_raylet_map()
self.update_load_metrics()
status = {
"load_metrics_report": self.load_metrics.summary()._asdict()
}
# Process autoscaling actions
if self.autoscaler:
# Only used to update the load metrics for the autoscaler.
self.update_raylet_map()
self.update_load_metrics()
self.autoscaler.update()
status[
"autoscaler_report"] = self.autoscaler.summary()._asdict()
as_json = json.dumps(status)
if _internal_kv_initialized():
_internal_kv_put(
DEBUG_AUTOSCALING_STATUS, as_json, overwrite=True)
# Process a round of messages.
self.process_messages()