[autoscaler] Add a 'request_cores' function for manual autoscaling (#4754)

This commit is contained in:
Daniel Edgecumbe
2019-07-27 01:14:45 +01:00
committed by Richard Liaw
parent d9e81da3b8
commit 06fec63c87
5 changed files with 137 additions and 11 deletions
+25
View File
@@ -7,6 +7,7 @@ import logging
import os
import time
import traceback
import json
import redis
@@ -212,6 +213,23 @@ class Monitor(object):
binary_to_hex(job_id)))
self._xray_clean_up_entries_for_job(job_id)
def autoscaler_resource_request_handler(self, _, data):
"""Handle a notification of a resource request for the autoscaler.
Args:
channel: unused
data: a resource request as JSON, e.g. {"CPU": 1}
"""
if not self.autoscaler:
return
try:
self.autoscaler.request_resources(json.loads(data))
except Exception:
# We don't want this to kill the monitor.
traceback.print_exc()
def process_messages(self, max_messages=10000):
"""Process all messages ready in the subscription channels.
@@ -241,6 +259,9 @@ class Monitor(object):
elif channel == ray.gcs_utils.XRAY_JOB_CHANNEL:
# Handles driver death.
message_handler = self.xray_job_notification_handler
elif (channel ==
ray.ray_constants.AUTOSCALER_RESOURCE_REQUEST_CHANNEL):
message_handler = self.autoscaler_resource_request_handler
else:
raise Exception("This code should be unreachable.")
@@ -307,6 +328,10 @@ class Monitor(object):
self.subscribe(ray.gcs_utils.XRAY_HEARTBEAT_BATCH_CHANNEL)
self.subscribe(ray.gcs_utils.XRAY_JOB_CHANNEL)
if self.autoscaler:
self.subscribe(
ray.ray_constants.AUTOSCALER_RESOURCE_REQUEST_CHANNEL)
# TODO(rkn): If there were any dead clients at startup, we should clean
# up the associated state in the state tables.