[autoscaler] Use fixed timestamp to check against health timeouts (#3503)

This commit is contained in:
Eric Liang
2018-12-10 11:58:27 -08:00
committed by Robert Nishihara
parent abd781d607
commit 962f18756b
+9 -7
View File
@@ -362,12 +362,14 @@ class StandardAutoscaler(object):
raise e
def _update(self):
now = time.time()
# Throttle autoscaling updates to this interval to avoid exceeding
# rate limits on API calls.
if time.time() - self.last_update_time < self.update_interval_s:
if now - self.last_update_time < self.update_interval_s:
return
self.last_update_time = time.time()
self.last_update_time = now
num_pending = self.num_launches_pending.value
nodes = self.workers()
logger.info(self.info_string(nodes))
@@ -377,7 +379,7 @@ class StandardAutoscaler(object):
# Terminate any idle or out of date nodes
last_used = self.load_metrics.last_used_time_by_ip
horizon = time.time() - (60 * self.config["idle_timeout_minutes"])
horizon = now - (60 * self.config["idle_timeout_minutes"])
num_terminated = 0
for node_id in nodes:
node_ip = self.provider.internal_ip(node_id)
@@ -441,7 +443,7 @@ class StandardAutoscaler(object):
# Attempt to recover unhealthy nodes
for node_id in nodes:
self.recover_if_needed(node_id)
self.recover_if_needed(node_id, now)
def reload_config(self, errors_fatal=False):
try:
@@ -488,14 +490,14 @@ class StandardAutoscaler(object):
return False
return True
def recover_if_needed(self, node_id):
def recover_if_needed(self, node_id, now):
if not self.can_update(node_id):
return
key = self.provider.internal_ip(node_id)
if key not in self.load_metrics.last_heartbeat_time_by_ip:
self.load_metrics.last_heartbeat_time_by_ip[key] = time.time()
self.load_metrics.last_heartbeat_time_by_ip[key] = now
last_heartbeat_time = self.load_metrics.last_heartbeat_time_by_ip[key]
delta = time.time() - last_heartbeat_time
delta = now - last_heartbeat_time
if delta < AUTOSCALER_HEARTBEAT_TIMEOUT_S:
return
logger.warning("StandardAutoscaler: No heartbeat from node "