Implement Python global state API for xray. (#2125)

* Implement global state API for xray.

* Fix object table.

* Fixes for log structure.

* Implement cluster_resources.

* Add driver task to task table.

* Remove python flatbuffers code

* Get some global state API tests running.

* Python linting.

* Fix linting.

* Fix mock modules for doc

* Copy over flatbuffer bindings.

* Fix for tests.

* Linting

* Fix monitor crash.
This commit is contained in:
Robert Nishihara
2018-05-29 16:25:54 -07:00
committed by Philipp Moritz
parent 166000b089
commit 6172f94c04
9 changed files with 474 additions and 179 deletions
+23 -11
View File
@@ -65,6 +65,8 @@ class Monitor(object):
Attributes:
redis: A connection to the Redis server.
use_raylet: A bool indicating whether to use the raylet code path or
not.
subscribe_client: A pubsub client for the Redis server. This is used to
receive notifications about failed components.
subscribed: A dictionary mapping channel names (str) to whether or not
@@ -84,6 +86,7 @@ class Monitor(object):
# Initialize the Redis clients.
self.state = ray.experimental.state.GlobalState()
self.state._initialize_global_state(redis_address, redis_port)
self.use_raylet = self.state.use_raylet
self.redis = redis.StrictRedis(
host=redis_address, port=redis_port, db=0)
# TODO(swang): Update pubsub client to use ray.experimental.state once
@@ -207,6 +210,11 @@ class Monitor(object):
that we do not miss any notifications for deleted clients that occurred
before we subscribed.
"""
# Exit if we are using the raylet code path because client_table is
# implemented differently. TODO(rkn): Fix this.
if self.use_raylet:
return
clients = self.state.client_table()
for node_ip_address, node_clients in clients.items():
for client in node_clients:
@@ -514,18 +522,22 @@ class Monitor(object):
# Handle messages from the subscription channels.
while True:
# Update the mapping from local scheduler client ID to IP address.
# This is only used to update the load metrics for the autoscaler.
local_schedulers = self.state.local_schedulers()
self.local_scheduler_id_to_ip_map = {}
for local_scheduler_info in local_schedulers:
client_id = local_scheduler_info["DBClientID"]
ip_address = local_scheduler_info["AuxAddress"].split(":")[0]
self.local_scheduler_id_to_ip_map[client_id] = ip_address
# TODO(rkn): The autoscaler needs to be re-enabled for xray.
if not self.use_raylet:
# Update the mapping from local scheduler client ID to IP
# address. This is only used to update the load metrics for the
# autoscaler.
local_schedulers = self.state.local_schedulers()
self.local_scheduler_id_to_ip_map = {}
for local_scheduler_info in local_schedulers:
client_id = local_scheduler_info["DBClientID"]
ip_address = local_scheduler_info["AuxAddress"].split(":")[
0]
self.local_scheduler_id_to_ip_map[client_id] = ip_address
# Process autoscaling actions
if self.autoscaler:
self.autoscaler.update()
# Process autoscaling actions
if self.autoscaler:
self.autoscaler.update()
# Record how many dead local schedulers and plasma managers we had
# at the beginning of this round.
num_dead_local_schedulers = len(self.dead_local_schedulers)