Clean up when a driver disconnects. (#462)

* Clean up state when drivers exit.

* Remove unnecessary field in ActorMapEntry struct.

* Have monitor release GPU resources in Redis when driver exits.

* Enable multiple drivers in multi-node tests and test driver cleanup.

* Make redis GPU allocation a redis transaction and small cleanups.

* Fix multi-node test.

* Small cleanups.

* Make global scheduler take node_ip_address so it appears in the right place in the client table.

* Cleanups.

* Fix linting and cleanups in local scheduler.

* Fix removed_driver_test.

* Fix bug related to vector -> list.

* Fix linting.

* Cleanup.

* Fix multi node tests.

* Fix jenkins tests.

* Add another multi node test with many drivers.

* Fix linting.

* Make the actor creation notification a flatbuffer message.

* Revert "Make the actor creation notification a flatbuffer message."

This reverts commit af99099c8084dbf9177fb4e34c0c9b1a12c78f39.

* Add comment explaining flatbuffer problems.
This commit is contained in:
Robert Nishihara
2017-04-24 18:10:21 -07:00
committed by Philipp Moritz
parent 8194b71f32
commit 0ac125e9b2
31 changed files with 1119 additions and 168 deletions
+96 -28
View File
@@ -7,13 +7,14 @@ import inspect
import json
import numpy as np
import random
import redis
import traceback
import ray.local_scheduler
import ray.pickling as pickling
import ray.signature as signature
import ray.worker
import ray.experimental.state as state
from ray.utils import binary_to_hex, hex_to_binary
# This is a variable used by each actor to indicate the IDs of the GPUs that
# the worker is currently allowed to use.
@@ -105,6 +106,72 @@ def fetch_and_register_actor(key, worker):
# the actor.
def attempt_to_reserve_gpus(num_gpus, driver_id, local_scheduler, worker):
"""Attempt to acquire GPUs on a particular local scheduler for an actor.
Args:
num_gpus: The number of GPUs to acquire.
driver_id: The ID of the driver responsible for creating the actor.
local_scheduler: Information about the local scheduler.
Returns:
A list of the GPU IDs that were successfully acquired. This should have
length either equal to num_gpus or equal to 0.
"""
local_scheduler_id = local_scheduler["DBClientID"]
local_scheduler_total_gpus = int(local_scheduler["NumGPUs"])
gpus_to_acquire = []
# Attempt to acquire GPU IDs atomically.
with worker.redis_client.pipeline() as pipe:
while True:
try:
# If this key is changed before the transaction below (the multi/exec
# block), then the transaction will not take place.
pipe.watch(local_scheduler_id)
# Figure out which GPUs are currently in use.
result = worker.redis_client.hget(local_scheduler_id, "gpus_in_use")
gpus_in_use = dict() if result is None else json.loads(result)
all_gpu_ids_in_use = []
for key in gpus_in_use:
all_gpu_ids_in_use += gpus_in_use[key]
assert len(all_gpu_ids_in_use) <= local_scheduler_total_gpus
assert len(set(all_gpu_ids_in_use)) == len(all_gpu_ids_in_use)
pipe.multi()
if local_scheduler_total_gpus - len(all_gpu_ids_in_use) >= num_gpus:
# There are enough available GPUs, so try to reserve some.
all_gpu_ids = set(range(local_scheduler_total_gpus))
for gpu_id in all_gpu_ids_in_use:
all_gpu_ids.remove(gpu_id)
gpus_to_acquire = list(all_gpu_ids)[:num_gpus]
# Use the hex driver ID so that the dictionary is JSON serializable.
driver_id_hex = binary_to_hex(driver_id)
if driver_id_hex not in gpus_in_use:
gpus_in_use[driver_id_hex] = []
gpus_in_use[driver_id_hex] += gpus_to_acquire
# Stick the updated GPU IDs back in Redis
pipe.hset(local_scheduler_id, "gpus_in_use", json.dumps(gpus_in_use))
pipe.execute()
# If a WatchError is not raised, then the operations should have gone
# through atomically.
break
except redis.WatchError:
# Another client must have changed the watched key between the time we
# started WATCHing it and the pipeline's execution. We should just
# retry.
gpus_to_acquire = []
continue
return gpus_to_acquire
def select_local_scheduler(local_schedulers, num_gpus, worker):
"""Select a local scheduler to assign this actor to.
@@ -121,42 +188,33 @@ def select_local_scheduler(local_schedulers, num_gpus, worker):
Exception: An exception is raised if no local scheduler can be found with
sufficient resources.
"""
# TODO(rkn): We should change this method to have a list of GPU IDs that we
# pop from and push to. The current implementation is not compatible with
# actors releasing GPU resources.
driver_id = worker.task_driver_id.id()
if num_gpus == 0:
local_scheduler_id = random.choice(local_schedulers)[b"ray_client_id"]
gpu_ids = []
local_scheduler_id = hex_to_binary(
random.choice(local_schedulers)["DBClientID"])
gpus_aquired = []
else:
# All of this logic is for finding a local scheduler that has enough
# available GPUs.
local_scheduler_id = None
# Loop through all of the local schedulers.
for local_scheduler in local_schedulers:
# See if there are enough available GPUs on this local scheduler.
local_scheduler_total_gpus = int(float(
local_scheduler[b"num_gpus"].decode("ascii")))
gpus_in_use = worker.redis_client.hget(local_scheduler[b"ray_client_id"],
b"gpus_in_use")
gpus_in_use = 0 if gpus_in_use is None else int(gpus_in_use)
if gpus_in_use + num_gpus <= local_scheduler_total_gpus:
# Attempt to reserve some GPUs for this actor.
new_gpus_in_use = worker.redis_client.hincrby(
local_scheduler[b"ray_client_id"], b"gpus_in_use", num_gpus)
if new_gpus_in_use > local_scheduler_total_gpus:
# If we failed to reserve the GPUs, undo the increment.
worker.redis_client.hincrby(local_scheduler[b"ray_client_id"],
b"gpus_in_use", num_gpus)
else:
# We succeeded at reserving the GPUs, so we are done.
local_scheduler_id = local_scheduler[b"ray_client_id"]
gpu_ids = list(range(new_gpus_in_use - num_gpus, new_gpus_in_use))
break
# Try to reserve enough GPUs on this local scheduler.
gpus_aquired = attempt_to_reserve_gpus(num_gpus, driver_id,
local_scheduler, worker)
if len(gpus_aquired) == num_gpus:
local_scheduler_id = hex_to_binary(local_scheduler["DBClientID"])
break
else:
# We should have either acquired as many GPUs as we need or none.
assert len(gpus_aquired) == 0
if local_scheduler_id is None:
raise Exception("Could not find a node with enough GPUs to create this "
"actor. The local scheduler information is {}."
.format(local_schedulers))
return local_scheduler_id, gpu_ids
return local_scheduler_id, gpus_aquired
def export_actor(actor_id, Class, actor_method_names, num_cpus, num_gpus,
@@ -183,13 +241,23 @@ def export_actor(actor_id, Class, actor_method_names, num_cpus, num_gpus,
worker.function_properties[driver_id][function_id] = (1, num_cpus,
num_gpus)
# Get a list of the local schedulers from the client table.
client_table = ray.global_state.client_table()
local_schedulers = []
for ip_address, clients in client_table.items():
for client in clients:
if client["ClientType"] == "local_scheduler":
local_schedulers.append(client)
# Select a local scheduler for the actor.
local_schedulers = state.get_local_schedulers(worker)
local_scheduler_id, gpu_ids = select_local_scheduler(local_schedulers,
num_gpus, worker)
# Really we should encode this message as a flatbuffer object. However, we're
# having trouble getting that to work. It almost works, but in Python 2.7,
# builder.CreateString fails on byte strings that contain characters outside
# range(128).
worker.redis_client.publish("actor_notifications",
actor_id.id() + local_scheduler_id)
actor_id.id() + driver_id + local_scheduler_id)
d = {"driver_id": driver_id,
"actor_id": actor_id.id(),