[xray] Workers blocked in a ray.get release their resources (#1920)

* [xray] Throttle task dispatch by required resources
* Pass in number of initial workers into raylet command
* Workers blocked in a ray.get release resources
This commit is contained in:
Stephanie Wang
2018-04-18 20:59:58 -07:00
committed by Alexey Tumanov
parent 1c965fcfeb
commit aa07f1ce4e
10 changed files with 131 additions and 18 deletions
+11 -2
View File
@@ -910,6 +910,7 @@ def start_raylet(redis_address,
plasma_store_name,
worker_path,
resources=None,
num_workers=0,
stdout_file=None,
stderr_file=None,
cleanup=True):
@@ -956,8 +957,15 @@ def start_raylet(redis_address,
plasma_store_name, raylet_name, redis_address))
command = [
RAYLET_EXECUTABLE, raylet_name, plasma_store_name, node_ip_address,
gcs_ip_address, gcs_port, start_worker_command, resource_argument
RAYLET_EXECUTABLE,
raylet_name,
plasma_store_name,
node_ip_address,
gcs_ip_address,
gcs_port,
str(num_workers),
start_worker_command,
resource_argument,
]
pid = subprocess.Popen(command, stdout=stdout_file, stderr=stderr_file)
@@ -1471,6 +1479,7 @@ def start_ray_processes(address_info=None,
object_store_addresses[i].name,
worker_path,
resources=resources[i],
num_workers=workers_per_local_scheduler[i],
stdout_file=raylet_stdout_file,
stderr_file=raylet_stderr_file,
cleanup=cleanup))
+1 -1
View File
@@ -504,7 +504,7 @@ class Worker(object):
# If there were objects that we weren't able to get locally, let the
# local scheduler know that we're now unblocked.
if was_blocked and not self.use_raylet:
if was_blocked:
self.local_scheduler_client.notify_unblocked()
assert len(final_results) == len(object_ids)