mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 23:23:15 +08:00
[xray] Workers blocked in a ray.get release their resources (#1920)
* [xray] Throttle task dispatch by required resources * Pass in number of initial workers into raylet command * Workers blocked in a ray.get release resources
This commit is contained in:
committed by
Alexey Tumanov
parent
1c965fcfeb
commit
aa07f1ce4e
+11
-2
@@ -910,6 +910,7 @@ def start_raylet(redis_address,
|
||||
plasma_store_name,
|
||||
worker_path,
|
||||
resources=None,
|
||||
num_workers=0,
|
||||
stdout_file=None,
|
||||
stderr_file=None,
|
||||
cleanup=True):
|
||||
@@ -956,8 +957,15 @@ def start_raylet(redis_address,
|
||||
plasma_store_name, raylet_name, redis_address))
|
||||
|
||||
command = [
|
||||
RAYLET_EXECUTABLE, raylet_name, plasma_store_name, node_ip_address,
|
||||
gcs_ip_address, gcs_port, start_worker_command, resource_argument
|
||||
RAYLET_EXECUTABLE,
|
||||
raylet_name,
|
||||
plasma_store_name,
|
||||
node_ip_address,
|
||||
gcs_ip_address,
|
||||
gcs_port,
|
||||
str(num_workers),
|
||||
start_worker_command,
|
||||
resource_argument,
|
||||
]
|
||||
pid = subprocess.Popen(command, stdout=stdout_file, stderr=stderr_file)
|
||||
|
||||
@@ -1471,6 +1479,7 @@ def start_ray_processes(address_info=None,
|
||||
object_store_addresses[i].name,
|
||||
worker_path,
|
||||
resources=resources[i],
|
||||
num_workers=workers_per_local_scheduler[i],
|
||||
stdout_file=raylet_stdout_file,
|
||||
stderr_file=raylet_stderr_file,
|
||||
cleanup=cleanup))
|
||||
|
||||
@@ -504,7 +504,7 @@ class Worker(object):
|
||||
|
||||
# If there were objects that we weren't able to get locally, let the
|
||||
# local scheduler know that we're now unblocked.
|
||||
if was_blocked and not self.use_raylet:
|
||||
if was_blocked:
|
||||
self.local_scheduler_client.notify_unblocked()
|
||||
|
||||
assert len(final_results) == len(object_ids)
|
||||
|
||||
Reference in New Issue
Block a user