[core] Introduce fetch_local to ray.wait (#12526)

This commit is contained in:
Yi Cheng
2020-12-16 23:44:28 -08:00
committed by GitHub
parent 12231ec2a6
commit 40032541dc
19 changed files with 122 additions and 69 deletions
+2 -2
View File
@@ -1004,7 +1004,7 @@ cdef class CoreWorker:
return c_object_id.Binary()
def wait(self, object_refs, int num_returns, int64_t timeout_ms,
TaskID current_task_id):
TaskID current_task_id, c_bool fetch_local):
cdef:
c_vector[CObjectID] wait_ids
c_vector[c_bool] results
@@ -1013,7 +1013,7 @@ cdef class CoreWorker:
wait_ids = ObjectRefsToVector(object_refs)
with nogil:
check_status(CCoreWorkerProcess.GetCoreWorker().Wait(
wait_ids, num_returns, timeout_ms, &results))
wait_ids, num_returns, timeout_ms, &results, fetch_local))
assert len(results) == len(object_refs)
+2 -1
View File
@@ -179,7 +179,8 @@ cdef extern from "ray/core_worker/core_worker.h" nogil:
c_bool plasma_objects_only)
CRayStatus Contains(const CObjectID &object_id, c_bool *has_object)
CRayStatus Wait(const c_vector[CObjectID] &object_ids, int num_objects,
int64_t timeout_ms, c_vector[c_bool] *results)
int64_t timeout_ms, c_vector[c_bool] *results,
c_bool fetch_local)
CRayStatus Delete(const c_vector[CObjectID] &object_ids,
c_bool local_only, c_bool delete_creating_tasks)
CRayStatus TriggerGlobalGC()
+36
View File
@@ -371,6 +371,42 @@ def test_ray_options(shutdown_only):
assert without_options != with_options
@pytest.mark.parametrize(
"ray_start_cluster_head", [{
"num_cpus": 0,
"object_store_memory": 75 * 1024 * 1024,
}],
indirect=True)
def test_fetch_local(ray_start_cluster_head):
cluster = ray_start_cluster_head
cluster.add_node(num_cpus=2, object_store_memory=75 * 1024 * 1024)
signal_actor = ray.test_utils.SignalActor.remote()
@ray.remote
def put():
ray.wait([signal_actor.wait.remote()])
return np.random.rand(5 * 1024 * 1024) # 40 MB data
local_ref = ray.put(np.random.rand(5 * 1024 * 1024))
remote_ref = put.remote()
# Data is not ready in any node
(ready_ref, remaining_ref) = ray.wait(
[remote_ref], timeout=2, fetch_local=False)
assert (0, 1) == (len(ready_ref), len(remaining_ref))
ray.wait([signal_actor.send.remote()])
# Data is ready in some node, but not local node.
(ready_ref, remaining_ref) = ray.wait([remote_ref], fetch_local=False)
assert (1, 0) == (len(ready_ref), len(remaining_ref))
(ready_ref, remaining_ref) = ray.wait(
[remote_ref], timeout=2, fetch_local=True)
assert (0, 1) == (len(ready_ref), len(remaining_ref))
del local_ref
(ready_ref, remaining_ref) = ray.wait([remote_ref], fetch_local=True)
assert (1, 0) == (len(ready_ref), len(remaining_ref))
def test_nested_functions(ray_start_shared_local_modes):
# Make sure that remote functions can use other values that are defined
# after the remote function but before the first function invocation.
+7 -1
View File
@@ -1417,7 +1417,7 @@ def put(value):
blocking_wait_inside_async_warned = False
def wait(object_refs, *, num_returns=1, timeout=None):
def wait(object_refs, *, num_returns=1, timeout=None, fetch_local=True):
"""Return a list of IDs that are ready and a list of IDs that are not.
If timeout is set, the function returns either when the requested number of
@@ -1445,6 +1445,11 @@ def wait(object_refs, *, num_returns=1, timeout=None):
num_returns (int): The number of object refs that should be returned.
timeout (float): The maximum amount of time in seconds to wait before
returning.
fetch_local (bool): If True, wait for the object to be downloaded onto
the local node before returning it as ready. If False, ray.wait()
will not trigger fetching of objects to the local node and will
return immediately once the object is available anywhere in the
cluster.
Returns:
A list of object refs that are ready and a list of the remaining object
@@ -1507,6 +1512,7 @@ def wait(object_refs, *, num_returns=1, timeout=None):
num_returns,
timeout_milliseconds,
worker.current_task_id,
fetch_local,
)
return ready_ids, remaining_ids