mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 00:29:38 +08:00
Add entries to in-memory store on Put() (#7085)
This commit is contained in:
+8
-13
@@ -177,10 +177,9 @@ cdef c_vector[CObjectID] ObjectIDsToVector(object_ids):
|
||||
The output vector.
|
||||
"""
|
||||
cdef:
|
||||
ObjectID object_id
|
||||
c_vector[CObjectID] result
|
||||
for object_id in object_ids:
|
||||
result.push_back(object_id.native())
|
||||
result.push_back((<ObjectID>object_id).native())
|
||||
return result
|
||||
|
||||
|
||||
@@ -267,7 +266,6 @@ cdef void prepare_args(
|
||||
int64_t put_threshold
|
||||
shared_ptr[CBuffer] arg_data
|
||||
c_vector[CObjectID] inlined_ids
|
||||
ObjectID obj_id
|
||||
|
||||
worker = ray.worker.global_worker
|
||||
put_threshold = RayConfig.instance().max_direct_call_object_size()
|
||||
@@ -288,8 +286,8 @@ cdef void prepare_args(
|
||||
arg_data = dynamic_pointer_cast[CBuffer, LocalMemoryBuffer](
|
||||
make_shared[LocalMemoryBuffer](size))
|
||||
write_serialized_object(serialized_arg, arg_data)
|
||||
for obj_id in serialized_arg.contained_object_ids:
|
||||
inlined_ids.push_back(obj_id.native())
|
||||
for object_id in serialized_arg.contained_object_ids:
|
||||
inlined_ids.push_back((<ObjectID>object_id).native())
|
||||
args_vector.push_back(
|
||||
CTaskArg.PassByValue(make_shared[CRayObject](
|
||||
arg_data, string_to_buffer(serialized_arg.metadata),
|
||||
@@ -298,7 +296,7 @@ cdef void prepare_args(
|
||||
else:
|
||||
args_vector.push_back(
|
||||
CTaskArg.PassByReference((CObjectID.FromBinary(
|
||||
core_worker.put_serialized_cobject(serialized_arg)))))
|
||||
core_worker.put_serialized_object(serialized_arg)))))
|
||||
|
||||
cdef deserialize_args(
|
||||
const c_vector[shared_ptr[CRayObject]] &c_args,
|
||||
@@ -694,12 +692,6 @@ cdef class CoreWorker:
|
||||
def put_serialized_object(self, serialized_object,
|
||||
ObjectID object_id=None,
|
||||
c_bool pin_object=True):
|
||||
return ObjectID(self.put_serialized_cobject(
|
||||
serialized_object, object_id, pin_object))
|
||||
|
||||
def put_serialized_cobject(self, serialized_object,
|
||||
ObjectID object_id=None,
|
||||
c_bool pin_object=True):
|
||||
cdef:
|
||||
CObjectID c_object_id
|
||||
shared_ptr[CBuffer] data
|
||||
@@ -916,7 +908,9 @@ cdef class CoreWorker:
|
||||
extra_data)
|
||||
|
||||
def deserialize_and_register_actor_handle(self, const c_string &bytes):
|
||||
cdef CActorHandle* c_actor_handle
|
||||
cdef:
|
||||
CActorHandle* c_actor_handle
|
||||
|
||||
worker = ray.worker.get_global_worker()
|
||||
worker.check_connected()
|
||||
manager = worker.function_actor_manager
|
||||
@@ -996,6 +990,7 @@ cdef class CoreWorker:
|
||||
CObjectID c_outer_object_id = outer_object_id.native()
|
||||
CTaskID c_owner_id = CTaskID.FromBinary(owner_id_binary)
|
||||
CAddress c_owner_address = CAddress()
|
||||
|
||||
c_owner_address.ParseFromString(serialized_owner_address)
|
||||
self.core_worker.get().RegisterOwnershipInfoAndResolveFuture(
|
||||
c_object_id,
|
||||
|
||||
@@ -40,8 +40,9 @@ class PlasmaEventHandler:
|
||||
"""Process notifications."""
|
||||
for object_id, object_size, metadata_size in messages:
|
||||
if object_size > 0 and object_id in self._waiting_dict:
|
||||
# This must be asynchronous to allow objects to be locally
|
||||
# received
|
||||
# This must be asynchronous because it runs on the main IO
|
||||
# thread in the worker. If this is blocked, other messages
|
||||
# won't be received.
|
||||
self._loop.call_soon_threadsafe(_complete_future, self,
|
||||
object_id)
|
||||
|
||||
|
||||
@@ -152,6 +152,8 @@ cdef extern from "ray/common/id.h" namespace "ray" nogil:
|
||||
|
||||
CObjectID WithPlasmaTransportType()
|
||||
|
||||
CObjectID WithDirectTransportType()
|
||||
|
||||
int64_t ObjectIndex() const
|
||||
|
||||
CTaskID TaskId() const
|
||||
|
||||
@@ -185,7 +185,7 @@ cdef class ObjectID(BaseID):
|
||||
|
||||
@classmethod
|
||||
def from_random(cls):
|
||||
return cls(CObjectID.FromRandom().Binary())
|
||||
return cls(CObjectID.FromRandom().WithDirectTransportType().Binary())
|
||||
|
||||
def __await__(self):
|
||||
# Delayed import because this can only be imported in py3.
|
||||
|
||||
@@ -192,6 +192,10 @@ class SerializationContext:
|
||||
# UniqueIDs are serialized as
|
||||
# (class name, (unique bytes,)).
|
||||
outer_id = context.get_outer_object_id()
|
||||
# outer_id is None in the case that this ObjectID was closed
|
||||
# over in a function or pickled directly using pickle.dumps().
|
||||
if outer_id is None:
|
||||
outer_id = ray.ObjectID.nil()
|
||||
worker.core_worker.deserialize_and_register_object_id(
|
||||
obj_id[1][0], outer_id, owner_id[1][0], owner_address)
|
||||
return deserialized_object_id
|
||||
|
||||
@@ -235,3 +235,15 @@ class RemoteSignal:
|
||||
|
||||
def wait(self):
|
||||
ray.get(self.signal_actor.wait.remote())
|
||||
|
||||
|
||||
@ray.remote
|
||||
def _put(obj):
|
||||
return obj
|
||||
|
||||
|
||||
def put_object(obj, use_ray_put):
|
||||
if use_ray_put:
|
||||
return ray.put(obj)
|
||||
else:
|
||||
return _put.remote(obj)
|
||||
|
||||
@@ -527,8 +527,8 @@ def test_put_pins_object(ray_start_object_store_memory):
|
||||
del x_id
|
||||
for _ in range(10):
|
||||
ray.put(np.zeros(10 * 1024 * 1024))
|
||||
with pytest.raises(ray.exceptions.UnreconstructableError):
|
||||
ray.get(ray.ObjectID(x_binary))
|
||||
assert not ray.worker.global_worker.core_worker.object_exists(
|
||||
ray.ObjectID(x_binary))
|
||||
|
||||
# weakref put
|
||||
y_id = ray.put("HI", weakref=True)
|
||||
|
||||
@@ -163,19 +163,18 @@ def test_asyncio_actor_async_get(ray_start_regular_shared):
|
||||
def remote_task():
|
||||
return 1
|
||||
|
||||
plasma_object = ray.put(2)
|
||||
|
||||
@ray.remote
|
||||
class AsyncGetter:
|
||||
async def get(self):
|
||||
return await remote_task.remote()
|
||||
|
||||
async def plasma_get(self):
|
||||
return await plasma_object
|
||||
async def plasma_get(self, plasma_object):
|
||||
return await plasma_object[0]
|
||||
|
||||
getter = AsyncGetter.options().remote()
|
||||
plasma_object = ray.put(2)
|
||||
getter = AsyncGetter.remote()
|
||||
assert ray.get(getter.get.remote()) == 1
|
||||
assert ray.get(getter.plasma_get.remote()) == 2
|
||||
assert ray.get(getter.plasma_get.remote([plasma_object])) == 2
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -841,10 +841,15 @@ def test_raylet_crash_when_get(ray_start_regular):
|
||||
time.sleep(2)
|
||||
ray.worker._global_node.kill_raylet()
|
||||
|
||||
object_id = ray.put(None)
|
||||
ray.internal.free(object_id)
|
||||
while ray.worker.global_worker.core_worker.object_exists(object_id):
|
||||
time.sleep(1)
|
||||
|
||||
thread = threading.Thread(target=sleep_to_kill_raylet)
|
||||
thread.start()
|
||||
with pytest.raises(ray.exceptions.UnreconstructableError):
|
||||
ray.get(ray.ObjectID.from_random())
|
||||
ray.get(object_id)
|
||||
thread.join()
|
||||
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
# coding: utf-8
|
||||
import asyncio
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
@@ -12,7 +13,7 @@ import pytest
|
||||
|
||||
import ray
|
||||
import ray.cluster_utils
|
||||
from ray.test_utils import SignalActor, wait_for_condition
|
||||
from ray.test_utils import SignalActor, put_object, wait_for_condition
|
||||
from ray.internal.internal_api import global_gc
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -372,19 +373,14 @@ def test_feature_flag(shutdown_only):
|
||||
# Remote function takes serialized reference and doesn't hold onto it after
|
||||
# finishing. Referenced object shouldn't be evicted while the task is pending
|
||||
# and should be evicted after it returns.
|
||||
def test_basic_serialized_reference(one_worker_100MiB):
|
||||
@pytest.mark.parametrize("use_ray_put", [False, True])
|
||||
def test_basic_serialized_reference(one_worker_100MiB, use_ray_put):
|
||||
@ray.remote
|
||||
def pending(ref, dep):
|
||||
ray.get(ref[0])
|
||||
|
||||
# TODO(edoakes): currently these tests don't work with ray.put() so we need
|
||||
# to return from a task like this instead. Once that is fixed, should have
|
||||
# tests run with both codepaths.
|
||||
@ray.remote
|
||||
def put():
|
||||
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
|
||||
|
||||
array_oid = put.remote()
|
||||
array_oid = put_object(
|
||||
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
|
||||
signal = SignalActor.remote()
|
||||
oid = pending.remote([array_oid], signal.wait.remote())
|
||||
|
||||
@@ -406,7 +402,19 @@ def test_basic_serialized_reference(one_worker_100MiB):
|
||||
# Call a recursive chain of tasks that pass a serialized reference to the end
|
||||
# of the chain. The reference should still exist while the final task in the
|
||||
# chain is running and should be removed once it finishes.
|
||||
def test_recursive_serialized_reference(one_worker_100MiB):
|
||||
@pytest.mark.parametrize("use_ray_put", [False, True])
|
||||
def test_recursive_serialized_reference(one_worker_100MiB, use_ray_put):
|
||||
@ray.remote(num_cpus=0)
|
||||
class Signal:
|
||||
def __init__(self):
|
||||
self.ready_event = asyncio.Event()
|
||||
|
||||
def send(self):
|
||||
self.ready_event.set()
|
||||
|
||||
async def wait(self):
|
||||
await self.ready_event.wait()
|
||||
|
||||
@ray.remote
|
||||
def recursive(ref, signal, max_depth, depth=0):
|
||||
ray.get(ref[0])
|
||||
@@ -415,14 +423,11 @@ def test_recursive_serialized_reference(one_worker_100MiB):
|
||||
else:
|
||||
return recursive.remote(ref, signal, max_depth, depth + 1)
|
||||
|
||||
@ray.remote
|
||||
def put():
|
||||
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
|
||||
|
||||
signal = SignalActor.remote()
|
||||
|
||||
max_depth = 5
|
||||
array_oid = put.remote()
|
||||
array_oid = put_object(
|
||||
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
|
||||
head_oid = recursive.remote([array_oid], signal, max_depth)
|
||||
|
||||
# Remove the local reference.
|
||||
@@ -447,7 +452,8 @@ def test_recursive_serialized_reference(one_worker_100MiB):
|
||||
# Test that a passed reference held by an actor after the method finishes
|
||||
# is kept until the reference is removed from the actor. Also tests giving
|
||||
# the actor a duplicate reference to the same object ID.
|
||||
def test_actor_holding_serialized_reference(one_worker_100MiB):
|
||||
@pytest.mark.parametrize("use_ray_put", [False, True])
|
||||
def test_actor_holding_serialized_reference(one_worker_100MiB, use_ray_put):
|
||||
@ray.remote
|
||||
class GreedyActor(object):
|
||||
def __init__(self):
|
||||
@@ -465,12 +471,9 @@ def test_actor_holding_serialized_reference(one_worker_100MiB):
|
||||
def delete_ref2(self):
|
||||
self.ref2 = None
|
||||
|
||||
@ray.remote
|
||||
def put():
|
||||
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
|
||||
|
||||
# Test that the reference held by the actor isn't evicted.
|
||||
array_oid = put.remote()
|
||||
array_oid = put_object(
|
||||
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
|
||||
actor = GreedyActor.remote()
|
||||
actor.set_ref1.remote([array_oid])
|
||||
|
||||
@@ -496,7 +499,19 @@ def test_actor_holding_serialized_reference(one_worker_100MiB):
|
||||
# Test that a passed reference held by an actor after a task finishes
|
||||
# is kept until the reference is removed from the worker. Also tests giving
|
||||
# the worker a duplicate reference to the same object ID.
|
||||
def test_worker_holding_serialized_reference(one_worker_100MiB):
|
||||
@pytest.mark.parametrize("use_ray_put", [False, True])
|
||||
def test_worker_holding_serialized_reference(one_worker_100MiB, use_ray_put):
|
||||
@ray.remote(num_cpus=0)
|
||||
class Signal:
|
||||
def __init__(self):
|
||||
self.ready_event = asyncio.Event()
|
||||
|
||||
def send(self):
|
||||
self.ready_event.set()
|
||||
|
||||
async def wait(self):
|
||||
await self.ready_event.wait()
|
||||
|
||||
@ray.remote
|
||||
def child(dep1, dep2):
|
||||
return
|
||||
@@ -505,14 +520,11 @@ def test_worker_holding_serialized_reference(one_worker_100MiB):
|
||||
def launch_pending_task(ref, signal):
|
||||
return child.remote(ref[0], signal.wait.remote())
|
||||
|
||||
@ray.remote
|
||||
def put():
|
||||
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
|
||||
|
||||
signal = SignalActor.remote()
|
||||
|
||||
# Test that the reference held by the actor isn't evicted.
|
||||
array_oid = put.remote()
|
||||
array_oid = put_object(
|
||||
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
|
||||
child_return_id = ray.get(launch_pending_task.remote([array_oid], signal))
|
||||
|
||||
# Remove the local reference.
|
||||
@@ -548,7 +560,19 @@ def test_basic_nested_ids(one_worker_100MiB):
|
||||
|
||||
# Test that an object containing object IDs within it pins the inner IDs
|
||||
# recursively and for submitted tasks.
|
||||
def test_recursively_nest_ids(one_worker_100MiB):
|
||||
@pytest.mark.parametrize("use_ray_put", [False, True])
|
||||
def test_recursively_nest_ids(one_worker_100MiB, use_ray_put):
|
||||
@ray.remote(num_cpus=0)
|
||||
class Signal:
|
||||
def __init__(self):
|
||||
self.ready_event = asyncio.Event()
|
||||
|
||||
def send(self):
|
||||
self.ready_event.set()
|
||||
|
||||
async def wait(self):
|
||||
await self.ready_event.wait()
|
||||
|
||||
@ray.remote
|
||||
def recursive(ref, signal, max_depth, depth=0):
|
||||
unwrapped = ray.get(ref[0])
|
||||
@@ -557,14 +581,11 @@ def test_recursively_nest_ids(one_worker_100MiB):
|
||||
else:
|
||||
return recursive.remote(unwrapped, signal, max_depth, depth + 1)
|
||||
|
||||
@ray.remote
|
||||
def put():
|
||||
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
|
||||
|
||||
signal = SignalActor.remote()
|
||||
|
||||
max_depth = 5
|
||||
array_oid = put.remote()
|
||||
array_oid = put_object(
|
||||
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
|
||||
nested_oid = array_oid
|
||||
for _ in range(max_depth):
|
||||
nested_oid = ray.put([nested_oid])
|
||||
@@ -591,14 +612,14 @@ def test_recursively_nest_ids(one_worker_100MiB):
|
||||
|
||||
# Test that serialized objectIDs returned from remote tasks are pinned until
|
||||
# they go out of scope on the caller side.
|
||||
def test_return_object_id(one_worker_100MiB):
|
||||
@ray.remote
|
||||
def put():
|
||||
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
|
||||
|
||||
@pytest.mark.parametrize("use_ray_put", [False, True])
|
||||
def test_return_object_id(one_worker_100MiB, use_ray_put):
|
||||
@ray.remote
|
||||
def return_an_id():
|
||||
return [put.remote()]
|
||||
return [
|
||||
put_object(
|
||||
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
|
||||
]
|
||||
|
||||
outer_oid = return_an_id.remote()
|
||||
inner_oid_binary = ray.get(outer_oid)[0].binary()
|
||||
@@ -619,14 +640,29 @@ def test_return_object_id(one_worker_100MiB):
|
||||
|
||||
# Test that serialized objectIDs returned from remote tasks are pinned if
|
||||
# passed into another remote task by the caller.
|
||||
def test_pass_returned_object_id(one_worker_100MiB):
|
||||
@pytest.mark.parametrize("use_ray_put", [False, True])
|
||||
def test_pass_returned_object_id(one_worker_100MiB, use_ray_put):
|
||||
@ray.remote(num_cpus=0)
|
||||
class Signal:
|
||||
def __init__(self):
|
||||
self.ready_event = asyncio.Event()
|
||||
|
||||
def send(self):
|
||||
self.ready_event.set()
|
||||
|
||||
async def wait(self):
|
||||
await self.ready_event.wait()
|
||||
|
||||
@ray.remote
|
||||
def put():
|
||||
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
|
||||
return
|
||||
|
||||
@ray.remote
|
||||
def return_an_id():
|
||||
return [put.remote()]
|
||||
return [
|
||||
put_object(
|
||||
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
|
||||
]
|
||||
|
||||
@ray.remote
|
||||
def pending(ref, signal):
|
||||
@@ -654,14 +690,25 @@ def test_pass_returned_object_id(one_worker_100MiB):
|
||||
# returned by another task to the end of the chain. The reference should still
|
||||
# exist while the final task in the chain is running and should be removed once
|
||||
# it finishes.
|
||||
def test_recursively_pass_returned_object_id(one_worker_100MiB):
|
||||
@ray.remote
|
||||
def put():
|
||||
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
|
||||
@pytest.mark.parametrize("use_ray_put", [False, True])
|
||||
def test_recursively_pass_returned_object_id(one_worker_100MiB, use_ray_put):
|
||||
@ray.remote(num_cpus=0)
|
||||
class Signal:
|
||||
def __init__(self):
|
||||
self.ready_event = asyncio.Event()
|
||||
|
||||
def send(self):
|
||||
self.ready_event.set()
|
||||
|
||||
async def wait(self):
|
||||
await self.ready_event.wait()
|
||||
|
||||
@ray.remote
|
||||
def return_an_id():
|
||||
return [put.remote()]
|
||||
return [
|
||||
put_object(
|
||||
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
|
||||
]
|
||||
|
||||
@ray.remote
|
||||
def recursive(ref, signal, max_depth, depth=0):
|
||||
@@ -700,15 +747,13 @@ def test_recursively_pass_returned_object_id(one_worker_100MiB):
|
||||
# returns the same ObjectID by calling ray.get() on its submitted task and
|
||||
# returning the result. The reference should still exist while the driver has a
|
||||
# reference to the final task's ObjectID.
|
||||
def test_recursively_return_borrowed_object_id(one_worker_100MiB):
|
||||
@ray.remote
|
||||
def put():
|
||||
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
|
||||
|
||||
@pytest.mark.parametrize("use_ray_put", [False, True])
|
||||
def test_recursively_return_borrowed_object_id(one_worker_100MiB, use_ray_put):
|
||||
@ray.remote
|
||||
def recursive(num_tasks_left):
|
||||
if num_tasks_left == 0:
|
||||
return put.remote()
|
||||
return put_object(
|
||||
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
|
||||
|
||||
final_id = ray.get(recursive.remote(num_tasks_left - 1))
|
||||
ray.get(final_id)
|
||||
|
||||
@@ -270,8 +270,15 @@ class Worker:
|
||||
"call 'put' on it (or return it).")
|
||||
|
||||
serialized_value = self.get_serialization_context().serialize(value)
|
||||
return self.core_worker.put_serialized_object(
|
||||
serialized_value, object_id=object_id, pin_object=pin_object)
|
||||
# This *must* be the first place that we construct this python
|
||||
# ObjectID because an entry with 0 local references is created when
|
||||
# the object is Put() in the core worker, expecting that this python
|
||||
# reference will be created. If another reference is created and
|
||||
# removed before this one, it will corrupt the state in the
|
||||
# reference counter.
|
||||
return ray.ObjectID(
|
||||
self.core_worker.put_serialized_object(
|
||||
serialized_value, object_id=object_id, pin_object=pin_object))
|
||||
|
||||
def deserialize_objects(self, data_metadata_pairs, object_ids):
|
||||
context = self.get_serialization_context()
|
||||
|
||||
Reference in New Issue
Block a user