Add entries to in-memory store on Put() (#7085)

This commit is contained in:
Edward Oakes
2020-03-04 10:17:27 -08:00
committed by GitHub
parent aa4861c2a0
commit 0abcca258f
19 changed files with 186 additions and 196 deletions
+8 -13
View File
@@ -177,10 +177,9 @@ cdef c_vector[CObjectID] ObjectIDsToVector(object_ids):
The output vector.
"""
cdef:
ObjectID object_id
c_vector[CObjectID] result
for object_id in object_ids:
result.push_back(object_id.native())
result.push_back((<ObjectID>object_id).native())
return result
@@ -267,7 +266,6 @@ cdef void prepare_args(
int64_t put_threshold
shared_ptr[CBuffer] arg_data
c_vector[CObjectID] inlined_ids
ObjectID obj_id
worker = ray.worker.global_worker
put_threshold = RayConfig.instance().max_direct_call_object_size()
@@ -288,8 +286,8 @@ cdef void prepare_args(
arg_data = dynamic_pointer_cast[CBuffer, LocalMemoryBuffer](
make_shared[LocalMemoryBuffer](size))
write_serialized_object(serialized_arg, arg_data)
for obj_id in serialized_arg.contained_object_ids:
inlined_ids.push_back(obj_id.native())
for object_id in serialized_arg.contained_object_ids:
inlined_ids.push_back((<ObjectID>object_id).native())
args_vector.push_back(
CTaskArg.PassByValue(make_shared[CRayObject](
arg_data, string_to_buffer(serialized_arg.metadata),
@@ -298,7 +296,7 @@ cdef void prepare_args(
else:
args_vector.push_back(
CTaskArg.PassByReference((CObjectID.FromBinary(
core_worker.put_serialized_cobject(serialized_arg)))))
core_worker.put_serialized_object(serialized_arg)))))
cdef deserialize_args(
const c_vector[shared_ptr[CRayObject]] &c_args,
@@ -694,12 +692,6 @@ cdef class CoreWorker:
def put_serialized_object(self, serialized_object,
ObjectID object_id=None,
c_bool pin_object=True):
return ObjectID(self.put_serialized_cobject(
serialized_object, object_id, pin_object))
def put_serialized_cobject(self, serialized_object,
ObjectID object_id=None,
c_bool pin_object=True):
cdef:
CObjectID c_object_id
shared_ptr[CBuffer] data
@@ -916,7 +908,9 @@ cdef class CoreWorker:
extra_data)
def deserialize_and_register_actor_handle(self, const c_string &bytes):
cdef CActorHandle* c_actor_handle
cdef:
CActorHandle* c_actor_handle
worker = ray.worker.get_global_worker()
worker.check_connected()
manager = worker.function_actor_manager
@@ -996,6 +990,7 @@ cdef class CoreWorker:
CObjectID c_outer_object_id = outer_object_id.native()
CTaskID c_owner_id = CTaskID.FromBinary(owner_id_binary)
CAddress c_owner_address = CAddress()
c_owner_address.ParseFromString(serialized_owner_address)
self.core_worker.get().RegisterOwnershipInfoAndResolveFuture(
c_object_id,
+3 -2
View File
@@ -40,8 +40,9 @@ class PlasmaEventHandler:
"""Process notifications."""
for object_id, object_size, metadata_size in messages:
if object_size > 0 and object_id in self._waiting_dict:
# This must be asynchronous to allow objects to be locally
# received
# This must be asynchronous because it runs on the main IO
# thread in the worker. If this is blocked, other messages
# won't be received.
self._loop.call_soon_threadsafe(_complete_future, self,
object_id)
+2
View File
@@ -152,6 +152,8 @@ cdef extern from "ray/common/id.h" namespace "ray" nogil:
CObjectID WithPlasmaTransportType()
CObjectID WithDirectTransportType()
int64_t ObjectIndex() const
CTaskID TaskId() const
+1 -1
View File
@@ -185,7 +185,7 @@ cdef class ObjectID(BaseID):
@classmethod
def from_random(cls):
return cls(CObjectID.FromRandom().Binary())
return cls(CObjectID.FromRandom().WithDirectTransportType().Binary())
def __await__(self):
# Delayed import because this can only be imported in py3.
+4
View File
@@ -192,6 +192,10 @@ class SerializationContext:
# UniqueIDs are serialized as
# (class name, (unique bytes,)).
outer_id = context.get_outer_object_id()
# outer_id is None in the case that this ObjectID was closed
# over in a function or pickled directly using pickle.dumps().
if outer_id is None:
outer_id = ray.ObjectID.nil()
worker.core_worker.deserialize_and_register_object_id(
obj_id[1][0], outer_id, owner_id[1][0], owner_address)
return deserialized_object_id
+12
View File
@@ -235,3 +235,15 @@ class RemoteSignal:
def wait(self):
ray.get(self.signal_actor.wait.remote())
@ray.remote
def _put(obj):
return obj
def put_object(obj, use_ray_put):
if use_ray_put:
return ray.put(obj)
else:
return _put.remote(obj)
+2 -2
View File
@@ -527,8 +527,8 @@ def test_put_pins_object(ray_start_object_store_memory):
del x_id
for _ in range(10):
ray.put(np.zeros(10 * 1024 * 1024))
with pytest.raises(ray.exceptions.UnreconstructableError):
ray.get(ray.ObjectID(x_binary))
assert not ray.worker.global_worker.core_worker.object_exists(
ray.ObjectID(x_binary))
# weakref put
y_id = ray.put("HI", weakref=True)
+5 -6
View File
@@ -163,19 +163,18 @@ def test_asyncio_actor_async_get(ray_start_regular_shared):
def remote_task():
return 1
plasma_object = ray.put(2)
@ray.remote
class AsyncGetter:
async def get(self):
return await remote_task.remote()
async def plasma_get(self):
return await plasma_object
async def plasma_get(self, plasma_object):
return await plasma_object[0]
getter = AsyncGetter.options().remote()
plasma_object = ray.put(2)
getter = AsyncGetter.remote()
assert ray.get(getter.get.remote()) == 1
assert ray.get(getter.plasma_get.remote()) == 2
assert ray.get(getter.plasma_get.remote([plasma_object])) == 2
if __name__ == "__main__":
+6 -1
View File
@@ -841,10 +841,15 @@ def test_raylet_crash_when_get(ray_start_regular):
time.sleep(2)
ray.worker._global_node.kill_raylet()
object_id = ray.put(None)
ray.internal.free(object_id)
while ray.worker.global_worker.core_worker.object_exists(object_id):
time.sleep(1)
thread = threading.Thread(target=sleep_to_kill_raylet)
thread.start()
with pytest.raises(ray.exceptions.UnreconstructableError):
ray.get(ray.ObjectID.from_random())
ray.get(object_id)
thread.join()
+99 -54
View File
@@ -1,4 +1,5 @@
# coding: utf-8
import asyncio
import copy
import json
import logging
@@ -12,7 +13,7 @@ import pytest
import ray
import ray.cluster_utils
from ray.test_utils import SignalActor, wait_for_condition
from ray.test_utils import SignalActor, put_object, wait_for_condition
from ray.internal.internal_api import global_gc
logger = logging.getLogger(__name__)
@@ -372,19 +373,14 @@ def test_feature_flag(shutdown_only):
# Remote function takes serialized reference and doesn't hold onto it after
# finishing. Referenced object shouldn't be evicted while the task is pending
# and should be evicted after it returns.
def test_basic_serialized_reference(one_worker_100MiB):
@pytest.mark.parametrize("use_ray_put", [False, True])
def test_basic_serialized_reference(one_worker_100MiB, use_ray_put):
@ray.remote
def pending(ref, dep):
ray.get(ref[0])
# TODO(edoakes): currently these tests don't work with ray.put() so we need
# to return from a task like this instead. Once that is fixed, should have
# tests run with both codepaths.
@ray.remote
def put():
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
array_oid = put.remote()
array_oid = put_object(
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
signal = SignalActor.remote()
oid = pending.remote([array_oid], signal.wait.remote())
@@ -406,7 +402,19 @@ def test_basic_serialized_reference(one_worker_100MiB):
# Call a recursive chain of tasks that pass a serialized reference to the end
# of the chain. The reference should still exist while the final task in the
# chain is running and should be removed once it finishes.
def test_recursive_serialized_reference(one_worker_100MiB):
@pytest.mark.parametrize("use_ray_put", [False, True])
def test_recursive_serialized_reference(one_worker_100MiB, use_ray_put):
@ray.remote(num_cpus=0)
class Signal:
def __init__(self):
self.ready_event = asyncio.Event()
def send(self):
self.ready_event.set()
async def wait(self):
await self.ready_event.wait()
@ray.remote
def recursive(ref, signal, max_depth, depth=0):
ray.get(ref[0])
@@ -415,14 +423,11 @@ def test_recursive_serialized_reference(one_worker_100MiB):
else:
return recursive.remote(ref, signal, max_depth, depth + 1)
@ray.remote
def put():
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
signal = SignalActor.remote()
max_depth = 5
array_oid = put.remote()
array_oid = put_object(
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
head_oid = recursive.remote([array_oid], signal, max_depth)
# Remove the local reference.
@@ -447,7 +452,8 @@ def test_recursive_serialized_reference(one_worker_100MiB):
# Test that a passed reference held by an actor after the method finishes
# is kept until the reference is removed from the actor. Also tests giving
# the actor a duplicate reference to the same object ID.
def test_actor_holding_serialized_reference(one_worker_100MiB):
@pytest.mark.parametrize("use_ray_put", [False, True])
def test_actor_holding_serialized_reference(one_worker_100MiB, use_ray_put):
@ray.remote
class GreedyActor(object):
def __init__(self):
@@ -465,12 +471,9 @@ def test_actor_holding_serialized_reference(one_worker_100MiB):
def delete_ref2(self):
self.ref2 = None
@ray.remote
def put():
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
# Test that the reference held by the actor isn't evicted.
array_oid = put.remote()
array_oid = put_object(
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
actor = GreedyActor.remote()
actor.set_ref1.remote([array_oid])
@@ -496,7 +499,19 @@ def test_actor_holding_serialized_reference(one_worker_100MiB):
# Test that a passed reference held by an actor after a task finishes
# is kept until the reference is removed from the worker. Also tests giving
# the worker a duplicate reference to the same object ID.
def test_worker_holding_serialized_reference(one_worker_100MiB):
@pytest.mark.parametrize("use_ray_put", [False, True])
def test_worker_holding_serialized_reference(one_worker_100MiB, use_ray_put):
@ray.remote(num_cpus=0)
class Signal:
def __init__(self):
self.ready_event = asyncio.Event()
def send(self):
self.ready_event.set()
async def wait(self):
await self.ready_event.wait()
@ray.remote
def child(dep1, dep2):
return
@@ -505,14 +520,11 @@ def test_worker_holding_serialized_reference(one_worker_100MiB):
def launch_pending_task(ref, signal):
return child.remote(ref[0], signal.wait.remote())
@ray.remote
def put():
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
signal = SignalActor.remote()
# Test that the reference held by the actor isn't evicted.
array_oid = put.remote()
array_oid = put_object(
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
child_return_id = ray.get(launch_pending_task.remote([array_oid], signal))
# Remove the local reference.
@@ -548,7 +560,19 @@ def test_basic_nested_ids(one_worker_100MiB):
# Test that an object containing object IDs within it pins the inner IDs
# recursively and for submitted tasks.
def test_recursively_nest_ids(one_worker_100MiB):
@pytest.mark.parametrize("use_ray_put", [False, True])
def test_recursively_nest_ids(one_worker_100MiB, use_ray_put):
@ray.remote(num_cpus=0)
class Signal:
def __init__(self):
self.ready_event = asyncio.Event()
def send(self):
self.ready_event.set()
async def wait(self):
await self.ready_event.wait()
@ray.remote
def recursive(ref, signal, max_depth, depth=0):
unwrapped = ray.get(ref[0])
@@ -557,14 +581,11 @@ def test_recursively_nest_ids(one_worker_100MiB):
else:
return recursive.remote(unwrapped, signal, max_depth, depth + 1)
@ray.remote
def put():
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
signal = SignalActor.remote()
max_depth = 5
array_oid = put.remote()
array_oid = put_object(
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
nested_oid = array_oid
for _ in range(max_depth):
nested_oid = ray.put([nested_oid])
@@ -591,14 +612,14 @@ def test_recursively_nest_ids(one_worker_100MiB):
# Test that serialized objectIDs returned from remote tasks are pinned until
# they go out of scope on the caller side.
def test_return_object_id(one_worker_100MiB):
@ray.remote
def put():
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
@pytest.mark.parametrize("use_ray_put", [False, True])
def test_return_object_id(one_worker_100MiB, use_ray_put):
@ray.remote
def return_an_id():
return [put.remote()]
return [
put_object(
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
]
outer_oid = return_an_id.remote()
inner_oid_binary = ray.get(outer_oid)[0].binary()
@@ -619,14 +640,29 @@ def test_return_object_id(one_worker_100MiB):
# Test that serialized objectIDs returned from remote tasks are pinned if
# passed into another remote task by the caller.
def test_pass_returned_object_id(one_worker_100MiB):
@pytest.mark.parametrize("use_ray_put", [False, True])
def test_pass_returned_object_id(one_worker_100MiB, use_ray_put):
@ray.remote(num_cpus=0)
class Signal:
def __init__(self):
self.ready_event = asyncio.Event()
def send(self):
self.ready_event.set()
async def wait(self):
await self.ready_event.wait()
@ray.remote
def put():
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
return
@ray.remote
def return_an_id():
return [put.remote()]
return [
put_object(
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
]
@ray.remote
def pending(ref, signal):
@@ -654,14 +690,25 @@ def test_pass_returned_object_id(one_worker_100MiB):
# returned by another task to the end of the chain. The reference should still
# exist while the final task in the chain is running and should be removed once
# it finishes.
def test_recursively_pass_returned_object_id(one_worker_100MiB):
@ray.remote
def put():
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
@pytest.mark.parametrize("use_ray_put", [False, True])
def test_recursively_pass_returned_object_id(one_worker_100MiB, use_ray_put):
@ray.remote(num_cpus=0)
class Signal:
def __init__(self):
self.ready_event = asyncio.Event()
def send(self):
self.ready_event.set()
async def wait(self):
await self.ready_event.wait()
@ray.remote
def return_an_id():
return [put.remote()]
return [
put_object(
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
]
@ray.remote
def recursive(ref, signal, max_depth, depth=0):
@@ -700,15 +747,13 @@ def test_recursively_pass_returned_object_id(one_worker_100MiB):
# returns the same ObjectID by calling ray.get() on its submitted task and
# returning the result. The reference should still exist while the driver has a
# reference to the final task's ObjectID.
def test_recursively_return_borrowed_object_id(one_worker_100MiB):
@ray.remote
def put():
return np.zeros(40 * 1024 * 1024, dtype=np.uint8)
@pytest.mark.parametrize("use_ray_put", [False, True])
def test_recursively_return_borrowed_object_id(one_worker_100MiB, use_ray_put):
@ray.remote
def recursive(num_tasks_left):
if num_tasks_left == 0:
return put.remote()
return put_object(
np.zeros(40 * 1024 * 1024, dtype=np.uint8), use_ray_put)
final_id = ray.get(recursive.remote(num_tasks_left - 1))
ray.get(final_id)
+9 -2
View File
@@ -270,8 +270,15 @@ class Worker:
"call 'put' on it (or return it).")
serialized_value = self.get_serialization_context().serialize(value)
return self.core_worker.put_serialized_object(
serialized_value, object_id=object_id, pin_object=pin_object)
# This *must* be the first place that we construct this python
# ObjectID because an entry with 0 local references is created when
# the object is Put() in the core worker, expecting that this python
# reference will be created. If another reference is created and
# removed before this one, it will corrupt the state in the
# reference counter.
return ray.ObjectID(
self.core_worker.put_serialized_object(
serialized_value, object_id=object_id, pin_object=pin_object))
def deserialize_objects(self, data_metadata_pairs, object_ids):
context = self.get_serialization_context()