[Core] Ownership-based Object Directory: Consolidate location table and reference table. (#13220)

* Added owned object reference before Plasma put on Create() + Seal() path.

* Consolidated location table and reference table in reference counter.

* Restore type in definition.

* Clean up owned reference on failed Seal().

* Added RemoveOwnedObject test for reference counter.

* Guard against ref going out of scope before location RPCs.

* Add 'owner must have ref in scope' precondition to documentation for object location methods.

* Move to separate Create() + Seal() methods for existing objects.

* Clearer distinction between Create() and Seal() methods.

* Make it clear that references will normally be cleaned up by reference counting.
This commit is contained in:
Clark Zinzow
2021-01-14 14:48:10 -07:00
committed by GitHub
parent d1e9887be2
commit 9a658b568f
12 changed files with 219 additions and 115 deletions
+18 -11
View File
@@ -915,13 +915,13 @@ cdef class CoreWorker:
CObjectID *c_object_id, shared_ptr[CBuffer] *data):
if object_ref is None:
with nogil:
check_status(CCoreWorkerProcess.GetCoreWorker().Create(
check_status(CCoreWorkerProcess.GetCoreWorker().CreateOwned(
metadata, data_size, contained_ids,
c_object_id, data))
else:
c_object_id[0] = object_ref.native()
with nogil:
check_status(CCoreWorkerProcess.GetCoreWorker().Create(
check_status(CCoreWorkerProcess.GetCoreWorker().CreateExisting(
metadata, data_size, c_object_id[0],
CCoreWorkerProcess.GetCoreWorker().GetRpcAddress(),
data))
@@ -933,7 +933,7 @@ cdef class CoreWorker:
return data.get() == NULL
def put_file_like_object(
self, metadata, data_size, file_like, ObjectRef object_ref=None):
self, metadata, data_size, file_like, ObjectRef object_ref):
"""Directly create a new Plasma Store object from a file like
object. This avoids extra memory copy.
@@ -971,8 +971,9 @@ cdef class CoreWorker:
# Using custom object refs is not supported because we
# can't track their lifecycle, so we don't pin the object
# in this case.
check_status(CCoreWorkerProcess.GetCoreWorker().Seal(
c_object_id, pin_object=False))
check_status(
CCoreWorkerProcess.GetCoreWorker().SealExisting(
c_object_id, pin_object=False))
def put_serialized_object(self, serialized_object,
ObjectRef object_ref=None,
@@ -1007,12 +1008,18 @@ cdef class CoreWorker:
c_object_id_vector, c_object_id))
else:
with nogil:
# Using custom object refs is not supported because we
# can't track their lifecycle, so we don't pin the object
# in this case.
check_status(CCoreWorkerProcess.GetCoreWorker().Seal(
c_object_id,
pin_object and object_ref is None))
if object_ref is None:
check_status(
CCoreWorkerProcess.GetCoreWorker().SealOwned(
c_object_id,
pin_object))
else:
# Using custom object refs is not supported because we
# can't track their lifecycle, so we don't pin the
# object in this case.
check_status(
CCoreWorkerProcess.GetCoreWorker().SealExisting(
c_object_id, pin_object=False))
return c_object_id.Binary()
+11 -10
View File
@@ -169,16 +169,17 @@ cdef extern from "ray/core_worker/core_worker.h" nogil:
CRayStatus Put(const CRayObject &object,
const c_vector[CObjectID] &contained_object_ids,
const CObjectID &object_id)
CRayStatus Create(const shared_ptr[CBuffer] &metadata,
const size_t data_size,
const c_vector[CObjectID] &contained_object_ids,
CObjectID *object_id, shared_ptr[CBuffer] *data)
CRayStatus Create(const shared_ptr[CBuffer] &metadata,
const size_t data_size,
const CObjectID &object_id,
const CAddress &owner_address,
shared_ptr[CBuffer] *data)
CRayStatus Seal(const CObjectID &object_id, c_bool pin_object)
CRayStatus CreateOwned(const shared_ptr[CBuffer] &metadata,
const size_t data_size,
const c_vector[CObjectID] &contained_object_ids,
CObjectID *object_id, shared_ptr[CBuffer] *data)
CRayStatus CreateExisting(const shared_ptr[CBuffer] &metadata,
const size_t data_size,
const CObjectID &object_id,
const CAddress &owner_address,
shared_ptr[CBuffer] *data)
CRayStatus SealOwned(const CObjectID &object_id, c_bool pin_object)
CRayStatus SealExisting(const CObjectID &object_id, c_bool pin_object)
CRayStatus Get(const c_vector[CObjectID] &ids, int64_t timeout_ms,
c_vector[shared_ptr[CRayObject]] *results,
c_bool plasma_objects_only)
+7
View File
@@ -370,6 +370,13 @@ def put_object(obj, use_ray_put):
return _put.remote(obj)
def put_unpinned_object(obj):
value = ray.worker.global_worker.get_serialization_context().serialize(obj)
return ray.ObjectRef(
ray.worker.global_worker.core_worker.put_serialized_object(
value, pin_object=False))
def wait_until_server_available(address,
timeout_ms=5000,
retry_interval_ms=100):
+3 -2
View File
@@ -2,6 +2,7 @@ import numpy as np
import unittest
import ray
from ray.test_utils import put_unpinned_object
MB = 1024 * 1024
@@ -49,7 +50,7 @@ class TestMemoryLimits(unittest.TestCase):
try:
ray.init(num_cpus=1, _driver_object_store_memory=100 * MB)
ray.worker.global_worker.put_object(
np.zeros(50 * MB, dtype=np.uint8), pin_object=False)
np.zeros(50 * MB, dtype=np.uint8))
self.assertRaises(
OBJECT_TOO_LARGE,
lambda: ray.put(np.zeros(200 * MB, dtype=np.uint8)))
@@ -64,7 +65,7 @@ class TestMemoryLimits(unittest.TestCase):
object_store_memory=300 * MB,
_driver_object_store_memory=driver_quota)
obj = np.ones(200 * 1024, dtype=np.uint8)
z = ray.worker.global_worker.put_object(obj, pin_object=False)
z = put_unpinned_object(obj)
a = LightActor._remote(object_store_memory=a_quota)
b = GreedyActor._remote(object_store_memory=b_quota)
for _ in range(5):
@@ -2,6 +2,7 @@ import numpy as np
import unittest
import ray
from ray.test_utils import put_unpinned_object
class TestObjectLostErrors(unittest.TestCase):
@@ -15,8 +16,7 @@ class TestObjectLostErrors(unittest.TestCase):
ray.shutdown()
def testDriverPutEvictedCannotReconstruct(self):
x_id = ray.worker.global_worker.put_object(
np.zeros(1 * 1024 * 1024), pin_object=False)
x_id = put_unpinned_object(np.zeros(1 * 1024 * 1024))
ray.get(x_id)
for _ in range(20):
ray.put(np.zeros(10 * 1024 * 1024))
+3 -5
View File
@@ -228,7 +228,7 @@ class Worker:
def set_load_code_from_local(self, load_code_from_local):
self._load_code_from_local = load_code_from_local
def put_object(self, value, object_ref=None, pin_object=True):
def put_object(self, value, object_ref=None):
"""Put value in the local object store with object reference `object_ref`.
This assumes that the value for `object_ref` has not yet been placed in
@@ -242,7 +242,6 @@ class Worker:
value: The value to put in the object store.
object_ref (ObjectRef): The object ref of the value to be
put. If None, one will be generated.
pin_object: If set, the object will be pinned at the raylet.
Returns:
ObjectRef: The object ref the object was put under.
@@ -274,8 +273,7 @@ class Worker:
# reference counter.
return ray.ObjectRef(
self.core_worker.put_serialized_object(
serialized_value, object_ref=object_ref,
pin_object=pin_object))
serialized_value, object_ref=object_ref))
def deserialize_objects(self, data_metadata_pairs, object_refs):
context = self.get_serialization_context()
@@ -1418,7 +1416,7 @@ def put(value):
worker.check_connected()
with profiling.profile("ray.put"):
try:
object_ref = worker.put_object(value, pin_object=True)
object_ref = worker.put_object(value)
except ObjectStoreFullError:
logger.info(
"Put failed since the value was either too large or the "