[Core] put small objects in memory store (#8972)

* remove the put in memory store * put small objects directly in memory store * cast data type * fix another place that uses Put to spill to plasma store * fix multiple tests related to memory limits * partially fix test_metrics * remove not functioning codes * fix core_worker_test * refactor put to plasma codes * add a flag for the new feature * add flag to more places * do a warmup round for the plasma store * lint * lint again * fix warmup store * Update _raylet.pyx Co-authored-by: Eric Liang <ekhliang@gmail.com>
2026-06-27 21:23:10 +08:00 · 2020-07-09 15:39:40 -07:00
parent 34b85659d4
commit 8a76f4cbb5
18 changed files with 132 additions and 51 deletions
@@ -774,9 +774,14 @@ cdef class CoreWorker:
            CObjectID c_object_id
            shared_ptr[CBuffer] data
            shared_ptr[CBuffer] metadata
+            int64_t put_threshold
+            c_bool put_small_object_in_memory_store
            c_vector[CObjectID] c_object_id_vector

        metadata = string_to_buffer(serialized_object.metadata)
+        put_threshold = RayConfig.instance().max_direct_call_object_size()
+        put_small_object_in_memory_store = (
+            RayConfig.instance().put_small_object_in_memory_store())
        total_bytes = serialized_object.total_bytes
        object_already_exists = self._create_put_buffer(
            metadata, total_bytes, object_id,
@@ -787,7 +792,8 @@ cdef class CoreWorker:
            if total_bytes > 0:
                (<SerializedObject>serialized_object).write_to(
                    Buffer.make(data))
-            if self.is_local_mode:
+            if self.is_local_mode or (put_small_object_in_memory_store
+               and <int64_t>total_bytes < put_threshold):
                c_object_id_vector.push_back(c_object_id)
                check_status(CCoreWorkerProcess.GetCoreWorker().Put(
                        CRayObject(data, metadata, c_object_id_vector),
@@ -1103,7 +1109,8 @@ cdef class CoreWorker:
        cdef:
            CObjectID c_object_id = object_id.native()
            CAddress c_owner_address = CAddress()
-        CCoreWorkerProcess.GetCoreWorker().PromoteToPlasmaAndGetOwnershipInfo(
+        CCoreWorkerProcess.GetCoreWorker().PromoteObjectToPlasma(c_object_id)
+        CCoreWorkerProcess.GetCoreWorker().GetOwnershipInfo(
                c_object_id, &c_owner_address)
        return (object_id,
                c_owner_address.SerializeAsString())
@@ -127,11 +127,13 @@ cdef extern from "ray/core_worker/core_worker.h" nogil:
        const CActorHandle* GetNamedActorHandle(const c_string &name)
        void AddLocalReference(const CObjectID &object_id)
        void RemoveLocalReference(const CObjectID &object_id)
+        void PutObjectIntoPlasma(const CRayObject &object,
+                                 const CObjectID &object_id)
        const CAddress &GetRpcAddress() const
        CAddress GetOwnerAddress(const CObjectID &object_id) const
        void PromoteObjectToPlasma(const CObjectID &object_id)
-        void PromoteToPlasmaAndGetOwnershipInfo(const CObjectID &object_id,
-                                                CAddress *owner_address)
+        void GetOwnershipInfo(const CObjectID &object_id,
+                              CAddress *owner_address)
        void RegisterOwnershipInfoAndResolveFuture(
                const CObjectID &object_id,
                const CObjectID &outer_object_id,
@@ -88,3 +88,5 @@ cdef extern from "ray/common/ray_config.h" nogil:
        int64_t max_direct_call_object_size() const

        c_bool gcs_actor_service_enabled() const
+
+        c_bool put_small_object_in_memory_store() const
@@ -157,3 +157,7 @@ cdef class Config:
    @staticmethod
    def maximum_gcs_deletion_batch_size():
        return RayConfig.instance().maximum_gcs_deletion_batch_size()
+
+    @staticmethod
+    def put_small_object_in_memory_store():
+        return RayConfig.instance().put_small_object_in_memory_store()
@@ -484,15 +484,16 @@ def test_shutdown_disconnect_global_state():
@pytest.mark.parametrize(
    "ray_start_object_store_memory", [150 * 1024 * 1024], indirect=True)
 def test_put_pins_object(ray_start_object_store_memory):
-    x_id = ray.put("HI")
+    obj = np.ones(200 * 1024, dtype=np.uint8)
+    x_id = ray.put(obj)
    x_binary = x_id.binary()
-    assert ray.get(ray.ObjectID(x_binary)) == "HI"
+    assert (ray.get(ray.ObjectID(x_binary)) == obj).all()

    # x cannot be evicted since x_id pins it
    for _ in range(10):
        ray.put(np.zeros(10 * 1024 * 1024))
-    assert ray.get(x_id) == "HI"
-    assert ray.get(ray.ObjectID(x_binary)) == "HI"
+    assert (ray.get(x_id) == obj).all()
+    assert (ray.get(ray.ObjectID(x_binary)) == obj).all()

    # now it can be evicted since x_id pins it but x_binary does not
    del x_id
@@ -502,7 +503,7 @@ def test_put_pins_object(ray_start_object_store_memory):
        ray.ObjectID(x_binary))

    # weakref put
-    y_id = ray.put("HI", weakref=True)
+    y_id = ray.put(obj, weakref=True)
    for _ in range(10):
        ray.put(np.zeros(10 * 1024 * 1024))
    with pytest.raises(ray.exceptions.UnreconstructableError):
@@ -2,6 +2,7 @@ import os
 import signal
 import sys
 import time
+import numpy as np

 import pytest

@@ -54,7 +55,8 @@ def test_dying_worker_get(ray_start_2_cpus):
    assert len(ready_ids) == 0
    # Seal the object so the store attempts to notify the worker that the
    # get has been fulfilled.
-    ray.worker.global_worker.put_object(1, x_id)
+    obj = np.ones(200 * 1024, dtype=np.uint8)
+    ray.worker.global_worker.put_object(obj, x_id)
    time.sleep(0.1)

    # Make sure that nothing has died.
@@ -97,7 +99,8 @@ ray.get(ray.ObjectID(ray.utils.hex_to_binary("{}")))
    assert len(ready_ids) == 0
    # Seal the object so the store attempts to notify the worker that the
    # get has been fulfilled.
-    ray.worker.global_worker.put_object(1, x_id)
+    obj = np.ones(200 * 1024, dtype=np.uint8)
+    ray.worker.global_worker.put_object(obj, x_id)
    time.sleep(0.1)

    # Make sure that nothing has died.
@@ -137,7 +140,8 @@ def test_dying_worker_wait(ray_start_2_cpus):
    time.sleep(0.1)

    # Create the object.
-    ray.worker.global_worker.put_object(1, x_id)
+    obj = np.ones(200 * 1024, dtype=np.uint8)
+    ray.worker.global_worker.put_object(obj, x_id)
    time.sleep(0.1)

    # Make sure that nothing has died.
@@ -180,7 +184,8 @@ ray.wait([ray.ObjectID(ray.utils.hex_to_binary("{}"))])
    assert len(ready_ids) == 0
    # Seal the object so the store attempts to notify the worker that the
    # wait can return.
-    ray.worker.global_worker.put_object(1, x_id)
+    obj = np.ones(200 * 1024, dtype=np.uint8)
+    ray.worker.global_worker.put_object(obj, x_id)
    time.sleep(0.1)

    # Make sure that nothing has died.
@@ -819,7 +819,7 @@ def test_raylet_crash_when_get(ray_start_regular):
        time.sleep(2)
        ray.worker._global_node.kill_raylet()

-    object_id = ray.put(None)
+    object_id = ray.put(np.zeros(200 * 1024, dtype=np.uint8))
    ray.internal.free(object_id)
    while ray.worker.global_worker.core_worker.object_exists(object_id):
        time.sleep(1)
@@ -62,7 +62,8 @@ class TestMemoryLimits(unittest.TestCase):
                num_cpus=1,
                object_store_memory=300 * MB,
                driver_object_store_memory=driver_quota)
-            z = ray.put("hi", weakref=True)
+            obj = np.ones(200 * 1024, dtype=np.uint8)
+            z = ray.put(obj, weakref=True)
            a = LightActor._remote(object_store_memory=a_quota)
            b = GreedyActor._remote(object_store_memory=b_quota)
            for _ in range(5):
@@ -4,6 +4,7 @@ import grpc
 import pytest
 import requests
 import time
+import numpy as np

 import ray
 from ray.core.generated import node_manager_pb2
@@ -180,7 +181,7 @@ def test_raylet_info_endpoint(shutdown_only):
            self.local_storage = [f.remote() for _ in range(10)]

        def remote_store(self):
-            self.remote_storage = ray.put("test")
+            self.remote_storage = ray.put(np.zeros(200 * 1024, dtype=np.uint8))

        def getpid(self):
            return os.getpid()
@@ -443,9 +444,8 @@ def test_memory_dashboard(shutdown_only):
        return True

    def test_object_pineed_in_memory():
-        import numpy as np

-        a = ray.put(np.zeros(1))
+        a = ray.put(np.zeros(200 * 1024, dtype=np.uint8))
        b = ray.get(a)  # Noqa F841
        del a

@@ -469,7 +469,7 @@ def test_memory_dashboard(shutdown_only):
        def f(arg):
            time.sleep(1)

-        a = ray.put(None)  # Noqa F841
+        a = ray.put(np.zeros(200 * 1024, dtype=np.uint8))  # Noqa F841
        b = f.remote(a)  # Noqa F841

        wait_for_condition(memory_table_ready)
@@ -174,8 +174,10 @@ def test_cleanup_on_driver_exit(call_ray_start):
    driver_script = """
 import time
 import ray
+import numpy as np
 ray.init(address="{}")
-object_ids = [ray.put(i) for i in range(1000)]
+object_ids = [ray.put(np.zeros(200 * 1024, dtype=np.uint8))
+              for i in range(1000)]
 start_time = time.time()
 while time.time() - start_time < 30:
    if len(ray.objects()) == 1000:
@@ -1286,14 +1286,6 @@ def connect(node,
        worker.core_worker.set_object_store_client_options(
            "ray_driver_{}".format(os.getpid()), driver_object_store_memory)

-    # Put something in the plasma store so that subsequent plasma store
-    # accesses will be faster. Currently the first access is always slow, and
-    # we don't want the user to experience this.
-    if mode != LOCAL_MODE:
-        temporary_object_id = ray.ObjectID.from_random()
-        worker.put_object(1, object_id=temporary_object_id)
-        ray.internal.free([temporary_object_id])
-
    # Start the import thread
    worker.import_thread = import_thread.ImportThread(worker, mode,
                                                      worker.threads_stopped)