mirror of
https://github.com/wassname/ray.git
synced 2026-07-04 23:20:54 +08:00
[Object Spilling] Remove retries and use a timer instead. (#13175)
This commit is contained in:
@@ -210,7 +210,6 @@ class RayParams:
|
||||
raise Exception(
|
||||
"Object pinning cannot be enabled if using LRU eviction.")
|
||||
self._system_config["object_pinning_enabled"] = False
|
||||
self._system_config["object_store_full_max_retries"] = -1
|
||||
self._system_config["free_objects_period_milliseconds"] = 1000
|
||||
|
||||
# Set the internal config options for object reconstruction.
|
||||
|
||||
@@ -22,7 +22,6 @@ def get_default_fixure_system_config():
|
||||
system_config = {
|
||||
"object_timeout_milliseconds": 200,
|
||||
"num_heartbeats_timeout": 10,
|
||||
"object_store_full_max_retries": 3,
|
||||
"object_store_full_delay_ms": 100,
|
||||
}
|
||||
return system_config
|
||||
|
||||
@@ -1023,9 +1023,6 @@ def test_connect_with_disconnected_node(shutdown_only):
|
||||
"ray_start_cluster_head", [{
|
||||
"num_cpus": 5,
|
||||
"object_store_memory": 10**8,
|
||||
"_system_config": {
|
||||
"object_store_full_max_retries": 0
|
||||
}
|
||||
}],
|
||||
indirect=True)
|
||||
def test_parallel_actor_fill_plasma_retry(ray_start_cluster_head):
|
||||
@@ -1042,10 +1039,7 @@ def test_parallel_actor_fill_plasma_retry(ray_start_cluster_head):
|
||||
|
||||
|
||||
def test_fill_object_store_exception(shutdown_only):
|
||||
ray.init(
|
||||
num_cpus=2,
|
||||
object_store_memory=10**8,
|
||||
_system_config={"object_store_full_max_retries": 0})
|
||||
ray.init(num_cpus=2, object_store_memory=10**8)
|
||||
|
||||
@ray.remote
|
||||
def expensive_task():
|
||||
|
||||
@@ -84,7 +84,6 @@ def test_spilling_not_done_for_pinned_object(tmp_path, shutdown_only):
|
||||
_system_config={
|
||||
"max_io_workers": 4,
|
||||
"automatic_object_spilling_enabled": True,
|
||||
"object_store_full_max_retries": 4,
|
||||
"object_store_full_delay_ms": 100,
|
||||
"object_spilling_config": json.dumps({
|
||||
"type": "filesystem",
|
||||
@@ -117,7 +116,6 @@ def test_spilling_not_done_for_pinned_object(tmp_path, shutdown_only):
|
||||
"object_store_memory": 75 * 1024 * 1024,
|
||||
"_system_config": {
|
||||
"automatic_object_spilling_enabled": True,
|
||||
"object_store_full_max_retries": 4,
|
||||
"object_store_full_delay_ms": 100,
|
||||
"max_io_workers": 4,
|
||||
"object_spilling_config": json.dumps({
|
||||
@@ -170,7 +168,6 @@ def test_spill_objects_automatically(object_spilling_config, shutdown_only):
|
||||
_system_config={
|
||||
"max_io_workers": 4,
|
||||
"automatic_object_spilling_enabled": True,
|
||||
"object_store_full_max_retries": 4,
|
||||
"object_store_full_delay_ms": 100,
|
||||
"object_spilling_config": object_spilling_config,
|
||||
"min_spilling_size": 0
|
||||
@@ -251,10 +248,6 @@ def test_spill_during_get(object_spilling_config, shutdown_only):
|
||||
_system_config={
|
||||
"automatic_object_spilling_enabled": True,
|
||||
"object_store_full_delay_ms": 100,
|
||||
# NOTE(swang): Use infinite retries because the OOM timer can still
|
||||
# get accidentally triggered when objects are released too slowly
|
||||
# (see github.com/ray-project/ray/issues/12040).
|
||||
"object_store_full_max_retries": -1,
|
||||
"max_io_workers": 1,
|
||||
"object_spilling_config": object_spilling_config,
|
||||
"min_spilling_size": 0,
|
||||
@@ -286,7 +279,6 @@ def test_spill_deadlock(object_spilling_config, shutdown_only):
|
||||
_system_config={
|
||||
"max_io_workers": 1,
|
||||
"automatic_object_spilling_enabled": True,
|
||||
"object_store_full_max_retries": 4,
|
||||
"object_store_full_delay_ms": 100,
|
||||
"object_spilling_config": object_spilling_config,
|
||||
"min_spilling_size": 0,
|
||||
@@ -320,7 +312,6 @@ def test_delete_objects(tmp_path, shutdown_only):
|
||||
"max_io_workers": 1,
|
||||
"min_spilling_size": 0,
|
||||
"automatic_object_spilling_enabled": True,
|
||||
"object_store_full_max_retries": 4,
|
||||
"object_store_full_delay_ms": 100,
|
||||
"object_spilling_config": json.dumps({
|
||||
"type": "filesystem",
|
||||
@@ -363,7 +354,6 @@ def test_delete_objects_delete_while_creating(tmp_path, shutdown_only):
|
||||
"max_io_workers": 4,
|
||||
"min_spilling_size": 0,
|
||||
"automatic_object_spilling_enabled": True,
|
||||
"object_store_full_max_retries": 4,
|
||||
"object_store_full_delay_ms": 100,
|
||||
"object_spilling_config": json.dumps({
|
||||
"type": "filesystem",
|
||||
@@ -413,7 +403,6 @@ def test_delete_objects_on_worker_failure(tmp_path, shutdown_only):
|
||||
_system_config={
|
||||
"max_io_workers": 4,
|
||||
"automatic_object_spilling_enabled": True,
|
||||
"object_store_full_max_retries": 4,
|
||||
"object_store_full_delay_ms": 100,
|
||||
"object_spilling_config": json.dumps({
|
||||
"type": "filesystem",
|
||||
@@ -489,7 +478,6 @@ def test_delete_objects_multi_node(tmp_path, ray_start_cluster):
|
||||
"max_io_workers": 2,
|
||||
"min_spilling_size": 20 * 1024 * 1024,
|
||||
"automatic_object_spilling_enabled": True,
|
||||
"object_store_full_max_retries": 4,
|
||||
"object_store_full_delay_ms": 100,
|
||||
"object_spilling_config": json.dumps({
|
||||
"type": "filesystem",
|
||||
@@ -563,7 +551,6 @@ def test_fusion_objects(tmp_path, shutdown_only):
|
||||
_system_config={
|
||||
"max_io_workers": 3,
|
||||
"automatic_object_spilling_enabled": True,
|
||||
"object_store_full_max_retries": 4,
|
||||
"object_store_full_delay_ms": 100,
|
||||
"object_spilling_config": json.dumps({
|
||||
"type": "filesystem",
|
||||
|
||||
@@ -20,7 +20,6 @@ logger = logging.getLogger(__name__)
|
||||
@pytest.fixture
|
||||
def one_worker_100MiB(request):
|
||||
config = {
|
||||
"object_store_full_max_retries": 2,
|
||||
"task_retry_delay_ms": 0,
|
||||
"object_timeout_milliseconds": 1000,
|
||||
}
|
||||
|
||||
@@ -238,9 +238,6 @@ RAY_CONFIG(uint32_t, maximum_gcs_dead_node_cached_count, 1000)
|
||||
/// The interval at which the gcs server will print debug info.
|
||||
RAY_CONFIG(int64_t, gcs_dump_debug_log_interval_minutes, 1)
|
||||
|
||||
/// Maximum number of times to retry putting an object when the plasma store is full.
|
||||
/// Can be set to -1 to enable unlimited retries.
|
||||
RAY_CONFIG(int32_t, object_store_full_max_retries, 1000)
|
||||
/// Duration to sleep after failing to put an object in plasma because it is full.
|
||||
RAY_CONFIG(uint32_t, object_store_full_delay_ms, 10)
|
||||
|
||||
@@ -361,6 +358,9 @@ RAY_CONFIG(int64_t, min_spilling_size, 100 * 1024 * 1024)
|
||||
/// TODO(sang): Fix it.
|
||||
RAY_CONFIG(bool, automatic_object_deletion_enabled, true)
|
||||
|
||||
/// Grace period until we throw the OOM error to the application in seconds.
|
||||
RAY_CONFIG(int64_t, oom_grace_period_s, 10)
|
||||
|
||||
/* Configuration parameters for locality-aware scheduling. */
|
||||
/// Whether to enable locality-aware leasing. If enabled, then Ray will consider task
|
||||
/// dependency locality when choosing a worker for leasing.
|
||||
|
||||
@@ -81,18 +81,16 @@ std::pair<PlasmaObject, PlasmaError> CreateRequestQueue::TryRequestImmediately(
|
||||
}
|
||||
|
||||
bool CreateRequestQueue::ProcessRequest(std::unique_ptr<CreateRequest> &request) {
|
||||
// Return an OOM error to the client if we have hit the maximum number of
|
||||
// retries.
|
||||
// TODO(sang): Delete this logic?
|
||||
// TODO(sang): Delete this logic when lru evict is removed.
|
||||
bool evict_if_full = evict_if_full_;
|
||||
if (max_retries_ == 0) {
|
||||
// If we cannot retry, then always evict on the first attempt.
|
||||
evict_if_full = true;
|
||||
} else if (num_retries_ > 0) {
|
||||
// Always try to evict after the first attempt.
|
||||
if (oom_start_time_ns_ != -1) {
|
||||
// If the first attempt fails, we set the evict_if_full true.
|
||||
// We need this logic because if lru_evict flag is on, this is false because we
|
||||
// shouldn't evict objects in the first attempt.
|
||||
evict_if_full = true;
|
||||
}
|
||||
request->error = request->create_callback(evict_if_full, &request->result);
|
||||
request->error =
|
||||
request->create_callback(/*evict_if_full=*/evict_if_full, &request->result);
|
||||
return request->error != PlasmaError::OutOfMemory;
|
||||
}
|
||||
|
||||
@@ -100,21 +98,25 @@ Status CreateRequestQueue::ProcessRequests() {
|
||||
while (!queue_.empty()) {
|
||||
auto request_it = queue_.begin();
|
||||
auto create_ok = ProcessRequest(*request_it);
|
||||
auto now = get_time_();
|
||||
if (create_ok) {
|
||||
FinishRequest(request_it);
|
||||
// Reset the oom start time since the creation succeeds.
|
||||
oom_start_time_ns_ = -1;
|
||||
} else {
|
||||
if (trigger_global_gc_) {
|
||||
trigger_global_gc_();
|
||||
}
|
||||
|
||||
if (oom_start_time_ns_ == -1) {
|
||||
oom_start_time_ns_ = now;
|
||||
}
|
||||
if (spill_objects_callback_()) {
|
||||
return Status::TransientObjectStoreFull("Waiting for spilling.");
|
||||
} else if (num_retries_ < max_retries_ || max_retries_ == -1) {
|
||||
} else if (now - oom_start_time_ns_ < oom_grace_period_ns_) {
|
||||
// We need a grace period since (1) global GC takes a bit of time to
|
||||
// kick in, and (2) there is a race between spilling finishing and space
|
||||
// actually freeing up in the object store.
|
||||
// If max_retries == -1, we retry infinitely.
|
||||
num_retries_ += 1;
|
||||
return Status::ObjectStoreFull("Waiting for grace period.");
|
||||
} else {
|
||||
// Raise OOM. In this case, the request will be marked as OOM.
|
||||
@@ -135,9 +137,6 @@ void CreateRequestQueue::FinishRequest(
|
||||
RAY_CHECK(it->second == nullptr);
|
||||
it->second = std::move(request);
|
||||
queue_.erase(request_it);
|
||||
|
||||
// Reset the number of retries since we are no longer trying this request.
|
||||
num_retries_ = 0;
|
||||
}
|
||||
|
||||
void CreateRequestQueue::RemoveDisconnectedClientRequests(
|
||||
|
||||
@@ -34,15 +34,18 @@ class CreateRequestQueue {
|
||||
using CreateObjectCallback =
|
||||
std::function<PlasmaError(bool evict_if_full, PlasmaObject *result)>;
|
||||
|
||||
CreateRequestQueue(int32_t max_retries, bool evict_if_full,
|
||||
CreateRequestQueue(bool evict_if_full, int64_t oom_grace_period_s,
|
||||
ray::SpillObjectsCallback spill_objects_callback,
|
||||
std::function<void()> trigger_global_gc)
|
||||
: max_retries_(max_retries),
|
||||
evict_if_full_(evict_if_full),
|
||||
std::function<void()> trigger_global_gc,
|
||||
std::function<int64_t()> get_time)
|
||||
: evict_if_full_(evict_if_full),
|
||||
oom_grace_period_ns_(oom_grace_period_s * 1e9),
|
||||
spill_objects_callback_(spill_objects_callback),
|
||||
trigger_global_gc_(trigger_global_gc) {
|
||||
RAY_LOG(DEBUG) << "Starting plasma::CreateRequestQueue with " << max_retries_
|
||||
<< " retries on OOM, evict if full? " << (evict_if_full_ ? 1 : 0);
|
||||
trigger_global_gc_(trigger_global_gc),
|
||||
get_time_(get_time) {
|
||||
RAY_LOG(DEBUG) << "Starting plasma::CreateRequestQueue with OOM grace period "
|
||||
<< oom_grace_period_ns_ << ", evict if full? "
|
||||
<< (evict_if_full_ ? 1 : 0);
|
||||
}
|
||||
|
||||
/// Add a request to the queue. The caller should use the returned request ID
|
||||
@@ -148,17 +151,15 @@ class CreateRequestQueue {
|
||||
/// a request by retrying. Start at 1 because 0 means "do not retry".
|
||||
uint64_t next_req_id_ = 1;
|
||||
|
||||
/// The maximum number of times to retry each request upon OOM.
|
||||
const int32_t max_retries_;
|
||||
|
||||
/// The number of times the request at the head of the queue has been tried.
|
||||
int32_t num_retries_ = 0;
|
||||
|
||||
/// On the first attempt to create an object, whether to evict from the
|
||||
/// object store to make space. If the first attempt fails, then we will
|
||||
/// always try to evict.
|
||||
const bool evict_if_full_;
|
||||
|
||||
/// Grace period until we throw the OOM error to the application.
|
||||
/// -1 means grace period is infinite.
|
||||
const int64_t oom_grace_period_ns_;
|
||||
|
||||
/// A callback to trigger object spilling. It tries to spill objects upto max
|
||||
/// throughput. It returns true if space is made by object spilling, and false if
|
||||
/// there's no more space to be made.
|
||||
@@ -168,6 +169,9 @@ class CreateRequestQueue {
|
||||
/// full.
|
||||
const std::function<void()> trigger_global_gc_;
|
||||
|
||||
/// A callback to return the current time.
|
||||
const std::function<int64_t()> get_time_;
|
||||
|
||||
/// Queue of object creation requests to respond to. Requests will be placed
|
||||
/// on this queue if the object store does not have enough room at the time
|
||||
/// that the client made the creation request, but space may be made through
|
||||
@@ -189,6 +193,9 @@ class CreateRequestQueue {
|
||||
/// Last time global gc was invoked in ms.
|
||||
uint64_t last_global_gc_ms_;
|
||||
|
||||
/// The time OOM timer first starts. It becomes -1 upon every creation success.
|
||||
int64_t oom_start_time_ns_ = -1;
|
||||
|
||||
friend class CreateRequestQueueTest;
|
||||
};
|
||||
|
||||
|
||||
@@ -125,9 +125,11 @@ PlasmaStore::PlasmaStore(boost::asio::io_service &main_service, std::string dire
|
||||
usage_log_interval_ns_(RayConfig::instance().object_store_usage_log_interval_s() *
|
||||
1e9),
|
||||
create_request_queue_(
|
||||
RayConfig::instance().object_store_full_max_retries(),
|
||||
/*evict_if_full=*/RayConfig::instance().object_pinning_enabled(),
|
||||
spill_objects_callback, object_store_full_callback) {
|
||||
/*oom_grace_period_s=*/RayConfig::instance().oom_grace_period_s(),
|
||||
spill_objects_callback, object_store_full_callback,
|
||||
/*get_time=*/
|
||||
[]() { return absl::GetCurrentTimeNanos(); }) {
|
||||
store_info_.directory = directory;
|
||||
store_info_.hugepages_enabled = hugepages_enabled;
|
||||
}
|
||||
|
||||
@@ -46,17 +46,24 @@ class MockClient : public ClientInterface {
|
||||
class CreateRequestQueueTest : public ::testing::Test {
|
||||
public:
|
||||
CreateRequestQueueTest()
|
||||
: queue_(
|
||||
/*max_retries=*/2,
|
||||
: oom_grace_period_s_(1),
|
||||
current_time_ns_(0),
|
||||
queue_(
|
||||
/*evict_if_full=*/true,
|
||||
/*oom_grace_period_s=*/oom_grace_period_s_,
|
||||
/*spill_object_callback=*/[&]() { return false; },
|
||||
/*on_global_gc=*/[&]() { num_global_gc_++; }) {}
|
||||
/*on_global_gc=*/[&]() { num_global_gc_++; },
|
||||
/*get_time=*/[&]() { return current_time_ns_; }) {}
|
||||
|
||||
void AssertNoLeaks() {
|
||||
ASSERT_TRUE(queue_.queue_.empty());
|
||||
ASSERT_TRUE(queue_.fulfilled_requests_.empty());
|
||||
}
|
||||
|
||||
void TearDown() { current_time_ns_ = 0; }
|
||||
|
||||
int64_t oom_grace_period_s_;
|
||||
int64_t current_time_ns_;
|
||||
CreateRequestQueue queue_;
|
||||
int num_global_gc_ = 0;
|
||||
};
|
||||
@@ -66,6 +73,8 @@ TEST_F(CreateRequestQueueTest, TestSimple) {
|
||||
result->data_size = 1234;
|
||||
return PlasmaError::OK;
|
||||
};
|
||||
// Advance the clock without processing objects. This shouldn't have an impact.
|
||||
current_time_ns_ += 10e9;
|
||||
auto client = std::make_shared<MockClient>();
|
||||
auto req_id = queue_.AddRequest(ObjectID::Nil(), client, request);
|
||||
ASSERT_REQUEST_UNFINISHED(queue_, req_id);
|
||||
@@ -115,8 +124,9 @@ TEST_F(CreateRequestQueueTest, TestOom) {
|
||||
ASSERT_REQUEST_UNFINISHED(queue_, req_id2);
|
||||
ASSERT_EQ(num_global_gc_, 2);
|
||||
|
||||
// Retries used up. The first request should reply with OOM and the second
|
||||
// Grace period is done. The first request should reply with OOM and the second
|
||||
// request should also be served.
|
||||
current_time_ns_ += oom_grace_period_s_ * 2e9;
|
||||
ASSERT_TRUE(queue_.ProcessRequests().ok());
|
||||
ASSERT_EQ(num_global_gc_, 3);
|
||||
|
||||
@@ -129,12 +139,14 @@ TEST_F(CreateRequestQueueTest, TestOom) {
|
||||
|
||||
TEST(CreateRequestQueueParameterTest, TestOomInfiniteRetry) {
|
||||
int num_global_gc_ = 0;
|
||||
int64_t current_time_ns;
|
||||
CreateRequestQueue queue(
|
||||
/*max_retries=*/-1,
|
||||
/*evict_if_full=*/true,
|
||||
/*oom_grace_period_s=*/100,
|
||||
// Spilling is failing.
|
||||
/*spill_object_callback=*/[&]() { return false; },
|
||||
/*on_global_gc=*/[&]() { num_global_gc_++; });
|
||||
/*on_global_gc=*/[&]() { num_global_gc_++; },
|
||||
/*get_time=*/[&]() { return current_time_ns; });
|
||||
|
||||
auto oom_request = [&](bool evict_if_full, PlasmaObject *result) {
|
||||
return PlasmaError::OutOfMemory;
|
||||
@@ -148,7 +160,9 @@ TEST(CreateRequestQueueParameterTest, TestOomInfiniteRetry) {
|
||||
auto req_id1 = queue.AddRequest(ObjectID::Nil(), client, oom_request);
|
||||
auto req_id2 = queue.AddRequest(ObjectID::Nil(), client, blocked_request);
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
for (int i = 0; i < 10; i++) {
|
||||
// Advance 1 second.
|
||||
current_time_ns += 1e9;
|
||||
ASSERT_TRUE(queue.ProcessRequests().IsObjectStoreFull());
|
||||
ASSERT_EQ(num_global_gc_, i + 1);
|
||||
}
|
||||
@@ -160,10 +174,11 @@ TEST(CreateRequestQueueParameterTest, TestOomInfiniteRetry) {
|
||||
|
||||
TEST_F(CreateRequestQueueTest, TestTransientOom) {
|
||||
CreateRequestQueue queue(
|
||||
/*max_retries=*/2,
|
||||
/*evict_if_full=*/false,
|
||||
/*evict_if_full=*/true,
|
||||
/*oom_grace_period_s=*/oom_grace_period_s_,
|
||||
/*spill_object_callback=*/[&]() { return true; },
|
||||
/*on_global_gc=*/[&]() { num_global_gc_++; });
|
||||
/*on_global_gc=*/[&]() { num_global_gc_++; },
|
||||
/*get_time=*/[&]() { return current_time_ns_; });
|
||||
|
||||
auto return_status = PlasmaError::OutOfMemory;
|
||||
auto oom_request = [&](bool evict_if_full, PlasmaObject *result) {
|
||||
@@ -181,14 +196,18 @@ TEST_F(CreateRequestQueueTest, TestTransientOom) {
|
||||
auto req_id1 = queue.AddRequest(ObjectID::Nil(), client, oom_request);
|
||||
auto req_id2 = queue.AddRequest(ObjectID::Nil(), client, blocked_request);
|
||||
|
||||
// Transient OOM should not use up any retries.
|
||||
for (int i = 0; i < 3; i++) {
|
||||
// Transient OOM should happen until the grace period.
|
||||
for (int i = 0; i < 9; i++) {
|
||||
// Advance 0.1 seconds. OOM grace period is 1 second, so it should return transient
|
||||
// error.
|
||||
current_time_ns_ += 1e8;
|
||||
ASSERT_TRUE(queue.ProcessRequests().IsTransientObjectStoreFull());
|
||||
ASSERT_REQUEST_UNFINISHED(queue, req_id1);
|
||||
ASSERT_REQUEST_UNFINISHED(queue, req_id2);
|
||||
ASSERT_EQ(num_global_gc_, i + 1);
|
||||
}
|
||||
|
||||
current_time_ns_ += oom_grace_period_s_ * 2e9;
|
||||
// Return OK for the first request. The second request should also be served.
|
||||
return_status = PlasmaError::OK;
|
||||
ASSERT_TRUE(queue.ProcessRequests().ok());
|
||||
@@ -201,10 +220,11 @@ TEST_F(CreateRequestQueueTest, TestTransientOom) {
|
||||
TEST_F(CreateRequestQueueTest, TestTransientOomThenOom) {
|
||||
bool is_spilling_possible = true;
|
||||
CreateRequestQueue queue(
|
||||
/*max_retries=*/2,
|
||||
/*evict_if_full=*/false,
|
||||
/*evict_if_full=*/true,
|
||||
/*oom_grace_period_s=*/oom_grace_period_s_,
|
||||
/*spill_object_callback=*/[&]() { return is_spilling_possible; },
|
||||
/*on_global_gc=*/[&]() { num_global_gc_++; });
|
||||
/*on_global_gc=*/[&]() { num_global_gc_++; },
|
||||
/*get_time=*/[&]() { return current_time_ns_; });
|
||||
|
||||
auto return_status = PlasmaError::OutOfMemory;
|
||||
auto oom_request = [&](bool evict_if_full, PlasmaObject *result) {
|
||||
@@ -222,8 +242,10 @@ TEST_F(CreateRequestQueueTest, TestTransientOomThenOom) {
|
||||
auto req_id1 = queue.AddRequest(ObjectID::Nil(), client, oom_request);
|
||||
auto req_id2 = queue.AddRequest(ObjectID::Nil(), client, blocked_request);
|
||||
|
||||
// Transient OOM should not use up any retries.
|
||||
// Transient OOM should not use up any until grace period is done.
|
||||
for (int i = 0; i < 3; i++) {
|
||||
// Advance 0.1 seconds. OOM grace period is 1 second.
|
||||
current_time_ns_ += 1e8;
|
||||
ASSERT_TRUE(queue.ProcessRequests().IsTransientObjectStoreFull());
|
||||
ASSERT_REQUEST_UNFINISHED(queue, req_id1);
|
||||
ASSERT_REQUEST_UNFINISHED(queue, req_id2);
|
||||
@@ -238,8 +260,9 @@ TEST_F(CreateRequestQueueTest, TestTransientOomThenOom) {
|
||||
ASSERT_REQUEST_UNFINISHED(queue, req_id2);
|
||||
ASSERT_EQ(num_global_gc_, 5);
|
||||
|
||||
// Retries used up. The first request should reply with OOM and the second
|
||||
// Grace period is done. The first request should reply with OOM and the second
|
||||
// request should also be served.
|
||||
current_time_ns_ += oom_grace_period_s_ * 2e9;
|
||||
ASSERT_TRUE(queue.ProcessRequests().ok());
|
||||
ASSERT_REQUEST_FINISHED(queue, req_id1, PlasmaError::OutOfMemory);
|
||||
ASSERT_REQUEST_FINISHED(queue, req_id2, PlasmaError::OK);
|
||||
@@ -261,13 +284,16 @@ TEST_F(CreateRequestQueueTest, TestEvictIfFull) {
|
||||
}
|
||||
|
||||
TEST(CreateRequestQueueParameterTest, TestNoEvictIfFull) {
|
||||
int64_t current_time_ns = 0;
|
||||
CreateRequestQueue queue(
|
||||
/*max_retries=*/2,
|
||||
/*evict_if_full=*/false,
|
||||
/*oom_grace_period_s=*/1,
|
||||
/*spill_object_callback=*/[&]() { return false; },
|
||||
/*on_global_gc=*/[&]() {});
|
||||
/*on_global_gc=*/[&]() {},
|
||||
/*get_time=*/[&]() { return current_time_ns; });
|
||||
|
||||
bool first_try = true;
|
||||
|
||||
auto oom_request = [&](bool evict_if_full, PlasmaObject *result) {
|
||||
if (first_try) {
|
||||
RAY_CHECK(!evict_if_full);
|
||||
@@ -281,6 +307,7 @@ TEST(CreateRequestQueueParameterTest, TestNoEvictIfFull) {
|
||||
auto client = std::make_shared<MockClient>();
|
||||
static_cast<void>(queue.AddRequest(ObjectID::Nil(), client, oom_request));
|
||||
ASSERT_TRUE(queue.ProcessRequests().IsObjectStoreFull());
|
||||
current_time_ns += 1e8;
|
||||
ASSERT_TRUE(queue.ProcessRequests().IsObjectStoreFull());
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user