[Object Spilling] Delete spilled objects when references are gone out of scope. (#12341)

This commit is contained in:
SangBin Cho
2020-12-01 13:10:39 -08:00
committed by GitHub
parent ef1b0c13c3
commit 0e892908f7
25 changed files with 968 additions and 11 deletions
+46
View File
@@ -54,6 +54,7 @@ from ray.includes.common cimport (
CTaskType,
CPlacementStrategy,
CRayFunction,
CWorkerType,
move,
LANGUAGE_CPP,
LANGUAGE_JAVA,
@@ -631,6 +632,46 @@ cdef void restore_spilled_objects_handler(
job_id=None)
cdef void delete_spilled_objects_handler(
const c_vector[c_string]& object_urls,
CWorkerType worker_type) nogil:
with gil:
urls = []
size = object_urls.size()
for i in range(size):
urls.append(object_urls[i])
try:
# Get proctitle.
if <int> worker_type == <int> WORKER_TYPE_SPILL_WORKER:
original_proctitle = (
ray_constants.WORKER_PROCESS_TYPE_SPILL_WORKER_IDLE)
proctitle = (
ray_constants.WORKER_PROCESS_TYPE_SPILL_WORKER_DELETE)
elif <int> worker_type == <int> WORKER_TYPE_RESTORE_WORKER:
original_proctitle = (
ray_constants.WORKER_PROCESS_TYPE_RESTORE_WORKER_IDLE)
proctitle = (
ray_constants.WORKER_PROCESS_TYPE_RESTORE_WORKER_DELETE)
else:
assert False, ("This line shouldn't be reachable.")
# Delete objects.
with ray.worker._changeproctitle(
proctitle,
original_proctitle):
external_storage.delete_spilled_objects(urls)
except Exception:
exception_str = (
"An unexpected internal error occurred while the IO worker "
"was deleting spilled objects.")
logger.exception(exception_str)
ray.utils.push_error_to_driver(
ray.worker.global_worker,
"delete_spilled_objects_error",
traceback.format_exc() + exception_str,
job_id=None)
# This function introduces ~2-7us of overhead per call (i.e., it can be called
# up to hundreds of thousands of times per second).
cdef void get_py_stack(c_string* stack_out) nogil:
@@ -739,6 +780,7 @@ cdef class CoreWorker:
options.gc_collect = gc_collect
options.spill_objects = spill_objects_handler
options.restore_spilled_objects = restore_spilled_objects_handler
options.delete_spilled_objects = delete_spilled_objects_handler
options.get_lang_stack = get_py_stack
options.ref_counting_enabled = True
options.is_local_mode = local_mode
@@ -1473,6 +1515,10 @@ cdef class CoreWorker:
def force_spill_objects(self, object_refs):
cdef c_vector[CObjectID] object_ids
object_ids = ObjectRefsToVector(object_refs)
assert not RayConfig.instance().automatic_object_deletion_enabled(), (
"Automatic object deletion is not supported for"
"force_spill_objects yet. Please set"
"automatic_object_deletion_enabled: False in Ray's system config.")
with nogil:
check_status(CCoreWorkerProcess.GetCoreWorker()
.SpillObjects(object_ids))
+28
View File
@@ -165,6 +165,14 @@ class ExternalStorage(metaclass=abc.ABCMeta):
url_with_offset_list: List of url_with_offset.
"""
@abc.abstractmethod
def delete_spilled_objects(self, urls: List[str]):
"""Delete objects that are spilled to the external storage.
Args:
urls: URLs that store spilled object files.
"""
class NullStorage(ExternalStorage):
"""The class that represents an uninitialized external storage."""
@@ -175,6 +183,9 @@ class NullStorage(ExternalStorage):
def restore_spilled_objects(self, object_refs, url_with_offset_list):
raise NotImplementedError("External storage is not initialized")
def delete_spilled_objects(self, urls: List[str]):
raise NotImplementedError("External storage is not initialized")
class FileSystemStorage(ExternalStorage):
"""The class for filesystem-like external storage.
@@ -221,6 +232,11 @@ class FileSystemStorage(ExternalStorage):
# read remaining data to our buffer
self._put_object_to_store(metadata, buf_len, f, object_ref)
def delete_spilled_objects(self, urls: List[str]):
for url in urls:
filename = parse_url_with_offset(url.decode()).base_url
os.remove(os.path.join(self.directory_path, filename))
class ExternalStorageSmartOpenImpl(ExternalStorage):
"""The external storage class implemented by smart_open.
@@ -303,6 +319,9 @@ class ExternalStorageSmartOpenImpl(ExternalStorage):
# read remaining data to our buffer
self._put_object_to_store(metadata, buf_len, f, object_ref)
def delete_spilled_objects(self, urls: List[str]):
pass
_external_storage = NullStorage()
@@ -350,3 +369,12 @@ def restore_spilled_objects(object_refs: List[ObjectRef],
"""
_external_storage.restore_spilled_objects(object_refs,
url_with_offset_list)
def delete_spilled_objects(urls: List[str]):
"""Delete objects that are spilled to the external storage.
Args:
urls: URLs that store spilled object files.
"""
_external_storage.delete_spilled_objects(urls)
+3
View File
@@ -231,6 +231,9 @@ cdef extern from "ray/core_worker/core_worker.h" nogil:
(void(
const c_vector[CObjectID] &,
const c_vector[c_string] &) nogil) restore_spilled_objects
(void(
const c_vector[c_string]&,
CWorkerType) nogil) delete_spilled_objects
(void(c_string *stack_out) nogil) get_lang_stack
c_bool ref_counting_enabled
c_bool is_local_mode
+2
View File
@@ -66,3 +66,5 @@ cdef extern from "ray/common/ray_config.h" nogil:
uint64_t metrics_report_interval_ms() const
c_bool enable_timeline() const
c_bool automatic_object_deletion_enabled() const
+4
View File
@@ -115,3 +115,7 @@ cdef class Config:
@staticmethod
def enable_timeline():
return RayConfig.instance().enable_timeline()
@staticmethod
def automatic_object_deletion_enabled():
return RayConfig.instance().automatic_object_deletion_enabled()
+4
View File
@@ -187,6 +187,10 @@ WORKER_PROCESS_TYPE_SPILL_WORKER = (
f"ray::SPILL_{WORKER_PROCESS_TYPE_SPILL_WORKER_NAME}")
WORKER_PROCESS_TYPE_RESTORE_WORKER = (
f"ray::RESTORE_{WORKER_PROCESS_TYPE_RESTORE_WORKER_NAME}")
WORKER_PROCESS_TYPE_SPILL_WORKER_DELETE = (
f"ray::DELETE_{WORKER_PROCESS_TYPE_SPILL_WORKER_NAME}")
WORKER_PROCESS_TYPE_RESTORE_WORKER_DELETE = (
f"ray::DELETE_{WORKER_PROCESS_TYPE_RESTORE_WORKER_NAME}")
LOG_MONITOR_MAX_OPEN_FILES = 200
+246
View File
@@ -1,5 +1,6 @@
import copy
import json
import os
import random
import platform
import sys
@@ -11,6 +12,7 @@ import psutil
import ray
from ray.external_storage import (create_url_with_offset,
parse_url_with_offset)
from ray.test_utils import wait_for_condition
bucket_name = "object-spilling-test"
spill_local_path = "/tmp/spill"
@@ -57,6 +59,7 @@ def test_sample_benchmark(object_spilling_config, shutdown_only):
"object_store_full_max_retries": 0,
"max_io_workers": max_io_workers,
"object_spilling_config": object_spilling_config,
"automatic_object_deletion_enabled": False,
})
arr = np.random.rand(object_size)
replay_buffer = []
@@ -134,6 +137,7 @@ def test_spill_objects_manually(object_spilling_config, shutdown_only):
"max_io_workers": 4,
"object_spilling_config": object_spilling_config,
"min_spilling_size": 0,
"automatic_object_deletion_enabled": False,
})
arr = np.random.rand(1024 * 1024) # 8 MB data
replay_buffer = []
@@ -195,6 +199,7 @@ def test_spill_objects_manually_from_workers(object_spilling_config,
"max_io_workers": 4,
"object_spilling_config": object_spilling_config,
"min_spilling_size": 0,
"automatic_object_deletion_enabled": False,
})
@ray.remote
@@ -226,6 +231,7 @@ def test_spill_objects_manually_with_workers(object_spilling_config,
"max_io_workers": 4,
"object_spilling_config": object_spilling_config,
"min_spilling_size": 0,
"automatic_object_deletion_enabled": False,
})
arrays = [np.random.rand(100 * 1024) for _ in range(50)]
objects = [ray.put(arr) for arr in arrays]
@@ -396,6 +402,246 @@ def test_spill_deadlock(object_spilling_config, shutdown_only):
@pytest.mark.skipif(
platform.system() == "Windows", reason="Failing on Windows.")
def test_delete_objects(tmp_path, shutdown_only):
# Limit our object store to 75 MiB of memory.
temp_folder = tmp_path / "spill"
temp_folder.mkdir()
ray.init(
object_store_memory=75 * 1024 * 1024,
_system_config={
"max_io_workers": 4,
"automatic_object_spilling_enabled": True,
"object_store_full_max_retries": 4,
"object_store_full_initial_delay_ms": 100,
"object_spilling_config": json.dumps({
"type": "filesystem",
"params": {
"directory_path": str(temp_folder)
}
}),
})
arr = np.random.rand(1024 * 1024) # 8 MB data
replay_buffer = []
for _ in range(80):
ref = None
while ref is None:
ref = ray.put(arr)
replay_buffer.append(ref)
print("-----------------------------------")
def is_dir_empty():
num_files = 0
for path in temp_folder.iterdir():
num_files += 1
return num_files == 0
del replay_buffer
del ref
wait_for_condition(is_dir_empty)
@pytest.mark.skipif(
platform.system() == "Windows", reason="Failing on Windows.")
def test_delete_objects_delete_while_creating(tmp_path, shutdown_only):
# Limit our object store to 75 MiB of memory.
temp_folder = tmp_path / "spill"
temp_folder.mkdir()
ray.init(
object_store_memory=75 * 1024 * 1024,
_system_config={
"max_io_workers": 4,
"automatic_object_spilling_enabled": True,
"object_store_full_max_retries": 4,
"object_store_full_initial_delay_ms": 100,
"object_spilling_config": json.dumps({
"type": "filesystem",
"params": {
"directory_path": str(temp_folder)
}
}),
})
arr = np.random.rand(1024 * 1024) # 8 MB data
replay_buffer = []
for _ in range(80):
ref = None
while ref is None:
ref = ray.put(arr)
replay_buffer.append(ref)
# Remove the replay buffer with 60% probability.
if random.randint(0, 9) < 6:
replay_buffer.pop()
# Do random sampling.
for _ in range(200):
ref = random.choice(replay_buffer)
sample = ray.get(ref, timeout=0)
assert np.array_equal(sample, arr)
def is_dir_empty():
num_files = 0
for path in temp_folder.iterdir():
num_files += 1
return num_files == 0
# After all, make sure all objects are killed without race condition.
del replay_buffer
del ref
wait_for_condition(is_dir_empty, timeout=1000)
@pytest.mark.skipif(
platform.system() == "Windows", reason="Failing on Windows.")
def test_delete_objects_on_worker_failure(tmp_path, shutdown_only):
# Limit our object store to 75 MiB of memory.
temp_folder = tmp_path / "spill"
temp_folder.mkdir()
ray.init(
object_store_memory=75 * 1024 * 1024,
_system_config={
"max_io_workers": 4,
"automatic_object_spilling_enabled": True,
"object_store_full_max_retries": 4,
"object_store_full_initial_delay_ms": 100,
"object_spilling_config": json.dumps({
"type": "filesystem",
"params": {
"directory_path": str(temp_folder)
}
}),
"min_spilling_size": 0,
})
arr = np.random.rand(1024 * 1024) # 8 MB data
@ray.remote
class Actor:
def __init__(self):
self.replay_buffer = []
def get_pid(self):
return os.getpid()
def create_objects(self):
for _ in range(80):
ref = None
while ref is None:
ref = ray.put(arr)
self.replay_buffer.append(ref)
# Remove the replay buffer with 60% probability.
if random.randint(0, 9) < 6:
self.replay_buffer.pop()
# Do random sampling.
for _ in range(200):
ref = random.choice(self.replay_buffer)
sample = ray.get(ref, timeout=0)
assert np.array_equal(sample, arr)
a = Actor.remote()
actor_pid = ray.get(a.get_pid.remote())
ray.get(a.create_objects.remote())
os.kill(actor_pid, 9)
def wait_until_actor_dead():
try:
ray.get(a.get_pid.remote())
except ray.exceptions.RayActorError:
return True
return False
wait_for_condition(wait_until_actor_dead)
def is_dir_empty():
num_files = 0
for path in temp_folder.iterdir():
num_files += 1
return num_files == 0
# After all, make sure all objects are deleted upon worker failures.
wait_for_condition(is_dir_empty, timeout=1000)
@pytest.mark.skipif(
platform.system() == "Windows", reason="Failing on Windows.")
def test_delete_objects_multi_node(tmp_path, ray_start_cluster):
# Limit our object store to 75 MiB of memory.
temp_folder = tmp_path / "spill"
temp_folder.mkdir()
cluster = ray_start_cluster
# Head node.
cluster.add_node(
num_cpus=1,
object_store_memory=75 * 1024 * 1024,
_system_config={
"max_io_workers": 2,
"automatic_object_spilling_enabled": True,
"object_store_full_max_retries": 4,
"object_store_full_initial_delay_ms": 100,
"object_spilling_config": json.dumps({
"type": "filesystem",
"params": {
"directory_path": str(temp_folder)
}
}),
})
# Add 2 worker nodes.
for _ in range(2):
cluster.add_node(num_cpus=1, object_store_memory=75 * 1024 * 1024)
ray.init(address=cluster.address)
arr = np.random.rand(1024 * 1024) # 8 MB data
@ray.remote(num_cpus=1)
class Actor:
def __init__(self):
self.replay_buffer = []
def ping(self):
return
def create_objects(self):
for _ in range(80):
ref = None
while ref is None:
ref = ray.put(arr)
self.replay_buffer.append(ref)
# Remove the replay buffer with 60% probability.
if random.randint(0, 9) < 6:
self.replay_buffer.pop()
# Do random sampling.
for _ in range(200):
ref = random.choice(self.replay_buffer)
sample = ray.get(ref, timeout=0)
assert np.array_equal(sample, arr)
actors = [Actor.remote() for _ in range(3)]
ray.get([actor.create_objects.remote() for actor in actors])
def wait_until_actor_dead(actor):
try:
ray.get(actor.ping.remote())
except ray.exceptions.RayActorError:
return True
return False
def is_dir_empty():
num_files = 0
for path in temp_folder.iterdir():
num_files += 1
return num_files == 0
# Kill actors to remove all references.
for actor in actors:
ray.kill(actor)
wait_for_condition(lambda: wait_until_actor_dead(actor))
# The multi node deletion should work.
wait_for_condition(is_dir_empty)
def test_fusion_objects(tmp_path, shutdown_only):
# Limit our object store to 75 MiB of memory.
temp_folder = tmp_path / "spill"