mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 10:33:16 +08:00
Limit default redis max memory to 10GB. (#3630)
* Limit Redis max memory to 10GB/shard by default. * Update stress tests. * Reorganize * Update * Add minimum cap size for object store and redis. * Small test update.
This commit is contained in:
committed by
Philipp Moritz
parent
4b23a34c93
commit
586a5c9ffa
@@ -18,6 +18,17 @@ ID_SIZE = 20
|
||||
NIL_JOB_ID = ObjectID(ID_SIZE * b"\xff")
|
||||
NIL_FUNCTION_ID = NIL_JOB_ID
|
||||
|
||||
# The default maximum number of bytes to allocate to the object store unless
|
||||
# overridden by the user.
|
||||
DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES = 20 * 10**9
|
||||
# The smallest cap on the memory used by the object store that we allow.
|
||||
OBJECT_STORE_MINIMUM_MEMORY_BYTES = 10**7
|
||||
# The default maximum number of bytes that the non-primary Redis shards are
|
||||
# allowed to use unless overridden by the user.
|
||||
DEFAULT_REDIS_MAX_MEMORY_BYTES = 10**10
|
||||
# The smallest cap on the memory used by Redis that we allow.
|
||||
REDIS_MINIMUM_MEMORY_BYTES = 10**7
|
||||
|
||||
# If a remote function or actor (or some other export) has serialized size
|
||||
# greater than this quantity, print an warning.
|
||||
PICKLE_OBJECT_WARNING_SIZE = 10**7
|
||||
|
||||
@@ -116,16 +116,16 @@ def cli(logging_level, logging_format):
|
||||
"--object-store-memory",
|
||||
required=False,
|
||||
type=int,
|
||||
help="the maximum amount of memory (in bytes) to allow the "
|
||||
"object store to use")
|
||||
help="The amount of memory (in bytes) to start the object store with. "
|
||||
"By default, this is capped at 20GB but can be set higher.")
|
||||
@click.option(
|
||||
"--redis-max-memory",
|
||||
required=False,
|
||||
type=int,
|
||||
help=("The max amount of memory (in bytes) to allow redis to use, or None "
|
||||
"for no limit. Once the limit is exceeded, redis will start LRU "
|
||||
"eviction of entries. This only applies to the sharded "
|
||||
"redis tables (task and object tables)."))
|
||||
help="The max amount of memory (in bytes) to allow redis to use. Once the "
|
||||
"limit is exceeded, redis will start LRU eviction of entries. This only "
|
||||
"applies to the sharded redis tables (task, object, and profile tables). "
|
||||
"By default this is capped at 10GB but can be set higher.")
|
||||
@click.option(
|
||||
"--num-workers",
|
||||
required=False,
|
||||
|
||||
+33
-14
@@ -19,7 +19,7 @@ import redis
|
||||
|
||||
import pyarrow
|
||||
# Ray modules
|
||||
import ray.ray_constants
|
||||
import ray.ray_constants as ray_constants
|
||||
import ray.plasma
|
||||
|
||||
from ray.tempfile_services import (
|
||||
@@ -36,9 +36,6 @@ PROCESS_TYPE_PLASMA_STORE = "plasma_store"
|
||||
PROCESS_TYPE_REDIS_SERVER = "redis_server"
|
||||
PROCESS_TYPE_WEB_UI = "web_ui"
|
||||
|
||||
# Max bytes to allocate to plasma unless overriden by the user
|
||||
MAX_DEFAULT_MEM = 20 * 1000 * 1000 * 1000
|
||||
|
||||
# This is a dictionary tracking all of the processes of different types that
|
||||
# have been started by this services module. Note that the order of the keys is
|
||||
# important because it determines the order in which these processes will be
|
||||
@@ -446,10 +443,11 @@ def start_redis(node_ip_address,
|
||||
use_credis: If True, additionally load the chain-replicated libraries
|
||||
into the redis servers. Defaults to None, which means its value is
|
||||
set by the presence of "RAY_USE_NEW_GCS" in os.environ.
|
||||
redis_max_memory: The max amount of memory (in bytes) to allow redis
|
||||
to use, or None for no limit. Once the limit is exceeded, redis
|
||||
will start LRU eviction of entries. This only applies to the
|
||||
sharded redis tables (task and object tables).
|
||||
redis_max_memory: The max amount of memory (in bytes) to allow each
|
||||
redis shard to use. Once the limit is exceeded, redis will start
|
||||
LRU eviction of entries. This only applies to the sharded redis
|
||||
tables (task, object, and profile tables). By default, this is
|
||||
capped at 10GB but can be set higher.
|
||||
|
||||
Returns:
|
||||
A tuple of the address for the primary Redis shard and a list of
|
||||
@@ -481,6 +479,8 @@ def start_redis(node_ip_address,
|
||||
stderr_file=redis_stderr_file,
|
||||
cleanup=cleanup,
|
||||
password=password,
|
||||
# Below we use None to indicate no limit on the memory of the
|
||||
# primary Redis shard.
|
||||
redis_max_memory=None)
|
||||
else:
|
||||
assigned_port, _ = _start_redis_instance(
|
||||
@@ -496,6 +496,8 @@ def start_redis(node_ip_address,
|
||||
# supplies.
|
||||
modules=[CREDIS_MASTER_MODULE, REDIS_MODULE],
|
||||
password=password,
|
||||
# Below we use None to indicate no limit on the memory of the
|
||||
# primary Redis shard.
|
||||
redis_max_memory=None)
|
||||
if port is not None:
|
||||
assert assigned_port == port
|
||||
@@ -516,6 +518,15 @@ def start_redis(node_ip_address,
|
||||
# Store version information in the primary Redis shard.
|
||||
_put_version_info_in_redis(primary_redis_client)
|
||||
|
||||
# Cap the memory of the other redis shards if no limit is provided.
|
||||
redis_max_memory = (redis_max_memory if redis_max_memory is not None else
|
||||
ray_constants.DEFAULT_REDIS_MAX_MEMORY_BYTES)
|
||||
if redis_max_memory < ray_constants.REDIS_MINIMUM_MEMORY_BYTES:
|
||||
raise ValueError("Attempting to cap Redis memory usage at {} bytes, "
|
||||
"but the minimum allowed is {} bytes.".format(
|
||||
redis_max_memory,
|
||||
ray_constants.REDIS_MINIMUM_MEMORY_BYTES))
|
||||
|
||||
# Start other Redis shards. Each Redis shard logs to a separate file,
|
||||
# prefixed by "redis-<shard number>".
|
||||
redis_shards = []
|
||||
@@ -860,9 +871,9 @@ def check_and_update_resources(resources):
|
||||
and not resource_quantity.is_integer()):
|
||||
raise ValueError("Resource quantities must all be whole numbers.")
|
||||
|
||||
if resource_quantity > ray.ray_constants.MAX_RESOURCE_QUANTITY:
|
||||
if resource_quantity > ray_constants.MAX_RESOURCE_QUANTITY:
|
||||
raise ValueError("Resource quantities must be at most {}.".format(
|
||||
ray.ray_constants.MAX_RESOURCE_QUANTITY))
|
||||
ray_constants.MAX_RESOURCE_QUANTITY))
|
||||
|
||||
return resources
|
||||
|
||||
@@ -1033,13 +1044,15 @@ def determine_plasma_store_config(object_store_memory=None,
|
||||
if object_store_memory is None:
|
||||
object_store_memory = int(system_memory * 0.4)
|
||||
# Cap memory to avoid memory waste and perf issues on large nodes
|
||||
if object_store_memory > MAX_DEFAULT_MEM:
|
||||
if (object_store_memory >
|
||||
ray_constants.DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES):
|
||||
logger.warning(
|
||||
"Warning: Capping object memory store to {}GB. ".format(
|
||||
MAX_DEFAULT_MEM // 1e9) +
|
||||
"To increase this further, specify `object_store_memory` "
|
||||
ray_constants.DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES // 1e9)
|
||||
+ "To increase this further, specify `object_store_memory` "
|
||||
"when calling ray.init() or ray start.")
|
||||
object_store_memory = MAX_DEFAULT_MEM
|
||||
object_store_memory = (
|
||||
ray_constants.DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES)
|
||||
|
||||
# Determine which directory to use. By default, use /tmp on MacOS and
|
||||
# /dev/shm on Linux, unless the shared-memory file system is too small,
|
||||
@@ -1122,6 +1135,12 @@ def start_plasma_store(node_ip_address,
|
||||
object_store_memory, plasma_directory = determine_plasma_store_config(
|
||||
object_store_memory, plasma_directory, huge_pages)
|
||||
|
||||
if object_store_memory < ray_constants.OBJECT_STORE_MINIMUM_MEMORY_BYTES:
|
||||
raise ValueError("Attempting to cap object store memory usage at {} "
|
||||
"bytes, but the minimum allowed is {} bytes.".format(
|
||||
object_store_memory,
|
||||
ray_constants.OBJECT_STORE_MINIMUM_MEMORY_BYTES))
|
||||
|
||||
# Print the object store memory using two decimal places.
|
||||
object_store_memory_str = (object_store_memory / 10**7) / 10**2
|
||||
logger.info("Starting the Plasma object store with {} GB memory "
|
||||
|
||||
@@ -1495,11 +1495,13 @@ def init(redis_address=None,
|
||||
resources: A dictionary mapping the name of a resource to the quantity
|
||||
of that resource available.
|
||||
object_store_memory: The amount of memory (in bytes) to start the
|
||||
object store with.
|
||||
redis_max_memory: The max amount of memory (in bytes) to allow redis
|
||||
to use, or None for no limit. Once the limit is exceeded, redis
|
||||
will start LRU eviction of entries. This only applies to the
|
||||
sharded redis tables (task and object tables).
|
||||
object store with. By default, this is capped at 20GB but can be
|
||||
set higher.
|
||||
redis_max_memory: The max amount of memory (in bytes) to allow each
|
||||
redis shard to use. Once the limit is exceeded, redis will start
|
||||
LRU eviction of entries. This only applies to the sharded redis
|
||||
tables (task, object, and profile tables). By default, this is
|
||||
capped at 10GB but can be set higher.
|
||||
node_ip_address (str): The IP address of the node that we are on.
|
||||
object_id_seed (int): Used to seed the deterministic generation of
|
||||
object IDs. The same value can be used across multiple runs of the
|
||||
|
||||
Reference in New Issue
Block a user