Limit default redis max memory to 10GB. (#3630)

* Limit Redis max memory to 10GB/shard by default.

* Update stress tests.

* Reorganize

* Update

* Add minimum cap size for object store and redis.

* Small test update.
This commit is contained in:
Robert Nishihara
2019-01-03 13:23:54 -08:00
committed by Philipp Moritz
parent 4b23a34c93
commit 586a5c9ffa
7 changed files with 121 additions and 119 deletions
+11
View File
@@ -18,6 +18,17 @@ ID_SIZE = 20
NIL_JOB_ID = ObjectID(ID_SIZE * b"\xff")
NIL_FUNCTION_ID = NIL_JOB_ID
# The default maximum number of bytes to allocate to the object store unless
# overridden by the user.
DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES = 20 * 10**9
# The smallest cap on the memory used by the object store that we allow.
OBJECT_STORE_MINIMUM_MEMORY_BYTES = 10**7
# The default maximum number of bytes that the non-primary Redis shards are
# allowed to use unless overridden by the user.
DEFAULT_REDIS_MAX_MEMORY_BYTES = 10**10
# The smallest cap on the memory used by Redis that we allow.
REDIS_MINIMUM_MEMORY_BYTES = 10**7
# If a remote function or actor (or some other export) has serialized size
# greater than this quantity, print an warning.
PICKLE_OBJECT_WARNING_SIZE = 10**7
+6 -6
View File
@@ -116,16 +116,16 @@ def cli(logging_level, logging_format):
"--object-store-memory",
required=False,
type=int,
help="the maximum amount of memory (in bytes) to allow the "
"object store to use")
help="The amount of memory (in bytes) to start the object store with. "
"By default, this is capped at 20GB but can be set higher.")
@click.option(
"--redis-max-memory",
required=False,
type=int,
help=("The max amount of memory (in bytes) to allow redis to use, or None "
"for no limit. Once the limit is exceeded, redis will start LRU "
"eviction of entries. This only applies to the sharded "
"redis tables (task and object tables)."))
help="The max amount of memory (in bytes) to allow redis to use. Once the "
"limit is exceeded, redis will start LRU eviction of entries. This only "
"applies to the sharded redis tables (task, object, and profile tables). "
"By default this is capped at 10GB but can be set higher.")
@click.option(
"--num-workers",
required=False,
+33 -14
View File
@@ -19,7 +19,7 @@ import redis
import pyarrow
# Ray modules
import ray.ray_constants
import ray.ray_constants as ray_constants
import ray.plasma
from ray.tempfile_services import (
@@ -36,9 +36,6 @@ PROCESS_TYPE_PLASMA_STORE = "plasma_store"
PROCESS_TYPE_REDIS_SERVER = "redis_server"
PROCESS_TYPE_WEB_UI = "web_ui"
# Max bytes to allocate to plasma unless overriden by the user
MAX_DEFAULT_MEM = 20 * 1000 * 1000 * 1000
# This is a dictionary tracking all of the processes of different types that
# have been started by this services module. Note that the order of the keys is
# important because it determines the order in which these processes will be
@@ -446,10 +443,11 @@ def start_redis(node_ip_address,
use_credis: If True, additionally load the chain-replicated libraries
into the redis servers. Defaults to None, which means its value is
set by the presence of "RAY_USE_NEW_GCS" in os.environ.
redis_max_memory: The max amount of memory (in bytes) to allow redis
to use, or None for no limit. Once the limit is exceeded, redis
will start LRU eviction of entries. This only applies to the
sharded redis tables (task and object tables).
redis_max_memory: The max amount of memory (in bytes) to allow each
redis shard to use. Once the limit is exceeded, redis will start
LRU eviction of entries. This only applies to the sharded redis
tables (task, object, and profile tables). By default, this is
capped at 10GB but can be set higher.
Returns:
A tuple of the address for the primary Redis shard and a list of
@@ -481,6 +479,8 @@ def start_redis(node_ip_address,
stderr_file=redis_stderr_file,
cleanup=cleanup,
password=password,
# Below we use None to indicate no limit on the memory of the
# primary Redis shard.
redis_max_memory=None)
else:
assigned_port, _ = _start_redis_instance(
@@ -496,6 +496,8 @@ def start_redis(node_ip_address,
# supplies.
modules=[CREDIS_MASTER_MODULE, REDIS_MODULE],
password=password,
# Below we use None to indicate no limit on the memory of the
# primary Redis shard.
redis_max_memory=None)
if port is not None:
assert assigned_port == port
@@ -516,6 +518,15 @@ def start_redis(node_ip_address,
# Store version information in the primary Redis shard.
_put_version_info_in_redis(primary_redis_client)
# Cap the memory of the other redis shards if no limit is provided.
redis_max_memory = (redis_max_memory if redis_max_memory is not None else
ray_constants.DEFAULT_REDIS_MAX_MEMORY_BYTES)
if redis_max_memory < ray_constants.REDIS_MINIMUM_MEMORY_BYTES:
raise ValueError("Attempting to cap Redis memory usage at {} bytes, "
"but the minimum allowed is {} bytes.".format(
redis_max_memory,
ray_constants.REDIS_MINIMUM_MEMORY_BYTES))
# Start other Redis shards. Each Redis shard logs to a separate file,
# prefixed by "redis-<shard number>".
redis_shards = []
@@ -860,9 +871,9 @@ def check_and_update_resources(resources):
and not resource_quantity.is_integer()):
raise ValueError("Resource quantities must all be whole numbers.")
if resource_quantity > ray.ray_constants.MAX_RESOURCE_QUANTITY:
if resource_quantity > ray_constants.MAX_RESOURCE_QUANTITY:
raise ValueError("Resource quantities must be at most {}.".format(
ray.ray_constants.MAX_RESOURCE_QUANTITY))
ray_constants.MAX_RESOURCE_QUANTITY))
return resources
@@ -1033,13 +1044,15 @@ def determine_plasma_store_config(object_store_memory=None,
if object_store_memory is None:
object_store_memory = int(system_memory * 0.4)
# Cap memory to avoid memory waste and perf issues on large nodes
if object_store_memory > MAX_DEFAULT_MEM:
if (object_store_memory >
ray_constants.DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES):
logger.warning(
"Warning: Capping object memory store to {}GB. ".format(
MAX_DEFAULT_MEM // 1e9) +
"To increase this further, specify `object_store_memory` "
ray_constants.DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES // 1e9)
+ "To increase this further, specify `object_store_memory` "
"when calling ray.init() or ray start.")
object_store_memory = MAX_DEFAULT_MEM
object_store_memory = (
ray_constants.DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES)
# Determine which directory to use. By default, use /tmp on MacOS and
# /dev/shm on Linux, unless the shared-memory file system is too small,
@@ -1122,6 +1135,12 @@ def start_plasma_store(node_ip_address,
object_store_memory, plasma_directory = determine_plasma_store_config(
object_store_memory, plasma_directory, huge_pages)
if object_store_memory < ray_constants.OBJECT_STORE_MINIMUM_MEMORY_BYTES:
raise ValueError("Attempting to cap object store memory usage at {} "
"bytes, but the minimum allowed is {} bytes.".format(
object_store_memory,
ray_constants.OBJECT_STORE_MINIMUM_MEMORY_BYTES))
# Print the object store memory using two decimal places.
object_store_memory_str = (object_store_memory / 10**7) / 10**2
logger.info("Starting the Plasma object store with {} GB memory "
+7 -5
View File
@@ -1495,11 +1495,13 @@ def init(redis_address=None,
resources: A dictionary mapping the name of a resource to the quantity
of that resource available.
object_store_memory: The amount of memory (in bytes) to start the
object store with.
redis_max_memory: The max amount of memory (in bytes) to allow redis
to use, or None for no limit. Once the limit is exceeded, redis
will start LRU eviction of entries. This only applies to the
sharded redis tables (task and object tables).
object store with. By default, this is capped at 20GB but can be
set higher.
redis_max_memory: The max amount of memory (in bytes) to allow each
redis shard to use. Once the limit is exceeded, redis will start
LRU eviction of entries. This only applies to the sharded redis
tables (task, object, and profile tables). By default, this is
capped at 10GB but can be set higher.
node_ip_address (str): The IP address of the node that we are on.
object_id_seed (int): Used to seed the deterministic generation of
object IDs. The same value can be used across multiple runs of the