Ray, Tune, and RLlib support for memory, object_store_memory options (#5226)

This commit is contained in:
Eric Liang
2019-08-22 14:01:10 +08:00
committed by Robert Nishihara
parent c852213b83
commit e2e30ca507
40 changed files with 1006 additions and 296 deletions
+16 -6
View File
@@ -18,6 +18,7 @@ import time
import ray
import ray.ray_constants as ray_constants
import ray.services
from ray.resource_spec import ResourceSpec
from ray.utils import try_to_create_directory
# Logger for this module. It should be configured at the entry point
@@ -84,6 +85,7 @@ class Node(object):
os.path.dirname(os.path.abspath(__file__)),
"workers/default_worker.py"))
self._resource_spec = None
self._ray_params = ray_params
self._redis_address = ray_params.redis_address
self._config = (json.loads(ray_params._internal_config)
@@ -179,6 +181,16 @@ class Node(object):
self._logs_dir = os.path.join(self._session_dir, "logs")
try_to_create_directory(self._logs_dir, warn_if_exist=False)
def get_resource_spec(self):
"""Resolve and return the current resource spec for the node."""
if not self._resource_spec:
self._resource_spec = ResourceSpec(
self._ray_params.num_cpus, self._ray_params.num_gpus,
self._ray_params.memory, self._ray_params.object_store_memory,
self._ray_params.resources,
self._ray_params.redis_max_memory).resolve(is_head=self.head)
return self._resource_spec
@property
def node_ip_address(self):
"""Get the cluster Redis address."""
@@ -344,14 +356,14 @@ class Node(object):
process_infos) = ray.services.start_redis(
self._node_ip_address,
redis_log_files,
self.get_resource_spec(),
port=self._ray_params.redis_port,
redis_shard_ports=self._ray_params.redis_shard_ports,
num_redis_shards=self._ray_params.num_redis_shards,
redis_max_clients=self._ray_params.redis_max_clients,
redirect_worker_output=True,
password=self._ray_params.redis_password,
include_java=self._ray_params.include_java,
redis_max_memory=self._ray_params.redis_max_memory)
include_java=self._ray_params.include_java)
assert (
ray_constants.PROCESS_TYPE_REDIS_SERVER not in self.all_processes)
self.all_processes[ray_constants.PROCESS_TYPE_REDIS_SERVER] = (
@@ -406,9 +418,9 @@ class Node(object):
"""Start the plasma store."""
stdout_file, stderr_file = self.new_log_files("plasma_store")
process_info = ray.services.start_plasma_store(
self.get_resource_spec(),
stdout_file=stdout_file,
stderr_file=stderr_file,
object_store_memory=self._ray_params.object_store_memory,
plasma_directory=self._ray_params.plasma_directory,
huge_pages=self._ray_params.huge_pages,
plasma_store_socket_name=self._plasma_store_socket_name)
@@ -436,9 +448,7 @@ class Node(object):
self._ray_params.worker_path,
self._temp_dir,
self._session_dir,
self._ray_params.num_cpus,
self._ray_params.num_gpus,
self._ray_params.resources,
self.get_resource_spec(),
self._ray_params.object_manager_port,
self._ray_params.node_manager_port,
self._ray_params.redis_password,