Moving Local Mode to C++ (#7670)

This commit is contained in:
ijrsvt
2020-04-01 13:50:57 -07:00
committed by GitHub
parent 65054a2c7c
commit 9bfc2c4b54
17 changed files with 489 additions and 648 deletions
+36 -65
View File
@@ -15,7 +15,6 @@ import sys
import threading
import time
import traceback
import random
# Ray modules
import ray.cloudpickle as pickle
@@ -54,7 +53,6 @@ from ray.utils import (
is_cython,
setup_logger,
)
from ray.local_mode_manager import LocalModeManager
SCRIPT_MODE = 0
WORKER_MODE = 1
@@ -264,6 +262,10 @@ class Worker:
"do this, you can wrap the ray.ObjectID in a list and "
"call 'put' on it (or return it).")
if self.mode == LOCAL_MODE:
assert object_id is None, ("Local Mode does not support "
"inserting with an objectID")
serialized_value = self.get_serialization_context().serialize(value)
# This *must* be the first place that we construct this python
# ObjectID because an entry with 0 local references is created when
@@ -291,10 +293,6 @@ class Worker:
whose values should be retrieved.
timeout (float): timeout (float): The maximum amount of time in
seconds to wait before returning.
Raises:
Exception if running in LOCAL_MODE and any of the object IDs do not
exist in the emulated object store.
"""
# Make sure that the values are object IDs.
for object_id in object_ids:
@@ -303,9 +301,6 @@ class Worker:
"Attempting to call `get` on the value {}, "
"which is not an ray.ObjectID.".format(object_id))
if self.mode == LOCAL_MODE:
return self.local_mode_manager.get_objects(object_ids)
timeout_ms = int(timeout * 1000) if timeout else -1
data_metadata_pairs = self.core_worker.get_objects(
object_ids, self.current_task_id, timeout_ms)
@@ -438,9 +433,11 @@ def get_gpu_ids():
Returns:
A list of GPU IDs.
"""
# TODO(ilr) Handle inserting resources in local mode
if _mode() == LOCAL_MODE:
raise RuntimeError("ray.get_gpu_ids() currently does not work in "
"local_mode.")
logger.info("ray.get_gpu_ids() currently does not work in LOCAL "
"MODE.")
all_resource_ids = global_worker.core_worker.resource_ids()
assigned_ids = [
@@ -600,8 +597,8 @@ def init(address=None,
same driver in order to generate the object IDs in a consistent
manner. However, the same ID should not be used for different
drivers.
local_mode (bool): True if the code should be executed serially
without Ray. This is useful for debugging.
local_mode (bool): True if the code should be executed serially. This
is useful for debugging.
driver_object_store_memory (int): Limit the amount of memory the driver
can use in the object store for creating objects. By default, this
is autoset based on available system memory, subject to a 20GB cap.
@@ -705,17 +702,13 @@ def init(address=None,
_internal_config["free_objects_period_milliseconds"] = 1000
global _global_node
if driver_mode == LOCAL_MODE:
# If starting Ray in LOCAL_MODE, don't start any other processes.
_global_node = ray.node.LocalNode()
elif redis_address is None:
if redis_address is None:
# In this case, we need to start a new cluster.
ray_params = ray.parameter.RayParams(
redis_address=redis_address,
redis_port=redis_port,
node_ip_address=node_ip_address,
object_id_seed=object_id_seed,
local_mode=local_mode,
driver_mode=driver_mode,
redirect_worker_output=redirect_worker_output,
redirect_output=redirect_output,
@@ -1122,12 +1115,11 @@ def connect(node,
ray._raylet.set_internal_config(internal_config)
if mode is not LOCAL_MODE:
# Create a Redis client to primary.
# The Redis client can safely be shared between threads. However,
# that is not true of Redis pubsub clients. See the documentation at
# https://github.com/andymccurdy/redis-py#thread-safety.
worker.redis_client = node.create_redis_client()
# Create a Redis client to primary.
# The Redis client can safely be shared between threads. However,
# that is not true of Redis pubsub clients. See the documentation at
# https://github.com/andymccurdy/redis-py#thread-safety.
worker.redis_client = node.create_redis_client()
# Initialize some fields.
if mode is WORKER_MODE:
@@ -1136,12 +1128,6 @@ def connect(node,
job_id = JobID.nil()
# TODO(qwang): Rename this to `worker_id_str` or type to `WorkerID`
worker.worker_id = _random_string()
setproctitle.setproctitle("ray::IDLE")
elif mode is LOCAL_MODE:
if job_id is None:
job_id = JobID.from_int(random.randint(1, 65535))
worker.worker_id = ray.utils.compute_driver_id_from_job(
job_id).binary()
else:
# This is the code path of driver mode.
if job_id is None:
@@ -1155,6 +1141,9 @@ def connect(node,
worker.worker_id = ray.utils.compute_driver_id_from_job(
job_id).binary()
if mode is not SCRIPT_MODE and setproctitle:
setproctitle.setproctitle("ray::IDLE")
if not isinstance(job_id, JobID):
raise TypeError("The type of given job id must be JobID.")
@@ -1163,12 +1152,6 @@ def connect(node,
worker.node = node
worker.set_mode(mode)
# If running Ray in LOCAL_MODE, there is no need to create call
# create_worker or to start the worker service.
if mode == LOCAL_MODE:
worker.local_mode_manager = LocalModeManager()
return
# For driver's check that the version information matches the version
# information that the Ray cluster was started with.
try:
@@ -1249,9 +1232,9 @@ def connect(node,
(log_stderr_file
if log_stderr_file is not None else sys.stderr).name)
worker.redis_client.hmset(b"Workers:" + worker.worker_id, worker_dict)
else:
raise ValueError("Invalid worker mode. Expected DRIVER or WORKER.")
elif not LOCAL_MODE:
raise ValueError(
"Invalid worker mode. Expected DRIVER, WORKER or LOCAL.")
redis_address, redis_port = node.redis_address.split(":")
gcs_options = ray._raylet.GcsClientOptions(
redis_address,
@@ -1259,15 +1242,9 @@ def connect(node,
node.redis_password,
)
worker.core_worker = ray._raylet.CoreWorker(
(mode == SCRIPT_MODE),
node.plasma_store_socket_name,
node.raylet_socket_name,
job_id,
gcs_options,
node.get_logs_dir_path(),
node.node_ip_address,
node.node_manager_port,
)
(mode == SCRIPT_MODE), node.plasma_store_socket_name,
node.raylet_socket_name, job_id, gcs_options, node.get_logs_dir_path(),
node.node_ip_address, node.node_manager_port, mode == LOCAL_MODE)
if driver_object_store_memory is not None:
worker.core_worker.set_object_store_client_options(
@@ -1276,9 +1253,10 @@ def connect(node,
# Put something in the plasma store so that subsequent plasma store
# accesses will be faster. Currently the first access is always slow, and
# we don't want the user to experience this.
temporary_object_id = ray.ObjectID.from_random()
worker.put_object(1, object_id=temporary_object_id)
ray.internal.free([temporary_object_id])
if mode != LOCAL_MODE:
temporary_object_id = ray.ObjectID.from_random()
worker.put_object(1, object_id=temporary_object_id)
ray.internal.free([temporary_object_id])
# Start the import thread
worker.import_thread = import_thread.ImportThread(worker, mode,
@@ -1540,16 +1518,13 @@ def put(value, weakref=False):
worker = global_worker
worker.check_connected()
with profiling.profile("ray.put"):
if worker.mode == LOCAL_MODE:
object_id = worker.local_mode_manager.put_object(value)
else:
try:
object_id = worker.put_object(value, pin_object=not weakref)
except ObjectStoreFullError:
logger.info(
"Put failed since the value was either too large or the "
"store was full of pinned objects.")
raise
try:
object_id = worker.put_object(value, pin_object=not weakref)
except ObjectStoreFullError:
logger.info(
"Put failed since the value was either too large or the "
"store was full of pinned objects.")
raise
return object_id
@@ -1624,10 +1599,6 @@ def wait(object_ids, num_returns=1, timeout=None):
worker.check_connected()
# TODO(swang): Check main thread.
with profiling.profile("ray.wait"):
# When Ray is run in LOCAL_MODE, all functions are run immediately,
# so all objects in object_id are ready.
if worker.mode == LOCAL_MODE:
return object_ids[:num_returns], object_ids[num_returns:]
# TODO(rkn): This is a temporary workaround for
# https://github.com/ray-project/ray/issues/997. However, it should be