Fetch internal config from raylet (#8195)

This commit is contained in:
Edward Oakes
2020-04-28 13:12:11 -05:00
committed by GitHub
parent 1775e89f26
commit ebdccde030
12 changed files with 115 additions and 85 deletions
-13
View File
@@ -112,19 +112,6 @@ include "includes/libcoreworker.pxi"
logger = logging.getLogger(__name__)
def set_internal_config(dict options):
cdef:
unordered_map[c_string, c_string] c_options
if options is None:
return
for key, value in options.items():
c_options[str(key).encode("ascii")] = str(value).encode("ascii")
RayConfig.instance().initialize(c_options)
cdef int check_status(const CRayStatus& status) nogil except -1:
if status.ok():
return 0
-2
View File
@@ -83,5 +83,3 @@ cdef extern from "ray/common/ray_config.h" nogil:
uint32_t maximum_gcs_deletion_batch_size() const
int64_t max_direct_call_object_size() const
void initialize(const unordered_map[c_string, c_string] &config_map)
+26
View File
@@ -1869,6 +1869,32 @@ def test_duplicate_args(ray_start_regular):
arg1, arg2, arg1, kwarg1=arg1, kwarg2=arg2, kwarg1_duplicate=arg1))
def test_internal_config_when_connecting(ray_start_cluster):
config = json.dumps({
"object_pinning_enabled": 0,
"initial_reconstruction_timeout_milliseconds": 200
})
cluster = ray.cluster_utils.Cluster()
cluster.add_node(
_internal_config=config, object_store_memory=100 * 1024 * 1024)
cluster.wait_for_nodes()
# Specifying _internal_config when connecting to a cluster is disallowed.
with pytest.raises(ValueError):
ray.init(address=cluster.address, _internal_config=config)
# Check that the config was picked up (object pinning is disabled).
ray.init(address=cluster.address)
oid = ray.put(np.zeros(40 * 1024 * 1024, dtype=np.uint8))
for _ in range(5):
ray.put(np.zeros(40 * 1024 * 1024, dtype=np.uint8))
# This would not raise an exception if object pinning was enabled.
with pytest.raises(ray.exceptions.UnreconstructableError):
ray.get(oid)
if __name__ == "__main__":
import pytest
sys.exit(pytest.main(["-v", __file__]))
-6
View File
@@ -973,12 +973,6 @@ def test_fill_object_store_lru_fallback(shutdown_only):
ray.get(oid)
oids.append(oid)
# NOTE: Needed to unset the config set by the lru_evict flag, for Travis.
ray._raylet.set_internal_config({
"object_pinning_enabled": 1,
"object_store_full_max_retries": 5,
})
@pytest.mark.parametrize(
"ray_start_cluster", [{
+5 -5
View File
@@ -73,7 +73,7 @@ def test_reconstruction_cached_dependency(ray_start_cluster,
object_store_memory=10**8,
_internal_config=config)
cluster.wait_for_nodes()
ray.init(address=cluster.address, _internal_config=config)
ray.init(address=cluster.address)
@ray.remote(max_retries=0)
def large_object():
@@ -135,7 +135,7 @@ def test_basic_reconstruction(ray_start_cluster, reconstruction_enabled):
object_store_memory=10**8,
_internal_config=config)
cluster.wait_for_nodes()
ray.init(address=cluster.address, _internal_config=config)
ray.init(address=cluster.address)
@ray.remote(max_retries=1 if reconstruction_enabled else 0)
def large_object():
@@ -187,7 +187,7 @@ def test_basic_reconstruction_put(ray_start_cluster, reconstruction_enabled):
object_store_memory=10**8,
_internal_config=config)
cluster.wait_for_nodes()
ray.init(address=cluster.address, _internal_config=config)
ray.init(address=cluster.address)
@ray.remote(max_retries=1 if reconstruction_enabled else 0)
def large_object():
@@ -242,7 +242,7 @@ def test_multiple_downstream_tasks(ray_start_cluster, reconstruction_enabled):
object_store_memory=10**8,
_internal_config=config)
cluster.wait_for_nodes()
ray.init(address=cluster.address, _internal_config=config)
ray.init(address=cluster.address)
@ray.remote(max_retries=1 if reconstruction_enabled else 0)
def large_object():
@@ -297,7 +297,7 @@ def test_reconstruction_chain(ray_start_cluster, reconstruction_enabled):
node_to_kill = cluster.add_node(
num_cpus=1, object_store_memory=10**8, _internal_config=config)
cluster.wait_for_nodes()
ray.init(address=cluster.address, _internal_config=config)
ray.init(address=cluster.address)
@ray.remote(max_retries=1 if reconstruction_enabled else 0)
def large_object():
+5 -10
View File
@@ -808,10 +808,9 @@ def init(address=None,
if raylet_socket_name is not None:
raise ValueError("When connecting to an existing cluster, "
"raylet_socket_name must not be provided.")
if _internal_config is not None:
logger.warning(
"When connecting to an existing cluster, "
"_internal_config must match the cluster's _internal_config.")
if _internal_config is not None and len(_internal_config) != 0:
raise ValueError("When connecting to an existing cluster, "
"_internal_config must not be provided.")
# In this case, we only need to connect the node.
ray_params = ray.parameter.RayParams(
@@ -836,8 +835,7 @@ def init(address=None,
log_to_driver=log_to_driver,
worker=global_worker,
driver_object_store_memory=driver_object_store_memory,
job_id=job_id,
internal_config=_internal_config)
job_id=job_id)
for hook in _post_init_hooks:
hook()
@@ -1113,8 +1111,7 @@ def connect(node,
log_to_driver=False,
worker=global_worker,
driver_object_store_memory=None,
job_id=None,
internal_config=None):
job_id=None):
"""Connect this worker to the raylet, to Plasma, and to Redis.
Args:
@@ -1142,8 +1139,6 @@ def connect(node,
except io.UnsupportedOperation:
pass # ignore
ray._raylet.set_internal_config(internal_config)
# Create a Redis client to primary.
# The Redis client can safely be shared between threads. However,
# that is not true of Redis pubsub clients. See the documentation at
+1 -2
View File
@@ -119,6 +119,5 @@ if __name__ == "__main__":
spawn_reaper=False,
connect_only=True)
ray.worker._global_node = node
ray.worker.connect(
node, mode=ray.WORKER_MODE, internal_config=internal_config)
ray.worker.connect(node, mode=ray.WORKER_MODE)
ray.worker.global_worker.main_loop()