Move all config constants into single file. (#1192)

* Initial pass at factoring out C++ configuration into a single file.

* Expose config through Python.

* Forward declarations.

* Fixes with Python extensions

* Remove old code.

* Consistent naming for constants.

* Fixes

* Fix linting.

* More linting.

* Whitespace

* rename config -> _config.

* Move config inside a class.

* update naming convention

* Fix linting.

* More linting

* More linting.

* Add in some more constants.

* Fix linting
This commit is contained in:
Robert Nishihara
2017-11-08 11:10:38 -08:00
committed by Philipp Moritz
parent a8032b9ca1
commit 1c6b30b5e2
27 changed files with 659 additions and 178 deletions
+2 -1
View File
@@ -40,6 +40,7 @@ except ImportError as e:
e.args += (helpful_message,)
raise
from ray.local_scheduler import _config # noqa: E402
from ray.worker import (error_info, init, connect, disconnect,
get, put, wait, remote, log_event, log_span,
flush_log, get_gpu_ids, get_webui_url,
@@ -59,7 +60,7 @@ __all__ = ["error_info", "init", "connect", "disconnect", "get", "put", "wait",
"remote", "log_event", "log_span", "flush_log", "actor",
"get_gpu_ids", "get_webui_url", "register_custom_serializer",
"SCRIPT_MODE", "WORKER_MODE", "PYTHON_MODE", "SILENT_MODE",
"global_state", "__version__"]
"global_state", "_config", "__version__"]
import ctypes # noqa: E402
# Windows only
+3 -2
View File
@@ -4,8 +4,9 @@ from __future__ import print_function
from ray.core.src.local_scheduler.liblocal_scheduler_library import (
Task, LocalSchedulerClient, ObjectID, check_simple_value, task_from_string,
task_to_string)
task_to_string, _config)
from .local_scheduler_services import start_local_scheduler
__all__ = ["Task", "LocalSchedulerClient", "ObjectID", "check_simple_value",
"task_from_string", "task_to_string", "start_local_scheduler"]
"task_from_string", "task_to_string", "start_local_scheduler",
"_config"]
+2 -4
View File
@@ -22,8 +22,6 @@ from ray.worker import NIL_ACTOR_ID
# These variables must be kept in sync with the C codebase.
# common/common.h
HEARTBEAT_TIMEOUT_MILLISECONDS = 100
NUM_HEARTBEATS_TIMEOUT = 100
DB_CLIENT_ID_SIZE = 20
NIL_ID = b"\xff" * DB_CLIENT_ID_SIZE
@@ -580,7 +578,7 @@ class Monitor(object):
plasma_manager_ids = list(self.live_plasma_managers.keys())
for plasma_manager_id in plasma_manager_ids:
if ((self.live_plasma_managers[plasma_manager_id]) >=
NUM_HEARTBEATS_TIMEOUT):
ray._config.num_heartbeats_timeout()):
log.warn("Timed out {}".format(PLASMA_MANAGER_CLIENT_TYPE))
# Remove the plasma manager from the managers whose
# heartbeats we're tracking.
@@ -599,7 +597,7 @@ class Monitor(object):
# Wait for a heartbeat interval before processing the next round of
# messages.
time.sleep(HEARTBEAT_TIMEOUT_MILLISECONDS * 1e-3)
time.sleep(ray._config.heartbeat_timeout_milliseconds() * 1e-3)
if __name__ == "__main__":
+15 -14
View File
@@ -49,10 +49,6 @@ NIL_LOCAL_SCHEDULER_ID = NIL_ID
NIL_FUNCTION_ID = NIL_ID
NIL_ACTOR_ID = NIL_ID
# When performing ray.get, wait 1 second before attemping to reconstruct and
# fetch the object again.
GET_TIMEOUT_MILLISECONDS = 1000
# This must be kept in sync with the `error_types` array in
# common/state/error_table.h.
OBJECT_HASH_MISMATCH_ERROR_TYPE = b"object_hash_mismatch"
@@ -372,10 +368,11 @@ class Worker(object):
# long time, if the store is blocked, it can block the manager
# as well as a consequence.
results = []
get_request_size = 10000
for i in range(0, len(object_ids), get_request_size):
for i in range(0, len(object_ids),
ray._config.worker_get_request_size()):
results += self.plasma_client.get(
object_ids[i:(i + get_request_size)],
object_ids[i:(i +
ray._config.worker_get_request_size())],
timeout,
self.serialization_context)
return results
@@ -420,12 +417,13 @@ class Worker(object):
# Do an initial fetch for remote objects. We divide the fetch into
# smaller fetches so as to not block the manager for a prolonged period
# of time in a single call.
fetch_request_size = 10000
plain_object_ids = [plasma.ObjectID(object_id.id())
for object_id in object_ids]
for i in range(0, len(object_ids), fetch_request_size):
for i in range(0, len(object_ids),
ray._config.worker_fetch_request_size()):
self.plasma_client.fetch(
plain_object_ids[i:(i + fetch_request_size)])
plain_object_ids[i:(i +
ray._config.worker_fetch_request_size())])
# Get the objects. We initially try to get the objects immediately.
final_results = self.retrieve_and_deserialize(plain_object_ids, 0)
@@ -436,7 +434,7 @@ class Worker(object):
if val is plasma.ObjectNotAvailable)
was_blocked = (len(unready_ids) > 0)
# Try reconstructing any objects we haven't gotten yet. Try to get them
# until at least GET_TIMEOUT_MILLISECONDS milliseconds passes, then
# until at least get_timeout_milliseconds milliseconds passes, then
# repeat.
while len(unready_ids) > 0:
for unready_id in unready_ids:
@@ -447,12 +445,15 @@ class Worker(object):
# prolonged period of time in a single call.
object_ids_to_fetch = list(map(
plasma.ObjectID, unready_ids.keys()))
for i in range(0, len(object_ids_to_fetch), fetch_request_size):
for i in range(0, len(object_ids_to_fetch),
ray._config.worker_fetch_request_size()):
self.plasma_client.fetch(
object_ids_to_fetch[i:(i + fetch_request_size)])
object_ids_to_fetch[i:(
i + ray._config.worker_fetch_request_size())])
results = self.retrieve_and_deserialize(
object_ids_to_fetch,
max([GET_TIMEOUT_MILLISECONDS, int(0.01 * len(unready_ids))]))
max([ray._config.get_timeout_milliseconds(),
int(0.01 * len(unready_ids))]))
# Remove any entries for objects we received during this iteration
# so we don't retrieve the same object twice.
for i, val in enumerate(results):