Move all config constants into single file. (#1192)

* Initial pass at factoring out C++ configuration into a single file. * Expose config through Python. * Forward declarations. * Fixes with Python extensions * Remove old code. * Consistent naming for constants. * Fixes * Fix linting. * More linting. * Whitespace * rename config -> _config. * Move config inside a class. * update naming convention * Fix linting. * More linting * More linting. * Add in some more constants. * Fix linting
2026-07-03 23:50:20 +08:00 · 2017-11-08 11:10:38 -08:00
parent a8032b9ca1
commit 1c6b30b5e2
27 changed files with 659 additions and 178 deletions
@@ -40,6 +40,7 @@ except ImportError as e:
            e.args += (helpful_message,)
    raise

+from ray.local_scheduler import _config  # noqa: E402
 from ray.worker import (error_info, init, connect, disconnect,
                        get, put, wait, remote, log_event, log_span,
                        flush_log, get_gpu_ids, get_webui_url,
@@ -59,7 +60,7 @@ __all__ = ["error_info", "init", "connect", "disconnect", "get", "put", "wait",
           "remote", "log_event", "log_span", "flush_log", "actor",
           "get_gpu_ids", "get_webui_url", "register_custom_serializer",
           "SCRIPT_MODE", "WORKER_MODE", "PYTHON_MODE", "SILENT_MODE",
-           "global_state", "__version__"]
+           "global_state", "_config", "__version__"]

 import ctypes  # noqa: E402
 # Windows only
@@ -4,8 +4,9 @@ from __future__ import print_function

 from ray.core.src.local_scheduler.liblocal_scheduler_library import (
    Task, LocalSchedulerClient, ObjectID, check_simple_value, task_from_string,
-    task_to_string)
+    task_to_string, _config)
 from .local_scheduler_services import start_local_scheduler

 __all__ = ["Task", "LocalSchedulerClient", "ObjectID", "check_simple_value",
-           "task_from_string", "task_to_string", "start_local_scheduler"]
+           "task_from_string", "task_to_string", "start_local_scheduler",
+           "_config"]
@@ -22,8 +22,6 @@ from ray.worker import NIL_ACTOR_ID

 # These variables must be kept in sync with the C codebase.
 # common/common.h
-HEARTBEAT_TIMEOUT_MILLISECONDS = 100
-NUM_HEARTBEATS_TIMEOUT = 100
 DB_CLIENT_ID_SIZE = 20
 NIL_ID = b"\xff" * DB_CLIENT_ID_SIZE

@@ -580,7 +578,7 @@ class Monitor(object):
            plasma_manager_ids = list(self.live_plasma_managers.keys())
            for plasma_manager_id in plasma_manager_ids:
                if ((self.live_plasma_managers[plasma_manager_id]) >=
-                        NUM_HEARTBEATS_TIMEOUT):
+                        ray._config.num_heartbeats_timeout()):
                    log.warn("Timed out {}".format(PLASMA_MANAGER_CLIENT_TYPE))
                    # Remove the plasma manager from the managers whose
                    # heartbeats we're tracking.
@@ -599,7 +597,7 @@ class Monitor(object):

            # Wait for a heartbeat interval before processing the next round of
            # messages.
-            time.sleep(HEARTBEAT_TIMEOUT_MILLISECONDS * 1e-3)
+            time.sleep(ray._config.heartbeat_timeout_milliseconds() * 1e-3)


 if __name__ == "__main__":
@@ -49,10 +49,6 @@ NIL_LOCAL_SCHEDULER_ID = NIL_ID
 NIL_FUNCTION_ID = NIL_ID
 NIL_ACTOR_ID = NIL_ID

-# When performing ray.get, wait 1 second before attemping to reconstruct and
-# fetch the object again.
-GET_TIMEOUT_MILLISECONDS = 1000
-
 # This must be kept in sync with the `error_types` array in
 # common/state/error_table.h.
 OBJECT_HASH_MISMATCH_ERROR_TYPE = b"object_hash_mismatch"
@@ -372,10 +368,11 @@ class Worker(object):
                # long time, if the store is blocked, it can block the manager
                # as well as a consequence.
                results = []
-                get_request_size = 10000
-                for i in range(0, len(object_ids), get_request_size):
+                for i in range(0, len(object_ids),
+                               ray._config.worker_get_request_size()):
                    results += self.plasma_client.get(
-                        object_ids[i:(i + get_request_size)],
+                        object_ids[i:(i +
+                                      ray._config.worker_get_request_size())],
                        timeout,
                        self.serialization_context)
                return results
@@ -420,12 +417,13 @@ class Worker(object):
        # Do an initial fetch for remote objects. We divide the fetch into
        # smaller fetches so as to not block the manager for a prolonged period
        # of time in a single call.
-        fetch_request_size = 10000
        plain_object_ids = [plasma.ObjectID(object_id.id())
                            for object_id in object_ids]
-        for i in range(0, len(object_ids), fetch_request_size):
+        for i in range(0, len(object_ids),
+                       ray._config.worker_fetch_request_size()):
            self.plasma_client.fetch(
-                plain_object_ids[i:(i + fetch_request_size)])
+                plain_object_ids[i:(i +
+                                    ray._config.worker_fetch_request_size())])

        # Get the objects. We initially try to get the objects immediately.
        final_results = self.retrieve_and_deserialize(plain_object_ids, 0)
@@ -436,7 +434,7 @@ class Worker(object):
                           if val is plasma.ObjectNotAvailable)
        was_blocked = (len(unready_ids) > 0)
        # Try reconstructing any objects we haven't gotten yet. Try to get them
-        # until at least GET_TIMEOUT_MILLISECONDS milliseconds passes, then
+        # until at least get_timeout_milliseconds milliseconds passes, then
        # repeat.
        while len(unready_ids) > 0:
            for unready_id in unready_ids:
@@ -447,12 +445,15 @@ class Worker(object):
            # prolonged period of time in a single call.
            object_ids_to_fetch = list(map(
                plasma.ObjectID, unready_ids.keys()))
-            for i in range(0, len(object_ids_to_fetch), fetch_request_size):
+            for i in range(0, len(object_ids_to_fetch),
+                           ray._config.worker_fetch_request_size()):
                self.plasma_client.fetch(
-                    object_ids_to_fetch[i:(i + fetch_request_size)])
+                    object_ids_to_fetch[i:(
+                        i + ray._config.worker_fetch_request_size())])
            results = self.retrieve_and_deserialize(
                object_ids_to_fetch,
-                max([GET_TIMEOUT_MILLISECONDS, int(0.01 * len(unready_ids))]))
+                max([ray._config.get_timeout_milliseconds(),
+                     int(0.01 * len(unready_ids))]))
            # Remove any entries for objects we received during this iteration
            # so we don't retrieve the same object twice.
            for i, val in enumerate(results):