mirror of
https://github.com/wassname/ray.git
synced 2026-07-03 23:50:20 +08:00
Move all config constants into single file. (#1192)
* Initial pass at factoring out C++ configuration into a single file. * Expose config through Python. * Forward declarations. * Fixes with Python extensions * Remove old code. * Consistent naming for constants. * Fixes * Fix linting. * More linting. * Whitespace * rename config -> _config. * Move config inside a class. * update naming convention * Fix linting. * More linting * More linting. * Add in some more constants. * Fix linting
This commit is contained in:
committed by
Philipp Moritz
parent
a8032b9ca1
commit
1c6b30b5e2
@@ -40,6 +40,7 @@ except ImportError as e:
|
||||
e.args += (helpful_message,)
|
||||
raise
|
||||
|
||||
from ray.local_scheduler import _config # noqa: E402
|
||||
from ray.worker import (error_info, init, connect, disconnect,
|
||||
get, put, wait, remote, log_event, log_span,
|
||||
flush_log, get_gpu_ids, get_webui_url,
|
||||
@@ -59,7 +60,7 @@ __all__ = ["error_info", "init", "connect", "disconnect", "get", "put", "wait",
|
||||
"remote", "log_event", "log_span", "flush_log", "actor",
|
||||
"get_gpu_ids", "get_webui_url", "register_custom_serializer",
|
||||
"SCRIPT_MODE", "WORKER_MODE", "PYTHON_MODE", "SILENT_MODE",
|
||||
"global_state", "__version__"]
|
||||
"global_state", "_config", "__version__"]
|
||||
|
||||
import ctypes # noqa: E402
|
||||
# Windows only
|
||||
|
||||
@@ -4,8 +4,9 @@ from __future__ import print_function
|
||||
|
||||
from ray.core.src.local_scheduler.liblocal_scheduler_library import (
|
||||
Task, LocalSchedulerClient, ObjectID, check_simple_value, task_from_string,
|
||||
task_to_string)
|
||||
task_to_string, _config)
|
||||
from .local_scheduler_services import start_local_scheduler
|
||||
|
||||
__all__ = ["Task", "LocalSchedulerClient", "ObjectID", "check_simple_value",
|
||||
"task_from_string", "task_to_string", "start_local_scheduler"]
|
||||
"task_from_string", "task_to_string", "start_local_scheduler",
|
||||
"_config"]
|
||||
|
||||
@@ -22,8 +22,6 @@ from ray.worker import NIL_ACTOR_ID
|
||||
|
||||
# These variables must be kept in sync with the C codebase.
|
||||
# common/common.h
|
||||
HEARTBEAT_TIMEOUT_MILLISECONDS = 100
|
||||
NUM_HEARTBEATS_TIMEOUT = 100
|
||||
DB_CLIENT_ID_SIZE = 20
|
||||
NIL_ID = b"\xff" * DB_CLIENT_ID_SIZE
|
||||
|
||||
@@ -580,7 +578,7 @@ class Monitor(object):
|
||||
plasma_manager_ids = list(self.live_plasma_managers.keys())
|
||||
for plasma_manager_id in plasma_manager_ids:
|
||||
if ((self.live_plasma_managers[plasma_manager_id]) >=
|
||||
NUM_HEARTBEATS_TIMEOUT):
|
||||
ray._config.num_heartbeats_timeout()):
|
||||
log.warn("Timed out {}".format(PLASMA_MANAGER_CLIENT_TYPE))
|
||||
# Remove the plasma manager from the managers whose
|
||||
# heartbeats we're tracking.
|
||||
@@ -599,7 +597,7 @@ class Monitor(object):
|
||||
|
||||
# Wait for a heartbeat interval before processing the next round of
|
||||
# messages.
|
||||
time.sleep(HEARTBEAT_TIMEOUT_MILLISECONDS * 1e-3)
|
||||
time.sleep(ray._config.heartbeat_timeout_milliseconds() * 1e-3)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
+15
-14
@@ -49,10 +49,6 @@ NIL_LOCAL_SCHEDULER_ID = NIL_ID
|
||||
NIL_FUNCTION_ID = NIL_ID
|
||||
NIL_ACTOR_ID = NIL_ID
|
||||
|
||||
# When performing ray.get, wait 1 second before attemping to reconstruct and
|
||||
# fetch the object again.
|
||||
GET_TIMEOUT_MILLISECONDS = 1000
|
||||
|
||||
# This must be kept in sync with the `error_types` array in
|
||||
# common/state/error_table.h.
|
||||
OBJECT_HASH_MISMATCH_ERROR_TYPE = b"object_hash_mismatch"
|
||||
@@ -372,10 +368,11 @@ class Worker(object):
|
||||
# long time, if the store is blocked, it can block the manager
|
||||
# as well as a consequence.
|
||||
results = []
|
||||
get_request_size = 10000
|
||||
for i in range(0, len(object_ids), get_request_size):
|
||||
for i in range(0, len(object_ids),
|
||||
ray._config.worker_get_request_size()):
|
||||
results += self.plasma_client.get(
|
||||
object_ids[i:(i + get_request_size)],
|
||||
object_ids[i:(i +
|
||||
ray._config.worker_get_request_size())],
|
||||
timeout,
|
||||
self.serialization_context)
|
||||
return results
|
||||
@@ -420,12 +417,13 @@ class Worker(object):
|
||||
# Do an initial fetch for remote objects. We divide the fetch into
|
||||
# smaller fetches so as to not block the manager for a prolonged period
|
||||
# of time in a single call.
|
||||
fetch_request_size = 10000
|
||||
plain_object_ids = [plasma.ObjectID(object_id.id())
|
||||
for object_id in object_ids]
|
||||
for i in range(0, len(object_ids), fetch_request_size):
|
||||
for i in range(0, len(object_ids),
|
||||
ray._config.worker_fetch_request_size()):
|
||||
self.plasma_client.fetch(
|
||||
plain_object_ids[i:(i + fetch_request_size)])
|
||||
plain_object_ids[i:(i +
|
||||
ray._config.worker_fetch_request_size())])
|
||||
|
||||
# Get the objects. We initially try to get the objects immediately.
|
||||
final_results = self.retrieve_and_deserialize(plain_object_ids, 0)
|
||||
@@ -436,7 +434,7 @@ class Worker(object):
|
||||
if val is plasma.ObjectNotAvailable)
|
||||
was_blocked = (len(unready_ids) > 0)
|
||||
# Try reconstructing any objects we haven't gotten yet. Try to get them
|
||||
# until at least GET_TIMEOUT_MILLISECONDS milliseconds passes, then
|
||||
# until at least get_timeout_milliseconds milliseconds passes, then
|
||||
# repeat.
|
||||
while len(unready_ids) > 0:
|
||||
for unready_id in unready_ids:
|
||||
@@ -447,12 +445,15 @@ class Worker(object):
|
||||
# prolonged period of time in a single call.
|
||||
object_ids_to_fetch = list(map(
|
||||
plasma.ObjectID, unready_ids.keys()))
|
||||
for i in range(0, len(object_ids_to_fetch), fetch_request_size):
|
||||
for i in range(0, len(object_ids_to_fetch),
|
||||
ray._config.worker_fetch_request_size()):
|
||||
self.plasma_client.fetch(
|
||||
object_ids_to_fetch[i:(i + fetch_request_size)])
|
||||
object_ids_to_fetch[i:(
|
||||
i + ray._config.worker_fetch_request_size())])
|
||||
results = self.retrieve_and_deserialize(
|
||||
object_ids_to_fetch,
|
||||
max([GET_TIMEOUT_MILLISECONDS, int(0.01 * len(unready_ids))]))
|
||||
max([ray._config.get_timeout_milliseconds(),
|
||||
int(0.01 * len(unready_ids))]))
|
||||
# Remove any entries for objects we received during this iteration
|
||||
# so we don't retrieve the same object twice.
|
||||
for i, val in enumerate(results):
|
||||
|
||||
Reference in New Issue
Block a user