mirror of
https://github.com/wassname/ray.git
synced 2026-07-03 03:10:54 +08:00
ff2217251f
* Fix documentation indentation. * Add error table to GCS and push error messages through node manager. * Add type to error data. * Linting * Fix failure_test bug. * Linting. * Enable one more test. * Attempt to fix doc building. * Restructuring * Fixes * More fixes. * Move current_time_ms function into util.h.
81 lines
2.6 KiB
Python
81 lines
2.6 KiB
Python
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import argparse
|
|
import traceback
|
|
|
|
import ray
|
|
import ray.actor
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description=("Parse addresses for the worker "
|
|
"to connect to."))
|
|
parser.add_argument(
|
|
"--node-ip-address",
|
|
required=True,
|
|
type=str,
|
|
help="the ip address of the worker's node")
|
|
parser.add_argument(
|
|
"--redis-address",
|
|
required=True,
|
|
type=str,
|
|
help="the address to use for Redis")
|
|
parser.add_argument(
|
|
"--object-store-name",
|
|
required=True,
|
|
type=str,
|
|
help="the object store's name")
|
|
parser.add_argument(
|
|
"--object-store-manager-name",
|
|
required=False,
|
|
type=str,
|
|
help="the object store manager's name")
|
|
parser.add_argument(
|
|
"--local-scheduler-name",
|
|
required=False,
|
|
type=str,
|
|
help="the local scheduler's name")
|
|
parser.add_argument(
|
|
"--raylet-name", required=False, type=str, help="the raylet's name")
|
|
|
|
if __name__ == "__main__":
|
|
args = parser.parse_args()
|
|
|
|
info = {
|
|
"node_ip_address": args.node_ip_address,
|
|
"redis_address": args.redis_address,
|
|
"store_socket_name": args.object_store_name,
|
|
"manager_socket_name": args.object_store_manager_name,
|
|
"local_scheduler_socket_name": args.local_scheduler_name,
|
|
"raylet_socket_name": args.raylet_name
|
|
}
|
|
|
|
ray.worker.connect(
|
|
info, mode=ray.WORKER_MODE, use_raylet=(args.raylet_name is not None))
|
|
|
|
error_explanation = """
|
|
This error is unexpected and should not have happened. Somehow a worker
|
|
crashed in an unanticipated way causing the main_loop to throw an exception,
|
|
which is being caught in "python/ray/workers/default_worker.py".
|
|
"""
|
|
|
|
try:
|
|
# This call to main_loop should never return if things are working.
|
|
# Most exceptions that are thrown (e.g., inside the execution of a
|
|
# task) should be caught and handled inside of the call to
|
|
# main_loop. If an exception is thrown here, then that means that
|
|
# there is some error that we didn't anticipate.
|
|
ray.worker.global_worker.main_loop()
|
|
except Exception as e:
|
|
traceback_str = traceback.format_exc() + error_explanation
|
|
ray.utils.push_error_to_driver(
|
|
ray.worker.global_worker,
|
|
"worker_crash",
|
|
traceback_str,
|
|
driver_id=None)
|
|
# TODO(rkn): Note that if the worker was in the middle of executing
|
|
# a task, then any worker or driver that is blocking in a get call
|
|
# and waiting for the output of that task will hang. We need to
|
|
# address this.
|