mirror of
https://github.com/wassname/ray.git
synced 2026-06-30 17:58:35 +08:00
[xray] Add error table and push error messages to driver through node manager. (#2256)
* Fix documentation indentation. * Add error table to GCS and push error messages through node manager. * Add type to error data. * Linting * Fix failure_test bug. * Linting. * Enable one more test. * Attempt to fix doc building. * Restructuring * Fixes * More fixes. * Move current_time_ms function into util.h.
This commit is contained in:
committed by
Philipp Moritz
parent
6bf48f47bc
commit
ff2217251f
+62
-9
@@ -7,9 +7,12 @@ import hashlib
|
||||
import numpy as np
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
|
||||
import ray.gcs_utils
|
||||
import ray.local_scheduler
|
||||
import ray.ray_constants as ray_constants
|
||||
|
||||
ERROR_KEY_PREFIX = b"Error:"
|
||||
DRIVER_ID_LENGTH = 20
|
||||
@@ -45,7 +48,7 @@ def format_error_message(exception_message, task_exception=False):
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def push_error_to_driver(redis_client,
|
||||
def push_error_to_driver(worker,
|
||||
error_type,
|
||||
message,
|
||||
driver_id=None,
|
||||
@@ -53,7 +56,7 @@ def push_error_to_driver(redis_client,
|
||||
"""Push an error message to the driver to be printed in the background.
|
||||
|
||||
Args:
|
||||
redis_client: The redis client to use.
|
||||
worker: The worker to use.
|
||||
error_type (str): The type of the error.
|
||||
message (str): The message that will be printed in the background
|
||||
on the driver.
|
||||
@@ -63,15 +66,65 @@ def push_error_to_driver(redis_client,
|
||||
will be serialized with json and stored in Redis.
|
||||
"""
|
||||
if driver_id is None:
|
||||
driver_id = DRIVER_ID_LENGTH * b"\x00"
|
||||
driver_id = ray_constants.NIL_JOB_ID.id()
|
||||
error_key = ERROR_KEY_PREFIX + driver_id + b":" + _random_string()
|
||||
data = {} if data is None else data
|
||||
redis_client.hmset(error_key, {
|
||||
"type": error_type,
|
||||
"message": message,
|
||||
"data": data
|
||||
})
|
||||
redis_client.rpush("ErrorKeys", error_key)
|
||||
if not worker.use_raylet:
|
||||
worker.redis_client.hmset(error_key, {
|
||||
"type": error_type,
|
||||
"message": message,
|
||||
"data": data
|
||||
})
|
||||
worker.redis_client.rpush("ErrorKeys", error_key)
|
||||
else:
|
||||
worker.local_scheduler_client.push_error(
|
||||
ray.ObjectID(driver_id), error_type, message, time.time())
|
||||
|
||||
|
||||
def push_error_to_driver_through_redis(redis_client,
|
||||
use_raylet,
|
||||
error_type,
|
||||
message,
|
||||
driver_id=None,
|
||||
data=None):
|
||||
"""Push an error message to the driver to be printed in the background.
|
||||
|
||||
Normally the push_error_to_driver function should be used. However, in some
|
||||
instances, the local scheduler client is not available, e.g., because the
|
||||
error happens in Python before the driver or worker has connected to the
|
||||
backend processes.
|
||||
|
||||
Args:
|
||||
redis_client: The redis client to use.
|
||||
use_raylet: True if we are using the Raylet code path and false
|
||||
otherwise.
|
||||
error_type (str): The type of the error.
|
||||
message (str): The message that will be printed in the background
|
||||
on the driver.
|
||||
driver_id: The ID of the driver to push the error message to. If this
|
||||
is None, then the message will be pushed to all drivers.
|
||||
data: This should be a dictionary mapping strings to strings. It
|
||||
will be serialized with json and stored in Redis.
|
||||
"""
|
||||
if driver_id is None:
|
||||
driver_id = ray_constants.NIL_JOB_ID.id()
|
||||
error_key = ERROR_KEY_PREFIX + driver_id + b":" + _random_string()
|
||||
data = {} if data is None else data
|
||||
if not use_raylet:
|
||||
redis_client.hmset(error_key, {
|
||||
"type": error_type,
|
||||
"message": message,
|
||||
"data": data
|
||||
})
|
||||
redis_client.rpush("ErrorKeys", error_key)
|
||||
else:
|
||||
# Do everything in Python and through the Python Redis client instead
|
||||
# of through the raylet.
|
||||
error_data = ray.gcs_utils.construct_error_message(
|
||||
error_type, message, time.time())
|
||||
redis_client.execute_command(
|
||||
"RAY.TABLE_APPEND", ray.gcs_utils.TablePrefix.ERROR_INFO,
|
||||
ray.gcs_utils.TablePubsub.ERROR_INFO, driver_id, error_data)
|
||||
|
||||
|
||||
def is_cython(obj):
|
||||
|
||||
Reference in New Issue
Block a user