[xray] Add error table and push error messages to driver through node manager. (#2256)

* Fix documentation indentation.

* Add error table to GCS and push error messages through node manager.

* Add type to error data.

* Linting

* Fix failure_test bug.

* Linting.

* Enable one more test.

* Attempt to fix doc building.

* Restructuring

* Fixes

* More fixes.

* Move current_time_ms function into util.h.
This commit is contained in:
Robert Nishihara
2018-06-20 21:29:28 -07:00
committed by Philipp Moritz
parent 6bf48f47bc
commit ff2217251f
27 changed files with 610 additions and 204 deletions
+62 -9
View File
@@ -7,9 +7,12 @@ import hashlib
import numpy as np
import os
import sys
import time
import uuid
import ray.gcs_utils
import ray.local_scheduler
import ray.ray_constants as ray_constants
ERROR_KEY_PREFIX = b"Error:"
DRIVER_ID_LENGTH = 20
@@ -45,7 +48,7 @@ def format_error_message(exception_message, task_exception=False):
return "\n".join(lines)
def push_error_to_driver(redis_client,
def push_error_to_driver(worker,
error_type,
message,
driver_id=None,
@@ -53,7 +56,7 @@ def push_error_to_driver(redis_client,
"""Push an error message to the driver to be printed in the background.
Args:
redis_client: The redis client to use.
worker: The worker to use.
error_type (str): The type of the error.
message (str): The message that will be printed in the background
on the driver.
@@ -63,15 +66,65 @@ def push_error_to_driver(redis_client,
will be serialized with json and stored in Redis.
"""
if driver_id is None:
driver_id = DRIVER_ID_LENGTH * b"\x00"
driver_id = ray_constants.NIL_JOB_ID.id()
error_key = ERROR_KEY_PREFIX + driver_id + b":" + _random_string()
data = {} if data is None else data
redis_client.hmset(error_key, {
"type": error_type,
"message": message,
"data": data
})
redis_client.rpush("ErrorKeys", error_key)
if not worker.use_raylet:
worker.redis_client.hmset(error_key, {
"type": error_type,
"message": message,
"data": data
})
worker.redis_client.rpush("ErrorKeys", error_key)
else:
worker.local_scheduler_client.push_error(
ray.ObjectID(driver_id), error_type, message, time.time())
def push_error_to_driver_through_redis(redis_client,
use_raylet,
error_type,
message,
driver_id=None,
data=None):
"""Push an error message to the driver to be printed in the background.
Normally the push_error_to_driver function should be used. However, in some
instances, the local scheduler client is not available, e.g., because the
error happens in Python before the driver or worker has connected to the
backend processes.
Args:
redis_client: The redis client to use.
use_raylet: True if we are using the Raylet code path and false
otherwise.
error_type (str): The type of the error.
message (str): The message that will be printed in the background
on the driver.
driver_id: The ID of the driver to push the error message to. If this
is None, then the message will be pushed to all drivers.
data: This should be a dictionary mapping strings to strings. It
will be serialized with json and stored in Redis.
"""
if driver_id is None:
driver_id = ray_constants.NIL_JOB_ID.id()
error_key = ERROR_KEY_PREFIX + driver_id + b":" + _random_string()
data = {} if data is None else data
if not use_raylet:
redis_client.hmset(error_key, {
"type": error_type,
"message": message,
"data": data
})
redis_client.rpush("ErrorKeys", error_key)
else:
# Do everything in Python and through the Python Redis client instead
# of through the raylet.
error_data = ray.gcs_utils.construct_error_message(
error_type, message, time.time())
redis_client.execute_command(
"RAY.TABLE_APPEND", ray.gcs_utils.TablePrefix.ERROR_INFO,
ray.gcs_utils.TablePubsub.ERROR_INFO, driver_id, error_data)
def is_cython(obj):