From 0fcceef772a3126e027b0817988022ebda1a506e Mon Sep 17 00:00:00 2001 From: Robert Nishihara Date: Wed, 28 Feb 2018 15:13:00 -0800 Subject: [PATCH] Update logging and check macros. (#1627) * Update logging and check macros. * Fix linting. * Fix RAY_DCHECK and unused variable. * Fix linting --- src/common/common.h | 64 +--- src/common/common_protocol.cc | 2 +- src/common/io.cc | 63 ++-- src/common/lib/python/common_extension.cc | 14 +- src/common/redis_module/ray_redis_module.cc | 16 +- src/common/state/db_client_table.cc | 11 +- src/common/state/error_table.cc | 4 +- src/common/state/object_table.cc | 12 +- src/common/state/redis.cc | 275 +++++++++--------- src/common/state/redis.h | 10 +- src/common/state/table.cc | 34 +-- src/common/task.cc | 48 +-- src/common/test/db_tests.cc | 18 +- src/common/test/object_table_tests.cc | 73 ++--- src/common/test/redis_tests.cc | 12 +- src/common/test/task_table_tests.cc | 36 +-- src/common/test/test_common.h | 3 +- src/global_scheduler/global_scheduler.cc | 79 +++-- .../global_scheduler_algorithm.cc | 52 ++-- src/local_scheduler/local_scheduler.cc | 216 +++++++------- .../local_scheduler_algorithm.cc | 186 ++++++------ src/local_scheduler/local_scheduler_client.cc | 16 +- .../test/local_scheduler_tests.cc | 4 +- src/plasma/plasma_manager.cc | 166 +++++------ src/plasma/plasma_protocol.cc | 54 ++-- src/plasma/test/manager_tests.cc | 6 +- src/ray/id.cc | 5 + src/ray/id.h | 2 + src/ray/util/logging.h | 14 + 29 files changed, 721 insertions(+), 774 deletions(-) diff --git a/src/common/common.h b/src/common/common.h index 0ea258113..f95bfcca5 100644 --- a/src/common/common.h +++ b/src/common/common.h @@ -25,6 +25,7 @@ extern "C" { #include "arrow/util/macros.h" #include "plasma/common.h" #include "ray/id.h" +#include "ray/util/logging.h" #include "state/ray_config.h" @@ -44,69 +45,6 @@ extern "C" { #define RAY_COMMON_LOG_LEVEL RAY_COMMON_INFO #endif -/** - * Macros to enable each level of Ray logging statements depending on the - * current logging level. */ -#if (RAY_COMMON_LOG_LEVEL > RAY_COMMON_DEBUG) -#define LOG_DEBUG(M, ...) -#else -#define LOG_DEBUG(M, ...) \ - fprintf(stderr, "[DEBUG] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__) -#endif - -#if (RAY_COMMON_LOG_LEVEL > RAY_COMMON_INFO) -#define LOG_INFO(M, ...) -#else -#define LOG_INFO(M, ...) \ - fprintf(stderr, "[INFO] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__) -#endif - -#if (RAY_COMMON_LOG_LEVEL > RAY_COMMON_WARNING) -#define LOG_WARN(M, ...) -#else -#define LOG_WARN(M, ...) \ - fprintf(stderr, "[WARN] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__) -#endif - -#if (RAY_COMMON_LOG_LEVEL > RAY_COMMON_ERROR) -#define LOG_ERROR(M, ...) -#else -#define LOG_ERROR(M, ...) \ - fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \ - errno == 0 ? "None" : strerror(errno), ##__VA_ARGS__) -#endif - -#if (RAY_COMMON_LOG_LEVEL > RAY_COMMON_FATAL) -#define LOG_FATAL(M, ...) -#elif defined(_EXECINFO_H) || !defined(_WIN32) -#define LOG_FATAL(M, ...) \ - do { \ - fprintf(stderr, "[FATAL] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \ - errno == 0 ? "None" : strerror(errno), ##__VA_ARGS__); \ - void *buffer[255]; \ - const int calls = backtrace(buffer, sizeof(buffer) / sizeof(void *)); \ - backtrace_symbols_fd(buffer, calls, 1); \ - abort(); \ - } while (0) -#else -#define LOG_FATAL(M, ...) \ - do { \ - fprintf(stderr, "[FATAL] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \ - errno == 0 ? "None" : strerror(errno), ##__VA_ARGS__); \ - exit(-1); \ - } while (0) -#endif - -/** Assertion definitions, with optional logging. */ -#define CHECKM(COND, M, ...) \ - if (!(COND)) { \ - LOG_FATAL("Check failure: %s \n" M, #COND, ##__VA_ARGS__); \ - } - -#define CHECK(COND) CHECKM(COND, "") - -#define RAY_DCHECK(COND) CHECK(COND) - /* These are exit codes for common errors that can occur in Ray components. */ #define EXIT_COULD_NOT_BIND_PORT -2 diff --git a/src/common/common_protocol.cc b/src/common/common_protocol.cc index 49716bbb0..05f5c2a2f 100644 --- a/src/common/common_protocol.cc +++ b/src/common/common_protocol.cc @@ -9,7 +9,7 @@ flatbuffers::Offset to_flatbuf( ray::ObjectID from_flatbuf(const flatbuffers::String &string) { ray::ObjectID object_id; - CHECK(string.size() == sizeof(ray::ObjectID)); + RAY_CHECK(string.size() == sizeof(ray::ObjectID)); memcpy(object_id.mutable_data(), string.data(), sizeof(ray::ObjectID)); return object_id; } diff --git a/src/common/io.cc b/src/common/io.cc index f39e4c3fd..4a5970d8c 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -24,7 +24,7 @@ int bind_inet_sock(const int port, bool shall_listen) { struct sockaddr_in name; int socket_fd = socket(PF_INET, SOCK_STREAM, 0); if (socket_fd < 0) { - LOG_ERROR("socket() failed for port %d.", port); + RAY_LOG(ERROR) << "socket() failed for port " << port; return -1; } name.sin_family = AF_INET; @@ -33,23 +33,23 @@ int bind_inet_sock(const int port, bool shall_listen) { int on = 1; /* TODO(pcm): http://stackoverflow.com/q/1150635 */ if (ioctl(socket_fd, FIONBIO, (char *) &on) < 0) { - LOG_ERROR("ioctl failed"); + RAY_LOG(ERROR) << "ioctl failed"; close(socket_fd); return -1; } int *const pon = (int *const) & on; if (setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, pon, sizeof(on)) < 0) { - LOG_ERROR("setsockopt failed for port %d", port); + RAY_LOG(ERROR) << "setsockopt failed for port " << port; close(socket_fd); return -1; } if (bind(socket_fd, (struct sockaddr *) &name, sizeof(name)) < 0) { - LOG_ERROR("Bind failed for port %d", port); + RAY_LOG(ERROR) << "Bind failed for port " << port; close(socket_fd); return -1; } if (shall_listen && listen(socket_fd, 128) == -1) { - LOG_ERROR("Could not listen to socket %d", port); + RAY_LOG(ERROR) << "Could not listen to socket " << port; close(socket_fd); return -1; } @@ -60,14 +60,14 @@ int bind_ipc_sock(const char *socket_pathname, bool shall_listen) { struct sockaddr_un socket_address; int socket_fd = socket(AF_UNIX, SOCK_STREAM, 0); if (socket_fd < 0) { - LOG_ERROR("socket() failed for pathname %s.", socket_pathname); + RAY_LOG(ERROR) << "socket() failed for pathname " << socket_pathname; return -1; } /* Tell the system to allow the port to be reused. */ int on = 1; if (setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, (char *) &on, sizeof(on)) < 0) { - LOG_ERROR("setsockopt failed for pathname %s", socket_pathname); + RAY_LOG(ERROR) << "setsockopt failed for pathname " << socket_pathname; close(socket_fd); return -1; } @@ -76,7 +76,7 @@ int bind_ipc_sock(const char *socket_pathname, bool shall_listen) { memset(&socket_address, 0, sizeof(socket_address)); socket_address.sun_family = AF_UNIX; if (strlen(socket_pathname) + 1 > sizeof(socket_address.sun_path)) { - LOG_ERROR("Socket pathname is too long."); + RAY_LOG(ERROR) << "Socket pathname is too long."; close(socket_fd); return -1; } @@ -85,12 +85,12 @@ int bind_ipc_sock(const char *socket_pathname, bool shall_listen) { if (bind(socket_fd, (struct sockaddr *) &socket_address, sizeof(socket_address)) != 0) { - LOG_ERROR("Bind failed for pathname %s.", socket_pathname); + RAY_LOG(ERROR) << "Bind failed for pathname " << socket_pathname; close(socket_fd); return -1; } if (shall_listen && listen(socket_fd, 128) == -1) { - LOG_ERROR("Could not listen to socket %s", socket_pathname); + RAY_LOG(ERROR) << "Could not listen to socket " << socket_pathname; close(socket_fd); return -1; } @@ -108,7 +108,7 @@ int connect_ipc_sock_retry(const char *socket_pathname, timeout = RayConfig::instance().connect_timeout_milliseconds(); } - CHECK(socket_pathname); + RAY_CHECK(socket_pathname); int fd = -1; for (int num_attempts = 0; num_attempts < num_retries; ++num_attempts) { fd = connect_ipc_sock(socket_pathname); @@ -116,15 +116,15 @@ int connect_ipc_sock_retry(const char *socket_pathname, break; } if (num_attempts == 0) { - LOG_ERROR("Connection to socket failed for pathname %s.", - socket_pathname); + RAY_LOG(ERROR) << "Connection to socket failed for pathname " + << socket_pathname; } /* Sleep for timeout milliseconds. */ usleep(timeout * 1000); } /* If we could not connect to the socket, exit. */ if (fd == -1) { - LOG_FATAL("Could not connect to socket %s", socket_pathname); + RAY_LOG(FATAL) << "Could not connect to socket " << socket_pathname; } return fd; } @@ -135,14 +135,14 @@ int connect_ipc_sock(const char *socket_pathname) { socket_fd = socket(AF_UNIX, SOCK_STREAM, 0); if (socket_fd < 0) { - LOG_ERROR("socket() failed for pathname %s.", socket_pathname); + RAY_LOG(ERROR) << "socket() failed for pathname " << socket_pathname; return -1; } memset(&socket_address, 0, sizeof(socket_address)); socket_address.sun_family = AF_UNIX; if (strlen(socket_pathname) + 1 > sizeof(socket_address.sun_path)) { - LOG_ERROR("Socket pathname is too long."); + RAY_LOG(ERROR) << "Socket pathname is too long."; return -1; } strncpy(socket_address.sun_path, socket_pathname, @@ -169,7 +169,7 @@ int connect_inet_sock_retry(const char *ip_addr, timeout = RayConfig::instance().connect_timeout_milliseconds(); } - CHECK(ip_addr); + RAY_CHECK(ip_addr); int fd = -1; for (int num_attempts = 0; num_attempts < num_retries; ++num_attempts) { fd = connect_inet_sock(ip_addr, port); @@ -177,15 +177,15 @@ int connect_inet_sock_retry(const char *ip_addr, break; } if (num_attempts == 0) { - LOG_ERROR("Connection to socket failed for address %s:%d.", ip_addr, - port); + RAY_LOG(ERROR) << "Connection to socket failed for address " << ip_addr + << ":" << port; } /* Sleep for timeout milliseconds. */ usleep(timeout * 1000); } /* If we could not connect to the socket, exit. */ if (fd == -1) { - LOG_FATAL("Could not connect to address %s:%d", ip_addr, port); + RAY_LOG(FATAL) << "Could not connect to address " << ip_addr << ":" << port; } return fd; } @@ -193,13 +193,14 @@ int connect_inet_sock_retry(const char *ip_addr, int connect_inet_sock(const char *ip_addr, int port) { int fd = socket(PF_INET, SOCK_STREAM, 0); if (fd < 0) { - LOG_ERROR("socket() failed for address %s:%d.", ip_addr, port); + RAY_LOG(ERROR) << "socket() failed for address " << ip_addr << ":" << port; return -1; } struct hostent *manager = gethostbyname(ip_addr); /* TODO(pcm): cache this */ if (!manager) { - LOG_ERROR("Failed to get hostname from address %s:%d.", ip_addr, port); + RAY_LOG(ERROR) << "Failed to get hostname from address " << ip_addr << ":" + << port; close(fd); return -1; } @@ -219,7 +220,7 @@ int connect_inet_sock(const char *ip_addr, int port) { int accept_client(int socket_fd) { int client_fd = accept(socket_fd, NULL, NULL); if (client_fd < 0) { - LOG_ERROR("Error reading from socket."); + RAY_LOG(ERROR) << "Error reading from socket."; return -1; } return client_fd; @@ -242,7 +243,7 @@ int write_bytes(int fd, uint8_t *cursor, size_t length) { /* Encountered early EOF. */ return -1; } - CHECK(nbytes > 0); + RAY_CHECK(nbytes > 0); bytesleft -= nbytes; offset += nbytes; } @@ -288,7 +289,7 @@ int read_bytes(int fd, uint8_t *cursor, size_t length) { /* Encountered early EOF. */ return -1; } - CHECK(nbytes > 0); + RAY_CHECK(nbytes > 0); bytesleft -= nbytes; offset += nbytes; } @@ -302,7 +303,7 @@ void read_message(int fd, int64_t *type, int64_t *length, uint8_t **bytes) { if (closed) { goto disconnected; } - CHECK(version == RayConfig::instance().ray_protocol_version()); + RAY_CHECK(version == RayConfig::instance().ray_protocol_version()); closed = read_bytes(fd, (uint8_t *) type, sizeof(*type)); if (closed) { goto disconnected; @@ -332,7 +333,8 @@ uint8_t *read_message_async(event_loop *loop, int sock) { int error = read_bytes(sock, (uint8_t *) &size, sizeof(int64_t)); if (error < 0) { /* The other side has closed the socket. */ - LOG_DEBUG("Socket has been closed, or some other error has occurred."); + RAY_LOG(DEBUG) << "Socket has been closed, or some other error has " + << "occurred."; if (loop != NULL) { event_loop_remove_file(loop, sock); } @@ -343,7 +345,8 @@ uint8_t *read_message_async(event_loop *loop, int sock) { error = read_bytes(sock, message, size); if (error < 0) { /* The other side has closed the socket. */ - LOG_DEBUG("Socket has been closed, or some other error has occurred."); + RAY_LOG(DEBUG) << "Socket has been closed, or some other error has " + << "occurred."; if (loop != NULL) { event_loop_remove_file(loop, sock); } @@ -359,7 +362,7 @@ int64_t read_vector(int fd, int64_t *type, std::vector &buffer) { if (closed) { goto disconnected; } - CHECK(version == RayConfig::instance().ray_protocol_version()); + RAY_CHECK(version == RayConfig::instance().ray_protocol_version()); int64_t length; closed = read_bytes(fd, (uint8_t *) type, sizeof(*type)); if (closed) { @@ -393,6 +396,6 @@ char *read_log_message(int fd) { int64_t type; int64_t length; read_message(fd, &type, &length, &bytes); - CHECK(type == LOG_MESSAGE); + RAY_CHECK(type == LOG_MESSAGE); return (char *) bytes; } diff --git a/src/common/lib/python/common_extension.cc b/src/common/lib/python/common_extension.cc index a93a7ab48..d5f92b695 100644 --- a/src/common/lib/python/common_extension.cc +++ b/src/common/lib/python/common_extension.cc @@ -28,14 +28,14 @@ void init_pickle_module(void) { #else pickle_module = PyImport_ImportModuleNoBlock("cPickle"); #endif - CHECK(pickle_module != NULL); - CHECK(PyObject_HasAttrString(pickle_module, "loads")); - CHECK(PyObject_HasAttrString(pickle_module, "dumps")); - CHECK(PyObject_HasAttrString(pickle_module, "HIGHEST_PROTOCOL")); + RAY_CHECK(pickle_module != NULL); + RAY_CHECK(PyObject_HasAttrString(pickle_module, "loads")); + RAY_CHECK(PyObject_HasAttrString(pickle_module, "dumps")); + RAY_CHECK(PyObject_HasAttrString(pickle_module, "HIGHEST_PROTOCOL")); pickle_loads = PyUnicode_FromString("loads"); pickle_dumps = PyUnicode_FromString("dumps"); pickle_protocol = PyObject_GetAttrString(pickle_module, "HIGHEST_PROTOCOL"); - CHECK(pickle_protocol != NULL); + RAY_CHECK(pickle_protocol != NULL); } TaskBuilder *g_task_builder = NULL; @@ -449,8 +449,8 @@ static PyObject *PyTask_arguments(PyObject *self) { assert(count == 1); PyList_SetItem(arg_list, i, PyObjectID_make(TaskSpec_arg_id(task, i, 0))); } else { - CHECK(pickle_module != NULL); - CHECK(pickle_loads != NULL); + RAY_CHECK(pickle_module != NULL); + RAY_CHECK(pickle_loads != NULL); PyObject *str = PyBytes_FromStringAndSize((char *) TaskSpec_arg_val(task, i), (Py_ssize_t) TaskSpec_arg_length(task, i)); diff --git a/src/common/redis_module/ray_redis_module.cc b/src/common/redis_module/ray_redis_module.cc index 513072631..e65f6256c 100644 --- a/src/common/redis_module/ray_redis_module.cc +++ b/src/common/redis_module/ray_redis_module.cc @@ -441,7 +441,8 @@ int TableAdd_RedisCommand(RedisModuleCtx *ctx, /* See how many clients received this publish. */ long long num_clients = RedisModule_CallReplyInteger(reply); - CHECKM(num_clients <= 1, "Published to %lld clients.", num_clients); + RAY_CHECK(num_clients <= 1) << "Published to " << num_clients + << " clients."; RedisModule_FreeString(ctx, publish_message); RedisModule_FreeString(ctx, publish_topic); @@ -473,7 +474,7 @@ int TableLookup_RedisCommand(RedisModuleCtx *ctx, } bool is_nil(const std::string &data) { - CHECK(data.size() == kUniqueIDSize); + RAY_CHECK(data.size() == kUniqueIDSize); const uint8_t *d = reinterpret_cast(data.data()); for (int i = 0; i < kUniqueIDSize; ++i) { if (d[i] != 255) { @@ -518,9 +519,9 @@ int TableTestAndUpdate_RedisCommand(RedisModuleCtx *ctx, } if (do_update) { - CHECK(data->mutate_scheduling_state(update->update_state())); + RAY_CHECK(data->mutate_scheduling_state(update->update_state())); } - CHECK(data->mutate_updated(do_update)); + RAY_CHECK(data->mutate_updated(do_update)); int result = RedisModule_ReplyWithStringBuffer(ctx, value_buf, value_len); @@ -978,8 +979,8 @@ int ResultTableLookup_RedisCommand(RedisModuleCtx *ctx, data_size_value = -1; } else { RedisModule_StringToLongLong(data_size, &data_size_value); - CHECK(RedisModule_StringToLongLong(data_size, &data_size_value) == - REDISMODULE_OK); + RAY_CHECK(RedisModule_StringToLongLong(data_size, &data_size_value) == + REDISMODULE_OK); } flatbuffers::Offset hash_str; @@ -1091,7 +1092,8 @@ int TaskTableWrite(RedisModuleCtx *ctx, /* See how many clients received this publish. */ long long num_clients = RedisModule_CallReplyInteger(reply); - CHECKM(num_clients <= 1, "Published to %lld clients.", num_clients); + RAY_CHECK(num_clients <= 1) << "Published to " << num_clients + << " clients."; RedisModule_FreeString(ctx, publish_message); RedisModule_FreeString(ctx, publish_topic); diff --git a/src/common/state/db_client_table.cc b/src/common/state/db_client_table.cc index ac9a809b9..0d53ad2a6 100644 --- a/src/common/state/db_client_table.cc +++ b/src/common/state/db_client_table.cc @@ -44,16 +44,15 @@ const std::vector db_client_table_get_ip_addresses( for (auto const &manager_id : manager_ids) { DBClient client = redis_cache_get_db_client(db_handle, manager_id); - CHECK(!client.manager_address.empty()); + RAY_CHECK(!client.manager_address.empty()); manager_vector.push_back(client.manager_address); } int64_t end_time = current_time_ms(); if (end_time - start_time > RayConfig::instance().max_time_for_loop()) { - LOG_WARN( - "calling redis_get_cached_db_client in a loop in with %zu manager IDs " - "took %" PRId64 " milliseconds.", - manager_ids.size(), end_time - start_time); + RAY_LOG(WARNING) << "calling redis_get_cached_db_client in a loop in with " + << manager_ids.size() << " manager IDs took " + << end_time - start_time << " milliseconds."; } return manager_vector; @@ -71,7 +70,7 @@ void db_client_table_cache_init(DBHandle *db_handle) { } DBClient db_client_table_cache_get(DBHandle *db_handle, DBClientID client_id) { - CHECK(!client_id.is_nil()); + RAY_CHECK(!client_id.is_nil()); return redis_cache_get_db_client(db_handle, client_id); } diff --git a/src/common/state/error_table.cc b/src/common/state/error_table.cc index 582c84884..98e85e340 100644 --- a/src/common/state/error_table.cc +++ b/src/common/state/error_table.cc @@ -14,7 +14,7 @@ void push_error(DBHandle *db_handle, int error_index, size_t data_length, const unsigned char *data) { - CHECK(error_index >= 0 && error_index < MAX_ERROR_INDEX); + RAY_CHECK(error_index >= 0 && error_index < MAX_ERROR_INDEX); /* Allocate a struct to hold the error information. */ ErrorInfo *info = (ErrorInfo *) malloc(sizeof(ErrorInfo) + data_length); info->driver_id = driver_id; @@ -22,7 +22,7 @@ void push_error(DBHandle *db_handle, info->data_length = data_length; memcpy(info->data, data, data_length); /* Generate a random key to identify this error message. */ - CHECK(sizeof(info->error_key) >= sizeof(UniqueID)); + RAY_CHECK(sizeof(info->error_key) >= sizeof(UniqueID)); UniqueID error_key = UniqueID::from_random(); memcpy(info->error_key, error_key.data(), sizeof(info->error_key)); diff --git a/src/common/state/object_table.cc b/src/common/state/object_table.cc index ce006d585..fcd527e62 100644 --- a/src/common/state/object_table.cc +++ b/src/common/state/object_table.cc @@ -6,7 +6,7 @@ void object_table_lookup(DBHandle *db_handle, RetryInfo *retry, object_table_lookup_done_callback done_callback, void *user_context) { - CHECK(db_handle != NULL); + RAY_CHECK(db_handle != NULL); init_table_callback(db_handle, object_id, __func__, new CommonCallbackData(NULL), retry, (table_done_callback) done_callback, @@ -20,7 +20,7 @@ void object_table_add(DBHandle *db_handle, RetryInfo *retry, object_table_done_callback done_callback, void *user_context) { - CHECK(db_handle != NULL); + RAY_CHECK(db_handle != NULL); ObjectTableAddData *info = (ObjectTableAddData *) malloc(sizeof(ObjectTableAddData)); @@ -38,7 +38,7 @@ void object_table_remove(DBHandle *db_handle, RetryInfo *retry, object_table_done_callback done_callback, void *user_context) { - CHECK(db_handle != NULL); + RAY_CHECK(db_handle != NULL); /* Copy the client ID, if one was provided. */ DBClientID *client_id_copy = NULL; if (client_id != NULL) { @@ -59,7 +59,7 @@ void object_table_subscribe_to_notifications( RetryInfo *retry, object_table_lookup_done_callback done_callback, void *user_context) { - CHECK(db_handle != NULL); + RAY_CHECK(db_handle != NULL); ObjectTableSubscribeData *sub_data = (ObjectTableSubscribeData *) malloc(sizeof(ObjectTableSubscribeData)); sub_data->object_available_callback = object_available_callback; @@ -76,8 +76,8 @@ void object_table_request_notifications(DBHandle *db_handle, int num_object_ids, ObjectID object_ids[], RetryInfo *retry) { - CHECK(db_handle != NULL); - CHECK(num_object_ids > 0); + RAY_CHECK(db_handle != NULL); + RAY_CHECK(num_object_ids > 0); ObjectTableRequestNotificationsData *data = (ObjectTableRequestNotificationsData *) malloc( sizeof(ObjectTableRequestNotificationsData) + diff --git a/src/common/state/redis.cc b/src/common/state/redis.cc index f3eee6d00..d3167ab9b 100644 --- a/src/common/state/redis.cc +++ b/src/common/state/redis.cc @@ -35,17 +35,17 @@ extern "C" { extern int usleep(useconds_t usec); #endif -#define CHECK_REDIS_CONNECT(CONTEXT_TYPE, context, M, ...) \ - do { \ - CONTEXT_TYPE *_context = (context); \ - if (!_context) { \ - LOG_FATAL("could not allocate redis context"); \ - } \ - if (_context->err) { \ - LOG_ERROR(M, ##__VA_ARGS__); \ - LOG_REDIS_ERROR(_context, ""); \ - exit(-1); \ - } \ +#define CHECK_REDIS_CONNECT(CONTEXT_TYPE, context, M, ...) \ + do { \ + CONTEXT_TYPE *_context = (context); \ + if (!_context) { \ + RAY_LOG(FATAL) << "could not allocate redis context"; \ + } \ + if (_context->err) { \ + RAY_LOG(ERROR) << M; \ + LOG_REDIS_ERROR(_context, ""); \ + exit(-1); \ + } \ } while (0) /** @@ -110,14 +110,14 @@ void get_redis_shards(redisContext *context, num_attempts++; continue; } - CHECKM(num_attempts < RayConfig::instance().redis_db_connect_retries(), - "No entry found for NumRedisShards"); - CHECKM(reply->type == REDIS_REPLY_STRING, - "Expected string, found Redis type %d for NumRedisShards", - reply->type); + RAY_CHECK(num_attempts < RayConfig::instance().redis_db_connect_retries()) + << "No entry found for NumRedisShards"; + RAY_CHECK(reply->type == REDIS_REPLY_STRING) + << "Expected string, found Redis type " << reply->type + << " for NumRedisShards"; int num_redis_shards = atoi(reply->str); - CHECKM(num_redis_shards >= 1, "Expected at least one Redis shard, found %d.", - num_redis_shards); + RAY_CHECK(num_redis_shards >= 1) << "Expected at least one Redis shard, " + << "found " << num_redis_shards; freeReplyObject(reply); /* Get the addresses of all of the Redis shards. */ @@ -137,18 +137,18 @@ void get_redis_shards(redisContext *context, num_attempts++; continue; } - CHECKM(num_attempts < RayConfig::instance().redis_db_connect_retries(), - "Expected %d Redis shard addresses, found %d", num_redis_shards, - (int) reply->elements); + RAY_CHECK(num_attempts < RayConfig::instance().redis_db_connect_retries()) + << "Expected " << num_redis_shards << " Redis shard addresses, found " + << reply->elements; /* Parse the Redis shard addresses. */ char db_shard_address[16]; int db_shard_port; for (size_t i = 0; i < reply->elements; ++i) { /* Parse the shard addresses and ports. */ - CHECK(reply->element[i]->type == REDIS_REPLY_STRING); - CHECK(parse_ip_addr_port(reply->element[i]->str, db_shard_address, - &db_shard_port) == 0); + RAY_CHECK(reply->element[i]->type == REDIS_REPLY_STRING); + RAY_CHECK(parse_ip_addr_port(reply->element[i]->str, db_shard_address, + &db_shard_port) == 0); db_shards_addresses.push_back(std::string(db_shard_address)); db_shards_ports.push_back(db_shard_port); } @@ -174,7 +174,7 @@ void db_connect_shard(const std::string &db_address, RayConfig::instance().redis_db_connect_retries()) { break; } - LOG_WARN("Failed to connect to Redis, retrying."); + RAY_LOG(WARNING) << "Failed to connect to Redis, retrying."; /* Sleep for a little. */ usleep(RayConfig::instance().redis_db_connect_wait_milliseconds() * 1000); sync_context = redisConnect(db_address.c_str(), db_port); @@ -190,13 +190,13 @@ void db_connect_shard(const std::string &db_address, * processes by hand), it is easier to do it multiple times. */ reply = (redisReply *) redisCommand(sync_context, "CONFIG SET notify-keyspace-events Kl"); - CHECKM(reply != NULL, "db_connect failed on CONFIG SET"); + RAY_CHECK(reply != NULL) << "db_connect failed on CONFIG SET"; freeReplyObject(reply); /* Also configure Redis to not run in protected mode, so clients on other * hosts can connect to it. */ reply = (redisReply *) redisCommand(sync_context, "CONFIG SET protected-mode no"); - CHECKM(reply != NULL, "db_connect failed on CONFIG SET"); + RAY_CHECK(reply != NULL) << "db_connect failed on CONFIG SET"; freeReplyObject(reply); /* Construct the argument arrays for RAY.CONNECT. */ @@ -224,9 +224,9 @@ void db_connect_shard(const std::string &db_address, /* Register this client with Redis. RAY.CONNECT is a custom Redis command that * we've defined. */ reply = (redisReply *) redisCommandArgv(sync_context, argc, argv, argvlen); - CHECKM(reply != NULL, "db_connect failed on RAY.CONNECT"); - CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str); - CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str); + RAY_CHECK(reply != NULL) << "db_connect failed on RAY.CONNECT"; + RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str; + RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str; freeReplyObject(reply); free(argv); free(argvlen); @@ -261,7 +261,7 @@ DBHandle *db_connect(const std::string &db_primary_address, /* Check that the number of args is even. These args will be passed to the * RAY.CONNECT Redis command, which takes arguments in pairs. */ if (args.size() % 2 != 0) { - LOG_FATAL("The number of extra args must be divisible by two."); + RAY_LOG(FATAL) << "The number of extra args must be divisible by two."; } /* Create a client ID for this client. */ @@ -288,7 +288,7 @@ DBHandle *db_connect(const std::string &db_primary_address, std::vector db_shards_addresses; std::vector db_shards_ports; get_redis_shards(db->sync_context, db_shards_addresses, db_shards_ports); - CHECKM(db_shards_addresses.size() > 0, "No Redis shards found"); + RAY_CHECK(db_shards_addresses.size() > 0) << "No Redis shards found"; /* Connect to the shards. */ for (size_t i = 0; i < db_shards_addresses.size(); ++i) { db_connect_shard(db_shards_addresses[i], db_shards_ports[i], client, @@ -309,7 +309,7 @@ void DBHandle_free(DBHandle *db) { redisAsyncFree(db->subscribe_context); /* Clean up the Redis shards. */ - CHECK(db->contexts.size() == db->subscribe_contexts.size()); + RAY_CHECK(db->contexts.size() == db->subscribe_contexts.size()); for (size_t i = 0; i < db->contexts.size(); ++i) { redisAsyncFree(db->contexts[i]); redisAsyncFree(db->subscribe_contexts[i]); @@ -326,8 +326,8 @@ void db_disconnect(DBHandle *db) { redisReply *reply = (redisReply *) redisCommand(db->sync_context, "RAY.DISCONNECT %b", db->client.data(), sizeof(db->client)); - CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str); - CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str); + RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str; + RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str; freeReplyObject(reply); DBHandle_free(db); @@ -340,24 +340,24 @@ void db_attach(DBHandle *db, event_loop *loop, bool reattach) { /* If the database is reattached in the tests, redis normally gives * an error which we can safely ignore. */ if (!reattach) { - CHECKM(err == REDIS_OK, "failed to attach the event loop"); + RAY_CHECK(err == REDIS_OK) << "failed to attach the event loop"; } err = redisAeAttach(loop, db->subscribe_context); if (!reattach) { - CHECKM(err == REDIS_OK, "failed to attach the event loop"); + RAY_CHECK(err == REDIS_OK) << "failed to attach the event loop"; } /* Attach other redis shards to the event loop. */ - CHECK(db->contexts.size() == db->subscribe_contexts.size()); + RAY_CHECK(db->contexts.size() == db->subscribe_contexts.size()); for (size_t i = 0; i < db->contexts.size(); ++i) { int err = redisAeAttach(loop, db->contexts[i]); /* If the database is reattached in the tests, redis normally gives * an error which we can safely ignore. */ if (!reattach) { - CHECKM(err == REDIS_OK, "failed to attach the event loop"); + RAY_CHECK(err == REDIS_OK) << "failed to attach the event loop"; } err = redisAeAttach(loop, db->subscribe_contexts[i]); if (!reattach) { - CHECKM(err == REDIS_OK, "failed to attach the event loop"); + RAY_CHECK(err == REDIS_OK) << "failed to attach the event loop"; } } } @@ -377,13 +377,14 @@ void redis_object_table_add_callback(redisAsyncContext *c, if (!success) { /* If our object hash doesn't match the one recorded in the table, report * the error back to the user and exit immediately. */ - LOG_WARN( - "Found objects with different value but same object ID, most likely " - "because a nondeterministic task was executed twice, either for " - "reconstruction or for speculation."); + RAY_LOG(WARNING) << "Found objects with different value but same object " + << "ID, most likely because a nondeterministic task was " + << "executed twice, either for reconstruction or for " + << "speculation."; } else { - CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str); - CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str); + RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " + << reply->str; + RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str; } /* Call the done callback if there is one. */ if (callback_data->done_callback != NULL) { @@ -428,8 +429,8 @@ void redis_object_table_remove_callback(redisAsyncContext *c, * condition with an object_table_add. */ return; } - CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str); - CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str); + RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str; + RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str; /* Call the done callback if there is one. */ if (callback_data->done_callback != NULL) { object_table_done_callback done_callback = @@ -464,7 +465,7 @@ void redis_object_table_remove(TableCallbackData *callback_data) { } void redis_object_table_lookup(TableCallbackData *callback_data) { - CHECK(callback_data); + RAY_CHECK(callback_data); DBHandle *db = callback_data->db_handle; ObjectID obj_id = callback_data->id; @@ -486,9 +487,9 @@ void redis_result_table_add_callback(redisAsyncContext *c, REDIS_CALLBACK_HEADER(db, callback_data, r); redisReply *reply = (redisReply *) r; /* Check that the command succeeded. */ - CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str); - CHECKM(strncmp(reply->str, "OK", strlen("OK")) == 0, "reply->str is %s", - reply->str); + RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str; + RAY_CHECK(strncmp(reply->str, "OK", strlen("OK")) == 0) << "reply->str is " + << reply->str; /* Call the done callback if there is one. */ if (callback_data->done_callback) { result_table_done_callback done_callback = @@ -499,7 +500,7 @@ void redis_result_table_add_callback(redisAsyncContext *c, } void redis_result_table_add(TableCallbackData *callback_data) { - CHECK(callback_data); + RAY_CHECK(callback_data); DBHandle *db = callback_data->db_handle; ObjectID id = callback_data->id; ResultTableAddInfo *info = (ResultTableAddInfo *) callback_data->data->Get(); @@ -522,10 +523,9 @@ void redis_result_table_add(TableCallbackData *callback_data) { * task is NULL. This is used by both redis_result_table_lookup_callback and * redis_task_table_get_task_callback. */ Task *parse_and_construct_task_from_redis_reply(redisReply *reply) { - Task *task; + Task *task = NULL; if (reply->type == REDIS_REPLY_NIL) { /* There is no task in the reply, so return NULL. */ - task = NULL; } else if (reply->type == REDIS_REPLY_STRING) { /* The reply is a flatbuffer TaskReply object. Parse it and construct the * task. */ @@ -540,7 +540,7 @@ Task *parse_and_construct_task_from_redis_reply(redisReply *reply) { from_flatbuf(*message->local_scheduler_id()), from_flatbuf(*execution_dependencies->execution_dependencies())); } else { - LOG_FATAL("Unexpected reply type %d", reply->type); + RAY_LOG(FATAL) << "Unexpected reply type " << reply->type; } /* Return the task. If it is not NULL, then it must be freed by the caller. */ return task; @@ -551,9 +551,9 @@ void redis_result_table_lookup_callback(redisAsyncContext *c, void *privdata) { REDIS_CALLBACK_HEADER(db, callback_data, r); redisReply *reply = (redisReply *) r; - CHECKM(reply->type == REDIS_REPLY_NIL || reply->type == REDIS_REPLY_STRING, - "Unexpected reply type %d in redis_result_table_lookup_callback", - reply->type); + RAY_CHECK(reply->type == REDIS_REPLY_NIL || reply->type == REDIS_REPLY_STRING) + << "Unexpected reply type " << reply->type << " in " + << "redis_result_table_lookup_callback"; /* Parse the task from the reply. */ TaskID result_id = TaskID::nil(); bool is_put = false; @@ -575,7 +575,7 @@ void redis_result_table_lookup_callback(redisAsyncContext *c, } void redis_result_table_lookup(TableCallbackData *callback_data) { - CHECK(callback_data); + RAY_CHECK(callback_data); DBHandle *db = callback_data->db_handle; ObjectID id = callback_data->id; redisAsyncContext *context = get_redis_context(db, id); @@ -594,8 +594,8 @@ DBClient redis_db_client_table_get(DBHandle *db, redisReply *reply = (redisReply *) redisCommand(db->sync_context, "HGETALL %s%b", DB_CLIENT_PREFIX, client_id, client_id_len); - CHECK(reply->type == REDIS_REPLY_ARRAY); - CHECK(reply->elements > 0); + RAY_CHECK(reply->type == REDIS_REPLY_ARRAY); + RAY_CHECK(reply->elements > 0); DBClient db_client; int num_fields = 0; /* Parse the fields into a DBClient. */ @@ -620,7 +620,7 @@ DBClient redis_db_client_table_get(DBHandle *db, freeReplyObject(reply); /* The client ID, type, and whether it is deleted are all * mandatory fields. Auxiliary address is optional. */ - CHECK(num_fields >= 3); + RAY_CHECK(num_fields >= 3); return db_client; } @@ -651,8 +651,8 @@ void redis_object_table_lookup_callback(redisAsyncContext *c, void *privdata) { REDIS_CALLBACK_HEADER(db, callback_data, r); redisReply *reply = (redisReply *) r; - LOG_DEBUG("Object table lookup callback"); - CHECK(reply->type == REDIS_REPLY_NIL || reply->type == REDIS_REPLY_ARRAY); + RAY_LOG(DEBUG) << "Object table lookup callback"; + RAY_CHECK(reply->type == REDIS_REPLY_NIL || reply->type == REDIS_REPLY_ARRAY); object_table_lookup_done_callback done_callback = (object_table_lookup_done_callback) callback_data->done_callback; @@ -671,7 +671,7 @@ void redis_object_table_lookup_callback(redisAsyncContext *c, std::vector manager_ids; for (size_t j = 0; j < reply->elements; ++j) { - CHECK(reply->element[j]->type == REDIS_REPLY_STRING); + RAY_CHECK(reply->element[j]->type == REDIS_REPLY_STRING); DBClientID manager_id; memcpy(manager_id.mutable_data(), reply->element[j]->str, sizeof(manager_id)); @@ -682,7 +682,7 @@ void redis_object_table_lookup_callback(redisAsyncContext *c, done_callback(obj_id, false, manager_ids, callback_data->user_context); } } else { - LOG_FATAL("Unexpected reply type from object table lookup."); + RAY_LOG(FATAL) << "Unexpected reply type from object table lookup."; } /* Clean up timer and callback. */ @@ -708,11 +708,11 @@ void object_table_redis_subscribe_to_notifications_callback( * - reply->emement[2]->str is the contents of the message. */ redisReply *reply = (redisReply *) r; - CHECK(reply->type == REDIS_REPLY_ARRAY); - CHECK(reply->elements == 3); + RAY_CHECK(reply->type == REDIS_REPLY_ARRAY); + RAY_CHECK(reply->elements == 3); redisReply *message_type = reply->element[0]; - LOG_DEBUG("Object table subscribe to notifications callback, message %s", - message_type->str); + RAY_LOG(DEBUG) << "Object table subscribe to notifications callback, message" + << message_type->str; if (strcmp(message_type->str, "message") == 0) { /* We received an object notification. Parse the payload. */ @@ -752,8 +752,8 @@ void object_table_redis_subscribe_to_notifications_callback( * destroy the callback data. */ remove_timer_callback(db->loop, callback_data); } else { - LOG_FATAL( - "Unexpected reply type from object table subscribe to notifications."); + RAY_LOG(FATAL) << "Unexpected reply type from object table subscribe to " + << "notifications."; } } @@ -770,8 +770,8 @@ void redis_object_table_subscribe_to_notifications( * as the channel name so this channel is specific to this client. * TODO(rkn): * The channel name should probably be the client ID with some prefix. */ - CHECKM(callback_data->data->Get() != NULL, - "Object table subscribe data passed as NULL."); + RAY_CHECK(callback_data->data->Get() != NULL) + << "Object table subscribe data passed as NULL."; if (((ObjectTableSubscribeData *) (callback_data->data->Get())) ->subscribe_all) { /* Subscribe to the object broadcast channel. */ @@ -802,9 +802,9 @@ void redis_object_table_request_notifications_callback(redisAsyncContext *c, /* Do some minimal checking. */ redisReply *reply = (redisReply *) r; - CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str); - CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str); - CHECK(callback_data->done_callback == NULL); + RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str; + RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str; + RAY_CHECK(callback_data->done_callback == NULL); /* Clean up the timer and callback. */ destroy_timer_callback(db->loop, callback_data); } @@ -876,7 +876,7 @@ void redis_task_table_get_task_callback(redisAsyncContext *c, void redis_task_table_get_task(TableCallbackData *callback_data) { DBHandle *db = callback_data->db_handle; - CHECK(callback_data->data->Get() == NULL); + RAY_CHECK(callback_data->data->Get() == NULL); TaskID task_id = callback_data->id; redisAsyncContext *context = get_redis_context(db, task_id); @@ -902,15 +902,16 @@ void redis_task_table_add_task_callback(redisAsyncContext *c, // db_client table before retrying the add. if (reply->type == REDIS_REPLY_ERROR && strcmp(reply->str, "No subscribers received message.") == 0) { - LOG_WARN("No subscribers received the task_table_add message."); + RAY_LOG(WARNING) << "No subscribers received the task_table_add message."; if (callback_data->retry.fail_callback != NULL) { callback_data->retry.fail_callback(callback_data->id, callback_data->user_context, callback_data->data->Get()); } } else { - CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str); - CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str); + RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " + << reply->str; + RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str; /* Call the done callback if there is one. */ if (callback_data->done_callback != NULL) { task_table_done_callback done_callback = @@ -926,7 +927,7 @@ void redis_task_table_add_task_callback(redisAsyncContext *c, void redis_task_table_add_task(TableCallbackData *callback_data) { DBHandle *db = callback_data->db_handle; Task *task = (Task *) callback_data->data->Get(); - CHECKM(task != NULL, "NULL task passed to redis_task_table_add_task."); + RAY_CHECK(task != NULL) << "NULL task passed to redis_task_table_add_task."; TaskID task_id = Task_task_id(task); DBClientID local_scheduler_id = Task_local_scheduler(task); @@ -967,15 +968,17 @@ void redis_task_table_update_callback(redisAsyncContext *c, // alive in the db_client table. if (reply->type == REDIS_REPLY_ERROR && strcmp(reply->str, "No subscribers received message.") == 0) { - LOG_WARN("No subscribers received the task_table_update message."); + RAY_LOG(WARNING) << "No subscribers received the task_table_update " + << "message."; if (callback_data->retry.fail_callback != NULL) { callback_data->retry.fail_callback(callback_data->id, callback_data->user_context, callback_data->data->Get()); } } else { - CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str); - CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str); + RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " + << reply->str; + RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str; /* Call the done callback if there is one. */ if (callback_data->done_callback != NULL) { @@ -992,7 +995,7 @@ void redis_task_table_update_callback(redisAsyncContext *c, void redis_task_table_update(TableCallbackData *callback_data) { DBHandle *db = callback_data->db_handle; Task *task = (Task *) callback_data->data->Get(); - CHECKM(task != NULL, "NULL task passed to redis_task_table_update."); + RAY_CHECK(task != NULL) << "NULL task passed to redis_task_table_update."; TaskID task_id = Task_task_id(task); redisAsyncContext *context = get_redis_context(db, task_id); @@ -1030,7 +1033,7 @@ void redis_task_table_test_and_update_callback(redisAsyncContext *c, * delayed when added to the task table if they are submitted to a local * scheduler before it receives the notification that maps the actor to a * local scheduler. */ - LOG_ERROR("No task found during task_table_test_and_update"); + RAY_LOG(ERROR) << "No task found during task_table_test_and_update"; return; } /* Determine whether the update happened. */ @@ -1091,11 +1094,11 @@ void redis_task_table_subscribe_callback(redisAsyncContext *c, REDIS_CALLBACK_HEADER(db, callback_data, r); redisReply *reply = (redisReply *) r; - CHECK(reply->type == REDIS_REPLY_ARRAY); + RAY_CHECK(reply->type == REDIS_REPLY_ARRAY); /* The number of elements is 3 for a reply to SUBSCRIBE, and 4 for a reply to * PSUBSCRIBE. */ - CHECKM(reply->elements == 3 || reply->elements == 4, "reply->elements is %zu", - reply->elements); + RAY_CHECK(reply->elements == 3 || reply->elements == 4) + << "reply->elements is " << reply->elements; /* The first element is the message type and the last entry is the payload. * The middle one or middle two elements describe the channel that was * published on. */ @@ -1148,9 +1151,8 @@ void redis_task_table_subscribe_callback(redisAsyncContext *c, * subscription callback needs this data. */ remove_timer_callback(db->loop, callback_data); } else { - LOG_FATAL( - "Unexpected reply type from task table subscribe. Message type is %s.", - message_type->str); + RAY_LOG(FATAL) << "Unexpected reply type from task table subscribe. " + << "Message type is " << message_type->str; } } @@ -1200,8 +1202,8 @@ void redis_db_client_table_remove_callback(redisAsyncContext *c, REDIS_CALLBACK_HEADER(db, callback_data, r); redisReply *reply = (redisReply *) r; - CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str); - CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str); + RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str; + RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str; /* Call the done callback if there is one. */ db_client_table_done_callback done_callback = @@ -1235,7 +1237,7 @@ void redis_db_client_table_scan(DBHandle *db, return; } /* Get all the database client information. */ - CHECK(reply->type == REDIS_REPLY_ARRAY); + RAY_CHECK(reply->type == REDIS_REPLY_ARRAY); for (size_t i = 0; i < reply->elements; ++i) { /* Strip the database client table prefix. */ unsigned char *key = (unsigned char *) reply->element[i]->str; @@ -1255,8 +1257,8 @@ void redis_db_client_table_subscribe_callback(redisAsyncContext *c, REDIS_CALLBACK_HEADER(db, callback_data, r); redisReply *reply = (redisReply *) r; - CHECK(reply->type == REDIS_REPLY_ARRAY); - CHECK(reply->elements > 2); + RAY_CHECK(reply->type == REDIS_REPLY_ARRAY); + RAY_CHECK(reply->elements > 2); /* First entry is message type, then possibly the regex we psubscribed to, * then topic, then payload. */ redisReply *payload = reply->element[reply->elements - 1]; @@ -1323,11 +1325,11 @@ void redis_local_scheduler_table_subscribe_callback(redisAsyncContext *c, REDIS_CALLBACK_HEADER(db, callback_data, r); redisReply *reply = (redisReply *) r; - CHECK(reply->type == REDIS_REPLY_ARRAY); - CHECK(reply->elements == 3); + RAY_CHECK(reply->type == REDIS_REPLY_ARRAY); + RAY_CHECK(reply->elements == 3); redisReply *message_type = reply->element[0]; - LOG_DEBUG("Local scheduler table subscribe callback, message %s", - message_type->str); + RAY_LOG(DEBUG) << "Local scheduler table subscribe callback, message " + << message_type->str; if (strcmp(message_type->str, "message") == 0) { /* Handle a local scheduler heartbeat. Parse the payload and call the @@ -1362,13 +1364,13 @@ void redis_local_scheduler_table_subscribe_callback(redisAsyncContext *c, } } else if (strcmp(message_type->str, "subscribe") == 0) { /* The reply for the initial SUBSCRIBE command. */ - CHECK(callback_data->done_callback == NULL); + RAY_CHECK(callback_data->done_callback == NULL); /* If the initial SUBSCRIBE was successful, clean up the timer, but don't * destroy the callback data. */ remove_timer_callback(db->loop, callback_data); } else { - LOG_FATAL("Unexpected reply type from local scheduler subscribe."); + RAY_LOG(FATAL) << "Unexpected reply type from local scheduler subscribe."; } } @@ -1389,10 +1391,10 @@ void redis_local_scheduler_table_send_info_callback(redisAsyncContext *c, REDIS_CALLBACK_HEADER(db, callback_data, r); redisReply *reply = (redisReply *) r; - CHECK(reply->type == REDIS_REPLY_INTEGER); - LOG_DEBUG("%lld subscribers received this publish.\n", reply->integer); + RAY_CHECK(reply->type == REDIS_REPLY_INTEGER); + RAY_LOG(DEBUG) << reply->integer << " subscribers received this publish."; - CHECK(callback_data->done_callback == NULL); + RAY_CHECK(callback_data->done_callback == NULL); /* Clean up the timer and callback. */ destroy_timer_callback(db->loop, callback_data); } @@ -1430,9 +1432,9 @@ void redis_local_scheduler_table_disconnect(DBHandle *db) { redisReply *reply = (redisReply *) redisCommand( db->sync_context, "PUBLISH local_schedulers %b", fbb.GetBufferPointer(), (size_t) fbb.GetSize()); - CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str); - CHECK(reply->type == REDIS_REPLY_INTEGER); - LOG_DEBUG("%lld subscribers received this publish.\n", reply->integer); + RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str; + RAY_CHECK(reply->type == REDIS_REPLY_INTEGER); + RAY_LOG(DEBUG) << reply->integer << " subscribers received this publish."; freeReplyObject(reply); } @@ -1442,10 +1444,11 @@ void redis_driver_table_subscribe_callback(redisAsyncContext *c, REDIS_CALLBACK_HEADER(db, callback_data, r); redisReply *reply = (redisReply *) r; - CHECK(reply->type == REDIS_REPLY_ARRAY); - CHECK(reply->elements == 3); + RAY_CHECK(reply->type == REDIS_REPLY_ARRAY); + RAY_CHECK(reply->elements == 3); redisReply *message_type = reply->element[0]; - LOG_DEBUG("Driver table subscribe callback, message %s", message_type->str); + RAY_LOG(DEBUG) << "Driver table subscribe callback, message " + << message_type->str; if (strcmp(message_type->str, "message") == 0) { /* Handle a driver heartbeat. Parse the payload and call the subscribe @@ -1463,13 +1466,13 @@ void redis_driver_table_subscribe_callback(redisAsyncContext *c, } } else if (strcmp(message_type->str, "subscribe") == 0) { /* The reply for the initial SUBSCRIBE command. */ - CHECK(callback_data->done_callback == NULL); + RAY_CHECK(callback_data->done_callback == NULL); /* If the initial SUBSCRIBE was successful, clean up the timer, but don't * destroy the callback data. */ remove_timer_callback(db->loop, callback_data); } else { - LOG_FATAL("Unexpected reply type from driver subscribe."); + RAY_LOG(FATAL) << "Unexpected reply type from driver subscribe."; } } @@ -1490,13 +1493,13 @@ void redis_driver_table_send_driver_death_callback(redisAsyncContext *c, REDIS_CALLBACK_HEADER(db, callback_data, r); redisReply *reply = (redisReply *) r; - CHECK(reply->type == REDIS_REPLY_INTEGER); - LOG_DEBUG("%lld subscribers received this publish.\n", reply->integer); + RAY_CHECK(reply->type == REDIS_REPLY_INTEGER); + RAY_LOG(DEBUG) << reply->integer << " subscribers received this publish."; /* At the very least, the local scheduler that publishes this message should * also receive it. */ - CHECK(reply->integer >= 1); + RAY_CHECK(reply->integer >= 1); - CHECK(callback_data->done_callback == NULL); + RAY_CHECK(callback_data->done_callback == NULL); /* Clean up the timer and callback. */ destroy_timer_callback(db->loop, callback_data); } @@ -1544,11 +1547,11 @@ void redis_actor_notification_table_subscribe_callback(redisAsyncContext *c, REDIS_CALLBACK_HEADER(db, callback_data, r); redisReply *reply = (redisReply *) r; - CHECK(reply->type == REDIS_REPLY_ARRAY); - CHECK(reply->elements == 3); + RAY_CHECK(reply->type == REDIS_REPLY_ARRAY); + RAY_CHECK(reply->elements == 3); redisReply *message_type = reply->element[0]; - LOG_DEBUG("Local scheduler table subscribe callback, message %s", - message_type->str); + RAY_LOG(DEBUG) << "Local scheduler table subscribe callback, message " + << message_type->str; if (strcmp(message_type->str, "message") == 0) { /* Handle an actor notification message. Parse the payload and call the @@ -1561,9 +1564,9 @@ void redis_actor_notification_table_subscribe_callback(redisAsyncContext *c, WorkerID driver_id; DBClientID local_scheduler_id; bool reconstruct; - CHECK(sizeof(actor_id) + sizeof(driver_id) + sizeof(local_scheduler_id) + - 1 == - payload->len); + RAY_CHECK(sizeof(actor_id) + sizeof(driver_id) + + sizeof(local_scheduler_id) + 1 == + payload->len); char *current_ptr = payload->str; /* Parse the actor ID. */ memcpy(&actor_id, current_ptr, sizeof(actor_id)); @@ -1580,7 +1583,8 @@ void redis_actor_notification_table_subscribe_callback(redisAsyncContext *c, } else if (*current_ptr == '0') { reconstruct = false; } else { - LOG_FATAL("This code should be unreachable."); + reconstruct = false; // We set this value to avoid a compiler warning. + RAY_LOG(FATAL) << "This code should be unreachable."; } current_ptr += 1; @@ -1590,13 +1594,14 @@ void redis_actor_notification_table_subscribe_callback(redisAsyncContext *c, } } else if (strcmp(message_type->str, "subscribe") == 0) { /* The reply for the initial SUBSCRIBE command. */ - CHECK(callback_data->done_callback == NULL); + RAY_CHECK(callback_data->done_callback == NULL); /* If the initial SUBSCRIBE was successful, clean up the timer, but don't * destroy the callback data. */ remove_timer_callback(db->loop, callback_data); } else { - LOG_FATAL("Unexpected reply type from actor notification subscribe."); + RAY_LOG(FATAL) << "Unexpected reply type from actor notification " + << "subscribe."; } } @@ -1627,7 +1632,7 @@ void redis_push_error_rpush_callback(redisAsyncContext *c, REDIS_CALLBACK_HEADER(db, callback_data, r); redisReply *reply = (redisReply *) r; /* The reply should be the length of the errors list after our RPUSH. */ - CHECK(reply->type == REDIS_REPLY_INTEGER); + RAY_CHECK(reply->type == REDIS_REPLY_INTEGER); destroy_timer_callback(db->loop, callback_data); } @@ -1638,8 +1643,8 @@ void redis_push_error_hmset_callback(redisAsyncContext *c, redisReply *reply = (redisReply *) r; /* Make sure we were able to add the error information. */ - CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str); - CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str); + RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str; + RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str; /* Add the error to this driver's list of errors. */ ErrorInfo *info = (ErrorInfo *) callback_data->data->Get(); @@ -1656,7 +1661,7 @@ void redis_push_error_hmset_callback(redisAsyncContext *c, void redis_push_error(TableCallbackData *callback_data) { DBHandle *db = callback_data->db_handle; ErrorInfo *info = (ErrorInfo *) callback_data->data->Get(); - CHECK(info->error_index < MAX_ERROR_INDEX && info->error_index >= 0); + RAY_CHECK(info->error_index < MAX_ERROR_INDEX && info->error_index >= 0); /* Look up the error type. */ const char *error_type = error_types[info->error_index]; const char *error_message = error_messages[info->error_index]; @@ -1674,6 +1679,6 @@ void redis_push_error(TableCallbackData *callback_data) { } DBClientID get_db_client_id(DBHandle *db) { - CHECK(db != NULL); + RAY_CHECK(db != NULL); return db->client; } diff --git a/src/common/state/redis.h b/src/common/state/redis.h index ad2a40442..ff324a5ac 100644 --- a/src/common/state/redis.h +++ b/src/common/state/redis.h @@ -11,11 +11,13 @@ #include "hiredis/hiredis.h" #include "hiredis/async.h" -#define LOG_REDIS_ERROR(context, M, ...) \ - LOG_ERROR("Redis error %d %s; %s", context->err, context->errstr, M) +#define LOG_REDIS_ERROR(context, M, ...) \ + RAY_LOG(ERROR) << "Redis error " << context->err << " " << context->errstr \ + << "; " << M -#define LOG_REDIS_DEBUG(context, M, ...) \ - LOG_DEBUG("Redis error %d %s; %s", context->err, context->errstr, M) +#define LOG_REDIS_DEBUG(context, M, ...) \ + RAY_LOG(DEBUG) << "Redis error " << context->err << " " << context->errstr \ + << "; " << M; struct DBHandle { /** String that identifies this client type. */ diff --git a/src/common/state/table.cc b/src/common/state/table.cc index a70eaa7bc..8269c2b1e 100644 --- a/src/common/state/table.cc +++ b/src/common/state/table.cc @@ -43,18 +43,18 @@ TableCallbackData *init_table_callback(DBHandle *db_handle, table_done_callback done_callback, table_retry_callback retry_callback, void *user_context) { - CHECK(db_handle); - CHECK(db_handle->loop); - CHECK(data); + RAY_CHECK(db_handle); + RAY_CHECK(db_handle->loop); + RAY_CHECK(data); /* If no retry info is provided, use the default retry info. */ if (retry == NULL) { retry = (RetryInfo *) &default_retry; } - CHECK(retry); + RAY_CHECK(retry); /* Allocate and initialize callback data structure for object table */ TableCallbackData *callback_data = (TableCallbackData *) malloc(sizeof(TableCallbackData)); - CHECKM(callback_data != NULL, "Memory allocation error!") + RAY_CHECK(callback_data != NULL) << "Memory allocation error!"; callback_data->id = id; callback_data->label = label; callback_data->retry = *retry; @@ -70,8 +70,8 @@ TableCallbackData *init_table_callback(DBHandle *db_handle, callback_data->timer_id = callback_data_id++; outstanding_callbacks_add(callback_data); - LOG_DEBUG("Initializing table command %s with timer ID %" PRId64, - callback_data->label, callback_data->timer_id); + RAY_LOG(DEBUG) << "Initializing table command " << callback_data->label + << " with timer ID " << callback_data->timer_id; callback_data->retry_callback(callback_data); return callback_data; @@ -92,12 +92,12 @@ void remove_timer_callback(event_loop *loop, TableCallbackData *callback_data) { } void destroy_table_callback(TableCallbackData *callback_data) { - CHECK(callback_data != NULL); + RAY_CHECK(callback_data != NULL); if (callback_data->requests_info) free(callback_data->requests_info); - CHECK(callback_data->data != NULL); + RAY_CHECK(callback_data->data != NULL); delete callback_data->data; callback_data->data = NULL; @@ -110,20 +110,20 @@ void destroy_table_callback(TableCallbackData *callback_data) { int64_t table_timeout_handler(event_loop *loop, int64_t timer_id, void *user_context) { - CHECK(loop != NULL); - CHECK(user_context != NULL); + RAY_CHECK(loop != NULL); + RAY_CHECK(user_context != NULL); TableCallbackData *callback_data = (TableCallbackData *) user_context; - CHECK(callback_data->retry.num_retries >= 0 || - callback_data->retry.num_retries == -1); - LOG_WARN("retrying operation %s, retry_count = %d", callback_data->label, - callback_data->retry.num_retries); + RAY_CHECK(callback_data->retry.num_retries >= 0 || + callback_data->retry.num_retries == -1); + RAY_LOG(WARNING) << "retrying operation " << callback_data->label + << ", retry_count = " << callback_data->retry.num_retries; if (callback_data->retry.num_retries == 0) { /* We didn't get a response from the database after exhausting all retries; * let user know, cleanup the state, and remove the timer. */ - LOG_WARN("Table command %s with timer ID %" PRId64 " failed", - callback_data->label, timer_id); + RAY_LOG(WARNING) << "Table command " << callback_data->label + << " with timer ID " << timer_id << " failed"; if (callback_data->retry.fail_callback) { callback_data->retry.fail_callback(callback_data->id, callback_data->user_context, diff --git a/src/common/task.cc b/src/common/task.cc index e85effc64..702eaab95 100644 --- a/src/common/task.cc +++ b/src/common/task.cc @@ -80,7 +80,7 @@ class TaskBuilder { } void SetRequiredResource(const std::string &resource_name, double value) { - CHECK(resource_map_.count(resource_name) == 0); + RAY_CHECK(resource_map_.count(resource_name) == 0); resource_map_[resource_name] = value; } @@ -91,7 +91,7 @@ class TaskBuilder { BYTE buff[DIGEST_SIZE]; sha256_final(&ctx, buff); TaskID task_id; - CHECK(sizeof(task_id) <= DIGEST_SIZE); + RAY_CHECK(sizeof(task_id) <= DIGEST_SIZE); memcpy(&task_id, buff, sizeof(task_id)); /* Add return object IDs. */ std::vector> returns; @@ -206,25 +206,25 @@ void TaskSpec_set_required_resource(TaskBuilder *builder, /* Functions for reading tasks. */ TaskID TaskSpec_task_id(const TaskSpec *spec) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); return from_flatbuf(*message->task_id()); } FunctionID TaskSpec_function(TaskSpec *spec) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); return from_flatbuf(*message->function_id()); } ActorID TaskSpec_actor_id(TaskSpec *spec) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); return from_flatbuf(*message->actor_id()); } ActorID TaskSpec_actor_handle_id(TaskSpec *spec) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); return from_flatbuf(*message->actor_handle_id()); } @@ -234,19 +234,19 @@ bool TaskSpec_is_actor_task(TaskSpec *spec) { } int64_t TaskSpec_actor_counter(TaskSpec *spec) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); return std::abs(message->actor_counter()); } bool TaskSpec_is_actor_checkpoint_method(TaskSpec *spec) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); return message->is_actor_checkpoint_method(); } ObjectID TaskSpec_actor_dummy_object(TaskSpec *spec) { - CHECK(TaskSpec_is_actor_task(spec)); + RAY_CHECK(TaskSpec_is_actor_task(spec)); /* The last return value for actor tasks is the dummy object that * represents that this task has completed execution. */ int64_t num_returns = TaskSpec_num_returns(spec); @@ -254,25 +254,25 @@ ObjectID TaskSpec_actor_dummy_object(TaskSpec *spec) { } UniqueID TaskSpec_driver_id(const TaskSpec *spec) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); return from_flatbuf(*message->driver_id()); } TaskID TaskSpec_parent_task_id(const TaskSpec *spec) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); return from_flatbuf(*message->parent_task_id()); } int64_t TaskSpec_parent_counter(TaskSpec *spec) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); return message->parent_counter(); } int64_t TaskSpec_num_args(TaskSpec *spec) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); return message->args()->size(); } @@ -289,45 +289,45 @@ int64_t TaskSpec_num_args_by_ref(TaskSpec *spec) { } int TaskSpec_arg_id_count(TaskSpec *spec, int64_t arg_index) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); auto ids = message->args()->Get(arg_index)->object_ids(); return ids->size(); } ObjectID TaskSpec_arg_id(TaskSpec *spec, int64_t arg_index, int64_t id_index) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); return from_flatbuf( *message->args()->Get(arg_index)->object_ids()->Get(id_index)); } const uint8_t *TaskSpec_arg_val(TaskSpec *spec, int64_t arg_index) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); return (uint8_t *) message->args()->Get(arg_index)->data()->c_str(); } int64_t TaskSpec_arg_length(TaskSpec *spec, int64_t arg_index) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); return message->args()->Get(arg_index)->data()->size(); } int64_t TaskSpec_num_returns(TaskSpec *spec) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); return message->returns()->size(); } bool TaskSpec_arg_by_ref(TaskSpec *spec, int64_t arg_index) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); return message->args()->Get(arg_index)->object_ids()->size() != 0; } ObjectID TaskSpec_return(TaskSpec *spec, int64_t return_index) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); return from_flatbuf(*message->returns()->Get(return_index)); } @@ -336,7 +336,7 @@ double TaskSpec_get_required_resource(const TaskSpec *spec, const std::string &resource_name) { // This is a bit ugly. However it shouldn't be much of a performance issue // because there shouldn't be many distinct resources in a single task spec. - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); for (size_t i = 0; i < message->required_resources()->size(); i++) { const ResourcePair *resource_pair = message->required_resources()->Get(i); @@ -349,7 +349,7 @@ double TaskSpec_get_required_resource(const TaskSpec *spec, const std::unordered_map TaskSpec_get_required_resources( const TaskSpec *spec) { - CHECK(spec); + RAY_CHECK(spec); auto message = flatbuffers::GetRoot(spec); return map_from_flatbuf(*message->required_resources()); } @@ -446,7 +446,7 @@ int TaskExecutionSpec::DependencyIdCount(int64_t dependency_index) const { } else { /* Index into the execution dependencies. */ dependency_index -= num_args; - CHECK((size_t) dependency_index < execution_dependencies_.size()); + RAY_CHECK((size_t) dependency_index < execution_dependencies_.size()); /* All elements in the execution dependency list have exactly one ID. */ return 1; } @@ -465,7 +465,7 @@ ObjectID TaskExecutionSpec::DependencyId(int64_t dependency_index, } else { /* Index into the execution dependencies. */ dependency_index -= num_args; - CHECK((size_t) dependency_index < execution_dependencies_.size()); + RAY_CHECK((size_t) dependency_index < execution_dependencies_.size()); return execution_dependencies_[dependency_index]; } } diff --git a/src/common/test/db_tests.cc b/src/common/test/db_tests.cc index 1df7a37a4..b507b4575 100644 --- a/src/common/test/db_tests.cc +++ b/src/common/test/db_tests.cc @@ -42,13 +42,13 @@ void lookup_done_callback(ObjectID object_id, const std::vector &manager_ids, void *user_context) { DBHandle *db = (DBHandle *) user_context; - CHECK(manager_ids.size() == 2); + RAY_CHECK(manager_ids.size() == 2); const std::vector managers = db_client_table_get_ip_addresses(db, manager_ids); - CHECK(parse_ip_addr_port(managers.at(0).c_str(), received_addr1, - &received_port1) == 0); - CHECK(parse_ip_addr_port(managers.at(1).c_str(), received_addr2, - &received_port2) == 0); + RAY_CHECK(parse_ip_addr_port(managers.at(0).c_str(), received_addr1, + &received_port1) == 0); + RAY_CHECK(parse_ip_addr_port(managers.at(1).c_str(), received_addr2, + &received_port2) == 0); } /* Entry added to database successfully. */ @@ -57,7 +57,7 @@ void add_done_callback(ObjectID object_id, bool success, void *user_context) {} /* Test if we got a timeout callback if we couldn't connect database. */ void timeout_callback(ObjectID object_id, void *context, void *user_data) { user_context *uc = (user_context *) context; - CHECK(uc->test_number == TEST_NUMBER) + RAY_CHECK(uc->test_number == TEST_NUMBER); } int64_t timeout_handler(event_loop *loop, int64_t id, void *context) { @@ -136,9 +136,9 @@ int64_t task_table_delayed_add_task(event_loop *loop, void task_table_test_callback(Task *callback_task, void *user_data) { task_table_test_callback_called = 1; - CHECK(Task_state(callback_task) == TASK_STATUS_SCHEDULED); - CHECK(Task_size(callback_task) == Task_size(task_table_test_task)); - CHECK(Task_equals(callback_task, task_table_test_task)); + RAY_CHECK(Task_state(callback_task) == TASK_STATUS_SCHEDULED); + RAY_CHECK(Task_size(callback_task) == Task_size(task_table_test_task)); + RAY_CHECK(Task_equals(callback_task, task_table_test_task)); event_loop *loop = (event_loop *) user_data; event_loop_stop(loop); } diff --git a/src/common/test/object_table_tests.cc b/src/common/test/object_table_tests.cc index 4999c0037..2c18a8921 100644 --- a/src/common/test/object_table_tests.cc +++ b/src/common/test/object_table_tests.cc @@ -38,13 +38,13 @@ void new_object_done_callback(ObjectID object_id, bool is_put, void *user_context) { new_object_succeeded = 1; - CHECK(object_id == new_object_id); - CHECK(task_id == new_object_task_id); + RAY_CHECK(object_id == new_object_id); + RAY_CHECK(task_id == new_object_task_id); event_loop_stop(g_loop); } void new_object_lookup_callback(ObjectID object_id, void *user_context) { - CHECK(object_id == new_object_id); + RAY_CHECK(object_id == new_object_id); RetryInfo retry = { .num_retries = 5, .timeout = 100, @@ -109,7 +109,7 @@ void new_object_no_task_callback(ObjectID object_id, bool is_put, void *user_context) { new_object_succeeded = 1; - CHECK(task_id.is_nil()); + RAY_CHECK(task_id.is_nil()); event_loop_stop(g_loop); } @@ -150,12 +150,12 @@ void lookup_done_callback(ObjectID object_id, const std::vector &manager_vector, void *context) { /* The done callback should not be called. */ - CHECK(0); + RAY_CHECK(0); } void lookup_fail_callback(UniqueID id, void *user_context, void *user_data) { lookup_failed = 1; - CHECK(user_context == (void *) lookup_timeout_context); + RAY_CHECK(user_context == (void *) lookup_timeout_context); event_loop_stop(g_loop); } @@ -189,12 +189,12 @@ int add_failed = 0; void add_done_callback(ObjectID object_id, bool success, void *user_context) { /* The done callback should not be called. */ - CHECK(0); + RAY_CHECK(0); } void add_fail_callback(UniqueID id, void *user_context, void *user_data) { add_failed = 1; - CHECK(user_context == (void *) add_timeout_context); + RAY_CHECK(user_context == (void *) add_timeout_context); event_loop_stop(g_loop); } @@ -230,7 +230,7 @@ void subscribe_done_callback(ObjectID object_id, const std::vector &manager_vector, void *user_context) { /* The done callback should not be called. */ - CHECK(0); + RAY_CHECK(0); } void subscribe_fail_callback(UniqueID id, void *user_context, void *user_data) { @@ -277,7 +277,7 @@ int64_t reconnect_context_callback(event_loop *loop, db->sync_context = redisConnect("127.0.0.1", 6379); /* Re-attach the database to the event loop (the file descriptor changed). */ db_attach(db, loop, true); - LOG_DEBUG("Reconnected to Redis"); + RAY_LOG(DEBUG) << "Reconnected to Redis"; return EVENT_LOOP_TIMER_DONE; } @@ -297,7 +297,7 @@ void lookup_retry_fail_callback(UniqueID id, void *user_context, void *user_data) { /* The fail callback should not be called. */ - CHECK(0); + RAY_CHECK(0); } /* === Test add retry === */ @@ -312,15 +312,15 @@ void add_lookup_done_callback(ObjectID object_id, const std::vector &manager_ids, void *context) { DBHandle *db = (DBHandle *) context; - CHECK(manager_ids.size() == 1); + RAY_CHECK(manager_ids.size() == 1); const std::vector managers = db_client_table_get_ip_addresses(db, manager_ids); - CHECK(managers.at(0) == "127.0.0.1:11235"); + RAY_CHECK(managers.at(0) == "127.0.0.1:11235"); lookup_retry_succeeded = 1; } void add_lookup_callback(ObjectID object_id, bool success, void *user_context) { - CHECK(success); + RAY_CHECK(success); DBHandle *db = (DBHandle *) user_context; RetryInfo retry = { .num_retries = 5, @@ -366,15 +366,15 @@ void add_remove_lookup_done_callback( bool never_created, const std::vector &manager_vector, void *context) { - CHECK(context == (void *) lookup_retry_context); - CHECK(manager_vector.size() == 0); + RAY_CHECK(context == (void *) lookup_retry_context); + RAY_CHECK(manager_vector.size() == 0); lookup_retry_succeeded = 1; } void add_remove_lookup_callback(ObjectID object_id, bool success, void *user_context) { - CHECK(success); + RAY_CHECK(success); DBHandle *db = (DBHandle *) user_context; RetryInfo retry = { .num_retries = 5, @@ -387,7 +387,7 @@ void add_remove_lookup_callback(ObjectID object_id, } void add_remove_callback(ObjectID object_id, bool success, void *user_context) { - CHECK(success); + RAY_CHECK(success); DBHandle *db = (DBHandle *) user_context; RetryInfo retry = { .num_retries = 5, @@ -433,7 +433,7 @@ int lookup_late_failed = 0; void lookup_late_fail_callback(UniqueID id, void *user_context, void *user_data) { - CHECK(user_context == (void *) lookup_late_context); + RAY_CHECK(user_context == (void *) lookup_late_context); lookup_late_failed = 1; } @@ -442,7 +442,7 @@ void lookup_late_done_callback(ObjectID object_id, const std::vector &manager_vector, void *context) { /* This function should never be called. */ - CHECK(0); + RAY_CHECK(0); } TEST lookup_late_test(void) { @@ -478,7 +478,7 @@ const char *add_late_context = "add_late"; int add_late_failed = 0; void add_late_fail_callback(UniqueID id, void *user_context, void *user_data) { - CHECK(user_context == (void *) add_late_context); + RAY_CHECK(user_context == (void *) add_late_context); add_late_failed = 1; } @@ -486,7 +486,7 @@ void add_late_done_callback(ObjectID object_id, bool success, void *user_context) { /* This function should never be called. */ - CHECK(0); + RAY_CHECK(0); } TEST add_late_test(void) { @@ -522,7 +522,7 @@ int subscribe_late_failed = 0; void subscribe_late_fail_callback(UniqueID id, void *user_context, void *user_data) { - CHECK(user_context == (void *) subscribe_late_context); + RAY_CHECK(user_context == (void *) subscribe_late_context); subscribe_late_failed = 1; } @@ -531,7 +531,7 @@ void subscribe_late_done_callback(ObjectID object_id, const std::vector &manager_vector, void *user_context) { /* This function should never be called. */ - CHECK(0); + RAY_CHECK(0); } TEST subscribe_late_test(void) { @@ -573,7 +573,7 @@ void subscribe_success_fail_callback(UniqueID id, void *user_context, void *user_data) { /* This function should never be called. */ - CHECK(0); + RAY_CHECK(0); } void subscribe_success_done_callback( @@ -594,9 +594,9 @@ void subscribe_success_object_available_callback( int64_t data_size, const std::vector &manager_vector, void *user_context) { - CHECK(user_context == (void *) subscribe_success_context); - CHECK(object_id == subscribe_id); - CHECK(manager_vector.size() == 1); + RAY_CHECK(user_context == (void *) subscribe_success_context); + RAY_CHECK(object_id == subscribe_id); + RAY_CHECK(manager_vector.size() == 1); subscribe_success_succeeded = 1; } @@ -656,15 +656,15 @@ void subscribe_object_present_object_available_callback( void *user_context) { subscribe_object_present_context_t *ctx = (subscribe_object_present_context_t *) user_context; - CHECK(ctx->data_size == data_size); - CHECK(strcmp(subscribe_object_present_str, ctx->teststr) == 0); + RAY_CHECK(ctx->data_size == data_size); + RAY_CHECK(strcmp(subscribe_object_present_str, ctx->teststr) == 0); subscribe_object_present_succeeded = 1; - CHECK(manager_vector.size() == 1); + RAY_CHECK(manager_vector.size() == 1); } void fatal_fail_callback(UniqueID id, void *user_context, void *user_data) { /* This function should never be called. */ - CHECK(0); + RAY_CHECK(0); } TEST subscribe_object_present_test(void) { @@ -723,7 +723,7 @@ void subscribe_object_not_present_object_available_callback( const std::vector &manager_vector, void *user_context) { /* This should not be called. */ - CHECK(0); + RAY_CHECK(0); } TEST subscribe_object_not_present_test(void) { @@ -773,11 +773,12 @@ void subscribe_object_available_later_object_available_callback( void *user_context) { subscribe_object_present_context_t *myctx = (subscribe_object_present_context_t *) user_context; - CHECK(myctx->data_size == data_size); - CHECK(strcmp(myctx->teststr, subscribe_object_available_later_context) == 0); + RAY_CHECK(myctx->data_size == data_size); + RAY_CHECK(strcmp(myctx->teststr, subscribe_object_available_later_context) == + 0); /* Make sure the callback is only called once. */ subscribe_object_available_later_succeeded += 1; - CHECK(manager_vector.size() == 1); + RAY_CHECK(manager_vector.size() == 1); } TEST subscribe_object_available_later_test(void) { diff --git a/src/common/test/redis_tests.cc b/src/common/test/redis_tests.cc index 9acd82981..da2eb6b64 100644 --- a/src/common/test/redis_tests.cc +++ b/src/common/test/redis_tests.cc @@ -47,10 +47,10 @@ void async_redis_socket_test_callback(redisAsyncContext *ac, redisReply *reply = (redisReply *) redisCommand(context, test_get_format, test_key); redisFree(context); - CHECK(reply != NULL); + RAY_CHECK(reply != NULL); if (strcmp(reply->str, test_value)) { freeReplyObject(reply); - CHECK(0); + RAY_CHECK(0); } freeReplyObject(reply); } @@ -97,7 +97,7 @@ void redis_accept_callback(event_loop *loop, void *context, int events) { int accept_fd = accept_client(socket_fd); - CHECK(accept_fd >= 0); + RAY_CHECK(accept_fd >= 0); connections.push_back(accept_fd); event_loop_add_file(loop, accept_fd, EVENT_LOOP_READ, redis_read_callback, context); @@ -155,8 +155,8 @@ void logging_test_callback(redisAsyncContext *ac, void *r, void *privdata) { redisContext *context = redisConnect("127.0.0.1", 6379); redisReply *reply = (redisReply *) redisCommand(context, "KEYS %s", "log:*"); redisFree(context); - CHECK(reply != NULL); - CHECK(reply->elements > 0); + RAY_CHECK(reply != NULL); + RAY_CHECK(reply->elements > 0); freeReplyObject(reply); } @@ -176,7 +176,7 @@ void logging_accept_callback(event_loop *loop, void *context, int events) { int accept_fd = accept_client(socket_fd); - CHECK(accept_fd >= 0); + RAY_CHECK(accept_fd >= 0); connections.push_back(accept_fd); event_loop_add_file(loop, accept_fd, EVENT_LOOP_READ, logging_read_callback, context); diff --git a/src/common/test/task_table_tests.cc b/src/common/test/task_table_tests.cc index e9adf466f..9324cc164 100644 --- a/src/common/test/task_table_tests.cc +++ b/src/common/test/task_table_tests.cc @@ -27,13 +27,13 @@ void lookup_nil_fail_callback(UniqueID id, void *user_context, void *user_data) { /* The fail callback should not be called. */ - CHECK(0); + RAY_CHECK(0); } void lookup_nil_success_callback(Task *task, void *context) { lookup_nil_success = 1; - CHECK(task == NULL); - CHECK(context == (void *) lookup_nil_context); + RAY_CHECK(task == NULL); + RAY_CHECK(context == (void *) lookup_nil_context); event_loop_stop(g_loop); } @@ -70,18 +70,18 @@ void add_lookup_fail_callback(UniqueID id, void *user_context, void *user_data) { /* The fail callback should not be called. */ - CHECK(0); + RAY_CHECK(0); } void lookup_success_callback(Task *task, void *context) { lookup_success = 1; - CHECK(Task_equals(task, add_lookup_task)); + RAY_CHECK(Task_equals(task, add_lookup_task)); event_loop_stop(g_loop); } void add_success_callback(TaskID task_id, void *context) { add_success = 1; - CHECK(TaskID_equal(task_id, Task_task_id(add_lookup_task))); + RAY_CHECK(TaskID_equal(task_id, Task_task_id(add_lookup_task))); DBHandle *db = (DBHandle *) context; RetryInfo retry = { @@ -137,12 +137,12 @@ int subscribe_failed = 0; void subscribe_done_callback(TaskID task_id, void *user_context) { /* The done callback should not be called. */ - CHECK(0); + RAY_CHECK(0); } void subscribe_fail_callback(UniqueID id, void *user_context, void *user_data) { subscribe_failed = 1; - CHECK(user_context == (void *) subscribe_timeout_context); + RAY_CHECK(user_context == (void *) subscribe_timeout_context); event_loop_stop(g_loop); } @@ -180,12 +180,12 @@ int publish_failed = 0; void publish_done_callback(TaskID task_id, void *user_context) { /* The done callback should not be called. */ - CHECK(0); + RAY_CHECK(0); } void publish_fail_callback(UniqueID id, void *user_context, void *user_data) { publish_failed = 1; - CHECK(user_context == (void *) publish_timeout_context); + RAY_CHECK(user_context == (void *) publish_timeout_context); event_loop_stop(g_loop); } @@ -249,7 +249,7 @@ const char *subscribe_retry_context = "subscribe_retry"; int subscribe_retry_succeeded = 0; void subscribe_retry_done_callback(ObjectID object_id, void *user_context) { - CHECK(user_context == (void *) subscribe_retry_context); + RAY_CHECK(user_context == (void *) subscribe_retry_context); subscribe_retry_succeeded = 1; } @@ -257,7 +257,7 @@ void subscribe_retry_fail_callback(UniqueID id, void *user_context, void *user_data) { /* The fail callback should not be called. */ - CHECK(0); + RAY_CHECK(0); } TEST subscribe_retry_test(void) { @@ -299,7 +299,7 @@ const char *publish_retry_context = "publish_retry"; int publish_retry_succeeded = 0; void publish_retry_done_callback(ObjectID object_id, void *user_context) { - CHECK(user_context == (void *) publish_retry_context); + RAY_CHECK(user_context == (void *) publish_retry_context); publish_retry_succeeded = 1; } @@ -307,7 +307,7 @@ void publish_retry_fail_callback(UniqueID id, void *user_context, void *user_data) { /* The fail callback should not be called. */ - CHECK(0); + RAY_CHECK(0); } TEST publish_retry_test(void) { @@ -355,13 +355,13 @@ int subscribe_late_failed = 0; void subscribe_late_fail_callback(UniqueID id, void *user_context, void *user_data) { - CHECK(user_context == (void *) subscribe_late_context); + RAY_CHECK(user_context == (void *) subscribe_late_context); subscribe_late_failed = 1; } void subscribe_late_done_callback(TaskID task_id, void *user_context) { /* This function should never be called. */ - CHECK(0); + RAY_CHECK(0); } TEST subscribe_late_test(void) { @@ -400,13 +400,13 @@ int publish_late_failed = 0; void publish_late_fail_callback(UniqueID id, void *user_context, void *user_data) { - CHECK(user_context == (void *) publish_late_context); + RAY_CHECK(user_context == (void *) publish_late_context); publish_late_failed = 1; } void publish_late_done_callback(TaskID task_id, void *user_context) { /* This function should never be called. */ - CHECK(0); + RAY_CHECK(0); } TEST publish_late_test(void) { diff --git a/src/common/test/test_common.h b/src/common/test/test_common.h index 6f6d0da53..03984e6f2 100644 --- a/src/common/test/test_common.h +++ b/src/common/test/test_common.h @@ -22,7 +22,8 @@ static inline std::string bind_ipc_sock_retry(const char *socket_name_format, int *fd) { std::string socket_name; for (int num_retries = 0; num_retries < 5; ++num_retries) { - LOG_INFO("trying to find plasma socket (attempt %d)", num_retries); + RAY_LOG(INFO) << "trying to find plasma socket (attempt " << num_retries + << ")"; size_t size = std::snprintf(nullptr, 0, socket_name_format, rand()) + 1; char socket_name_c_str[size]; std::snprintf(socket_name_c_str, size, socket_name_format, rand()); diff --git a/src/global_scheduler/global_scheduler.cc b/src/global_scheduler/global_scheduler.cc index 2182bd964..b87c58cf9 100644 --- a/src/global_scheduler/global_scheduler.cc +++ b/src/global_scheduler/global_scheduler.cc @@ -31,7 +31,7 @@ void assign_task_to_local_scheduler_retry(UniqueID id, void *user_data) { GlobalSchedulerState *state = (GlobalSchedulerState *) user_context; Task *task = (Task *) user_data; - CHECK(Task_state(task) == TASK_STATUS_SCHEDULED); + RAY_CHECK(Task_state(task) == TASK_STATUS_SCHEDULED); // If the local scheduler has died since we requested the task assignment, do // not retry again. @@ -68,13 +68,13 @@ void assign_task_to_local_scheduler_retry(UniqueID id, void assign_task_to_local_scheduler(GlobalSchedulerState *state, Task *task, DBClientID local_scheduler_id) { - std::string id_string = local_scheduler_id.hex(); TaskSpec *spec = Task_task_execution_spec(task)->Spec(); - LOG_DEBUG("assigning task to local_scheduler_id = %s", id_string.c_str()); + RAY_LOG(DEBUG) << "assigning task to local_scheduler_id = " + << local_scheduler_id; Task_set_state(task, TASK_STATUS_SCHEDULED); Task_set_local_scheduler(task, local_scheduler_id); - id_string = Task_task_id(task).hex(); - LOG_DEBUG("Issuing a task table update for task = %s", id_string.c_str()); + RAY_LOG(DEBUG) << "Issuing a task table update for task = " + << Task_task_id(task); #if !RAY_USE_NEW_GCS auto retryInfo = RetryInfo{ @@ -99,7 +99,7 @@ void assign_task_to_local_scheduler(GlobalSchedulerState *state, /* The value -1 indicates that the size of the object is not known yet. */ obj_info_entry.data_size = -1; } - CHECK(state->local_scheduler_plasma_map.count(local_scheduler_id) == 1); + RAY_CHECK(state->local_scheduler_plasma_map.count(local_scheduler_id) == 1); state->scheduler_object_info_table[return_id].object_locations.push_back( state->local_scheduler_plasma_map[local_scheduler_id]); } @@ -108,7 +108,7 @@ void assign_task_to_local_scheduler(GlobalSchedulerState *state, * instead of db_client_id objects. */ /* Update the local scheduler info. */ auto it = state->local_schedulers.find(local_scheduler_id); - CHECK(it != state->local_schedulers.end()); + RAY_CHECK(it != state->local_schedulers.end()); LocalScheduler &local_scheduler = it->second; local_scheduler.num_tasks_sent += 1; @@ -119,8 +119,9 @@ void assign_task_to_local_scheduler(GlobalSchedulerState *state, double resource_quantity = resource_pair.second; // The local scheduler must have this resource because otherwise we wouldn't // be assigning the task to this local scheduler. - CHECK(local_scheduler.info.dynamic_resources.count(resource_name) == 1 || - resource_quantity == 0); + RAY_CHECK(local_scheduler.info.dynamic_resources.count(resource_name) == + 1 || + resource_quantity == 0); // Subtract task's resource from the cached dynamic resource capacity for // this local scheduler. This will be overwritten on the next heartbeat. local_scheduler.info.dynamic_resources[resource_name] = @@ -161,9 +162,8 @@ void GlobalSchedulerState_free(GlobalSchedulerState *state) { /* Free the array of unschedulable tasks. */ int64_t num_pending_tasks = state->pending_tasks.size(); if (num_pending_tasks > 0) { - LOG_WARN("There are %" PRId64 - " remaining tasks in the pending tasks array.", - num_pending_tasks); + RAY_LOG(WARNING) << "There are " << num_pending_tasks + << " remaining tasks in the pending tasks array."; } for (int i = 0; i < num_pending_tasks; ++i) { Task *pending_task = state->pending_tasks[i]; @@ -195,7 +195,7 @@ void signal_handler(int signal) { void process_task_waiting(Task *waiting_task, void *user_context) { GlobalSchedulerState *state = (GlobalSchedulerState *) user_context; - LOG_DEBUG("Task waiting callback is called."); + RAY_LOG(DEBUG) << "Task waiting callback is called."; bool successfully_assigned = handle_task_waiting(state, state->policy_state, waiting_task); /* If the task was not successfully submitted to a local scheduler, add the @@ -238,7 +238,7 @@ remove_local_scheduler( GlobalSchedulerState *state, std::unordered_map::iterator it) { - CHECK(it != state->local_schedulers.end()); + RAY_CHECK(it != state->local_schedulers.end()); DBClientID local_scheduler_id = it->first; it = state->local_schedulers.erase(it); @@ -264,8 +264,8 @@ remove_local_scheduler( */ void process_new_db_client(DBClient *db_client, void *user_context) { GlobalSchedulerState *state = (GlobalSchedulerState *) user_context; - std::string id_string = db_client->id.hex(); - LOG_DEBUG("db client table callback for db client = %s", id_string.c_str()); + RAY_LOG(DEBUG) << "db client table callback for db client = " + << db_client->id; if (strncmp(db_client->client_type.c_str(), "local_scheduler", strlen("local_scheduler")) == 0) { bool local_scheduler_present = @@ -304,15 +304,14 @@ void object_table_subscribe_callback(ObjectID object_id, void *user_context) { /* Extract global scheduler state from the callback context. */ GlobalSchedulerState *state = (GlobalSchedulerState *) user_context; - std::string id_string = object_id.hex(); - LOG_DEBUG("object table subscribe callback for OBJECT = %s", - id_string.c_str()); + RAY_LOG(DEBUG) << "object table subscribe callback for OBJECT = " + << object_id; const std::vector managers = db_client_table_get_ip_addresses(state->db, manager_ids); - LOG_DEBUG("\tManagers<%lu>:", managers.size()); + RAY_LOG(DEBUG) << "\tManagers<" << managers.size() << ">:"; for (size_t i = 0; i < managers.size(); i++) { - LOG_DEBUG("\t\t%s", managers[i].c_str()); + RAY_LOG(DEBUG) << "\t\t" << managers[i]; } if (state->scheduler_object_info_table.find(object_id) == @@ -322,12 +321,11 @@ void object_table_subscribe_callback(ObjectID object_id, state->scheduler_object_info_table[object_id]; obj_info_entry.data_size = data_size; - id_string = object_id.hex(); - LOG_DEBUG("New object added to object_info_table with id = %s", - id_string.c_str()); - LOG_DEBUG("\tmanager locations:"); + RAY_LOG(DEBUG) << "New object added to object_info_table with id = " + << object_id; + RAY_LOG(DEBUG) << "\tmanager locations:"; for (size_t i = 0; i < managers.size(); i++) { - LOG_DEBUG("\t\t%s", managers[i].c_str()); + RAY_LOG(DEBUG) << "\t\t" << managers[i]; } } @@ -347,12 +345,10 @@ void local_scheduler_table_handler(DBClientID client_id, /* Extract global scheduler state from the callback context. */ GlobalSchedulerState *state = (GlobalSchedulerState *) user_context; ARROW_UNUSED(state); - std::string id_string = client_id.hex(); - LOG_DEBUG("Local scheduler heartbeat from db_client_id %s", - id_string.c_str()); - LOG_DEBUG( - "total workers = %d, task queue length = %d, available workers = %d", - info.total_num_workers, info.task_queue_length, info.available_workers); + RAY_LOG(DEBUG) << "Local scheduler heartbeat from db_client_id " << client_id; + RAY_LOG(DEBUG) << "total workers = " << info.total_num_workers + << ", task queue length = " << info.task_queue_length + << ", available workers = " << info.available_workers; /* Update the local scheduler info struct. */ auto it = state->local_schedulers.find(client_id); @@ -371,7 +367,8 @@ void local_scheduler_table_handler(DBClientID client_id, local_scheduler.info = info; } } else { - LOG_WARN("client_id didn't match any cached local scheduler entries"); + RAY_LOG(WARNING) << "client_id didn't match any cached local scheduler " + << "entries"; } } @@ -408,8 +405,8 @@ int heartbeat_timeout_handler(event_loop *loop, timer_id id, void *context) { while (it != state->local_schedulers.end()) { if (it->second.num_heartbeats_missed >= RayConfig::instance().num_heartbeats_timeout()) { - LOG_WARN( - "Missed too many heartbeats from local scheduler, marking as dead."); + RAY_LOG(WARNING) << "Missed too many heartbeats from local scheduler, " + << "marking as dead."; /* Notify others by updating the global state. */ db_client_table_remove(state->db, it->second.id, NULL, NULL, NULL); /* Remove the scheduler from the local state. The call to @@ -484,22 +481,20 @@ int main(int argc, char *argv[]) { node_ip_address = optarg; break; default: - LOG_ERROR("unknown option %c", c); - exit(-1); + RAY_LOG(FATAL) << "unknown option " << c; } } char redis_primary_addr[16]; - int redis_primary_port; + int redis_primary_port = -1; if (!redis_primary_addr_port || parse_ip_addr_port(redis_primary_addr_port, redis_primary_addr, &redis_primary_port) == -1) { - LOG_FATAL( - "specify the primary redis address like 127.0.0.1:6379 with the -r " - "switch"); + RAY_LOG(FATAL) << "specify the primary redis address like 127.0.0.1:6379 " + << "with the -r switch"; } if (!node_ip_address) { - LOG_FATAL("specify the node IP address with the -h switch"); + RAY_LOG(FATAL) << "specify the node IP address with the -h switch"; } start_server(node_ip_address, redis_primary_addr, redis_primary_port); } diff --git a/src/global_scheduler/global_scheduler_algorithm.cc b/src/global_scheduler/global_scheduler_algorithm.cc index 764f90ca0..c7ba0f7d0 100644 --- a/src/global_scheduler/global_scheduler_algorithm.cc +++ b/src/global_scheduler/global_scheduler_algorithm.cc @@ -55,7 +55,7 @@ int64_t locally_available_data_size(const GlobalSchedulerState *state, * local scheduler. */ int64_t task_data_size = 0; - CHECK(state->local_scheduler_plasma_map.count(local_scheduler_id) == 1); + RAY_CHECK(state->local_scheduler_plasma_map.count(local_scheduler_id) == 1); const std::string &plasma_manager = state->local_scheduler_plasma_map.at(local_scheduler_id); @@ -120,8 +120,8 @@ bool handle_task_waiting_random(GlobalSchedulerState *state, GlobalSchedulerPolicyState *policy_state, Task *task) { TaskSpec *task_spec = Task_task_execution_spec(task)->Spec(); - CHECKM(task_spec != NULL, - "task wait handler encounted a task with NULL spec"); + RAY_CHECK(task_spec != NULL) + << "task wait handler encounted a task with NULL spec"; std::vector feasible_nodes; @@ -136,10 +136,8 @@ bool handle_task_waiting_random(GlobalSchedulerState *state, } if (feasible_nodes.size() == 0) { - std::string id_string = Task_task_id(task).hex(); - LOG_ERROR( - "Infeasible task. No nodes satisfy hard constraints for task = %s", - id_string.c_str()); + RAY_LOG(ERROR) << "Infeasible task. No nodes satisfy hard constraints for " + << "task = " << Task_task_id(task); return false; } @@ -148,8 +146,8 @@ bool handle_task_waiting_random(GlobalSchedulerState *state, std::uniform_int_distribution<> dis(0, feasible_nodes.size() - 1); DBClientID local_scheduler_id = feasible_nodes[dis(policy_state->getRandomGenerator())]; - CHECKM(!local_scheduler_id.is_nil(), - "Task is feasible, but doesn't have a local scheduler assigned."); + RAY_CHECK(!local_scheduler_id.is_nil()) + << "Task is feasible, but doesn't have a local scheduler assigned."; // A local scheduler ID was found, so assign the task. assign_task_to_local_scheduler(state, task, local_scheduler_id); return true; @@ -161,15 +159,15 @@ bool handle_task_waiting_cost(GlobalSchedulerState *state, TaskSpec *task_spec = Task_task_execution_spec(task)->Spec(); int64_t curtime = current_time_ms(); - CHECKM(task_spec != NULL, - "task wait handler encounted a task with NULL spec"); + RAY_CHECK(task_spec != NULL) + << "task wait handler encounted a task with NULL spec"; // For tasks already seen by the global scheduler (spillback > 1), // adjust scheduled task counts for the source local scheduler. if (task->execution_spec->SpillbackCount() > 1) { auto it = state->local_schedulers.find(task->local_scheduler_id); // Task's previous local scheduler must be present and known. - CHECK(it != state->local_schedulers.end()); + RAY_CHECK(it != state->local_schedulers.end()); LocalScheduler &src_local_scheduler = it->second; src_local_scheduler.num_recent_tasks_sent -= 1; } @@ -178,12 +176,11 @@ bool handle_task_waiting_cost(GlobalSchedulerState *state, // Go through all the nodes, calculate the score for each, pick max score. double best_local_scheduler_score = INT32_MIN; - CHECKM(best_local_scheduler_score < 0, - "We might have a floating point underflow"); - std::string id_string_fromlocalsched = task->local_scheduler_id.hex(); - LOG_INFO("ct[%" PRId64 "] task from %s spillback %d", curtime, - id_string_fromlocalsched.c_str(), - task->execution_spec->SpillbackCount()); + RAY_CHECK(best_local_scheduler_score < 0) + << "We might have a floating point underflow"; + RAY_LOG(INFO) << "ct[" << curtime << "] task from " + << task->local_scheduler_id << " spillback " + << task->execution_spec->SpillbackCount(); // The best node to send this task. DBClientID best_local_scheduler_id = DBClientID::nil(); @@ -200,14 +197,13 @@ bool handle_task_waiting_cost(GlobalSchedulerState *state, if (task->local_scheduler_id == scheduler->id) { continue; } - std::string id_string = scheduler->id.hex(); task_feasible = true; // This node satisfies the hard capacity constraint. Calculate its score. double score = -1 * calculate_cost_pending(state, scheduler, task_spec); - LOG_INFO("ct[%" PRId64 "][%s][q%d][w%d]: score %f bestscore %f\n", curtime, - id_string.c_str(), scheduler->info.task_queue_length, - scheduler->info.available_workers, score, - best_local_scheduler_score); + RAY_LOG(INFO) << "ct[" << curtime << "][" << scheduler->id << "][q" + << scheduler->info.task_queue_length << "][w" + << scheduler->info.available_workers << "]: score " << score + << " bestscore " << best_local_scheduler_score; if (score >= best_local_scheduler_score) { best_local_scheduler_score = score; best_local_scheduler_id = scheduler->id; @@ -215,16 +211,14 @@ bool handle_task_waiting_cost(GlobalSchedulerState *state, } if (!task_feasible) { - std::string id_string = Task_task_id(task).hex(); - LOG_ERROR( - "Infeasible task. No nodes satisfy hard constraints for task = %s", - id_string.c_str()); + RAY_LOG(ERROR) << "Infeasible task. No nodes satisfy hard constraints for " + << "task = " << Task_task_id(task); // TODO(atumanov): propagate this error to the task's driver and/or // cache the task in case new local schedulers satisfy it in the future. return false; } - CHECKM(!best_local_scheduler_id.is_nil(), - "Task is feasible, but doesn't have a local scheduler assigned."); + RAY_CHECK(!best_local_scheduler_id.is_nil()) + << "Task is feasible, but doesn't have a local scheduler assigned."; // A local scheduler ID was found, so assign the task. assign_task_to_local_scheduler(state, task, best_local_scheduler_id); return true; diff --git a/src/local_scheduler/local_scheduler.cc b/src/local_scheduler/local_scheduler.cc index d707dff7d..2ae75f0fe 100644 --- a/src/local_scheduler/local_scheduler.cc +++ b/src/local_scheduler/local_scheduler.cc @@ -73,12 +73,12 @@ void kill_worker(LocalSchedulerState *state, bool suppress_warning) { /* Erase the local scheduler's reference to the worker. */ auto it = std::find(state->workers.begin(), state->workers.end(), worker); - CHECK(it != state->workers.end()); + RAY_CHECK(it != state->workers.end()); state->workers.erase(it); /* Make sure that we removed the worker. */ it = std::find(state->workers.begin(), state->workers.end(), worker); - CHECK(it == state->workers.end()); + RAY_CHECK(it == state->workers.end()); /* Release any resources held by the worker. It's important to do this before * calling handle_worker_removed and handle_actor_worker_disconnect because @@ -121,7 +121,7 @@ void kill_worker(LocalSchedulerState *state, force_kill_worker, (void *) worker); free_worker = false; } - LOG_DEBUG("Killed worker with pid %d", worker->pid); + RAY_LOG(DEBUG) << "Killed worker with pid " << worker->pid; } /* If this worker is still running a task and we aren't cleaning up, push an @@ -149,7 +149,7 @@ void kill_worker(LocalSchedulerState *state, } } - LOG_DEBUG("Killed worker with pid %d", worker->pid); + RAY_LOG(DEBUG) << "Killed worker with pid " << worker->pid; if (free_worker) { /* Clean up the client socket after killing the worker so that the worker * can't receive the SIGPIPE before exiting. */ @@ -173,7 +173,8 @@ void LocalSchedulerState_free(LocalSchedulerState *state) { for (auto const &worker_pid : state->child_pids) { kill(worker_pid, SIGKILL); waitpid(worker_pid, NULL, 0); - LOG_INFO("Killed worker pid %d which hadn't started yet.", worker_pid); + RAY_LOG(INFO) << "Killed worker pid " << worker_pid + << " which hadn't started yet."; } /* Kill any registered workers. */ @@ -236,19 +237,19 @@ void start_worker(LocalSchedulerState *state, bool reconstruct) { /* Non-actors can't be started in reconstruct mode. */ if (actor_id.is_nil()) { - CHECK(!reconstruct); + RAY_CHECK(!reconstruct); } /* We can't start a worker if we don't have the path to the worker script. */ if (state->config.start_worker_command == NULL) { - LOG_DEBUG( - "No valid command to start worker provided. Cannot start worker."); + RAY_LOG(DEBUG) << "No valid command to start worker provided. Cannot start " + << "worker."; return; } /* Launch the process to create the worker. */ pid_t pid = fork(); if (pid != 0) { state->child_pids.push_back(pid); - LOG_DEBUG("Started worker with pid %d", pid); + RAY_LOG(DEBUG) << "Started worker with pid " << pid; return; } @@ -279,7 +280,7 @@ void start_worker(LocalSchedulerState *state, execvp(command_vector[0], (char *const *) command_vector.data()); LocalSchedulerState_free(state); - LOG_FATAL("Failed to start worker"); + RAY_LOG(FATAL) << "Failed to start worker"; } /** @@ -320,7 +321,7 @@ const char **parse_command(const char *command) { } free(command_copy); - CHECK(num_args == i); + RAY_CHECK(num_args == i); return command_args; } @@ -345,9 +346,8 @@ LocalSchedulerState *LocalSchedulerState_init( state->config.start_worker_command = NULL; } if (start_worker_command == NULL) { - LOG_WARN( - "No valid command to start a worker provided, local scheduler will not " - "start any workers."); + RAY_LOG(WARNING) << "No valid command to start a worker provided, local " + << "scheduler will not start any workers."; } state->config.global_scheduler_exists = global_scheduler_exists; @@ -443,14 +443,14 @@ void resource_sanity_checks(LocalSchedulerState *state, const std::string resource_name = resource_pair.first; double resource_quantity = resource_pair.second; - CHECK(state->dynamic_resources[resource_name] <= - state->static_resources[resource_name]); + RAY_CHECK(state->dynamic_resources[resource_name] <= + state->static_resources[resource_name]); if (resource_name != std::string("CPU")) { - CHECK(state->dynamic_resources[resource_name] >= 0); + RAY_CHECK(state->dynamic_resources[resource_name] >= 0); } - CHECK(resource_quantity >= 0); - CHECK(resource_quantity <= state->static_resources[resource_name]); + RAY_CHECK(resource_quantity >= 0); + RAY_CHECK(resource_quantity <= state->static_resources[resource_name]); } } @@ -468,8 +468,8 @@ void acquire_resources( if (resource_name == std::string("GPU")) { if (resource_quantity != 0) { // Make sure that the worker isn't using any GPUs already. - CHECK(worker->gpus_in_use.size() == 0); - CHECK(state->available_gpus.size() >= resource_quantity); + RAY_CHECK(worker->gpus_in_use.size() == 0); + RAY_CHECK(state->available_gpus.size() >= resource_quantity); // Reserve GPUs for the worker. for (int i = 0; i < resource_quantity; i++) { worker->gpus_in_use.push_back(state->available_gpus.back()); @@ -480,11 +480,11 @@ void acquire_resources( // Do bookkeeping for general resource types. if (resource_name != std::string("CPU")) { - CHECK(state->dynamic_resources[resource_name] >= resource_quantity); + RAY_CHECK(state->dynamic_resources[resource_name] >= resource_quantity); } state->dynamic_resources[resource_name] -= resource_quantity; if (resource_name == std::string("CPU")) { - CHECK(worker->resources_in_use[resource_name] == 0); + RAY_CHECK(worker->resources_in_use[resource_name] == 0); } worker->resources_in_use[resource_name] += resource_quantity; } @@ -504,7 +504,7 @@ void release_resources( // Do some special handling for GPU resources. if (resource_name == std::string("GPU")) { if (resource_quantity != 0) { - CHECK(resource_quantity == worker->gpus_in_use.size()); + RAY_CHECK(resource_quantity == worker->gpus_in_use.size()); // Move the GPU IDs the worker was using back to the local scheduler. for (auto const &gpu_id : worker->gpus_in_use) { state->available_gpus.push_back(gpu_id); @@ -515,7 +515,7 @@ void release_resources( // Do bookkeeping for general resources types. if (resource_name == std::string("CPU")) { - CHECK(resource_quantity == worker->resources_in_use[resource_name]); + RAY_CHECK(resource_quantity == worker->resources_in_use[resource_name]); } state->dynamic_resources[resource_name] += resource_quantity; worker->resources_in_use[resource_name] -= resource_quantity; @@ -542,14 +542,14 @@ void assign_task_to_worker(LocalSchedulerState *state, // non-CPU resources (in particular, GPUs) should already have been acquired // by the actor worker. if (!worker->actor_id.is_nil()) { - CHECK(required_resources.size() == 1); - CHECK(required_resources.count("CPU") == 1); + RAY_CHECK(required_resources.size() == 1); + RAY_CHECK(required_resources.count("CPU") == 1); } - CHECK(worker->actor_id == TaskSpec_actor_id(spec)); + RAY_CHECK(worker->actor_id == TaskSpec_actor_id(spec)); /* Make sure the driver for this task is still alive. */ WorkerID driver_id = TaskSpec_driver_id(spec); - CHECK(is_driver_alive(state, driver_id)); + RAY_CHECK(is_driver_alive(state, driver_id)); /* Construct a flatbuffer object to send to the worker. */ flatbuffers::FlatBufferBuilder fbb; @@ -563,12 +563,10 @@ void assign_task_to_worker(LocalSchedulerState *state, if (errno == EPIPE || errno == EBADF) { /* Something went wrong, so kill the worker. */ kill_worker(state, worker, false, false); - LOG_WARN( - "Failed to give task to worker on fd %d. The client may have hung " - "up.", - worker->sock); + RAY_LOG(WARNING) << "Failed to give task to worker on fd " << worker->sock + << ". The client may have hung up."; } else { - LOG_FATAL("Failed to give task to client on fd %d.", worker->sock); + RAY_LOG(FATAL) << "Failed to give task to client on fd " << worker->sock; } } @@ -596,15 +594,15 @@ void finish_task(LocalSchedulerState *state, LocalSchedulerClient *worker) { if (worker->task_in_progress != NULL) { TaskSpec *spec = Task_task_execution_spec(worker->task_in_progress)->Spec(); /* Return dynamic resources back for the task in progress. */ - CHECK(worker->resources_in_use["CPU"] == - TaskSpec_get_required_resource(spec, "CPU")); + RAY_CHECK(worker->resources_in_use["CPU"] == + TaskSpec_get_required_resource(spec, "CPU")); if (worker->actor_id.is_nil()) { - CHECK(worker->gpus_in_use.size() == - TaskSpec_get_required_resource(spec, "GPU")); + RAY_CHECK(worker->gpus_in_use.size() == + TaskSpec_get_required_resource(spec, "GPU")); release_resources(state, worker, worker->resources_in_use); } else { // Actor tasks should only specify CPU requirements. - CHECK(0 == TaskSpec_get_required_resource(spec, "GPU")); + RAY_CHECK(0 == TaskSpec_get_required_resource(spec, "GPU")); std::unordered_map cpu_resources; cpu_resources["CPU"] = worker->resources_in_use["CPU"]; std::unordered_map resources_to_release = @@ -641,8 +639,8 @@ void process_plasma_notification(event_loop *loop, if (!notification) { /* The store has closed the socket. */ LocalSchedulerState_free(state); - LOG_FATAL( - "Lost connection to the plasma store, local scheduler is exiting!"); + RAY_LOG(FATAL) << "Lost connection to the plasma store, local scheduler is " + << "exiting!"; } auto object_info = flatbuffers::GetRoot(notification); ObjectID object_id = from_flatbuf(*object_info->object_id()); @@ -773,8 +771,8 @@ void reconstruct_evicted_result_lookup_callback(ObjectID reconstruct_object_id, TaskID task_id, bool is_put, void *user_context) { - CHECKM(!task_id.is_nil(), - "No task information found for object during reconstruction"); + RAY_CHECK(!task_id.is_nil()) + << "No task information found for object during reconstruction"; LocalSchedulerState *state = (LocalSchedulerState *) user_context; task_table_test_and_update_callback done_callback; @@ -820,9 +818,8 @@ void reconstruct_failed_result_lookup_callback(ObjectID reconstruct_object_id, * after this lookup returns, possibly due to concurrent clients. In most * cases, this is okay because the initial execution is probably still * pending, so for now, we log a warning and suppress reconstruction. */ - LOG_WARN( - "No task information found for object during reconstruction (no object " - "entry yet)"); + RAY_LOG(WARNING) << "No task information found for object during " + << "reconstruction (no object entry yet)"; return; } LocalSchedulerState *state = (LocalSchedulerState *) user_context; @@ -852,7 +849,7 @@ void reconstruct_object_lookup_callback( bool never_created, const std::vector &manager_ids, void *user_context) { - LOG_DEBUG("Manager count was %lu", manager_ids.size()); + RAY_LOG(DEBUG) << "Manager count was " << manager_ids.size(); /* Only continue reconstruction if we find that the object doesn't exist on * any nodes. NOTE: This codepath is not responsible for checking if the * object table entry is up-to-date. */ @@ -887,14 +884,14 @@ void reconstruct_object_lookup_callback( void reconstruct_object(LocalSchedulerState *state, ObjectID reconstruct_object_id) { - LOG_DEBUG("Starting reconstruction"); + RAY_LOG(DEBUG) << "Starting reconstruction"; /* If the object is locally available, no need to reconstruct. */ if (object_locally_available(state->algorithm_state, reconstruct_object_id)) { return; } /* Determine if reconstruction is necessary by checking if the object exists * on a node. */ - CHECK(state->db != NULL); + RAY_CHECK(state->db != NULL); object_table_lookup(state->db, reconstruct_object_id, NULL, reconstruct_object_lookup_callback, (void *) state); } @@ -912,13 +909,12 @@ void send_client_register_reply(LocalSchedulerState *state, if (errno == EPIPE || errno == EBADF || errno == ECONNRESET) { /* Something went wrong, so kill the worker. */ kill_worker(state, worker, false, false); - LOG_WARN( - "Failed to give send register client reply to worker on fd %d. The " - "client may have hung up.", - worker->sock); + RAY_LOG(WARNING) << "Failed to give send register client reply to worker " + << "on fd " << worker->sock + << ". The client may have hung up."; } else { - LOG_FATAL("Failed to send register client reply to client on fd %d.", - worker->sock); + RAY_LOG(FATAL) << "Failed to send register client reply to client on fd " + << worker->sock; } } } @@ -927,10 +923,10 @@ void handle_client_register(LocalSchedulerState *state, LocalSchedulerClient *worker, const RegisterClientRequest *message) { /* Make sure this worker hasn't already registered. */ - CHECK(!worker->registered); + RAY_CHECK(!worker->registered); worker->registered = true; worker->is_worker = message->is_worker(); - CHECK(worker->client_id.is_nil()); + RAY_CHECK(worker->client_id.is_nil()); worker->client_id = from_flatbuf(*message->client_id()); /* Register the worker or driver. */ @@ -942,11 +938,11 @@ void handle_client_register(LocalSchedulerState *state, if (!actor_id.is_nil()) { /* Make sure that the local scheduler is aware that it is responsible for * this actor. */ - CHECK(state->actor_mapping.count(actor_id) == 1); - CHECK(state->actor_mapping[actor_id].local_scheduler_id == - get_db_client_id(state->db)); + RAY_CHECK(state->actor_mapping.count(actor_id) == 1); + RAY_CHECK(state->actor_mapping[actor_id].local_scheduler_id == + get_db_client_id(state->db)); /* Update the worker struct with this actor ID. */ - CHECK(worker->actor_id.is_nil()); + RAY_CHECK(worker->actor_id.is_nil()); worker->actor_id = actor_id; /* Let the scheduling algorithm process the presence of this new * worker. */ @@ -965,16 +961,16 @@ void handle_client_register(LocalSchedulerState *state, /* TODO(rkn): This means that an actor wants to register but that there * aren't enough GPUs for it. We should queue this request, and reply to * the actor when GPUs become available. */ - LOG_WARN( - "Attempting to create an actor but there aren't enough available " - "GPUs. We'll start the worker anyway without any GPUs, but this is " - "incorrect behavior."); + RAY_LOG(WARNING) << "Attempting to create an actor but there aren't " + << "enough available GPUs. We'll start the worker " + << "anyway without any GPUs, but this is incorrect " + << "behavior."; } } /* Register worker process id with the scheduler. */ /* Determine if this worker is one of our child processes. */ - LOG_DEBUG("PID is %d", worker->pid); + RAY_LOG(DEBUG) << "PID is " << worker->pid; auto it = std::find(state->child_pids.begin(), state->child_pids.end(), worker->pid); if (it != state->child_pids.end()) { @@ -983,7 +979,7 @@ void handle_client_register(LocalSchedulerState *state, * cleanup. */ worker->is_child = true; state->child_pids.erase(it); - LOG_DEBUG("Found matching child pid %d", worker->pid); + RAY_LOG(DEBUG) << "Found matching child pid " << worker->pid; } /* If the worker is an actor that corresponds to a driver that has been @@ -1018,16 +1014,17 @@ void handle_driver_removed_callback(WorkerID driver_id, void *user_context) { if (!actor_id.is_nil()) { /* This is an actor. */ - CHECK(state->actor_mapping.count(actor_id) == 1); + RAY_CHECK(state->actor_mapping.count(actor_id) == 1); if (state->actor_mapping[actor_id].driver_id == driver_id) { /* This actor was created by the removed driver, so kill the actor. */ - LOG_DEBUG("Killing an actor for a removed driver."); + RAY_LOG(DEBUG) << "Killing an actor for a removed driver."; kill_worker(state, *it, false, true); } } else if (task != NULL) { TaskSpec *spec = Task_task_execution_spec(task)->Spec(); if (TaskSpec_driver_id(spec) == driver_id) { - LOG_DEBUG("Killing a worker executing a task for a removed driver."); + RAY_LOG(DEBUG) << "Killing a worker executing a task for a removed " + << "driver."; kill_worker(state, *it, false, true); } } @@ -1081,13 +1078,10 @@ void handle_get_actor_frontier(LocalSchedulerState *state, if (errno == EPIPE || errno == EBADF) { /* Something went wrong, so kill the worker. */ kill_worker(state, worker, false, false); - LOG_WARN( - "Failed to return actor frontier to worker on fd %d. The client may " - "have hung " - "up.", - worker->sock); + RAY_LOG(WARNING) << "Failed to return actor frontier to worker on fd " + << worker->sock << ". The client may have hung up."; } else { - LOG_FATAL("Failed to give task to client on fd %d.", worker->sock); + RAY_LOG(FATAL) << "Failed to give task to client on fd " << worker->sock; } } } @@ -1124,7 +1118,7 @@ void process_message(event_loop *loop, read_vector(client_sock, &type, state->input_buffer); uint8_t *input = state->input_buffer.data(); - LOG_DEBUG("New event of type %" PRId64, type); + RAY_LOG(DEBUG) << "New event of type " << type; switch (type) { case MessageType_SubmitTask: { @@ -1159,7 +1153,7 @@ void process_message(event_loop *loop, } break; case MessageType_DisconnectClient: { finish_task(state, worker); - CHECK(!worker->disconnected); + RAY_CHECK(!worker->disconnected); worker->disconnected = true; /* If the disconnected worker was not an actor, start a new worker to make * sure there are enough workers in the pool. */ @@ -1217,7 +1211,7 @@ void process_message(event_loop *loop, reconstruct_object(state, from_flatbuf(*message->object_id())); } break; case DISCONNECT_CLIENT: { - LOG_DEBUG("Disconnecting client on fd %d", client_sock); + RAY_LOG(DEBUG) << "Disconnecting client on fd " << client_sock; handle_client_disconnect(state, worker); } break; case MessageType_NotifyUnblocked: { @@ -1225,7 +1219,7 @@ void process_message(event_loop *loop, if (worker->task_in_progress != NULL) { /* If the worker was executing a task (i.e. non-driver), update its * state to not blocked. */ - CHECK(worker->is_blocked); + RAY_CHECK(worker->is_blocked); worker->is_blocked = false; /* Lease back the CPU resources that the blocked worker needs (note that * it never released its GPU resources). TODO(swang): Leasing back the @@ -1264,16 +1258,15 @@ void process_message(event_loop *loop, } break; default: /* This code should be unreachable. */ - CHECK(0); + RAY_CHECK(0); } /* Print a warning if this method took too long. */ int64_t end_time = current_time_ms(); if (end_time - start_time > RayConfig::instance().max_time_for_handler_milliseconds()) { - LOG_WARN("process_message of type %" PRId64 " took %" PRId64 - " milliseconds.", - type, end_time - start_time); + RAY_LOG(WARNING) << "process_message of type " << type << " took " + << end_time - start_time << " milliseconds."; } } @@ -1302,7 +1295,7 @@ void new_client_connection(event_loop *loop, state->workers.push_back(worker); event_loop_add_file(loop, new_socket, EVENT_LOOP_READ, process_message, worker); - LOG_DEBUG("new connection with fd %d", new_socket); + RAY_LOG(DEBUG) << "new connection with fd " << new_socket; } /* We need this code so we can clean up when we get a SIGTERM signal. */ @@ -1310,7 +1303,7 @@ void new_client_connection(event_loop *loop, LocalSchedulerState *g_state = NULL; void signal_handler(int signal) { - LOG_DEBUG("Signal was %d", signal); + RAY_LOG(DEBUG) << "Signal was " << signal; if (signal == SIGTERM) { /* NOTE(swang): This call removes the SIGTERM handler to ensure that we * free the local scheduler state at most once. If another SIGTERM is @@ -1338,7 +1331,7 @@ void handle_task_scheduled_callback(Task *original_task, * scheduling algorithm. */ WorkerID driver_id = TaskSpec_driver_id(spec); if (!is_driver_alive(state, driver_id)) { - LOG_DEBUG("Ignoring scheduled task for removed driver."); + RAY_LOG(DEBUG) << "Ignoring scheduled task for removed driver."; return; } @@ -1382,14 +1375,14 @@ void handle_actor_creation_callback(ActorID actor_id, * TODO(rkn): We will need to remove this check to handle the case where the * corresponding publish is retried and the case in which a task that * creates an actor is resubmitted due to fault tolerance. */ - CHECK(state->actor_mapping.count(actor_id) == 0); + RAY_CHECK(state->actor_mapping.count(actor_id) == 0); } else { /* In this case, the actor already exists. Check that the driver hasn't * changed but that the local scheduler has. */ auto it = state->actor_mapping.find(actor_id); - CHECK(it != state->actor_mapping.end()); - CHECK(it->second.driver_id == driver_id); - CHECK(!(it->second.local_scheduler_id == local_scheduler_id)); + RAY_CHECK(it != state->actor_mapping.end()); + RAY_CHECK(it->second.driver_id == driver_id); + RAY_CHECK(!(it->second.local_scheduler_id == local_scheduler_id)); /* If the actor was previously assigned to this local scheduler, kill the * actor. */ if (it->second.local_scheduler_id == get_db_client_id(state->db)) { @@ -1428,12 +1421,13 @@ int heartbeat_handler(event_loop *loop, timer_id id, void *context) { /* Check that the last heartbeat was not sent too long ago. */ int64_t current_time = current_time_ms(); - CHECK(current_time >= state->previous_heartbeat_time); + RAY_CHECK(current_time >= state->previous_heartbeat_time); if (current_time - state->previous_heartbeat_time > RayConfig::instance().num_heartbeats_timeout() * RayConfig::instance().heartbeat_timeout_milliseconds()) { - LOG_FATAL("The last heartbeat was sent %" PRId64 " milliseconds ago.", - current_time - state->previous_heartbeat_time); + RAY_LOG(FATAL) << "The last heartbeat was sent " + << current_time - state->previous_heartbeat_time + << " milliseconds ago."; } state->previous_heartbeat_time = current_time; @@ -1579,11 +1573,12 @@ int main(int argc, char *argv[]) { num_workers_str = optarg; break; default: - LOG_FATAL("unknown option %c", c); + RAY_LOG(FATAL) << "unknown option " << c; } } if (!static_resource_list) { - LOG_FATAL("please specify a static resource list with the -c switch"); + RAY_LOG(FATAL) << "please specify a static resource list with the -c " + << "switch"; } // Parse the resource list. std::istringstream resource_string(static_resource_list); @@ -1591,27 +1586,28 @@ int main(int argc, char *argv[]) { std::string resource_quantity; while (std::getline(resource_string, resource_name, ',')) { - CHECK(std::getline(resource_string, resource_quantity, ',')); + RAY_CHECK(std::getline(resource_string, resource_quantity, ',')); // TODO(rkn): The line below could throw an exception. What should we do // about this? static_resource_conf[resource_name] = std::stod(resource_quantity); } if (!scheduler_socket_name) { - LOG_FATAL("please specify socket for incoming connections with -s switch"); + RAY_LOG(FATAL) << "please specify socket for incoming connections with " + << "-s switch"; } if (!plasma_store_socket_name) { - LOG_FATAL( - "please specify socket for connecting to Plasma store with -p switch"); + RAY_LOG(FATAL) << "please specify socket for connecting to Plasma store " + << "with -p switch"; } if (!node_ip_address) { - LOG_FATAL("please specify the node IP address with -h switch"); + RAY_LOG(FATAL) << "please specify the node IP address with -h switch"; } int num_workers = 0; if (num_workers_str) { num_workers = strtol(num_workers_str, NULL, 10); if (num_workers < 0) { - LOG_FATAL("Number of workers must be nonnegative"); + RAY_LOG(FATAL) << "Number of workers must be nonnegative"; } } @@ -1621,9 +1617,9 @@ int main(int argc, char *argv[]) { /* Start the local scheduler without connecting to Redis. In this case, all * submitted tasks will be queued and scheduled locally. */ if (plasma_manager_socket_name) { - LOG_FATAL( - "if a plasma manager socket name is provided with the -m switch, " - "then a redis address must be provided with the -r switch"); + RAY_LOG(FATAL) << "if a plasma manager socket name is provided with the " + << "-m switch, then a redis address must be provided with " + << "the -r switch"; } } else { char redis_primary_addr[16]; @@ -1631,14 +1627,12 @@ int main(int argc, char *argv[]) { /* Parse the primary Redis address into an IP address and a port. */ if (parse_ip_addr_port(redis_primary_addr_port, redis_primary_addr, &redis_primary_port) == -1) { - LOG_FATAL( - "if a redis address is provided with the -r switch, it should be " - "formatted like 127.0.0.1:6379"); + RAY_LOG(FATAL) << "if a redis address is provided with the -r switch, it " + << "should be formatted like 127.0.0.1:6379"; } if (!plasma_manager_socket_name) { - LOG_FATAL( - "please specify socket for connecting to Plasma manager with -m " - "switch"); + RAY_LOG(FATAL) << "please specify socket for connecting to Plasma " + << "manager with -m switch"; } redis_addr = redis_primary_addr; redis_port = redis_primary_port; diff --git a/src/local_scheduler/local_scheduler_algorithm.cc b/src/local_scheduler/local_scheduler_algorithm.cc index 880c7cf5c..e5b146e17 100644 --- a/src/local_scheduler/local_scheduler_algorithm.cc +++ b/src/local_scheduler/local_scheduler_algorithm.cc @@ -224,25 +224,23 @@ void create_actor(SchedulingAlgorithmState *algorithm_state, entry.task_queue = new std::list(); entry.worker = worker; entry.worker_available = false; - CHECK(algorithm_state->local_actor_infos.count(actor_id) == 0) + RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) == 0); algorithm_state->local_actor_infos[actor_id] = entry; /* Log some useful information about the actor that we created. */ - std::string id_string = actor_id.hex(); - LOG_DEBUG("Creating actor with ID %s.", id_string.c_str()); + RAY_LOG(DEBUG) << "Creating actor with ID " << actor_id; } void remove_actor(SchedulingAlgorithmState *algorithm_state, ActorID actor_id) { - CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1); + RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1); LocalActorInfo &entry = algorithm_state->local_actor_infos.find(actor_id)->second; /* Log some useful information about the actor that we're removing. */ - std::string id_string = actor_id.hex(); size_t count = entry.task_queue->size(); if (count > 0) { - LOG_WARN("Removing actor with ID %s and %lld remaining tasks.", - id_string.c_str(), (long long) count); + RAY_LOG(WARNING) << "Removing actor with ID " << actor_id << " and " + << count << " remaining tasks."; } entry.task_queue->clear(); @@ -266,7 +264,7 @@ bool dispatch_actor_task(LocalSchedulerState *state, SchedulingAlgorithmState *algorithm_state, ActorID actor_id) { /* Make sure this worker actually is an actor. */ - CHECK(!actor_id.is_nil()); + RAY_CHECK(!actor_id.is_nil()); /* Return if this actor doesn't have any pending tasks. */ if (algorithm_state->actors_with_pending_tasks.find(actor_id) == algorithm_state->actors_with_pending_tasks.end()) { @@ -278,16 +276,16 @@ bool dispatch_actor_task(LocalSchedulerState *state, * scheduler. This should be rare. */ return false; } - CHECK(state->actor_mapping[actor_id].local_scheduler_id == - get_db_client_id(state->db)); + RAY_CHECK(state->actor_mapping[actor_id].local_scheduler_id == + get_db_client_id(state->db)); /* Get the local actor entry for this actor. */ - CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0); + RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0); LocalActorInfo &entry = algorithm_state->local_actor_infos.find(actor_id)->second; /* There should be some queued tasks for this actor. */ - CHECK(!entry.task_queue->empty()); + RAY_CHECK(!entry.task_queue->empty()); /* If the worker is not available, we cannot assign a task to it. */ if (!entry.worker_available) { return false; @@ -304,7 +302,7 @@ bool dispatch_actor_task(LocalSchedulerState *state, } /* If there are not enough resources available, we cannot assign the task. */ - CHECK(0 == TaskSpec_get_required_resource(spec, "GPU")); + RAY_CHECK(0 == TaskSpec_get_required_resource(spec, "GPU")); if (!check_dynamic_resources(state, TaskSpec_get_required_resources(spec))) { return false; } @@ -428,7 +426,7 @@ void insert_actor_task_queue(LocalSchedulerState *state, * not been created yet. The correct worker struct will be inserted when the * actor worker connects to the local scheduler. */ create_actor(algorithm_state, actor_id, NULL); - CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1); + RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1); } LocalActorInfo &entry = algorithm_state->local_actor_infos.find(actor_id)->second; @@ -437,7 +435,7 @@ void insert_actor_task_queue(LocalSchedulerState *state, } /* Extend the frontier to include the new handle. */ if (entry.frontier_dependencies.count(task_handle_id) == 0) { - CHECK(task_entry.ExecutionDependencies().size() == 1); + RAY_CHECK(task_entry.ExecutionDependencies().size() == 1); entry.frontier_dependencies[task_handle_id] = task_entry.ExecutionDependencies()[1]; } @@ -448,9 +446,9 @@ void insert_actor_task_queue(LocalSchedulerState *state, * check will fail if the fault-tolerance mechanism resubmits a task on an * actor. */ if (task_counter < entry.task_counters[task_handle_id]) { - LOG_INFO( - "A task that has already been executed has been resubmitted, so we " - "are ignoring it. This should only happen during reconstruction."); + RAY_LOG(INFO) << "A task that has already been executed has been " + << "resubmitted, so we are ignoring it. This should only " + << "happen during reconstruction."; return; } @@ -466,9 +464,8 @@ void insert_actor_task_queue(LocalSchedulerState *state, } /* A duplicate task submitted by the same handle. */ if (task_counter == TaskSpec_actor_counter(pending_task_spec)) { - LOG_INFO( - "A task was resubmitted, so we are ignoring it. This should only " - "happen during reconstruction."); + RAY_LOG(INFO) << "A task was resubmitted, so we are ignoring it. This " + << "should only happen during reconstruction."; return; } /* We found a task with the same handle ID and a greater task counter. */ @@ -501,7 +498,7 @@ void queue_actor_task(LocalSchedulerState *state, bool from_global_scheduler) { TaskSpec *spec = execution_spec.Spec(); ActorID actor_id = TaskSpec_actor_id(spec); - DCHECK(!actor_id.is_nil()); + RAY_CHECK(!actor_id.is_nil()); /* Update the task table. */ if (state->db != NULL) { @@ -567,10 +564,9 @@ void fetch_missing_dependency( /* TODO(swang): Local scheduler should also exit even if there are no * pending fetches. This could be done by subscribing to the db_client * table, or pinging the plasma manager in the heartbeat handler. */ - LOG_FATAL( - "Lost connection to the plasma manager, local scheduler is " - "exiting. Error: %s", - arrow_status.ToString().c_str()); + RAY_LOG(FATAL) << "Lost connection to the plasma manager, local " + << "scheduler is exiting. Error: " + << arrow_status.ToString(); } } /* Create an entry and add it to the list of active fetch requests to @@ -617,7 +613,7 @@ void fetch_missing_dependencies( } } } - CHECK(num_missing_dependencies > 0); + RAY_CHECK(num_missing_dependencies > 0); } /** @@ -700,7 +696,8 @@ int fetch_object_timeout_handler(event_loop *loop, timer_id id, void *context) { LocalSchedulerState *state = (LocalSchedulerState *) context; /* Only try the fetches if we are connected to the object store manager. */ if (state->plasma_conn->get_manager_fd() == -1) { - LOG_INFO("Local scheduler is not connected to a object store manager"); + RAY_LOG(INFO) + << "Local scheduler is not connected to a object store manager"; return RayConfig::instance().local_scheduler_fetch_timeout_milliseconds(); } @@ -728,10 +725,9 @@ int fetch_object_timeout_handler(event_loop *loop, timer_id id, void *context) { reinterpret_cast(&object_ids[j])); if (!arrow_status.ok()) { LocalSchedulerState_free(state); - LOG_FATAL( - "Lost connection to the plasma manager, local scheduler is exiting. " - "Error: %s", - arrow_status.ToString().c_str()); + RAY_LOG(FATAL) << "Lost connection to the plasma manager, local " + << "scheduler is exiting. Error: " + << arrow_status.ToString(); } } @@ -739,8 +735,8 @@ int fetch_object_timeout_handler(event_loop *loop, timer_id id, void *context) { int64_t end_time = current_time_ms(); if (end_time - start_time > RayConfig::instance().max_time_for_handler_milliseconds()) { - LOG_WARN("fetch_object_timeout_handler took %" PRId64 " milliseconds.", - end_time - start_time); + RAY_LOG(WARNING) << "fetch_object_timeout_handler took " + << end_time - start_time << " milliseconds."; } /* Wait at least local_scheduler_fetch_timeout_milliseconds before running @@ -795,9 +791,8 @@ int reconstruct_object_timeout_handler(event_loop *loop, int64_t end_time = current_time_ms(); if (end_time - start_time > RayConfig::instance().max_time_for_handler_milliseconds()) { - LOG_WARN("reconstruct_object_timeout_handler took %" PRId64 - " milliseconds.", - end_time - start_time); + RAY_LOG(WARNING) << "reconstruct_object_timeout_handler took " + << end_time - start_time << " milliseconds."; } return RayConfig::instance() @@ -879,7 +874,7 @@ void dispatch_tasks(LocalSchedulerState *state, } /* Dispatch this task to an available worker and dequeue the task. */ - LOG_DEBUG("Dispatching task"); + RAY_LOG(DEBUG) << "Dispatching task"; /* Get the last available worker in the available worker queue. */ LocalSchedulerClient *worker = algorithm_state->available_workers.back(); /* Tell the available worker to execute the task. */ @@ -1004,15 +999,15 @@ void queue_waiting_task(LocalSchedulerState *state, if (task_counter != task_counters.end() && TaskSpec_actor_counter(spec) < task_counter->second) { /* If the task to queue has a lower task counter, do not queue it. */ - LOG_INFO( - "A task that has already been executed has been resubmitted, so we " - "are ignoring it. This should only happen during reconstruction."); + RAY_LOG(INFO) << "A task that has already been executed has been " + << "resubmitted, so we are ignoring it. This should only " + << "happen during reconstruction."; return; } } } - LOG_DEBUG("Queueing task in waiting queue"); + RAY_LOG(DEBUG) << "Queueing task in waiting queue"; auto it = queue_task(state, algorithm_state->waiting_task_queue, execution_spec, from_global_scheduler); fetch_missing_dependencies(state, algorithm_state, it); @@ -1033,7 +1028,7 @@ void queue_dispatch_task(LocalSchedulerState *state, SchedulingAlgorithmState *algorithm_state, TaskExecutionSpec &execution_spec, bool from_global_scheduler) { - LOG_DEBUG("Queueing task in dispatch queue"); + RAY_LOG(DEBUG) << "Queueing task in dispatch queue"; TaskSpec *spec = execution_spec.Spec(); if (TaskSpec_is_actor_task(spec)) { queue_actor_task(state, algorithm_state, execution_spec, @@ -1076,14 +1071,14 @@ void give_task_to_local_scheduler_retry(UniqueID id, void *user_data) { LocalSchedulerState *state = (LocalSchedulerState *) user_context; Task *task = (Task *) user_data; - CHECK(Task_state(task) == TASK_STATUS_SCHEDULED); + RAY_CHECK(Task_state(task) == TASK_STATUS_SCHEDULED); TaskExecutionSpec *execution_spec = Task_task_execution_spec(task); TaskSpec *spec = execution_spec->Spec(); - CHECK(TaskSpec_is_actor_task(spec)); + RAY_CHECK(TaskSpec_is_actor_task(spec)); ActorID actor_id = TaskSpec_actor_id(spec); - CHECK(state->actor_mapping.count(actor_id) == 1); + RAY_CHECK(state->actor_mapping.count(actor_id) == 1); if (state->actor_mapping[actor_id].local_scheduler_id == get_db_client_id(state->db)) { @@ -1113,11 +1108,11 @@ void give_task_to_local_scheduler(LocalSchedulerState *state, TaskExecutionSpec &execution_spec, DBClientID local_scheduler_id) { if (local_scheduler_id == get_db_client_id(state->db)) { - LOG_WARN("Local scheduler is trying to assign a task to itself."); + RAY_LOG(WARNING) << "Local scheduler is trying to assign a task to itself."; } - CHECK(state->db != NULL); + RAY_CHECK(state->db != NULL); /* Assign the task to the relevant local scheduler. */ - DCHECK(state->config.global_scheduler_exists); + RAY_CHECK(state->config.global_scheduler_exists); Task *task = Task_alloc(execution_spec, TASK_STATUS_SCHEDULED, local_scheduler_id); #if !RAY_USE_NEW_GCS @@ -1139,11 +1134,11 @@ void give_task_to_global_scheduler_retry(UniqueID id, void *user_data) { LocalSchedulerState *state = (LocalSchedulerState *) user_context; Task *task = (Task *) user_data; - CHECK(Task_state(task) == TASK_STATUS_WAITING); + RAY_CHECK(Task_state(task) == TASK_STATUS_WAITING); TaskExecutionSpec *execution_spec = Task_task_execution_spec(task); TaskSpec *spec = execution_spec->Spec(); - CHECK(!TaskSpec_is_actor_task(spec)); + RAY_CHECK(!TaskSpec_is_actor_task(spec)); give_task_to_global_scheduler(state, state->algorithm_state, *execution_spec); } @@ -1165,11 +1160,11 @@ void give_task_to_global_scheduler(LocalSchedulerState *state, return; } /* Pass on the task to the global scheduler. */ - DCHECK(state->config.global_scheduler_exists); + RAY_CHECK(state->config.global_scheduler_exists); Task *task = Task_alloc(execution_spec, TASK_STATUS_WAITING, get_db_client_id(state->db)); #if !RAY_USE_NEW_GCS - DCHECK(state->db != NULL); + RAY_CHECK(state->db != NULL); auto retryInfo = RetryInfo{ .num_retries = 0, // This value is unused. .timeout = 0, // This value is unused. @@ -1225,7 +1220,7 @@ void handle_actor_task_submitted(LocalSchedulerState *state, SchedulingAlgorithmState *algorithm_state, TaskExecutionSpec &execution_spec) { TaskSpec *task_spec = execution_spec.Spec(); - CHECK(TaskSpec_is_actor_task(task_spec)); + RAY_CHECK(TaskSpec_is_actor_task(task_spec)); ActorID actor_id = TaskSpec_actor_id(task_spec); if (state->actor_mapping.count(actor_id) == 0) { @@ -1284,8 +1279,8 @@ void handle_task_scheduled(LocalSchedulerState *state, /* This callback handles tasks that were assigned to this local scheduler by * the global scheduler, so we can safely assert that there is a connection to * the database. */ - DCHECK(state->db != NULL); - DCHECK(state->config.global_scheduler_exists); + RAY_CHECK(state->db != NULL); + RAY_CHECK(state->config.global_scheduler_exists); /* Push the task to the appropriate queue. */ queue_task_locally(state, algorithm_state, execution_spec, true); dispatch_tasks(state, algorithm_state); @@ -1298,24 +1293,24 @@ void handle_actor_task_scheduled(LocalSchedulerState *state, /* This callback handles tasks that were assigned to this local scheduler by * the global scheduler or by other workers, so we can safely assert that * there is a connection to the database. */ - DCHECK(state->db != NULL); - DCHECK(state->config.global_scheduler_exists); + RAY_CHECK(state->db != NULL); + RAY_CHECK(state->config.global_scheduler_exists); /* Check that the task is meant to run on an actor that this local scheduler * is responsible for. */ - DCHECK(TaskSpec_is_actor_task(spec)); + RAY_CHECK(TaskSpec_is_actor_task(spec)); ActorID actor_id = TaskSpec_actor_id(spec); if (state->actor_mapping.count(actor_id) == 1) { - DCHECK(state->actor_mapping[actor_id].local_scheduler_id == - get_db_client_id(state->db)); + RAY_CHECK(state->actor_mapping[actor_id].local_scheduler_id == + get_db_client_id(state->db)); } else { /* This means that an actor has been assigned to this local scheduler, and a * task for that actor has been received by this local scheduler, but this * local scheduler has not yet processed the notification about the actor * creation. This may be possible though should be very uncommon. If it does * happen, it's ok. */ - LOG_INFO( - "handle_actor_task_scheduled called on local scheduler but the " - "corresponding actor_map_entry is not present. This should be rare."); + RAY_LOG(INFO) << "handle_actor_task_scheduled called on local scheduler " + << "but the corresponding actor_map_entry is not present. " + << "This should be rare."; } /* Push the task to the appropriate queue. */ queue_task_locally(state, algorithm_state, execution_spec, true); @@ -1325,19 +1320,19 @@ void handle_actor_task_scheduled(LocalSchedulerState *state, void handle_worker_available(LocalSchedulerState *state, SchedulingAlgorithmState *algorithm_state, LocalSchedulerClient *worker) { - CHECK(worker->task_in_progress == NULL); + RAY_CHECK(worker->task_in_progress == NULL); /* Check that the worker isn't in the pool of available workers. */ - DCHECK(!worker_in_vector(algorithm_state->available_workers, worker)); + RAY_CHECK(!worker_in_vector(algorithm_state->available_workers, worker)); /* Check that the worker isn't in the list of blocked workers. */ - DCHECK(!worker_in_vector(algorithm_state->blocked_workers, worker)); + RAY_CHECK(!worker_in_vector(algorithm_state->blocked_workers, worker)); /* If the worker was executing a task, it must have finished, so remove it * from the list of executing workers. If the worker is connecting for the * first time, it will not be in the list of executing workers. */ remove_worker_from_vector(algorithm_state->executing_workers, worker); /* Double check that we successfully removed the worker. */ - DCHECK(!worker_in_vector(algorithm_state->executing_workers, worker)); + RAY_CHECK(!worker_in_vector(algorithm_state->executing_workers, worker)); /* Add worker to the list of available workers. */ algorithm_state->available_workers.push_back(worker); @@ -1350,7 +1345,7 @@ void handle_worker_removed(LocalSchedulerState *state, SchedulingAlgorithmState *algorithm_state, LocalSchedulerClient *worker) { /* Make sure this is not an actor. */ - CHECK(worker->actor_id.is_nil()); + RAY_CHECK(worker->actor_id.is_nil()); /* Make sure that we remove the worker at most once. */ int num_times_removed = 0; @@ -1360,24 +1355,24 @@ void handle_worker_removed(LocalSchedulerState *state, remove_worker_from_vector(algorithm_state->available_workers, worker); num_times_removed += removed_from_available; /* Double check that we actually removed the worker. */ - DCHECK(!worker_in_vector(algorithm_state->available_workers, worker)); + RAY_CHECK(!worker_in_vector(algorithm_state->available_workers, worker)); /* Remove the worker from executing workers, if it's there. */ bool removed_from_executing = remove_worker_from_vector(algorithm_state->executing_workers, worker); num_times_removed += removed_from_executing; /* Double check that we actually removed the worker. */ - DCHECK(!worker_in_vector(algorithm_state->executing_workers, worker)); + RAY_CHECK(!worker_in_vector(algorithm_state->executing_workers, worker)); /* Remove the worker from blocked workers, if it's there. */ bool removed_from_blocked = remove_worker_from_vector(algorithm_state->blocked_workers, worker); num_times_removed += removed_from_blocked; /* Double check that we actually removed the worker. */ - DCHECK(!worker_in_vector(algorithm_state->blocked_workers, worker)); + RAY_CHECK(!worker_in_vector(algorithm_state->blocked_workers, worker)); /* Make sure we removed the worker at most once. */ - CHECK(num_times_removed <= 1); + RAY_CHECK(num_times_removed <= 1); /* Attempt to dispatch some tasks because some resources may have freed up. */ dispatch_all_tasks(state, algorithm_state); @@ -1400,7 +1395,7 @@ void handle_actor_worker_disconnect(LocalSchedulerState *state, state->removed_actors.insert(worker->actor_id); - CHECK(algorithm_state->local_actor_infos.count(worker->actor_id) != 0); + RAY_CHECK(algorithm_state->local_actor_infos.count(worker->actor_id) != 0); LocalActorInfo &entry = algorithm_state->local_actor_infos.find(worker->actor_id)->second; for (auto &task : *entry.task_queue) { @@ -1421,13 +1416,13 @@ void handle_actor_worker_available(LocalSchedulerState *state, SchedulingAlgorithmState *algorithm_state, LocalSchedulerClient *worker) { ActorID actor_id = worker->actor_id; - CHECK(!actor_id.is_nil()); + RAY_CHECK(!actor_id.is_nil()); /* Get the actor info for this worker. */ - CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1); + RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1); LocalActorInfo &entry = algorithm_state->local_actor_infos.find(actor_id)->second; - CHECK(worker == entry.worker); - CHECK(!entry.worker_available); + RAY_CHECK(worker == entry.worker); + RAY_CHECK(!entry.worker_available); /* If an actor task was assigned, mark returned dummy object as locally * available. This is not added to the object table, so the update will be * invisible to other nodes. */ @@ -1446,10 +1441,11 @@ void handle_worker_blocked(LocalSchedulerState *state, SchedulingAlgorithmState *algorithm_state, LocalSchedulerClient *worker) { /* Find the worker in the list of executing workers. */ - CHECK(remove_worker_from_vector(algorithm_state->executing_workers, worker)); + RAY_CHECK( + remove_worker_from_vector(algorithm_state->executing_workers, worker)); /* Check that the worker isn't in the list of blocked workers. */ - DCHECK(!worker_in_vector(algorithm_state->blocked_workers, worker)); + RAY_CHECK(!worker_in_vector(algorithm_state->blocked_workers, worker)); /* Add the worker to the list of blocked workers. */ algorithm_state->blocked_workers.push_back(worker); @@ -1471,10 +1467,11 @@ void handle_worker_unblocked(LocalSchedulerState *state, SchedulingAlgorithmState *algorithm_state, LocalSchedulerClient *worker) { /* Find the worker in the list of blocked workers. */ - CHECK(remove_worker_from_vector(algorithm_state->blocked_workers, worker)); + RAY_CHECK( + remove_worker_from_vector(algorithm_state->blocked_workers, worker)); /* Check that the worker isn't in the list of executing workers. */ - DCHECK(!worker_in_vector(algorithm_state->executing_workers, worker)); + RAY_CHECK(!worker_in_vector(algorithm_state->executing_workers, worker)); /* Add the worker to the list of executing workers. */ algorithm_state->executing_workers.push_back(worker); @@ -1499,7 +1496,7 @@ void handle_object_available(LocalSchedulerState *state, } /* Add the entry to the set of locally available objects. */ - CHECK(algorithm_state->local_objects.count(object_id) == 0); + RAY_CHECK(algorithm_state->local_objects.count(object_id) == 0); algorithm_state->local_objects[object_id] = entry; if (!entry.dependent_tasks.empty()) { @@ -1530,7 +1527,7 @@ void handle_object_removed(LocalSchedulerState *state, /* Remove the object from the set of locally available objects. */ SchedulingAlgorithmState *algorithm_state = state->algorithm_state; - CHECK(algorithm_state->local_objects.count(removed_object_id) == 1); + RAY_CHECK(algorithm_state->local_objects.count(removed_object_id) == 1); algorithm_state->local_objects.erase(removed_object_id); /* Track queued tasks that were dependent on this object. @@ -1544,7 +1541,7 @@ void handle_object_removed(LocalSchedulerState *state, it != algorithm_state->dispatch_task_queue->end();) { if (it->DependsOn(removed_object_id)) { /* This task was dependent on the removed object. */ - LOG_DEBUG("Moved task from dispatch queue back to waiting queue"); + RAY_LOG(DEBUG) << "Moved task from dispatch queue back to waiting queue"; algorithm_state->waiting_task_queue->push_back(std::move(*it)); /* Remove the task from the dispatch queue, but do not free the task * spec. */ @@ -1563,7 +1560,8 @@ void handle_object_removed(LocalSchedulerState *state, queue_it != actor_info.task_queue->end();) { if (queue_it->DependsOn(removed_object_id)) { /* This task was dependent on the removed object. */ - LOG_DEBUG("Moved task from actor dispatch queue back to waiting queue"); + RAY_LOG(DEBUG) << "Moved task from actor dispatch queue back to " + << "waiting queue"; algorithm_state->waiting_task_queue->push_back(std::move(*queue_it)); /* Remove the task from the dispatch queue, but do not free the task * spec. */ @@ -1667,16 +1665,16 @@ int num_dispatch_tasks(SchedulingAlgorithmState *algorithm_state) { void print_worker_info(const char *message, SchedulingAlgorithmState *algorithm_state) { - LOG_DEBUG("%s: %lu available, %lu executing, %lu blocked", message, - algorithm_state->available_workers.size(), - algorithm_state->executing_workers.size(), - algorithm_state->blocked_workers.size()); + RAY_LOG(DEBUG) << message << ": " << algorithm_state->available_workers.size() + << " available, " << algorithm_state->executing_workers.size() + << " executing, " << algorithm_state->blocked_workers.size() + << " blocked"; } std::unordered_map get_actor_task_counters(SchedulingAlgorithmState *algorithm_state, ActorID actor_id) { - CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0); + RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0); return algorithm_state->local_actor_infos[actor_id].task_counters; } @@ -1685,7 +1683,7 @@ void set_actor_task_counters( ActorID actor_id, const std::unordered_map &task_counters) { - CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0); + RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0); /* Overwrite the current task counters for the actor. This is necessary * during reconstruction when resuming from a checkpoint so that we can * resume the task frontier at the time that the checkpoint was saved. */ @@ -1731,7 +1729,7 @@ void set_actor_task_counters( std::unordered_map get_actor_frontier( SchedulingAlgorithmState *algorithm_state, ActorID actor_id) { - CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0); + RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0); return algorithm_state->local_actor_infos[actor_id].frontier_dependencies; } @@ -1741,7 +1739,7 @@ void set_actor_frontier( ActorID actor_id, const std::unordered_map &frontier_dependencies) { - CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0); + RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0); auto entry = algorithm_state->local_actor_infos[actor_id]; entry.frontier_dependencies = frontier_dependencies; for (auto frontier_dependency : entry.frontier_dependencies) { diff --git a/src/local_scheduler/local_scheduler_client.cc b/src/local_scheduler/local_scheduler_client.cc index 2ef4ddb55..203d6cd26 100644 --- a/src/local_scheduler/local_scheduler_client.cc +++ b/src/local_scheduler/local_scheduler_client.cc @@ -30,7 +30,7 @@ LocalSchedulerConnection *LocalSchedulerConnection_init( /* Register the process ID with the local scheduler. */ int success = write_message(result->conn, MessageType_RegisterClientRequest, fbb.GetSize(), fbb.GetBufferPointer()); - CHECKM(success == 0, "Unable to register worker with local scheduler"); + RAY_CHECK(success == 0) << "Unable to register worker with local scheduler"; /* Wait for a confirmation from the local scheduler. */ int64_t type; @@ -38,10 +38,10 @@ LocalSchedulerConnection *LocalSchedulerConnection_init( uint8_t *reply; read_message(result->conn, &type, &reply_size, &reply); if (type == DISCONNECT_CLIENT) { - LOG_DEBUG("Exiting because local scheduler closed connection."); + RAY_LOG(DEBUG) << "Exiting because local scheduler closed connection."; exit(1); } - CHECK(type == MessageType_RegisterClientReply); + RAY_CHECK(type == MessageType_RegisterClientReply); /* Parse the reply object. */ auto reply_message = flatbuffers::GetRoot(reply); @@ -50,7 +50,7 @@ LocalSchedulerConnection *LocalSchedulerConnection_init( } /* If the worker is not an actor, there should not be any GPU IDs here. */ if (ActorID_equal(result->actor_id, ActorID::nil())) { - CHECK(reply_message->gpu_ids()->size() == 0); + RAY_CHECK(reply_message->gpu_ids()->size() == 0); } free(reply); @@ -111,10 +111,10 @@ TaskSpec *local_scheduler_get_task(LocalSchedulerConnection *conn, * scheduler gives this client a task. */ read_message(conn->conn, &type, &reply_size, &reply); if (type == DISCONNECT_CLIENT) { - LOG_WARN("Exiting because local scheduler closed connection."); + RAY_LOG(WARNING) << "Exiting because local scheduler closed connection."; exit(1); } - CHECK(type == MessageType_ExecuteTask); + RAY_CHECK(type == MessageType_ExecuteTask); /* Parse the flatbuffer object. */ auto reply_message = flatbuffers::GetRoot(reply); @@ -186,10 +186,10 @@ const std::vector local_scheduler_get_actor_frontier( std::vector reply; read_vector(conn->conn, &type, reply); if (type == DISCONNECT_CLIENT) { - LOG_DEBUG("Exiting because local scheduler closed connection."); + RAY_LOG(DEBUG) << "Exiting because local scheduler closed connection."; exit(1); } - CHECK(type == MessageType_GetActorFrontierReply); + RAY_CHECK(type == MessageType_GetActorFrontierReply); return reply; } diff --git a/src/local_scheduler/test/local_scheduler_tests.cc b/src/local_scheduler/test/local_scheduler_tests.cc index 07733ed65..f9b144db0 100644 --- a/src/local_scheduler/test/local_scheduler_tests.cc +++ b/src/local_scheduler/test/local_scheduler_tests.cc @@ -91,7 +91,7 @@ LocalSchedulerMock *LocalSchedulerMock_init(int num_workers, connect_ipc_sock_retry(plasma_store_socket_name, 5, 100); std::string local_scheduler_socket_name = bind_ipc_sock_retry( local_scheduler_socket_name_format, &mock->local_scheduler_fd); - CHECK(mock->plasma_store_fd >= 0 && mock->local_scheduler_fd >= 0); + RAY_CHECK(mock->plasma_store_fd >= 0 && mock->local_scheduler_fd >= 0); /* Construct worker command */ std::stringstream worker_command_ss; @@ -411,7 +411,7 @@ TaskExecutionSpec *object_reconstruction_suppression_spec; void object_reconstruction_suppression_callback(ObjectID object_id, bool success, void *user_context) { - CHECK(success); + RAY_CHECK(success); /* Submit the task after adding the object to the object table. */ LocalSchedulerConnection *worker = (LocalSchedulerConnection *) user_context; local_scheduler_submit(worker, *object_reconstruction_suppression_spec); diff --git a/src/plasma/plasma_manager.cc b/src/plasma/plasma_manager.cc index 1ec5b4bed..fea58fc36 100644 --- a/src/plasma/plasma_manager.cc +++ b/src/plasma/plasma_manager.cc @@ -74,8 +74,8 @@ int handle_sigpipe(plasma::Status s, int fd) { } break; default: /* This code should be unreachable. */ - CHECK(0); - LOG_FATAL("Failed to write message to client on fd %d", fd); + RAY_CHECK(0); + RAY_LOG(FATAL) << "Failed to write message to client on fd " << fd; } return err; @@ -320,12 +320,12 @@ bool ClientConnection_request_finished(ClientConnection *client_conn) { std::unordered_map, UniqueIDHasher> & object_wait_requests_from_type(PlasmaManagerState *manager_state, int type) { /* We use different types of hash tables for different requests. */ + RAY_CHECK(type == plasma::PLASMA_QUERY_LOCAL || + type == plasma::PLASMA_QUERY_ANYWHERE); if (type == plasma::PLASMA_QUERY_LOCAL) { return manager_state->object_wait_requests_local; - } else if (type == plasma::PLASMA_QUERY_ANYWHERE) { - return manager_state->object_wait_requests_remote; } else { - LOG_FATAL("This code should be unreachable."); + return manager_state->object_wait_requests_remote; } } @@ -367,8 +367,8 @@ void remove_wait_request_for_object(PlasmaManagerState *manager_state, void remove_wait_request(PlasmaManagerState *manager_state, WaitRequest *wait_req) { if (wait_req->timer != -1) { - CHECK(event_loop_remove_timer(manager_state->loop, wait_req->timer) == - AE_OK); + RAY_CHECK(event_loop_remove_timer(manager_state->loop, wait_req->timer) == + AE_OK); } delete wait_req; } @@ -416,9 +416,9 @@ void update_object_wait_requests(PlasmaManagerState *manager_state, auto object_request = wait_req->object_requests.find(obj_id.to_plasma_id()); /* Check that we found the object. */ - CHECK(object_request != wait_req->object_requests.end()); + RAY_CHECK(object_request != wait_req->object_requests.end()); /* Check that the object found was not previously known to us. */ - CHECK(object_request->second.status == ObjectStatus_Nonexistent); + RAY_CHECK(object_request->second.status == ObjectStatus_Nonexistent); /* Update the found object's status to a known status. */ object_request->second.status = status; @@ -431,7 +431,7 @@ void update_object_wait_requests(PlasmaManagerState *manager_state, index += 1; } } - DCHECK(static_cast(index) == wait_requests.size()); + RAY_CHECK(static_cast(index) == wait_requests.size()); /* Remove the array of wait requests for this object, since no one should be * waiting for this object anymore. */ object_wait_requests.erase(object_wait_requests_it); @@ -491,7 +491,7 @@ PlasmaManagerState *PlasmaManagerState_init(const char *store_socket_name, RAY_CHECK_OK(state->gcs_client.context()->AttachToEventLoop(state->loop)); } else { state->db = NULL; - LOG_DEBUG("No db connection specified"); + RAY_LOG(DEBUG) << "No db connection specified"; } state->addr = manager_addr; state->port = manager_port; @@ -572,14 +572,14 @@ int write_object_chunk(ClientConnection *conn, PlasmaRequestBuffer *buf) { int err; if (r <= 0) { - LOG_ERROR("Write error"); + RAY_LOG(ERROR) << "Write error"; err = errno; } else { conn->cursor += r; - CHECK(conn->cursor <= buf->data_size + buf->metadata_size); + RAY_CHECK(conn->cursor <= buf->data_size + buf->metadata_size); /* If we've finished writing this buffer, reset the cursor. */ if (conn->cursor == buf->data_size + buf->metadata_size) { - LOG_DEBUG("writing on channel %d finished", conn->fd); + RAY_LOG(DEBUG) << "writing on channel " << conn->fd << " finished"; ClientConnection_finish_request(conn); } err = 0; @@ -612,7 +612,7 @@ void send_queued_request(event_loop *loop, conn->fd); break; case MessageType_PlasmaDataReply: - LOG_DEBUG("Transferring object to manager"); + RAY_LOG(DEBUG) << "Transferring object to manager"; if (ClientConnection_request_finished(conn)) { /* If the cursor is not set, we haven't sent any requests for this object * yet, so send the initial data request. */ @@ -627,7 +627,7 @@ void send_queued_request(event_loop *loop, } break; default: - LOG_FATAL("Buffered request has unknown type."); + RAY_LOG(FATAL) << "Buffered request has unknown type."; } /* If the other side hung up, stop sending to this manager. */ @@ -658,7 +658,7 @@ void send_queued_request(event_loop *loop, int read_object_chunk(ClientConnection *conn, PlasmaRequestBuffer *buf) { ssize_t r, s; - CHECK(buf != NULL); + RAY_CHECK(buf != NULL); /* Try to read one buf_size at a time. */ s = buf->data_size + buf->metadata_size - conn->cursor; if (s > RayConfig::instance().buf_size()) { @@ -668,11 +668,11 @@ int read_object_chunk(ClientConnection *conn, PlasmaRequestBuffer *buf) { int err; if (r <= 0) { - LOG_ERROR("Read error"); + RAY_LOG(ERROR) << "Read error"; err = errno; } else { conn->cursor += r; - CHECK(conn->cursor <= buf->data_size + buf->metadata_size); + RAY_CHECK(conn->cursor <= buf->data_size + buf->metadata_size); /* If the cursor is equal to the full object size, reset the cursor and * we're done. */ if (conn->cursor == buf->data_size + buf->metadata_size) { @@ -709,7 +709,7 @@ void process_data_chunk(event_loop *loop, /* If we're done receiving the object, seal the object and release it. The * release corresponds to the call to plasma_create that occurred in * process_data_request. */ - LOG_DEBUG("reading on channel %d finished", data_sock); + RAY_LOG(DEBUG) << "reading on channel " << data_sock << " finished"; /* The following seal also triggers notification of clients for fetch or * wait requests, see process_object_notification. */ ARROW_CHECK_OK(plasma_conn->Seal(buf->object_id.to_plasma_id())); @@ -805,9 +805,8 @@ void process_transfer_request(event_loop *loop, /* If the object wasn't locally available, exit immediately. If the object * later appears locally, the requesting plasma manager should request the * transfer again. */ - LOG_WARN( - "Unable to transfer object to requesting plasma manager, object not " - "local."); + RAY_LOG(WARNING) << "Unable to transfer object to requesting plasma " + << "manager, object not local."; return; } @@ -822,8 +821,8 @@ void process_transfer_request(event_loop *loop, } } - CHECK(object_buffer.metadata->data() == - object_buffer.data->data() + object_buffer.data_size); + RAY_CHECK(object_buffer.metadata->data() == + object_buffer.data->data() + object_buffer.data_size); PlasmaRequestBuffer *buf = new PlasmaRequestBuffer(); buf->type = MessageType_PlasmaDataReply; buf->object_id = obj_id; @@ -875,7 +874,7 @@ void process_data_request(event_loop *loop, * conn->transfer_queue. */ conn->transfer_queue.push_back(buf); } - CHECK(ClientConnection_request_finished(conn)); + RAY_CHECK(ClientConnection_request_finished(conn)); ClientConnection_start_request(conn); /* Switch to reading the data from this socket, instead of listening for @@ -912,10 +911,10 @@ void process_data_request(event_loop *loop, void request_transfer_from(PlasmaManagerState *manager_state, FetchRequest *fetch_req) { - CHECK(fetch_req->manager_vector.size() > 0); - CHECK(fetch_req->next_manager >= 0 && - static_cast(fetch_req->next_manager) < - fetch_req->manager_vector.size()); + RAY_CHECK(fetch_req->manager_vector.size() > 0); + RAY_CHECK(fetch_req->next_manager >= 0 && + static_cast(fetch_req->next_manager) < + fetch_req->manager_vector.size()); char addr[16]; int port; parse_ip_addr_port(fetch_req->manager_vector[fetch_req->next_manager].c_str(), @@ -931,8 +930,8 @@ void request_transfer_from(PlasmaManagerState *manager_state, &temp_addr[2], &temp_addr[3]); if (memcmp(temp_addr, manager_state->addr, 4) == 0 && port == manager_state->port) { - LOG_FATAL( - "This manager is attempting to request a transfer from itself."); + RAY_LOG(FATAL) << "This manager is attempting to request a transfer from " + << "itself."; } PlasmaRequestBuffer *transfer_request = new PlasmaRequestBuffer(); @@ -975,12 +974,12 @@ int fetch_timeout_handler(event_loop *loop, timer_id id, void *context) { if (is_receiving_or_received(manager_state, fetch_req->object_id)) { // Do nothing if the object transfer is in progress or if the object // has already been received. - LOG_DEBUG("fetch_timeout_handler: Object in progress or received. %s", - fetch_req->object_id.hex().c_str()); + RAY_LOG(DEBUG) << "fetch_timeout_handler: Object in progress or " + << "received. " << fetch_req->object_id; continue; } - LOG_DEBUG("fetch_timeout_handler: Object missing. %s", - fetch_req->object_id.hex().c_str()); + RAY_LOG(DEBUG) << "fetch_timeout_handler: Object missing. " + << fetch_req->object_id; request_transfer_from(manager_state, fetch_req); /* If we've tried all of the managers that we know about for this object, * add this object to the list to resend requests for. */ @@ -1018,13 +1017,13 @@ void request_transfer(ObjectID object_id, PlasmaManagerState *manager_state = (PlasmaManagerState *) context; /* This callback is called from object_table_subscribe, which guarantees that * the manager vector contains at least one element. */ - CHECK(manager_vector.size() >= 1); + RAY_CHECK(manager_vector.size() >= 1); auto it = manager_state->fetch_requests.find(object_id); if (is_object_local(manager_state, object_id)) { /* If the object is already here, then the fetch request should have been * removed. */ - CHECK(it == manager_state->fetch_requests.end()); + RAY_CHECK(it == manager_state->fetch_requests.end()); return; } FetchRequest *fetch_req = it->second; @@ -1033,7 +1032,7 @@ void request_transfer(ObjectID object_id, * TODO(rkn): We actually have to remove this check to handle the rare * scenario where the object is transferred here and then evicted before this * callback gets called. */ - CHECK(fetch_req != NULL); + RAY_CHECK(fetch_req != NULL); /* Update the manager vector. */ fetch_req->manager_vector = manager_vector; @@ -1055,7 +1054,7 @@ void call_request_transfer(ObjectID object_id, PlasmaManagerState *manager_state = (PlasmaManagerState *) context; /* Check that there isn't already a fetch request for this object. */ auto it = manager_state->fetch_requests.find(object_id); - CHECK(it == manager_state->fetch_requests.end()); + RAY_CHECK(it == manager_state->fetch_requests.end()); /* Create a fetch request. */ FetchRequest *fetch_req = create_fetch_request(manager_state, object_id); manager_state->fetch_requests[object_id] = fetch_req; @@ -1063,7 +1062,7 @@ void call_request_transfer(ObjectID object_id, } void fatal_table_callback(ObjectID id, void *user_context, void *user_data) { - CHECK(0); + RAY_CHECK(0); } /* This callback is used by both fetch and wait. Therefore, it may have to @@ -1143,7 +1142,7 @@ void process_wait_request(ClientConnection *client_conn, plasma::ObjectRequestMap &&object_requests, uint64_t timeout_ms, int num_ready_objects) { - CHECK(client_conn != NULL); + RAY_CHECK(client_conn != NULL); PlasmaManagerState *manager_state = client_conn->manager_state; int num_object_requests = object_requests.size(); @@ -1184,7 +1183,7 @@ void process_wait_request(ClientConnection *client_conn, num_object_ids_to_request += 1; } else { /* This code should be unreachable. */ - CHECK(0); + RAY_CHECK(0); } } @@ -1255,7 +1254,7 @@ void object_table_lookup_fail_callback(ObjectID object_id, void *user_data) { /* Fail for now. Later, we may want to send a ObjectStatus_Nonexistent to the * client. */ - CHECK(0); + RAY_CHECK(0); } void process_status_request(ClientConnection *client_conn, @@ -1299,7 +1298,7 @@ void process_delete_object_notification(PlasmaManagerState *state, void log_object_hash_mismatch_error_task_callback(Task *task, void *user_context) { - CHECK(task != NULL); + RAY_CHECK(task != NULL); PlasmaManagerState *state = (PlasmaManagerState *) user_context; TaskSpec *spec = Task_task_execution_spec(task)->Spec(); FunctionID function = TaskSpec_function(spec); @@ -1314,7 +1313,7 @@ void log_object_hash_mismatch_error_result_callback(ObjectID object_id, TaskID task_id, bool is_put, void *user_context) { - CHECK(!task_id.is_nil()); + RAY_CHECK(!task_id.is_nil()); PlasmaManagerState *state = (PlasmaManagerState *) user_context; /* Get the specification for the nondeterministic task. */ #if !RAY_USE_NEW_GCS @@ -1391,8 +1390,8 @@ void process_object_notification(event_loop *loop, uint8_t *notification = read_message_async(loop, client_sock); if (notification == NULL) { PlasmaManagerState_free(state); - LOG_FATAL( - "Lost connection to the plasma store, plasma manager is exiting!"); + RAY_LOG(FATAL) << "Lost connection to the plasma store, plasma manager is " + << "exiting!"; } auto object_info = flatbuffers::GetRoot(notification); /* Add object to locally available object. */ @@ -1437,7 +1436,7 @@ ClientConnection *ClientConnection_listen(event_loop *loop, ClientConnection *conn = ClientConnection_init(state, new_socket, client_key); event_loop_add_file(loop, new_socket, EVENT_LOOP_READ, process_message, conn); - LOG_DEBUG("New client connection with fd %d", new_socket); + RAY_LOG(DEBUG) << "New client connection with fd " << new_socket; return conn; } @@ -1483,7 +1482,7 @@ void process_message(event_loop *loop, switch (type) { case MessageType_PlasmaDataRequest: { - LOG_DEBUG("Processing data request"); + RAY_LOG(DEBUG) << "Processing data request"; plasma::ObjectID object_id; char *address; int port; @@ -1493,7 +1492,7 @@ void process_message(event_loop *loop, free(address); } break; case MessageType_PlasmaDataReply: { - LOG_DEBUG("Processing data reply"); + RAY_LOG(DEBUG) << "Processing data reply"; plasma::ObjectID object_id; int64_t object_size; int64_t metadata_size; @@ -1503,7 +1502,7 @@ void process_message(event_loop *loop, metadata_size, conn); } break; case MessageType_PlasmaFetchRequest: { - LOG_DEBUG("Processing fetch remote"); + RAY_LOG(DEBUG) << "Processing fetch remote"; std::vector object_ids_to_fetch; /* TODO(pcm): process_fetch_requests allocates an array of num_objects * object_ids too so these should be shared in the future. */ @@ -1512,7 +1511,7 @@ void process_message(event_loop *loop, object_ids_to_fetch.data()); } break; case MessageType_PlasmaWaitRequest: { - LOG_DEBUG("Processing wait"); + RAY_LOG(DEBUG) << "Processing wait"; plasma::ObjectRequestMap object_requests; int64_t timeout_ms; int num_ready_objects; @@ -1522,18 +1521,18 @@ void process_message(event_loop *loop, num_ready_objects); } break; case MessageType_PlasmaStatusRequest: { - LOG_DEBUG("Processing status"); + RAY_LOG(DEBUG) << "Processing status"; plasma::ObjectID object_id; ARROW_CHECK_OK(plasma::ReadStatusRequest(data, length, &object_id, 1)); process_status_request(conn, object_id); } break; case DISCONNECT_CLIENT: { - LOG_DEBUG("Disconnecting client on fd %d", client_sock); + RAY_LOG(DEBUG) << "Disconnecting client on fd " << client_sock; event_loop_remove_file(loop, client_sock); ClientConnection_free(conn); } break; default: - LOG_FATAL("invalid request %" PRId64, type); + RAY_LOG(FATAL) << "invalid request " << type; } free(data); @@ -1541,9 +1540,8 @@ void process_message(event_loop *loop, int64_t end_time = current_time_ms(); if (end_time - start_time > RayConfig::instance().max_time_for_handler_milliseconds()) { - LOG_WARN("process_message of type %" PRId64 " took %" PRId64 - " milliseconds.", - type, end_time - start_time); + RAY_LOG(WARNING) << "process_message of type " << type << " took " + << end_time - start_time << " milliseconds."; } } @@ -1552,12 +1550,13 @@ int heartbeat_handler(event_loop *loop, timer_id id, void *context) { /* Check that the last heartbeat was not sent too long ago. */ int64_t current_time = current_time_ms(); - CHECK(current_time >= state->previous_heartbeat_time); + RAY_CHECK(current_time >= state->previous_heartbeat_time); if (current_time - state->previous_heartbeat_time > RayConfig::instance().num_heartbeats_timeout() * RayConfig::instance().heartbeat_timeout_milliseconds()) { - LOG_FATAL("The last heartbeat was sent %" PRId64 " milliseconds ago.", - current_time - state->previous_heartbeat_time); + RAY_LOG(FATAL) << "The last heartbeat was sent " + << current_time - state->previous_heartbeat_time + << " milliseconds ago."; } state->previous_heartbeat_time = current_time; @@ -1583,18 +1582,18 @@ void start_server(const char *store_socket_name, } int local_sock = bind_ipc_sock(manager_socket_name, false); - CHECKM(local_sock >= 0, "Unable to bind local manager socket"); + RAY_CHECK(local_sock >= 0) << "Unable to bind local manager socket"; g_manager_state = PlasmaManagerState_init( store_socket_name, manager_socket_name, master_addr, port, redis_primary_addr, redis_primary_port); - CHECK(g_manager_state); + RAY_CHECK(g_manager_state); - CHECK(listen(remote_sock, 128) != -1); - CHECK(listen(local_sock, 128) != -1); + RAY_CHECK(listen(remote_sock, 128) != -1); + RAY_CHECK(listen(local_sock, 128) != -1); - LOG_DEBUG("Started server connected to store %s, listening on port %d", - store_socket_name, port); + RAY_LOG(DEBUG) << "Started server connected to store " << store_socket_name + << ", listening on port " << port; event_loop_add_file(g_manager_state->loop, local_sock, EVENT_LOOP_READ, handle_new_client, g_manager_state); event_loop_add_file(g_manager_state->loop, remote_sock, EVENT_LOOP_READ, @@ -1619,7 +1618,7 @@ void start_server(const char *store_socket_name, /* Report "success" to valgrind. */ void signal_handler(int signal) { - LOG_DEBUG("Signal was %d", signal); + RAY_LOG(DEBUG) << "Signal was " << signal; if (signal == SIGTERM) { if (g_manager_state) { PlasmaManagerState_free(g_manager_state); @@ -1662,37 +1661,32 @@ int main(int argc, char *argv[]) { redis_primary_addr_port = optarg; break; default: - LOG_FATAL("unknown option %c", c); + RAY_LOG(FATAL) << "unknown option " << c; } } if (!store_socket_name) { - LOG_FATAL( - "please specify socket for connecting to the plasma store with -s " - "switch"); + RAY_LOG(FATAL) << "please specify socket for connecting to the plasma " + << "store with -s switch"; } if (!manager_socket_name) { - LOG_FATAL( - "please specify socket name of the manager's local socket with -m " - "switch"); + RAY_LOG(FATAL) << "please specify socket name of the manager's local " + << "socket with -m switch"; } if (!master_addr) { - LOG_FATAL( - "please specify ip address of the current host in the format " - "123.456.789.10 with -h switch"); + RAY_LOG(FATAL) << "please specify ip address of the current host in the " + << "format 123.456.789.10 with -h switch"; } if (port == -1) { - LOG_FATAL( - "please specify port the plasma manager shall listen to in the" - "format 12345 with -p switch"); + RAY_LOG(FATAL) << "please specify port the plasma manager shall listen to " + << "in the format 12345 with -p switch"; } char redis_primary_addr[16]; - int redis_primary_port; + int redis_primary_port = -1; if (!redis_primary_addr_port || parse_ip_addr_port(redis_primary_addr_port, redis_primary_addr, &redis_primary_port) == -1) { - LOG_FATAL( - "specify the primary redis address like 127.0.0.1:6379 with the -r " - "switch"); + RAY_LOG(FATAL) << "specify the primary redis address like 127.0.0.1:6379 " + << "with the -r switch"; } start_server(store_socket_name, manager_socket_name, master_addr, port, redis_primary_addr, redis_primary_port); diff --git a/src/plasma/plasma_protocol.cc b/src/plasma/plasma_protocol.cc index 912b01849..1f97d9398 100644 --- a/src/plasma/plasma_protocol.cc +++ b/src/plasma/plasma_protocol.cc @@ -22,8 +22,8 @@ Status PlasmaReceive(int sock, std::vector &buffer) { int64_t type; RETURN_NOT_OK(ReadMessage(sock, &type, buffer)); - ARROW_CHECK(type == message_type) << "type = " << type - << ", message_type = " << message_type; + RAY_CHECK(type == message_type) << "type = " << type + << ", message_type = " << message_type; return Status::OK(); } @@ -45,7 +45,7 @@ Status ReadCreateRequest(uint8_t *data, ObjectID *object_id, int64_t *data_size, int64_t *metadata_size) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); *data_size = message->data_size(); *metadata_size = message->metadata_size(); @@ -72,7 +72,7 @@ Status SendCreateReply(int sock, Status ReadCreateReply(uint8_t *data, ObjectID *object_id, PlasmaObject *object) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); *object_id = ObjectID::from_binary(message->object_id()->str()); object->handle.store_fd = message->plasma_object()->segment_index(); @@ -99,10 +99,10 @@ Status SendSealRequest(int sock, ObjectID object_id, unsigned char *digest) { Status ReadSealRequest(uint8_t *data, ObjectID *object_id, unsigned char *digest) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); *object_id = ObjectID::from_binary(message->object_id()->str()); - ARROW_CHECK(message->digest()->size() == kDigestSize); + RAY_CHECK(message->digest()->size() == kDigestSize); memcpy(digest, message->digest()->data(), kDigestSize); return Status::OK(); } @@ -117,7 +117,7 @@ Status SendSealReply(int sock, ObjectID object_id, int error) { } Status ReadSealReply(uint8_t *data, ObjectID *object_id) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); *object_id = ObjectID::from_binary(message->object_id()->str()); return plasma_error_status(message->error()); @@ -135,7 +135,7 @@ Status SendReleaseRequest(int sock, ObjectID object_id) { } Status ReadReleaseRequest(uint8_t *data, ObjectID *object_id) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); *object_id = ObjectID::from_binary(message->object_id()->str()); return Status::OK(); @@ -151,7 +151,7 @@ Status SendReleaseReply(int sock, ObjectID object_id, int error) { } Status ReadReleaseReply(uint8_t *data, ObjectID *object_id) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); *object_id = ObjectID::from_binary(message->object_id()->str()); return plasma_error_status(message->error()); @@ -169,7 +169,7 @@ Status SendDeleteRequest(int sock, ObjectID object_id) { } Status ReadDeleteRequest(uint8_t *data, ObjectID *object_id) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); *object_id = ObjectID::from_binary(message->object_id()->str()); return Status::OK(); @@ -185,7 +185,7 @@ Status SendDeleteReply(int sock, ObjectID object_id, int error) { } Status ReadDeleteReply(uint8_t *data, ObjectID *object_id) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); *object_id = ObjectID::from_binary(message->object_id()->str()); return plasma_error_status(message->error()); @@ -205,7 +205,7 @@ Status SendStatusRequest(int sock, ObjectID object_ids[], int64_t num_objects) { Status ReadStatusRequest(uint8_t *data, ObjectID object_ids[], int64_t num_objects) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); for (int64_t i = 0; i < num_objects; ++i) { object_ids[i] = ObjectID::from_binary(message->object_ids()->Get(i)->str()); @@ -227,7 +227,7 @@ Status SendStatusReply(int sock, } int64_t ReadStatusReply_num_objects(uint8_t *data) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); return message->object_ids()->size(); } @@ -236,7 +236,7 @@ Status ReadStatusReply(uint8_t *data, ObjectID object_ids[], int object_status[], int64_t num_objects) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); for (int64_t i = 0; i < num_objects; ++i) { object_ids[i] = ObjectID::from_binary(message->object_ids()->Get(i)->str()); @@ -259,7 +259,7 @@ Status SendContainsRequest(int sock, ObjectID object_id) { } Status ReadContainsRequest(uint8_t *data, ObjectID *object_id) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); *object_id = ObjectID::from_binary(message->object_id()->str()); return Status::OK(); @@ -275,7 +275,7 @@ Status SendContainsReply(int sock, ObjectID object_id, int has_object) { } Status ReadContainsReply(uint8_t *data, ObjectID *object_id, int *has_object) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); *object_id = ObjectID::from_binary(message->object_id()->str()); *has_object = message->has_object(); @@ -305,7 +305,7 @@ Status SendConnectReply(int sock, int64_t memory_capacity) { } Status ReadConnectReply(uint8_t *data, int64_t *memory_capacity) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); *memory_capacity = message->memory_capacity(); return Status::OK(); @@ -322,7 +322,7 @@ Status SendEvictRequest(int sock, int64_t num_bytes) { } Status ReadEvictRequest(uint8_t *data, int64_t *num_bytes) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); *num_bytes = message->num_bytes(); return Status::OK(); @@ -337,7 +337,7 @@ Status SendEvictReply(int sock, int64_t num_bytes) { } Status ReadEvictReply(uint8_t *data, int64_t &num_bytes) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); num_bytes = message->num_bytes(); return Status::OK(); @@ -360,7 +360,7 @@ Status SendGetRequest(int sock, Status ReadGetRequest(uint8_t *data, std::vector &object_ids, int64_t *timeout_ms) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); for (int64_t i = 0; i < message->object_ids()->size(); ++i) { auto object_id = message->object_ids()->Get(i)->str(); @@ -396,7 +396,7 @@ Status ReadGetReply(uint8_t *data, ObjectID object_ids[], PlasmaObject plasma_objects[], int64_t num_objects) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); for (int64_t i = 0; i < num_objects; ++i) { object_ids[i] = ObjectID::from_binary(message->object_ids()->Get(i)->str()); @@ -425,7 +425,7 @@ Status SendFetchRequest(int sock, ObjectID object_ids[], int64_t num_objects) { } Status ReadFetchRequest(uint8_t *data, std::vector &object_ids) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); for (int64_t i = 0; i < message->object_ids()->size(); ++i) { object_ids.push_back( @@ -462,7 +462,7 @@ Status ReadWaitRequest(uint8_t *data, ObjectRequestMap &object_requests, int64_t *timeout_ms, int *num_ready_objects) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); *num_ready_objects = message->num_ready_objects(); *timeout_ms = message->timeout(); @@ -502,7 +502,7 @@ Status SendWaitReply(int sock, Status ReadWaitReply(uint8_t *data, ObjectRequest object_requests[], int *num_ready_objects) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); *num_ready_objects = message->num_ready_objects(); @@ -543,9 +543,9 @@ Status ReadDataRequest(uint8_t *data, ObjectID *object_id, char **address, int *port) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); - DCHECK(message->object_id()->size() == sizeof(ObjectID)); + RAY_DCHECK(message->object_id()->size() == sizeof(ObjectID)); *object_id = ObjectID::from_binary(message->object_id()->str()); *address = strdup(message->address()->c_str()); *port = message->port(); @@ -568,7 +568,7 @@ Status ReadDataReply(uint8_t *data, ObjectID *object_id, int64_t *object_size, int64_t *metadata_size) { - DCHECK(data); + RAY_DCHECK(data); auto message = flatbuffers::GetRoot(data); *object_id = ObjectID::from_binary(message->object_id()->str()); *object_size = (int64_t) message->object_size(); diff --git a/src/plasma/test/manager_tests.cc b/src/plasma/test/manager_tests.cc index 22633c0af..f256f6b8a 100644 --- a/src/plasma/test/manager_tests.cc +++ b/src/plasma/test/manager_tests.cc @@ -31,7 +31,7 @@ void wait_for_pollin(int fd) { poll_list[0].fd = fd; poll_list[0].events = POLLIN; int retval = poll(poll_list, (unsigned long) 1, -1); - CHECK(retval > 0); + RAY_CHECK(retval > 0); } int test_done_handler(event_loop *loop, timer_id id, void *context) { @@ -66,7 +66,7 @@ plasma_mock *init_plasma_mock(plasma_mock *remote_mock) { std::string manager_socket_name = bind_ipc_sock_retry( plasma_manager_socket_name_format, &mock->manager_local_fd); - CHECK(mock->manager_local_fd >= 0 && mock->local_store >= 0); + RAY_CHECK(mock->manager_local_fd >= 0 && mock->local_store >= 0); mock->state = PlasmaManagerState_init(plasma_store_socket_name, manager_socket_name.c_str(), @@ -252,7 +252,7 @@ TEST object_notifications_test(void) { int fd[2]; socketpair(AF_UNIX, SOCK_STREAM, 0, fd); int flags = fcntl(fd[1], F_GETFL, 0); - CHECK(fcntl(fd[1], F_SETFL, flags | O_NONBLOCK) == 0); + RAY_CHECK(fcntl(fd[1], F_SETFL, flags | O_NONBLOCK) == 0); ObjectID object_id = ObjectID::from_random(); ObjectInfoT info; diff --git a/src/ray/id.cc b/src/ray/id.cc index 1437b7e49..662ac2e06 100644 --- a/src/ray/id.cc +++ b/src/ray/id.cc @@ -78,4 +78,9 @@ bool UniqueID::operator==(const UniqueID &rhs) const { return std::memcmp(data(), rhs.data(), kUniqueIDSize) == 0; } +std::ostream &operator<<(std::ostream &os, const UniqueID &id) { + os << id.hex(); + return os; +} + } // namespace ray diff --git a/src/ray/id.h b/src/ray/id.h index 2e3cf5582..97761e04f 100644 --- a/src/ray/id.h +++ b/src/ray/id.h @@ -44,6 +44,8 @@ struct UniqueIDHasher { } }; +std::ostream &operator<<(std::ostream &os, const UniqueID &id); + typedef UniqueID TaskID; typedef UniqueID JobID; typedef UniqueID ObjectID; diff --git a/src/ray/util/logging.h b/src/ray/util/logging.h index aa65fc116..a73a742d4 100644 --- a/src/ray/util/logging.h +++ b/src/ray/util/logging.h @@ -34,12 +34,26 @@ namespace ray { << __FILE__ << __LINE__ \ << " Check failed: " #condition " " +#ifdef NDEBUG + +#define RAY_DCHECK(condition) \ + RAY_IGNORE_EXPR(condition) \ + while (false) \ + ::ray::internal::NullLog() + +#else + +#define RAY_DCHECK(condition) RAY_CHECK(condition) + +#endif // NDEBUG + namespace internal { class NullLog { public: template NullLog &operator<<(const T &t) { + RAY_IGNORE_EXPR(t); return *this; } };