Update logging and check macros. (#1627)

* Update logging and check macros.

* Fix linting.

* Fix RAY_DCHECK and unused variable.

* Fix linting
This commit is contained in:
Robert Nishihara
2018-02-28 15:13:00 -08:00
committed by Philipp Moritz
parent e7df293946
commit 0fcceef772
29 changed files with 721 additions and 774 deletions
+1 -63
View File
@@ -25,6 +25,7 @@ extern "C" {
#include "arrow/util/macros.h"
#include "plasma/common.h"
#include "ray/id.h"
#include "ray/util/logging.h"
#include "state/ray_config.h"
@@ -44,69 +45,6 @@ extern "C" {
#define RAY_COMMON_LOG_LEVEL RAY_COMMON_INFO
#endif
/**
* Macros to enable each level of Ray logging statements depending on the
* current logging level. */
#if (RAY_COMMON_LOG_LEVEL > RAY_COMMON_DEBUG)
#define LOG_DEBUG(M, ...)
#else
#define LOG_DEBUG(M, ...) \
fprintf(stderr, "[DEBUG] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
#endif
#if (RAY_COMMON_LOG_LEVEL > RAY_COMMON_INFO)
#define LOG_INFO(M, ...)
#else
#define LOG_INFO(M, ...) \
fprintf(stderr, "[INFO] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
#endif
#if (RAY_COMMON_LOG_LEVEL > RAY_COMMON_WARNING)
#define LOG_WARN(M, ...)
#else
#define LOG_WARN(M, ...) \
fprintf(stderr, "[WARN] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
#endif
#if (RAY_COMMON_LOG_LEVEL > RAY_COMMON_ERROR)
#define LOG_ERROR(M, ...)
#else
#define LOG_ERROR(M, ...) \
fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
errno == 0 ? "None" : strerror(errno), ##__VA_ARGS__)
#endif
#if (RAY_COMMON_LOG_LEVEL > RAY_COMMON_FATAL)
#define LOG_FATAL(M, ...)
#elif defined(_EXECINFO_H) || !defined(_WIN32)
#define LOG_FATAL(M, ...) \
do { \
fprintf(stderr, "[FATAL] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
errno == 0 ? "None" : strerror(errno), ##__VA_ARGS__); \
void *buffer[255]; \
const int calls = backtrace(buffer, sizeof(buffer) / sizeof(void *)); \
backtrace_symbols_fd(buffer, calls, 1); \
abort(); \
} while (0)
#else
#define LOG_FATAL(M, ...) \
do { \
fprintf(stderr, "[FATAL] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
errno == 0 ? "None" : strerror(errno), ##__VA_ARGS__); \
exit(-1); \
} while (0)
#endif
/** Assertion definitions, with optional logging. */
#define CHECKM(COND, M, ...) \
if (!(COND)) { \
LOG_FATAL("Check failure: %s \n" M, #COND, ##__VA_ARGS__); \
}
#define CHECK(COND) CHECKM(COND, "")
#define RAY_DCHECK(COND) CHECK(COND)
/* These are exit codes for common errors that can occur in Ray components. */
#define EXIT_COULD_NOT_BIND_PORT -2
+1 -1
View File
@@ -9,7 +9,7 @@ flatbuffers::Offset<flatbuffers::String> to_flatbuf(
ray::ObjectID from_flatbuf(const flatbuffers::String &string) {
ray::ObjectID object_id;
CHECK(string.size() == sizeof(ray::ObjectID));
RAY_CHECK(string.size() == sizeof(ray::ObjectID));
memcpy(object_id.mutable_data(), string.data(), sizeof(ray::ObjectID));
return object_id;
}
+33 -30
View File
@@ -24,7 +24,7 @@ int bind_inet_sock(const int port, bool shall_listen) {
struct sockaddr_in name;
int socket_fd = socket(PF_INET, SOCK_STREAM, 0);
if (socket_fd < 0) {
LOG_ERROR("socket() failed for port %d.", port);
RAY_LOG(ERROR) << "socket() failed for port " << port;
return -1;
}
name.sin_family = AF_INET;
@@ -33,23 +33,23 @@ int bind_inet_sock(const int port, bool shall_listen) {
int on = 1;
/* TODO(pcm): http://stackoverflow.com/q/1150635 */
if (ioctl(socket_fd, FIONBIO, (char *) &on) < 0) {
LOG_ERROR("ioctl failed");
RAY_LOG(ERROR) << "ioctl failed";
close(socket_fd);
return -1;
}
int *const pon = (int *const) & on;
if (setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, pon, sizeof(on)) < 0) {
LOG_ERROR("setsockopt failed for port %d", port);
RAY_LOG(ERROR) << "setsockopt failed for port " << port;
close(socket_fd);
return -1;
}
if (bind(socket_fd, (struct sockaddr *) &name, sizeof(name)) < 0) {
LOG_ERROR("Bind failed for port %d", port);
RAY_LOG(ERROR) << "Bind failed for port " << port;
close(socket_fd);
return -1;
}
if (shall_listen && listen(socket_fd, 128) == -1) {
LOG_ERROR("Could not listen to socket %d", port);
RAY_LOG(ERROR) << "Could not listen to socket " << port;
close(socket_fd);
return -1;
}
@@ -60,14 +60,14 @@ int bind_ipc_sock(const char *socket_pathname, bool shall_listen) {
struct sockaddr_un socket_address;
int socket_fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (socket_fd < 0) {
LOG_ERROR("socket() failed for pathname %s.", socket_pathname);
RAY_LOG(ERROR) << "socket() failed for pathname " << socket_pathname;
return -1;
}
/* Tell the system to allow the port to be reused. */
int on = 1;
if (setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, (char *) &on,
sizeof(on)) < 0) {
LOG_ERROR("setsockopt failed for pathname %s", socket_pathname);
RAY_LOG(ERROR) << "setsockopt failed for pathname " << socket_pathname;
close(socket_fd);
return -1;
}
@@ -76,7 +76,7 @@ int bind_ipc_sock(const char *socket_pathname, bool shall_listen) {
memset(&socket_address, 0, sizeof(socket_address));
socket_address.sun_family = AF_UNIX;
if (strlen(socket_pathname) + 1 > sizeof(socket_address.sun_path)) {
LOG_ERROR("Socket pathname is too long.");
RAY_LOG(ERROR) << "Socket pathname is too long.";
close(socket_fd);
return -1;
}
@@ -85,12 +85,12 @@ int bind_ipc_sock(const char *socket_pathname, bool shall_listen) {
if (bind(socket_fd, (struct sockaddr *) &socket_address,
sizeof(socket_address)) != 0) {
LOG_ERROR("Bind failed for pathname %s.", socket_pathname);
RAY_LOG(ERROR) << "Bind failed for pathname " << socket_pathname;
close(socket_fd);
return -1;
}
if (shall_listen && listen(socket_fd, 128) == -1) {
LOG_ERROR("Could not listen to socket %s", socket_pathname);
RAY_LOG(ERROR) << "Could not listen to socket " << socket_pathname;
close(socket_fd);
return -1;
}
@@ -108,7 +108,7 @@ int connect_ipc_sock_retry(const char *socket_pathname,
timeout = RayConfig::instance().connect_timeout_milliseconds();
}
CHECK(socket_pathname);
RAY_CHECK(socket_pathname);
int fd = -1;
for (int num_attempts = 0; num_attempts < num_retries; ++num_attempts) {
fd = connect_ipc_sock(socket_pathname);
@@ -116,15 +116,15 @@ int connect_ipc_sock_retry(const char *socket_pathname,
break;
}
if (num_attempts == 0) {
LOG_ERROR("Connection to socket failed for pathname %s.",
socket_pathname);
RAY_LOG(ERROR) << "Connection to socket failed for pathname "
<< socket_pathname;
}
/* Sleep for timeout milliseconds. */
usleep(timeout * 1000);
}
/* If we could not connect to the socket, exit. */
if (fd == -1) {
LOG_FATAL("Could not connect to socket %s", socket_pathname);
RAY_LOG(FATAL) << "Could not connect to socket " << socket_pathname;
}
return fd;
}
@@ -135,14 +135,14 @@ int connect_ipc_sock(const char *socket_pathname) {
socket_fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (socket_fd < 0) {
LOG_ERROR("socket() failed for pathname %s.", socket_pathname);
RAY_LOG(ERROR) << "socket() failed for pathname " << socket_pathname;
return -1;
}
memset(&socket_address, 0, sizeof(socket_address));
socket_address.sun_family = AF_UNIX;
if (strlen(socket_pathname) + 1 > sizeof(socket_address.sun_path)) {
LOG_ERROR("Socket pathname is too long.");
RAY_LOG(ERROR) << "Socket pathname is too long.";
return -1;
}
strncpy(socket_address.sun_path, socket_pathname,
@@ -169,7 +169,7 @@ int connect_inet_sock_retry(const char *ip_addr,
timeout = RayConfig::instance().connect_timeout_milliseconds();
}
CHECK(ip_addr);
RAY_CHECK(ip_addr);
int fd = -1;
for (int num_attempts = 0; num_attempts < num_retries; ++num_attempts) {
fd = connect_inet_sock(ip_addr, port);
@@ -177,15 +177,15 @@ int connect_inet_sock_retry(const char *ip_addr,
break;
}
if (num_attempts == 0) {
LOG_ERROR("Connection to socket failed for address %s:%d.", ip_addr,
port);
RAY_LOG(ERROR) << "Connection to socket failed for address " << ip_addr
<< ":" << port;
}
/* Sleep for timeout milliseconds. */
usleep(timeout * 1000);
}
/* If we could not connect to the socket, exit. */
if (fd == -1) {
LOG_FATAL("Could not connect to address %s:%d", ip_addr, port);
RAY_LOG(FATAL) << "Could not connect to address " << ip_addr << ":" << port;
}
return fd;
}
@@ -193,13 +193,14 @@ int connect_inet_sock_retry(const char *ip_addr,
int connect_inet_sock(const char *ip_addr, int port) {
int fd = socket(PF_INET, SOCK_STREAM, 0);
if (fd < 0) {
LOG_ERROR("socket() failed for address %s:%d.", ip_addr, port);
RAY_LOG(ERROR) << "socket() failed for address " << ip_addr << ":" << port;
return -1;
}
struct hostent *manager = gethostbyname(ip_addr); /* TODO(pcm): cache this */
if (!manager) {
LOG_ERROR("Failed to get hostname from address %s:%d.", ip_addr, port);
RAY_LOG(ERROR) << "Failed to get hostname from address " << ip_addr << ":"
<< port;
close(fd);
return -1;
}
@@ -219,7 +220,7 @@ int connect_inet_sock(const char *ip_addr, int port) {
int accept_client(int socket_fd) {
int client_fd = accept(socket_fd, NULL, NULL);
if (client_fd < 0) {
LOG_ERROR("Error reading from socket.");
RAY_LOG(ERROR) << "Error reading from socket.";
return -1;
}
return client_fd;
@@ -242,7 +243,7 @@ int write_bytes(int fd, uint8_t *cursor, size_t length) {
/* Encountered early EOF. */
return -1;
}
CHECK(nbytes > 0);
RAY_CHECK(nbytes > 0);
bytesleft -= nbytes;
offset += nbytes;
}
@@ -288,7 +289,7 @@ int read_bytes(int fd, uint8_t *cursor, size_t length) {
/* Encountered early EOF. */
return -1;
}
CHECK(nbytes > 0);
RAY_CHECK(nbytes > 0);
bytesleft -= nbytes;
offset += nbytes;
}
@@ -302,7 +303,7 @@ void read_message(int fd, int64_t *type, int64_t *length, uint8_t **bytes) {
if (closed) {
goto disconnected;
}
CHECK(version == RayConfig::instance().ray_protocol_version());
RAY_CHECK(version == RayConfig::instance().ray_protocol_version());
closed = read_bytes(fd, (uint8_t *) type, sizeof(*type));
if (closed) {
goto disconnected;
@@ -332,7 +333,8 @@ uint8_t *read_message_async(event_loop *loop, int sock) {
int error = read_bytes(sock, (uint8_t *) &size, sizeof(int64_t));
if (error < 0) {
/* The other side has closed the socket. */
LOG_DEBUG("Socket has been closed, or some other error has occurred.");
RAY_LOG(DEBUG) << "Socket has been closed, or some other error has "
<< "occurred.";
if (loop != NULL) {
event_loop_remove_file(loop, sock);
}
@@ -343,7 +345,8 @@ uint8_t *read_message_async(event_loop *loop, int sock) {
error = read_bytes(sock, message, size);
if (error < 0) {
/* The other side has closed the socket. */
LOG_DEBUG("Socket has been closed, or some other error has occurred.");
RAY_LOG(DEBUG) << "Socket has been closed, or some other error has "
<< "occurred.";
if (loop != NULL) {
event_loop_remove_file(loop, sock);
}
@@ -359,7 +362,7 @@ int64_t read_vector(int fd, int64_t *type, std::vector<uint8_t> &buffer) {
if (closed) {
goto disconnected;
}
CHECK(version == RayConfig::instance().ray_protocol_version());
RAY_CHECK(version == RayConfig::instance().ray_protocol_version());
int64_t length;
closed = read_bytes(fd, (uint8_t *) type, sizeof(*type));
if (closed) {
@@ -393,6 +396,6 @@ char *read_log_message(int fd) {
int64_t type;
int64_t length;
read_message(fd, &type, &length, &bytes);
CHECK(type == LOG_MESSAGE);
RAY_CHECK(type == LOG_MESSAGE);
return (char *) bytes;
}
+7 -7
View File
@@ -28,14 +28,14 @@ void init_pickle_module(void) {
#else
pickle_module = PyImport_ImportModuleNoBlock("cPickle");
#endif
CHECK(pickle_module != NULL);
CHECK(PyObject_HasAttrString(pickle_module, "loads"));
CHECK(PyObject_HasAttrString(pickle_module, "dumps"));
CHECK(PyObject_HasAttrString(pickle_module, "HIGHEST_PROTOCOL"));
RAY_CHECK(pickle_module != NULL);
RAY_CHECK(PyObject_HasAttrString(pickle_module, "loads"));
RAY_CHECK(PyObject_HasAttrString(pickle_module, "dumps"));
RAY_CHECK(PyObject_HasAttrString(pickle_module, "HIGHEST_PROTOCOL"));
pickle_loads = PyUnicode_FromString("loads");
pickle_dumps = PyUnicode_FromString("dumps");
pickle_protocol = PyObject_GetAttrString(pickle_module, "HIGHEST_PROTOCOL");
CHECK(pickle_protocol != NULL);
RAY_CHECK(pickle_protocol != NULL);
}
TaskBuilder *g_task_builder = NULL;
@@ -449,8 +449,8 @@ static PyObject *PyTask_arguments(PyObject *self) {
assert(count == 1);
PyList_SetItem(arg_list, i, PyObjectID_make(TaskSpec_arg_id(task, i, 0)));
} else {
CHECK(pickle_module != NULL);
CHECK(pickle_loads != NULL);
RAY_CHECK(pickle_module != NULL);
RAY_CHECK(pickle_loads != NULL);
PyObject *str =
PyBytes_FromStringAndSize((char *) TaskSpec_arg_val(task, i),
(Py_ssize_t) TaskSpec_arg_length(task, i));
+9 -7
View File
@@ -441,7 +441,8 @@ int TableAdd_RedisCommand(RedisModuleCtx *ctx,
/* See how many clients received this publish. */
long long num_clients = RedisModule_CallReplyInteger(reply);
CHECKM(num_clients <= 1, "Published to %lld clients.", num_clients);
RAY_CHECK(num_clients <= 1) << "Published to " << num_clients
<< " clients.";
RedisModule_FreeString(ctx, publish_message);
RedisModule_FreeString(ctx, publish_topic);
@@ -473,7 +474,7 @@ int TableLookup_RedisCommand(RedisModuleCtx *ctx,
}
bool is_nil(const std::string &data) {
CHECK(data.size() == kUniqueIDSize);
RAY_CHECK(data.size() == kUniqueIDSize);
const uint8_t *d = reinterpret_cast<const uint8_t *>(data.data());
for (int i = 0; i < kUniqueIDSize; ++i) {
if (d[i] != 255) {
@@ -518,9 +519,9 @@ int TableTestAndUpdate_RedisCommand(RedisModuleCtx *ctx,
}
if (do_update) {
CHECK(data->mutate_scheduling_state(update->update_state()));
RAY_CHECK(data->mutate_scheduling_state(update->update_state()));
}
CHECK(data->mutate_updated(do_update));
RAY_CHECK(data->mutate_updated(do_update));
int result = RedisModule_ReplyWithStringBuffer(ctx, value_buf, value_len);
@@ -978,8 +979,8 @@ int ResultTableLookup_RedisCommand(RedisModuleCtx *ctx,
data_size_value = -1;
} else {
RedisModule_StringToLongLong(data_size, &data_size_value);
CHECK(RedisModule_StringToLongLong(data_size, &data_size_value) ==
REDISMODULE_OK);
RAY_CHECK(RedisModule_StringToLongLong(data_size, &data_size_value) ==
REDISMODULE_OK);
}
flatbuffers::Offset<flatbuffers::String> hash_str;
@@ -1091,7 +1092,8 @@ int TaskTableWrite(RedisModuleCtx *ctx,
/* See how many clients received this publish. */
long long num_clients = RedisModule_CallReplyInteger(reply);
CHECKM(num_clients <= 1, "Published to %lld clients.", num_clients);
RAY_CHECK(num_clients <= 1) << "Published to " << num_clients
<< " clients.";
RedisModule_FreeString(ctx, publish_message);
RedisModule_FreeString(ctx, publish_topic);
+5 -6
View File
@@ -44,16 +44,15 @@ const std::vector<std::string> db_client_table_get_ip_addresses(
for (auto const &manager_id : manager_ids) {
DBClient client = redis_cache_get_db_client(db_handle, manager_id);
CHECK(!client.manager_address.empty());
RAY_CHECK(!client.manager_address.empty());
manager_vector.push_back(client.manager_address);
}
int64_t end_time = current_time_ms();
if (end_time - start_time > RayConfig::instance().max_time_for_loop()) {
LOG_WARN(
"calling redis_get_cached_db_client in a loop in with %zu manager IDs "
"took %" PRId64 " milliseconds.",
manager_ids.size(), end_time - start_time);
RAY_LOG(WARNING) << "calling redis_get_cached_db_client in a loop in with "
<< manager_ids.size() << " manager IDs took "
<< end_time - start_time << " milliseconds.";
}
return manager_vector;
@@ -71,7 +70,7 @@ void db_client_table_cache_init(DBHandle *db_handle) {
}
DBClient db_client_table_cache_get(DBHandle *db_handle, DBClientID client_id) {
CHECK(!client_id.is_nil());
RAY_CHECK(!client_id.is_nil());
return redis_cache_get_db_client(db_handle, client_id);
}
+2 -2
View File
@@ -14,7 +14,7 @@ void push_error(DBHandle *db_handle,
int error_index,
size_t data_length,
const unsigned char *data) {
CHECK(error_index >= 0 && error_index < MAX_ERROR_INDEX);
RAY_CHECK(error_index >= 0 && error_index < MAX_ERROR_INDEX);
/* Allocate a struct to hold the error information. */
ErrorInfo *info = (ErrorInfo *) malloc(sizeof(ErrorInfo) + data_length);
info->driver_id = driver_id;
@@ -22,7 +22,7 @@ void push_error(DBHandle *db_handle,
info->data_length = data_length;
memcpy(info->data, data, data_length);
/* Generate a random key to identify this error message. */
CHECK(sizeof(info->error_key) >= sizeof(UniqueID));
RAY_CHECK(sizeof(info->error_key) >= sizeof(UniqueID));
UniqueID error_key = UniqueID::from_random();
memcpy(info->error_key, error_key.data(), sizeof(info->error_key));
+6 -6
View File
@@ -6,7 +6,7 @@ void object_table_lookup(DBHandle *db_handle,
RetryInfo *retry,
object_table_lookup_done_callback done_callback,
void *user_context) {
CHECK(db_handle != NULL);
RAY_CHECK(db_handle != NULL);
init_table_callback(db_handle, object_id, __func__,
new CommonCallbackData(NULL), retry,
(table_done_callback) done_callback,
@@ -20,7 +20,7 @@ void object_table_add(DBHandle *db_handle,
RetryInfo *retry,
object_table_done_callback done_callback,
void *user_context) {
CHECK(db_handle != NULL);
RAY_CHECK(db_handle != NULL);
ObjectTableAddData *info =
(ObjectTableAddData *) malloc(sizeof(ObjectTableAddData));
@@ -38,7 +38,7 @@ void object_table_remove(DBHandle *db_handle,
RetryInfo *retry,
object_table_done_callback done_callback,
void *user_context) {
CHECK(db_handle != NULL);
RAY_CHECK(db_handle != NULL);
/* Copy the client ID, if one was provided. */
DBClientID *client_id_copy = NULL;
if (client_id != NULL) {
@@ -59,7 +59,7 @@ void object_table_subscribe_to_notifications(
RetryInfo *retry,
object_table_lookup_done_callback done_callback,
void *user_context) {
CHECK(db_handle != NULL);
RAY_CHECK(db_handle != NULL);
ObjectTableSubscribeData *sub_data =
(ObjectTableSubscribeData *) malloc(sizeof(ObjectTableSubscribeData));
sub_data->object_available_callback = object_available_callback;
@@ -76,8 +76,8 @@ void object_table_request_notifications(DBHandle *db_handle,
int num_object_ids,
ObjectID object_ids[],
RetryInfo *retry) {
CHECK(db_handle != NULL);
CHECK(num_object_ids > 0);
RAY_CHECK(db_handle != NULL);
RAY_CHECK(num_object_ids > 0);
ObjectTableRequestNotificationsData *data =
(ObjectTableRequestNotificationsData *) malloc(
sizeof(ObjectTableRequestNotificationsData) +
+140 -135
View File
@@ -35,17 +35,17 @@ extern "C" {
extern int usleep(useconds_t usec);
#endif
#define CHECK_REDIS_CONNECT(CONTEXT_TYPE, context, M, ...) \
do { \
CONTEXT_TYPE *_context = (context); \
if (!_context) { \
LOG_FATAL("could not allocate redis context"); \
} \
if (_context->err) { \
LOG_ERROR(M, ##__VA_ARGS__); \
LOG_REDIS_ERROR(_context, ""); \
exit(-1); \
} \
#define CHECK_REDIS_CONNECT(CONTEXT_TYPE, context, M, ...) \
do { \
CONTEXT_TYPE *_context = (context); \
if (!_context) { \
RAY_LOG(FATAL) << "could not allocate redis context"; \
} \
if (_context->err) { \
RAY_LOG(ERROR) << M; \
LOG_REDIS_ERROR(_context, ""); \
exit(-1); \
} \
} while (0)
/**
@@ -110,14 +110,14 @@ void get_redis_shards(redisContext *context,
num_attempts++;
continue;
}
CHECKM(num_attempts < RayConfig::instance().redis_db_connect_retries(),
"No entry found for NumRedisShards");
CHECKM(reply->type == REDIS_REPLY_STRING,
"Expected string, found Redis type %d for NumRedisShards",
reply->type);
RAY_CHECK(num_attempts < RayConfig::instance().redis_db_connect_retries())
<< "No entry found for NumRedisShards";
RAY_CHECK(reply->type == REDIS_REPLY_STRING)
<< "Expected string, found Redis type " << reply->type
<< " for NumRedisShards";
int num_redis_shards = atoi(reply->str);
CHECKM(num_redis_shards >= 1, "Expected at least one Redis shard, found %d.",
num_redis_shards);
RAY_CHECK(num_redis_shards >= 1) << "Expected at least one Redis shard, "
<< "found " << num_redis_shards;
freeReplyObject(reply);
/* Get the addresses of all of the Redis shards. */
@@ -137,18 +137,18 @@ void get_redis_shards(redisContext *context,
num_attempts++;
continue;
}
CHECKM(num_attempts < RayConfig::instance().redis_db_connect_retries(),
"Expected %d Redis shard addresses, found %d", num_redis_shards,
(int) reply->elements);
RAY_CHECK(num_attempts < RayConfig::instance().redis_db_connect_retries())
<< "Expected " << num_redis_shards << " Redis shard addresses, found "
<< reply->elements;
/* Parse the Redis shard addresses. */
char db_shard_address[16];
int db_shard_port;
for (size_t i = 0; i < reply->elements; ++i) {
/* Parse the shard addresses and ports. */
CHECK(reply->element[i]->type == REDIS_REPLY_STRING);
CHECK(parse_ip_addr_port(reply->element[i]->str, db_shard_address,
&db_shard_port) == 0);
RAY_CHECK(reply->element[i]->type == REDIS_REPLY_STRING);
RAY_CHECK(parse_ip_addr_port(reply->element[i]->str, db_shard_address,
&db_shard_port) == 0);
db_shards_addresses.push_back(std::string(db_shard_address));
db_shards_ports.push_back(db_shard_port);
}
@@ -174,7 +174,7 @@ void db_connect_shard(const std::string &db_address,
RayConfig::instance().redis_db_connect_retries()) {
break;
}
LOG_WARN("Failed to connect to Redis, retrying.");
RAY_LOG(WARNING) << "Failed to connect to Redis, retrying.";
/* Sleep for a little. */
usleep(RayConfig::instance().redis_db_connect_wait_milliseconds() * 1000);
sync_context = redisConnect(db_address.c_str(), db_port);
@@ -190,13 +190,13 @@ void db_connect_shard(const std::string &db_address,
* processes by hand), it is easier to do it multiple times. */
reply = (redisReply *) redisCommand(sync_context,
"CONFIG SET notify-keyspace-events Kl");
CHECKM(reply != NULL, "db_connect failed on CONFIG SET");
RAY_CHECK(reply != NULL) << "db_connect failed on CONFIG SET";
freeReplyObject(reply);
/* Also configure Redis to not run in protected mode, so clients on other
* hosts can connect to it. */
reply =
(redisReply *) redisCommand(sync_context, "CONFIG SET protected-mode no");
CHECKM(reply != NULL, "db_connect failed on CONFIG SET");
RAY_CHECK(reply != NULL) << "db_connect failed on CONFIG SET";
freeReplyObject(reply);
/* Construct the argument arrays for RAY.CONNECT. */
@@ -224,9 +224,9 @@ void db_connect_shard(const std::string &db_address,
/* Register this client with Redis. RAY.CONNECT is a custom Redis command that
* we've defined. */
reply = (redisReply *) redisCommandArgv(sync_context, argc, argv, argvlen);
CHECKM(reply != NULL, "db_connect failed on RAY.CONNECT");
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
RAY_CHECK(reply != NULL) << "db_connect failed on RAY.CONNECT";
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str;
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
freeReplyObject(reply);
free(argv);
free(argvlen);
@@ -261,7 +261,7 @@ DBHandle *db_connect(const std::string &db_primary_address,
/* Check that the number of args is even. These args will be passed to the
* RAY.CONNECT Redis command, which takes arguments in pairs. */
if (args.size() % 2 != 0) {
LOG_FATAL("The number of extra args must be divisible by two.");
RAY_LOG(FATAL) << "The number of extra args must be divisible by two.";
}
/* Create a client ID for this client. */
@@ -288,7 +288,7 @@ DBHandle *db_connect(const std::string &db_primary_address,
std::vector<std::string> db_shards_addresses;
std::vector<int> db_shards_ports;
get_redis_shards(db->sync_context, db_shards_addresses, db_shards_ports);
CHECKM(db_shards_addresses.size() > 0, "No Redis shards found");
RAY_CHECK(db_shards_addresses.size() > 0) << "No Redis shards found";
/* Connect to the shards. */
for (size_t i = 0; i < db_shards_addresses.size(); ++i) {
db_connect_shard(db_shards_addresses[i], db_shards_ports[i], client,
@@ -309,7 +309,7 @@ void DBHandle_free(DBHandle *db) {
redisAsyncFree(db->subscribe_context);
/* Clean up the Redis shards. */
CHECK(db->contexts.size() == db->subscribe_contexts.size());
RAY_CHECK(db->contexts.size() == db->subscribe_contexts.size());
for (size_t i = 0; i < db->contexts.size(); ++i) {
redisAsyncFree(db->contexts[i]);
redisAsyncFree(db->subscribe_contexts[i]);
@@ -326,8 +326,8 @@ void db_disconnect(DBHandle *db) {
redisReply *reply =
(redisReply *) redisCommand(db->sync_context, "RAY.DISCONNECT %b",
db->client.data(), sizeof(db->client));
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str;
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
freeReplyObject(reply);
DBHandle_free(db);
@@ -340,24 +340,24 @@ void db_attach(DBHandle *db, event_loop *loop, bool reattach) {
/* If the database is reattached in the tests, redis normally gives
* an error which we can safely ignore. */
if (!reattach) {
CHECKM(err == REDIS_OK, "failed to attach the event loop");
RAY_CHECK(err == REDIS_OK) << "failed to attach the event loop";
}
err = redisAeAttach(loop, db->subscribe_context);
if (!reattach) {
CHECKM(err == REDIS_OK, "failed to attach the event loop");
RAY_CHECK(err == REDIS_OK) << "failed to attach the event loop";
}
/* Attach other redis shards to the event loop. */
CHECK(db->contexts.size() == db->subscribe_contexts.size());
RAY_CHECK(db->contexts.size() == db->subscribe_contexts.size());
for (size_t i = 0; i < db->contexts.size(); ++i) {
int err = redisAeAttach(loop, db->contexts[i]);
/* If the database is reattached in the tests, redis normally gives
* an error which we can safely ignore. */
if (!reattach) {
CHECKM(err == REDIS_OK, "failed to attach the event loop");
RAY_CHECK(err == REDIS_OK) << "failed to attach the event loop";
}
err = redisAeAttach(loop, db->subscribe_contexts[i]);
if (!reattach) {
CHECKM(err == REDIS_OK, "failed to attach the event loop");
RAY_CHECK(err == REDIS_OK) << "failed to attach the event loop";
}
}
}
@@ -377,13 +377,14 @@ void redis_object_table_add_callback(redisAsyncContext *c,
if (!success) {
/* If our object hash doesn't match the one recorded in the table, report
* the error back to the user and exit immediately. */
LOG_WARN(
"Found objects with different value but same object ID, most likely "
"because a nondeterministic task was executed twice, either for "
"reconstruction or for speculation.");
RAY_LOG(WARNING) << "Found objects with different value but same object "
<< "ID, most likely because a nondeterministic task was "
<< "executed twice, either for reconstruction or for "
<< "speculation.";
} else {
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is "
<< reply->str;
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
}
/* Call the done callback if there is one. */
if (callback_data->done_callback != NULL) {
@@ -428,8 +429,8 @@ void redis_object_table_remove_callback(redisAsyncContext *c,
* condition with an object_table_add. */
return;
}
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str;
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
/* Call the done callback if there is one. */
if (callback_data->done_callback != NULL) {
object_table_done_callback done_callback =
@@ -464,7 +465,7 @@ void redis_object_table_remove(TableCallbackData *callback_data) {
}
void redis_object_table_lookup(TableCallbackData *callback_data) {
CHECK(callback_data);
RAY_CHECK(callback_data);
DBHandle *db = callback_data->db_handle;
ObjectID obj_id = callback_data->id;
@@ -486,9 +487,9 @@ void redis_result_table_add_callback(redisAsyncContext *c,
REDIS_CALLBACK_HEADER(db, callback_data, r);
redisReply *reply = (redisReply *) r;
/* Check that the command succeeded. */
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
CHECKM(strncmp(reply->str, "OK", strlen("OK")) == 0, "reply->str is %s",
reply->str);
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str;
RAY_CHECK(strncmp(reply->str, "OK", strlen("OK")) == 0) << "reply->str is "
<< reply->str;
/* Call the done callback if there is one. */
if (callback_data->done_callback) {
result_table_done_callback done_callback =
@@ -499,7 +500,7 @@ void redis_result_table_add_callback(redisAsyncContext *c,
}
void redis_result_table_add(TableCallbackData *callback_data) {
CHECK(callback_data);
RAY_CHECK(callback_data);
DBHandle *db = callback_data->db_handle;
ObjectID id = callback_data->id;
ResultTableAddInfo *info = (ResultTableAddInfo *) callback_data->data->Get();
@@ -522,10 +523,9 @@ void redis_result_table_add(TableCallbackData *callback_data) {
* task is NULL. This is used by both redis_result_table_lookup_callback and
* redis_task_table_get_task_callback. */
Task *parse_and_construct_task_from_redis_reply(redisReply *reply) {
Task *task;
Task *task = NULL;
if (reply->type == REDIS_REPLY_NIL) {
/* There is no task in the reply, so return NULL. */
task = NULL;
} else if (reply->type == REDIS_REPLY_STRING) {
/* The reply is a flatbuffer TaskReply object. Parse it and construct the
* task. */
@@ -540,7 +540,7 @@ Task *parse_and_construct_task_from_redis_reply(redisReply *reply) {
from_flatbuf(*message->local_scheduler_id()),
from_flatbuf(*execution_dependencies->execution_dependencies()));
} else {
LOG_FATAL("Unexpected reply type %d", reply->type);
RAY_LOG(FATAL) << "Unexpected reply type " << reply->type;
}
/* Return the task. If it is not NULL, then it must be freed by the caller. */
return task;
@@ -551,9 +551,9 @@ void redis_result_table_lookup_callback(redisAsyncContext *c,
void *privdata) {
REDIS_CALLBACK_HEADER(db, callback_data, r);
redisReply *reply = (redisReply *) r;
CHECKM(reply->type == REDIS_REPLY_NIL || reply->type == REDIS_REPLY_STRING,
"Unexpected reply type %d in redis_result_table_lookup_callback",
reply->type);
RAY_CHECK(reply->type == REDIS_REPLY_NIL || reply->type == REDIS_REPLY_STRING)
<< "Unexpected reply type " << reply->type << " in "
<< "redis_result_table_lookup_callback";
/* Parse the task from the reply. */
TaskID result_id = TaskID::nil();
bool is_put = false;
@@ -575,7 +575,7 @@ void redis_result_table_lookup_callback(redisAsyncContext *c,
}
void redis_result_table_lookup(TableCallbackData *callback_data) {
CHECK(callback_data);
RAY_CHECK(callback_data);
DBHandle *db = callback_data->db_handle;
ObjectID id = callback_data->id;
redisAsyncContext *context = get_redis_context(db, id);
@@ -594,8 +594,8 @@ DBClient redis_db_client_table_get(DBHandle *db,
redisReply *reply =
(redisReply *) redisCommand(db->sync_context, "HGETALL %s%b",
DB_CLIENT_PREFIX, client_id, client_id_len);
CHECK(reply->type == REDIS_REPLY_ARRAY);
CHECK(reply->elements > 0);
RAY_CHECK(reply->type == REDIS_REPLY_ARRAY);
RAY_CHECK(reply->elements > 0);
DBClient db_client;
int num_fields = 0;
/* Parse the fields into a DBClient. */
@@ -620,7 +620,7 @@ DBClient redis_db_client_table_get(DBHandle *db,
freeReplyObject(reply);
/* The client ID, type, and whether it is deleted are all
* mandatory fields. Auxiliary address is optional. */
CHECK(num_fields >= 3);
RAY_CHECK(num_fields >= 3);
return db_client;
}
@@ -651,8 +651,8 @@ void redis_object_table_lookup_callback(redisAsyncContext *c,
void *privdata) {
REDIS_CALLBACK_HEADER(db, callback_data, r);
redisReply *reply = (redisReply *) r;
LOG_DEBUG("Object table lookup callback");
CHECK(reply->type == REDIS_REPLY_NIL || reply->type == REDIS_REPLY_ARRAY);
RAY_LOG(DEBUG) << "Object table lookup callback";
RAY_CHECK(reply->type == REDIS_REPLY_NIL || reply->type == REDIS_REPLY_ARRAY);
object_table_lookup_done_callback done_callback =
(object_table_lookup_done_callback) callback_data->done_callback;
@@ -671,7 +671,7 @@ void redis_object_table_lookup_callback(redisAsyncContext *c,
std::vector<DBClientID> manager_ids;
for (size_t j = 0; j < reply->elements; ++j) {
CHECK(reply->element[j]->type == REDIS_REPLY_STRING);
RAY_CHECK(reply->element[j]->type == REDIS_REPLY_STRING);
DBClientID manager_id;
memcpy(manager_id.mutable_data(), reply->element[j]->str,
sizeof(manager_id));
@@ -682,7 +682,7 @@ void redis_object_table_lookup_callback(redisAsyncContext *c,
done_callback(obj_id, false, manager_ids, callback_data->user_context);
}
} else {
LOG_FATAL("Unexpected reply type from object table lookup.");
RAY_LOG(FATAL) << "Unexpected reply type from object table lookup.";
}
/* Clean up timer and callback. */
@@ -708,11 +708,11 @@ void object_table_redis_subscribe_to_notifications_callback(
* - reply->emement[2]->str is the contents of the message.
*/
redisReply *reply = (redisReply *) r;
CHECK(reply->type == REDIS_REPLY_ARRAY);
CHECK(reply->elements == 3);
RAY_CHECK(reply->type == REDIS_REPLY_ARRAY);
RAY_CHECK(reply->elements == 3);
redisReply *message_type = reply->element[0];
LOG_DEBUG("Object table subscribe to notifications callback, message %s",
message_type->str);
RAY_LOG(DEBUG) << "Object table subscribe to notifications callback, message"
<< message_type->str;
if (strcmp(message_type->str, "message") == 0) {
/* We received an object notification. Parse the payload. */
@@ -752,8 +752,8 @@ void object_table_redis_subscribe_to_notifications_callback(
* destroy the callback data. */
remove_timer_callback(db->loop, callback_data);
} else {
LOG_FATAL(
"Unexpected reply type from object table subscribe to notifications.");
RAY_LOG(FATAL) << "Unexpected reply type from object table subscribe to "
<< "notifications.";
}
}
@@ -770,8 +770,8 @@ void redis_object_table_subscribe_to_notifications(
* as the channel name so this channel is specific to this client.
* TODO(rkn):
* The channel name should probably be the client ID with some prefix. */
CHECKM(callback_data->data->Get() != NULL,
"Object table subscribe data passed as NULL.");
RAY_CHECK(callback_data->data->Get() != NULL)
<< "Object table subscribe data passed as NULL.";
if (((ObjectTableSubscribeData *) (callback_data->data->Get()))
->subscribe_all) {
/* Subscribe to the object broadcast channel. */
@@ -802,9 +802,9 @@ void redis_object_table_request_notifications_callback(redisAsyncContext *c,
/* Do some minimal checking. */
redisReply *reply = (redisReply *) r;
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
CHECK(callback_data->done_callback == NULL);
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str;
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
RAY_CHECK(callback_data->done_callback == NULL);
/* Clean up the timer and callback. */
destroy_timer_callback(db->loop, callback_data);
}
@@ -876,7 +876,7 @@ void redis_task_table_get_task_callback(redisAsyncContext *c,
void redis_task_table_get_task(TableCallbackData *callback_data) {
DBHandle *db = callback_data->db_handle;
CHECK(callback_data->data->Get() == NULL);
RAY_CHECK(callback_data->data->Get() == NULL);
TaskID task_id = callback_data->id;
redisAsyncContext *context = get_redis_context(db, task_id);
@@ -902,15 +902,16 @@ void redis_task_table_add_task_callback(redisAsyncContext *c,
// db_client table before retrying the add.
if (reply->type == REDIS_REPLY_ERROR &&
strcmp(reply->str, "No subscribers received message.") == 0) {
LOG_WARN("No subscribers received the task_table_add message.");
RAY_LOG(WARNING) << "No subscribers received the task_table_add message.";
if (callback_data->retry.fail_callback != NULL) {
callback_data->retry.fail_callback(callback_data->id,
callback_data->user_context,
callback_data->data->Get());
}
} else {
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is "
<< reply->str;
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
/* Call the done callback if there is one. */
if (callback_data->done_callback != NULL) {
task_table_done_callback done_callback =
@@ -926,7 +927,7 @@ void redis_task_table_add_task_callback(redisAsyncContext *c,
void redis_task_table_add_task(TableCallbackData *callback_data) {
DBHandle *db = callback_data->db_handle;
Task *task = (Task *) callback_data->data->Get();
CHECKM(task != NULL, "NULL task passed to redis_task_table_add_task.");
RAY_CHECK(task != NULL) << "NULL task passed to redis_task_table_add_task.";
TaskID task_id = Task_task_id(task);
DBClientID local_scheduler_id = Task_local_scheduler(task);
@@ -967,15 +968,17 @@ void redis_task_table_update_callback(redisAsyncContext *c,
// alive in the db_client table.
if (reply->type == REDIS_REPLY_ERROR &&
strcmp(reply->str, "No subscribers received message.") == 0) {
LOG_WARN("No subscribers received the task_table_update message.");
RAY_LOG(WARNING) << "No subscribers received the task_table_update "
<< "message.";
if (callback_data->retry.fail_callback != NULL) {
callback_data->retry.fail_callback(callback_data->id,
callback_data->user_context,
callback_data->data->Get());
}
} else {
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is "
<< reply->str;
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
/* Call the done callback if there is one. */
if (callback_data->done_callback != NULL) {
@@ -992,7 +995,7 @@ void redis_task_table_update_callback(redisAsyncContext *c,
void redis_task_table_update(TableCallbackData *callback_data) {
DBHandle *db = callback_data->db_handle;
Task *task = (Task *) callback_data->data->Get();
CHECKM(task != NULL, "NULL task passed to redis_task_table_update.");
RAY_CHECK(task != NULL) << "NULL task passed to redis_task_table_update.";
TaskID task_id = Task_task_id(task);
redisAsyncContext *context = get_redis_context(db, task_id);
@@ -1030,7 +1033,7 @@ void redis_task_table_test_and_update_callback(redisAsyncContext *c,
* delayed when added to the task table if they are submitted to a local
* scheduler before it receives the notification that maps the actor to a
* local scheduler. */
LOG_ERROR("No task found during task_table_test_and_update");
RAY_LOG(ERROR) << "No task found during task_table_test_and_update";
return;
}
/* Determine whether the update happened. */
@@ -1091,11 +1094,11 @@ void redis_task_table_subscribe_callback(redisAsyncContext *c,
REDIS_CALLBACK_HEADER(db, callback_data, r);
redisReply *reply = (redisReply *) r;
CHECK(reply->type == REDIS_REPLY_ARRAY);
RAY_CHECK(reply->type == REDIS_REPLY_ARRAY);
/* The number of elements is 3 for a reply to SUBSCRIBE, and 4 for a reply to
* PSUBSCRIBE. */
CHECKM(reply->elements == 3 || reply->elements == 4, "reply->elements is %zu",
reply->elements);
RAY_CHECK(reply->elements == 3 || reply->elements == 4)
<< "reply->elements is " << reply->elements;
/* The first element is the message type and the last entry is the payload.
* The middle one or middle two elements describe the channel that was
* published on. */
@@ -1148,9 +1151,8 @@ void redis_task_table_subscribe_callback(redisAsyncContext *c,
* subscription callback needs this data. */
remove_timer_callback(db->loop, callback_data);
} else {
LOG_FATAL(
"Unexpected reply type from task table subscribe. Message type is %s.",
message_type->str);
RAY_LOG(FATAL) << "Unexpected reply type from task table subscribe. "
<< "Message type is " << message_type->str;
}
}
@@ -1200,8 +1202,8 @@ void redis_db_client_table_remove_callback(redisAsyncContext *c,
REDIS_CALLBACK_HEADER(db, callback_data, r);
redisReply *reply = (redisReply *) r;
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str;
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
/* Call the done callback if there is one. */
db_client_table_done_callback done_callback =
@@ -1235,7 +1237,7 @@ void redis_db_client_table_scan(DBHandle *db,
return;
}
/* Get all the database client information. */
CHECK(reply->type == REDIS_REPLY_ARRAY);
RAY_CHECK(reply->type == REDIS_REPLY_ARRAY);
for (size_t i = 0; i < reply->elements; ++i) {
/* Strip the database client table prefix. */
unsigned char *key = (unsigned char *) reply->element[i]->str;
@@ -1255,8 +1257,8 @@ void redis_db_client_table_subscribe_callback(redisAsyncContext *c,
REDIS_CALLBACK_HEADER(db, callback_data, r);
redisReply *reply = (redisReply *) r;
CHECK(reply->type == REDIS_REPLY_ARRAY);
CHECK(reply->elements > 2);
RAY_CHECK(reply->type == REDIS_REPLY_ARRAY);
RAY_CHECK(reply->elements > 2);
/* First entry is message type, then possibly the regex we psubscribed to,
* then topic, then payload. */
redisReply *payload = reply->element[reply->elements - 1];
@@ -1323,11 +1325,11 @@ void redis_local_scheduler_table_subscribe_callback(redisAsyncContext *c,
REDIS_CALLBACK_HEADER(db, callback_data, r);
redisReply *reply = (redisReply *) r;
CHECK(reply->type == REDIS_REPLY_ARRAY);
CHECK(reply->elements == 3);
RAY_CHECK(reply->type == REDIS_REPLY_ARRAY);
RAY_CHECK(reply->elements == 3);
redisReply *message_type = reply->element[0];
LOG_DEBUG("Local scheduler table subscribe callback, message %s",
message_type->str);
RAY_LOG(DEBUG) << "Local scheduler table subscribe callback, message "
<< message_type->str;
if (strcmp(message_type->str, "message") == 0) {
/* Handle a local scheduler heartbeat. Parse the payload and call the
@@ -1362,13 +1364,13 @@ void redis_local_scheduler_table_subscribe_callback(redisAsyncContext *c,
}
} else if (strcmp(message_type->str, "subscribe") == 0) {
/* The reply for the initial SUBSCRIBE command. */
CHECK(callback_data->done_callback == NULL);
RAY_CHECK(callback_data->done_callback == NULL);
/* If the initial SUBSCRIBE was successful, clean up the timer, but don't
* destroy the callback data. */
remove_timer_callback(db->loop, callback_data);
} else {
LOG_FATAL("Unexpected reply type from local scheduler subscribe.");
RAY_LOG(FATAL) << "Unexpected reply type from local scheduler subscribe.";
}
}
@@ -1389,10 +1391,10 @@ void redis_local_scheduler_table_send_info_callback(redisAsyncContext *c,
REDIS_CALLBACK_HEADER(db, callback_data, r);
redisReply *reply = (redisReply *) r;
CHECK(reply->type == REDIS_REPLY_INTEGER);
LOG_DEBUG("%lld subscribers received this publish.\n", reply->integer);
RAY_CHECK(reply->type == REDIS_REPLY_INTEGER);
RAY_LOG(DEBUG) << reply->integer << " subscribers received this publish.";
CHECK(callback_data->done_callback == NULL);
RAY_CHECK(callback_data->done_callback == NULL);
/* Clean up the timer and callback. */
destroy_timer_callback(db->loop, callback_data);
}
@@ -1430,9 +1432,9 @@ void redis_local_scheduler_table_disconnect(DBHandle *db) {
redisReply *reply = (redisReply *) redisCommand(
db->sync_context, "PUBLISH local_schedulers %b", fbb.GetBufferPointer(),
(size_t) fbb.GetSize());
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
CHECK(reply->type == REDIS_REPLY_INTEGER);
LOG_DEBUG("%lld subscribers received this publish.\n", reply->integer);
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str;
RAY_CHECK(reply->type == REDIS_REPLY_INTEGER);
RAY_LOG(DEBUG) << reply->integer << " subscribers received this publish.";
freeReplyObject(reply);
}
@@ -1442,10 +1444,11 @@ void redis_driver_table_subscribe_callback(redisAsyncContext *c,
REDIS_CALLBACK_HEADER(db, callback_data, r);
redisReply *reply = (redisReply *) r;
CHECK(reply->type == REDIS_REPLY_ARRAY);
CHECK(reply->elements == 3);
RAY_CHECK(reply->type == REDIS_REPLY_ARRAY);
RAY_CHECK(reply->elements == 3);
redisReply *message_type = reply->element[0];
LOG_DEBUG("Driver table subscribe callback, message %s", message_type->str);
RAY_LOG(DEBUG) << "Driver table subscribe callback, message "
<< message_type->str;
if (strcmp(message_type->str, "message") == 0) {
/* Handle a driver heartbeat. Parse the payload and call the subscribe
@@ -1463,13 +1466,13 @@ void redis_driver_table_subscribe_callback(redisAsyncContext *c,
}
} else if (strcmp(message_type->str, "subscribe") == 0) {
/* The reply for the initial SUBSCRIBE command. */
CHECK(callback_data->done_callback == NULL);
RAY_CHECK(callback_data->done_callback == NULL);
/* If the initial SUBSCRIBE was successful, clean up the timer, but don't
* destroy the callback data. */
remove_timer_callback(db->loop, callback_data);
} else {
LOG_FATAL("Unexpected reply type from driver subscribe.");
RAY_LOG(FATAL) << "Unexpected reply type from driver subscribe.";
}
}
@@ -1490,13 +1493,13 @@ void redis_driver_table_send_driver_death_callback(redisAsyncContext *c,
REDIS_CALLBACK_HEADER(db, callback_data, r);
redisReply *reply = (redisReply *) r;
CHECK(reply->type == REDIS_REPLY_INTEGER);
LOG_DEBUG("%lld subscribers received this publish.\n", reply->integer);
RAY_CHECK(reply->type == REDIS_REPLY_INTEGER);
RAY_LOG(DEBUG) << reply->integer << " subscribers received this publish.";
/* At the very least, the local scheduler that publishes this message should
* also receive it. */
CHECK(reply->integer >= 1);
RAY_CHECK(reply->integer >= 1);
CHECK(callback_data->done_callback == NULL);
RAY_CHECK(callback_data->done_callback == NULL);
/* Clean up the timer and callback. */
destroy_timer_callback(db->loop, callback_data);
}
@@ -1544,11 +1547,11 @@ void redis_actor_notification_table_subscribe_callback(redisAsyncContext *c,
REDIS_CALLBACK_HEADER(db, callback_data, r);
redisReply *reply = (redisReply *) r;
CHECK(reply->type == REDIS_REPLY_ARRAY);
CHECK(reply->elements == 3);
RAY_CHECK(reply->type == REDIS_REPLY_ARRAY);
RAY_CHECK(reply->elements == 3);
redisReply *message_type = reply->element[0];
LOG_DEBUG("Local scheduler table subscribe callback, message %s",
message_type->str);
RAY_LOG(DEBUG) << "Local scheduler table subscribe callback, message "
<< message_type->str;
if (strcmp(message_type->str, "message") == 0) {
/* Handle an actor notification message. Parse the payload and call the
@@ -1561,9 +1564,9 @@ void redis_actor_notification_table_subscribe_callback(redisAsyncContext *c,
WorkerID driver_id;
DBClientID local_scheduler_id;
bool reconstruct;
CHECK(sizeof(actor_id) + sizeof(driver_id) + sizeof(local_scheduler_id) +
1 ==
payload->len);
RAY_CHECK(sizeof(actor_id) + sizeof(driver_id) +
sizeof(local_scheduler_id) + 1 ==
payload->len);
char *current_ptr = payload->str;
/* Parse the actor ID. */
memcpy(&actor_id, current_ptr, sizeof(actor_id));
@@ -1580,7 +1583,8 @@ void redis_actor_notification_table_subscribe_callback(redisAsyncContext *c,
} else if (*current_ptr == '0') {
reconstruct = false;
} else {
LOG_FATAL("This code should be unreachable.");
reconstruct = false; // We set this value to avoid a compiler warning.
RAY_LOG(FATAL) << "This code should be unreachable.";
}
current_ptr += 1;
@@ -1590,13 +1594,14 @@ void redis_actor_notification_table_subscribe_callback(redisAsyncContext *c,
}
} else if (strcmp(message_type->str, "subscribe") == 0) {
/* The reply for the initial SUBSCRIBE command. */
CHECK(callback_data->done_callback == NULL);
RAY_CHECK(callback_data->done_callback == NULL);
/* If the initial SUBSCRIBE was successful, clean up the timer, but don't
* destroy the callback data. */
remove_timer_callback(db->loop, callback_data);
} else {
LOG_FATAL("Unexpected reply type from actor notification subscribe.");
RAY_LOG(FATAL) << "Unexpected reply type from actor notification "
<< "subscribe.";
}
}
@@ -1627,7 +1632,7 @@ void redis_push_error_rpush_callback(redisAsyncContext *c,
REDIS_CALLBACK_HEADER(db, callback_data, r);
redisReply *reply = (redisReply *) r;
/* The reply should be the length of the errors list after our RPUSH. */
CHECK(reply->type == REDIS_REPLY_INTEGER);
RAY_CHECK(reply->type == REDIS_REPLY_INTEGER);
destroy_timer_callback(db->loop, callback_data);
}
@@ -1638,8 +1643,8 @@ void redis_push_error_hmset_callback(redisAsyncContext *c,
redisReply *reply = (redisReply *) r;
/* Make sure we were able to add the error information. */
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str;
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
/* Add the error to this driver's list of errors. */
ErrorInfo *info = (ErrorInfo *) callback_data->data->Get();
@@ -1656,7 +1661,7 @@ void redis_push_error_hmset_callback(redisAsyncContext *c,
void redis_push_error(TableCallbackData *callback_data) {
DBHandle *db = callback_data->db_handle;
ErrorInfo *info = (ErrorInfo *) callback_data->data->Get();
CHECK(info->error_index < MAX_ERROR_INDEX && info->error_index >= 0);
RAY_CHECK(info->error_index < MAX_ERROR_INDEX && info->error_index >= 0);
/* Look up the error type. */
const char *error_type = error_types[info->error_index];
const char *error_message = error_messages[info->error_index];
@@ -1674,6 +1679,6 @@ void redis_push_error(TableCallbackData *callback_data) {
}
DBClientID get_db_client_id(DBHandle *db) {
CHECK(db != NULL);
RAY_CHECK(db != NULL);
return db->client;
}
+6 -4
View File
@@ -11,11 +11,13 @@
#include "hiredis/hiredis.h"
#include "hiredis/async.h"
#define LOG_REDIS_ERROR(context, M, ...) \
LOG_ERROR("Redis error %d %s; %s", context->err, context->errstr, M)
#define LOG_REDIS_ERROR(context, M, ...) \
RAY_LOG(ERROR) << "Redis error " << context->err << " " << context->errstr \
<< "; " << M
#define LOG_REDIS_DEBUG(context, M, ...) \
LOG_DEBUG("Redis error %d %s; %s", context->err, context->errstr, M)
#define LOG_REDIS_DEBUG(context, M, ...) \
RAY_LOG(DEBUG) << "Redis error " << context->err << " " << context->errstr \
<< "; " << M;
struct DBHandle {
/** String that identifies this client type. */
+17 -17
View File
@@ -43,18 +43,18 @@ TableCallbackData *init_table_callback(DBHandle *db_handle,
table_done_callback done_callback,
table_retry_callback retry_callback,
void *user_context) {
CHECK(db_handle);
CHECK(db_handle->loop);
CHECK(data);
RAY_CHECK(db_handle);
RAY_CHECK(db_handle->loop);
RAY_CHECK(data);
/* If no retry info is provided, use the default retry info. */
if (retry == NULL) {
retry = (RetryInfo *) &default_retry;
}
CHECK(retry);
RAY_CHECK(retry);
/* Allocate and initialize callback data structure for object table */
TableCallbackData *callback_data =
(TableCallbackData *) malloc(sizeof(TableCallbackData));
CHECKM(callback_data != NULL, "Memory allocation error!")
RAY_CHECK(callback_data != NULL) << "Memory allocation error!";
callback_data->id = id;
callback_data->label = label;
callback_data->retry = *retry;
@@ -70,8 +70,8 @@ TableCallbackData *init_table_callback(DBHandle *db_handle,
callback_data->timer_id = callback_data_id++;
outstanding_callbacks_add(callback_data);
LOG_DEBUG("Initializing table command %s with timer ID %" PRId64,
callback_data->label, callback_data->timer_id);
RAY_LOG(DEBUG) << "Initializing table command " << callback_data->label
<< " with timer ID " << callback_data->timer_id;
callback_data->retry_callback(callback_data);
return callback_data;
@@ -92,12 +92,12 @@ void remove_timer_callback(event_loop *loop, TableCallbackData *callback_data) {
}
void destroy_table_callback(TableCallbackData *callback_data) {
CHECK(callback_data != NULL);
RAY_CHECK(callback_data != NULL);
if (callback_data->requests_info)
free(callback_data->requests_info);
CHECK(callback_data->data != NULL);
RAY_CHECK(callback_data->data != NULL);
delete callback_data->data;
callback_data->data = NULL;
@@ -110,20 +110,20 @@ void destroy_table_callback(TableCallbackData *callback_data) {
int64_t table_timeout_handler(event_loop *loop,
int64_t timer_id,
void *user_context) {
CHECK(loop != NULL);
CHECK(user_context != NULL);
RAY_CHECK(loop != NULL);
RAY_CHECK(user_context != NULL);
TableCallbackData *callback_data = (TableCallbackData *) user_context;
CHECK(callback_data->retry.num_retries >= 0 ||
callback_data->retry.num_retries == -1);
LOG_WARN("retrying operation %s, retry_count = %d", callback_data->label,
callback_data->retry.num_retries);
RAY_CHECK(callback_data->retry.num_retries >= 0 ||
callback_data->retry.num_retries == -1);
RAY_LOG(WARNING) << "retrying operation " << callback_data->label
<< ", retry_count = " << callback_data->retry.num_retries;
if (callback_data->retry.num_retries == 0) {
/* We didn't get a response from the database after exhausting all retries;
* let user know, cleanup the state, and remove the timer. */
LOG_WARN("Table command %s with timer ID %" PRId64 " failed",
callback_data->label, timer_id);
RAY_LOG(WARNING) << "Table command " << callback_data->label
<< " with timer ID " << timer_id << " failed";
if (callback_data->retry.fail_callback) {
callback_data->retry.fail_callback(callback_data->id,
callback_data->user_context,
+24 -24
View File
@@ -80,7 +80,7 @@ class TaskBuilder {
}
void SetRequiredResource(const std::string &resource_name, double value) {
CHECK(resource_map_.count(resource_name) == 0);
RAY_CHECK(resource_map_.count(resource_name) == 0);
resource_map_[resource_name] = value;
}
@@ -91,7 +91,7 @@ class TaskBuilder {
BYTE buff[DIGEST_SIZE];
sha256_final(&ctx, buff);
TaskID task_id;
CHECK(sizeof(task_id) <= DIGEST_SIZE);
RAY_CHECK(sizeof(task_id) <= DIGEST_SIZE);
memcpy(&task_id, buff, sizeof(task_id));
/* Add return object IDs. */
std::vector<flatbuffers::Offset<flatbuffers::String>> returns;
@@ -206,25 +206,25 @@ void TaskSpec_set_required_resource(TaskBuilder *builder,
/* Functions for reading tasks. */
TaskID TaskSpec_task_id(const TaskSpec *spec) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
return from_flatbuf(*message->task_id());
}
FunctionID TaskSpec_function(TaskSpec *spec) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
return from_flatbuf(*message->function_id());
}
ActorID TaskSpec_actor_id(TaskSpec *spec) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
return from_flatbuf(*message->actor_id());
}
ActorID TaskSpec_actor_handle_id(TaskSpec *spec) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
return from_flatbuf(*message->actor_handle_id());
}
@@ -234,19 +234,19 @@ bool TaskSpec_is_actor_task(TaskSpec *spec) {
}
int64_t TaskSpec_actor_counter(TaskSpec *spec) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
return std::abs(message->actor_counter());
}
bool TaskSpec_is_actor_checkpoint_method(TaskSpec *spec) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
return message->is_actor_checkpoint_method();
}
ObjectID TaskSpec_actor_dummy_object(TaskSpec *spec) {
CHECK(TaskSpec_is_actor_task(spec));
RAY_CHECK(TaskSpec_is_actor_task(spec));
/* The last return value for actor tasks is the dummy object that
* represents that this task has completed execution. */
int64_t num_returns = TaskSpec_num_returns(spec);
@@ -254,25 +254,25 @@ ObjectID TaskSpec_actor_dummy_object(TaskSpec *spec) {
}
UniqueID TaskSpec_driver_id(const TaskSpec *spec) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
return from_flatbuf(*message->driver_id());
}
TaskID TaskSpec_parent_task_id(const TaskSpec *spec) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
return from_flatbuf(*message->parent_task_id());
}
int64_t TaskSpec_parent_counter(TaskSpec *spec) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
return message->parent_counter();
}
int64_t TaskSpec_num_args(TaskSpec *spec) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
return message->args()->size();
}
@@ -289,45 +289,45 @@ int64_t TaskSpec_num_args_by_ref(TaskSpec *spec) {
}
int TaskSpec_arg_id_count(TaskSpec *spec, int64_t arg_index) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
auto ids = message->args()->Get(arg_index)->object_ids();
return ids->size();
}
ObjectID TaskSpec_arg_id(TaskSpec *spec, int64_t arg_index, int64_t id_index) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
return from_flatbuf(
*message->args()->Get(arg_index)->object_ids()->Get(id_index));
}
const uint8_t *TaskSpec_arg_val(TaskSpec *spec, int64_t arg_index) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
return (uint8_t *) message->args()->Get(arg_index)->data()->c_str();
}
int64_t TaskSpec_arg_length(TaskSpec *spec, int64_t arg_index) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
return message->args()->Get(arg_index)->data()->size();
}
int64_t TaskSpec_num_returns(TaskSpec *spec) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
return message->returns()->size();
}
bool TaskSpec_arg_by_ref(TaskSpec *spec, int64_t arg_index) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
return message->args()->Get(arg_index)->object_ids()->size() != 0;
}
ObjectID TaskSpec_return(TaskSpec *spec, int64_t return_index) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
return from_flatbuf(*message->returns()->Get(return_index));
}
@@ -336,7 +336,7 @@ double TaskSpec_get_required_resource(const TaskSpec *spec,
const std::string &resource_name) {
// This is a bit ugly. However it shouldn't be much of a performance issue
// because there shouldn't be many distinct resources in a single task spec.
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
for (size_t i = 0; i < message->required_resources()->size(); i++) {
const ResourcePair *resource_pair = message->required_resources()->Get(i);
@@ -349,7 +349,7 @@ double TaskSpec_get_required_resource(const TaskSpec *spec,
const std::unordered_map<std::string, double> TaskSpec_get_required_resources(
const TaskSpec *spec) {
CHECK(spec);
RAY_CHECK(spec);
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
return map_from_flatbuf(*message->required_resources());
}
@@ -446,7 +446,7 @@ int TaskExecutionSpec::DependencyIdCount(int64_t dependency_index) const {
} else {
/* Index into the execution dependencies. */
dependency_index -= num_args;
CHECK((size_t) dependency_index < execution_dependencies_.size());
RAY_CHECK((size_t) dependency_index < execution_dependencies_.size());
/* All elements in the execution dependency list have exactly one ID. */
return 1;
}
@@ -465,7 +465,7 @@ ObjectID TaskExecutionSpec::DependencyId(int64_t dependency_index,
} else {
/* Index into the execution dependencies. */
dependency_index -= num_args;
CHECK((size_t) dependency_index < execution_dependencies_.size());
RAY_CHECK((size_t) dependency_index < execution_dependencies_.size());
return execution_dependencies_[dependency_index];
}
}
+9 -9
View File
@@ -42,13 +42,13 @@ void lookup_done_callback(ObjectID object_id,
const std::vector<DBClientID> &manager_ids,
void *user_context) {
DBHandle *db = (DBHandle *) user_context;
CHECK(manager_ids.size() == 2);
RAY_CHECK(manager_ids.size() == 2);
const std::vector<std::string> managers =
db_client_table_get_ip_addresses(db, manager_ids);
CHECK(parse_ip_addr_port(managers.at(0).c_str(), received_addr1,
&received_port1) == 0);
CHECK(parse_ip_addr_port(managers.at(1).c_str(), received_addr2,
&received_port2) == 0);
RAY_CHECK(parse_ip_addr_port(managers.at(0).c_str(), received_addr1,
&received_port1) == 0);
RAY_CHECK(parse_ip_addr_port(managers.at(1).c_str(), received_addr2,
&received_port2) == 0);
}
/* Entry added to database successfully. */
@@ -57,7 +57,7 @@ void add_done_callback(ObjectID object_id, bool success, void *user_context) {}
/* Test if we got a timeout callback if we couldn't connect database. */
void timeout_callback(ObjectID object_id, void *context, void *user_data) {
user_context *uc = (user_context *) context;
CHECK(uc->test_number == TEST_NUMBER)
RAY_CHECK(uc->test_number == TEST_NUMBER);
}
int64_t timeout_handler(event_loop *loop, int64_t id, void *context) {
@@ -136,9 +136,9 @@ int64_t task_table_delayed_add_task(event_loop *loop,
void task_table_test_callback(Task *callback_task, void *user_data) {
task_table_test_callback_called = 1;
CHECK(Task_state(callback_task) == TASK_STATUS_SCHEDULED);
CHECK(Task_size(callback_task) == Task_size(task_table_test_task));
CHECK(Task_equals(callback_task, task_table_test_task));
RAY_CHECK(Task_state(callback_task) == TASK_STATUS_SCHEDULED);
RAY_CHECK(Task_size(callback_task) == Task_size(task_table_test_task));
RAY_CHECK(Task_equals(callback_task, task_table_test_task));
event_loop *loop = (event_loop *) user_data;
event_loop_stop(loop);
}
+37 -36
View File
@@ -38,13 +38,13 @@ void new_object_done_callback(ObjectID object_id,
bool is_put,
void *user_context) {
new_object_succeeded = 1;
CHECK(object_id == new_object_id);
CHECK(task_id == new_object_task_id);
RAY_CHECK(object_id == new_object_id);
RAY_CHECK(task_id == new_object_task_id);
event_loop_stop(g_loop);
}
void new_object_lookup_callback(ObjectID object_id, void *user_context) {
CHECK(object_id == new_object_id);
RAY_CHECK(object_id == new_object_id);
RetryInfo retry = {
.num_retries = 5,
.timeout = 100,
@@ -109,7 +109,7 @@ void new_object_no_task_callback(ObjectID object_id,
bool is_put,
void *user_context) {
new_object_succeeded = 1;
CHECK(task_id.is_nil());
RAY_CHECK(task_id.is_nil());
event_loop_stop(g_loop);
}
@@ -150,12 +150,12 @@ void lookup_done_callback(ObjectID object_id,
const std::vector<DBClientID> &manager_vector,
void *context) {
/* The done callback should not be called. */
CHECK(0);
RAY_CHECK(0);
}
void lookup_fail_callback(UniqueID id, void *user_context, void *user_data) {
lookup_failed = 1;
CHECK(user_context == (void *) lookup_timeout_context);
RAY_CHECK(user_context == (void *) lookup_timeout_context);
event_loop_stop(g_loop);
}
@@ -189,12 +189,12 @@ int add_failed = 0;
void add_done_callback(ObjectID object_id, bool success, void *user_context) {
/* The done callback should not be called. */
CHECK(0);
RAY_CHECK(0);
}
void add_fail_callback(UniqueID id, void *user_context, void *user_data) {
add_failed = 1;
CHECK(user_context == (void *) add_timeout_context);
RAY_CHECK(user_context == (void *) add_timeout_context);
event_loop_stop(g_loop);
}
@@ -230,7 +230,7 @@ void subscribe_done_callback(ObjectID object_id,
const std::vector<DBClientID> &manager_vector,
void *user_context) {
/* The done callback should not be called. */
CHECK(0);
RAY_CHECK(0);
}
void subscribe_fail_callback(UniqueID id, void *user_context, void *user_data) {
@@ -277,7 +277,7 @@ int64_t reconnect_context_callback(event_loop *loop,
db->sync_context = redisConnect("127.0.0.1", 6379);
/* Re-attach the database to the event loop (the file descriptor changed). */
db_attach(db, loop, true);
LOG_DEBUG("Reconnected to Redis");
RAY_LOG(DEBUG) << "Reconnected to Redis";
return EVENT_LOOP_TIMER_DONE;
}
@@ -297,7 +297,7 @@ void lookup_retry_fail_callback(UniqueID id,
void *user_context,
void *user_data) {
/* The fail callback should not be called. */
CHECK(0);
RAY_CHECK(0);
}
/* === Test add retry === */
@@ -312,15 +312,15 @@ void add_lookup_done_callback(ObjectID object_id,
const std::vector<DBClientID> &manager_ids,
void *context) {
DBHandle *db = (DBHandle *) context;
CHECK(manager_ids.size() == 1);
RAY_CHECK(manager_ids.size() == 1);
const std::vector<std::string> managers =
db_client_table_get_ip_addresses(db, manager_ids);
CHECK(managers.at(0) == "127.0.0.1:11235");
RAY_CHECK(managers.at(0) == "127.0.0.1:11235");
lookup_retry_succeeded = 1;
}
void add_lookup_callback(ObjectID object_id, bool success, void *user_context) {
CHECK(success);
RAY_CHECK(success);
DBHandle *db = (DBHandle *) user_context;
RetryInfo retry = {
.num_retries = 5,
@@ -366,15 +366,15 @@ void add_remove_lookup_done_callback(
bool never_created,
const std::vector<DBClientID> &manager_vector,
void *context) {
CHECK(context == (void *) lookup_retry_context);
CHECK(manager_vector.size() == 0);
RAY_CHECK(context == (void *) lookup_retry_context);
RAY_CHECK(manager_vector.size() == 0);
lookup_retry_succeeded = 1;
}
void add_remove_lookup_callback(ObjectID object_id,
bool success,
void *user_context) {
CHECK(success);
RAY_CHECK(success);
DBHandle *db = (DBHandle *) user_context;
RetryInfo retry = {
.num_retries = 5,
@@ -387,7 +387,7 @@ void add_remove_lookup_callback(ObjectID object_id,
}
void add_remove_callback(ObjectID object_id, bool success, void *user_context) {
CHECK(success);
RAY_CHECK(success);
DBHandle *db = (DBHandle *) user_context;
RetryInfo retry = {
.num_retries = 5,
@@ -433,7 +433,7 @@ int lookup_late_failed = 0;
void lookup_late_fail_callback(UniqueID id,
void *user_context,
void *user_data) {
CHECK(user_context == (void *) lookup_late_context);
RAY_CHECK(user_context == (void *) lookup_late_context);
lookup_late_failed = 1;
}
@@ -442,7 +442,7 @@ void lookup_late_done_callback(ObjectID object_id,
const std::vector<DBClientID> &manager_vector,
void *context) {
/* This function should never be called. */
CHECK(0);
RAY_CHECK(0);
}
TEST lookup_late_test(void) {
@@ -478,7 +478,7 @@ const char *add_late_context = "add_late";
int add_late_failed = 0;
void add_late_fail_callback(UniqueID id, void *user_context, void *user_data) {
CHECK(user_context == (void *) add_late_context);
RAY_CHECK(user_context == (void *) add_late_context);
add_late_failed = 1;
}
@@ -486,7 +486,7 @@ void add_late_done_callback(ObjectID object_id,
bool success,
void *user_context) {
/* This function should never be called. */
CHECK(0);
RAY_CHECK(0);
}
TEST add_late_test(void) {
@@ -522,7 +522,7 @@ int subscribe_late_failed = 0;
void subscribe_late_fail_callback(UniqueID id,
void *user_context,
void *user_data) {
CHECK(user_context == (void *) subscribe_late_context);
RAY_CHECK(user_context == (void *) subscribe_late_context);
subscribe_late_failed = 1;
}
@@ -531,7 +531,7 @@ void subscribe_late_done_callback(ObjectID object_id,
const std::vector<DBClientID> &manager_vector,
void *user_context) {
/* This function should never be called. */
CHECK(0);
RAY_CHECK(0);
}
TEST subscribe_late_test(void) {
@@ -573,7 +573,7 @@ void subscribe_success_fail_callback(UniqueID id,
void *user_context,
void *user_data) {
/* This function should never be called. */
CHECK(0);
RAY_CHECK(0);
}
void subscribe_success_done_callback(
@@ -594,9 +594,9 @@ void subscribe_success_object_available_callback(
int64_t data_size,
const std::vector<DBClientID> &manager_vector,
void *user_context) {
CHECK(user_context == (void *) subscribe_success_context);
CHECK(object_id == subscribe_id);
CHECK(manager_vector.size() == 1);
RAY_CHECK(user_context == (void *) subscribe_success_context);
RAY_CHECK(object_id == subscribe_id);
RAY_CHECK(manager_vector.size() == 1);
subscribe_success_succeeded = 1;
}
@@ -656,15 +656,15 @@ void subscribe_object_present_object_available_callback(
void *user_context) {
subscribe_object_present_context_t *ctx =
(subscribe_object_present_context_t *) user_context;
CHECK(ctx->data_size == data_size);
CHECK(strcmp(subscribe_object_present_str, ctx->teststr) == 0);
RAY_CHECK(ctx->data_size == data_size);
RAY_CHECK(strcmp(subscribe_object_present_str, ctx->teststr) == 0);
subscribe_object_present_succeeded = 1;
CHECK(manager_vector.size() == 1);
RAY_CHECK(manager_vector.size() == 1);
}
void fatal_fail_callback(UniqueID id, void *user_context, void *user_data) {
/* This function should never be called. */
CHECK(0);
RAY_CHECK(0);
}
TEST subscribe_object_present_test(void) {
@@ -723,7 +723,7 @@ void subscribe_object_not_present_object_available_callback(
const std::vector<DBClientID> &manager_vector,
void *user_context) {
/* This should not be called. */
CHECK(0);
RAY_CHECK(0);
}
TEST subscribe_object_not_present_test(void) {
@@ -773,11 +773,12 @@ void subscribe_object_available_later_object_available_callback(
void *user_context) {
subscribe_object_present_context_t *myctx =
(subscribe_object_present_context_t *) user_context;
CHECK(myctx->data_size == data_size);
CHECK(strcmp(myctx->teststr, subscribe_object_available_later_context) == 0);
RAY_CHECK(myctx->data_size == data_size);
RAY_CHECK(strcmp(myctx->teststr, subscribe_object_available_later_context) ==
0);
/* Make sure the callback is only called once. */
subscribe_object_available_later_succeeded += 1;
CHECK(manager_vector.size() == 1);
RAY_CHECK(manager_vector.size() == 1);
}
TEST subscribe_object_available_later_test(void) {
+6 -6
View File
@@ -47,10 +47,10 @@ void async_redis_socket_test_callback(redisAsyncContext *ac,
redisReply *reply =
(redisReply *) redisCommand(context, test_get_format, test_key);
redisFree(context);
CHECK(reply != NULL);
RAY_CHECK(reply != NULL);
if (strcmp(reply->str, test_value)) {
freeReplyObject(reply);
CHECK(0);
RAY_CHECK(0);
}
freeReplyObject(reply);
}
@@ -97,7 +97,7 @@ void redis_accept_callback(event_loop *loop,
void *context,
int events) {
int accept_fd = accept_client(socket_fd);
CHECK(accept_fd >= 0);
RAY_CHECK(accept_fd >= 0);
connections.push_back(accept_fd);
event_loop_add_file(loop, accept_fd, EVENT_LOOP_READ, redis_read_callback,
context);
@@ -155,8 +155,8 @@ void logging_test_callback(redisAsyncContext *ac, void *r, void *privdata) {
redisContext *context = redisConnect("127.0.0.1", 6379);
redisReply *reply = (redisReply *) redisCommand(context, "KEYS %s", "log:*");
redisFree(context);
CHECK(reply != NULL);
CHECK(reply->elements > 0);
RAY_CHECK(reply != NULL);
RAY_CHECK(reply->elements > 0);
freeReplyObject(reply);
}
@@ -176,7 +176,7 @@ void logging_accept_callback(event_loop *loop,
void *context,
int events) {
int accept_fd = accept_client(socket_fd);
CHECK(accept_fd >= 0);
RAY_CHECK(accept_fd >= 0);
connections.push_back(accept_fd);
event_loop_add_file(loop, accept_fd, EVENT_LOOP_READ, logging_read_callback,
context);
+18 -18
View File
@@ -27,13 +27,13 @@ void lookup_nil_fail_callback(UniqueID id,
void *user_context,
void *user_data) {
/* The fail callback should not be called. */
CHECK(0);
RAY_CHECK(0);
}
void lookup_nil_success_callback(Task *task, void *context) {
lookup_nil_success = 1;
CHECK(task == NULL);
CHECK(context == (void *) lookup_nil_context);
RAY_CHECK(task == NULL);
RAY_CHECK(context == (void *) lookup_nil_context);
event_loop_stop(g_loop);
}
@@ -70,18 +70,18 @@ void add_lookup_fail_callback(UniqueID id,
void *user_context,
void *user_data) {
/* The fail callback should not be called. */
CHECK(0);
RAY_CHECK(0);
}
void lookup_success_callback(Task *task, void *context) {
lookup_success = 1;
CHECK(Task_equals(task, add_lookup_task));
RAY_CHECK(Task_equals(task, add_lookup_task));
event_loop_stop(g_loop);
}
void add_success_callback(TaskID task_id, void *context) {
add_success = 1;
CHECK(TaskID_equal(task_id, Task_task_id(add_lookup_task)));
RAY_CHECK(TaskID_equal(task_id, Task_task_id(add_lookup_task)));
DBHandle *db = (DBHandle *) context;
RetryInfo retry = {
@@ -137,12 +137,12 @@ int subscribe_failed = 0;
void subscribe_done_callback(TaskID task_id, void *user_context) {
/* The done callback should not be called. */
CHECK(0);
RAY_CHECK(0);
}
void subscribe_fail_callback(UniqueID id, void *user_context, void *user_data) {
subscribe_failed = 1;
CHECK(user_context == (void *) subscribe_timeout_context);
RAY_CHECK(user_context == (void *) subscribe_timeout_context);
event_loop_stop(g_loop);
}
@@ -180,12 +180,12 @@ int publish_failed = 0;
void publish_done_callback(TaskID task_id, void *user_context) {
/* The done callback should not be called. */
CHECK(0);
RAY_CHECK(0);
}
void publish_fail_callback(UniqueID id, void *user_context, void *user_data) {
publish_failed = 1;
CHECK(user_context == (void *) publish_timeout_context);
RAY_CHECK(user_context == (void *) publish_timeout_context);
event_loop_stop(g_loop);
}
@@ -249,7 +249,7 @@ const char *subscribe_retry_context = "subscribe_retry";
int subscribe_retry_succeeded = 0;
void subscribe_retry_done_callback(ObjectID object_id, void *user_context) {
CHECK(user_context == (void *) subscribe_retry_context);
RAY_CHECK(user_context == (void *) subscribe_retry_context);
subscribe_retry_succeeded = 1;
}
@@ -257,7 +257,7 @@ void subscribe_retry_fail_callback(UniqueID id,
void *user_context,
void *user_data) {
/* The fail callback should not be called. */
CHECK(0);
RAY_CHECK(0);
}
TEST subscribe_retry_test(void) {
@@ -299,7 +299,7 @@ const char *publish_retry_context = "publish_retry";
int publish_retry_succeeded = 0;
void publish_retry_done_callback(ObjectID object_id, void *user_context) {
CHECK(user_context == (void *) publish_retry_context);
RAY_CHECK(user_context == (void *) publish_retry_context);
publish_retry_succeeded = 1;
}
@@ -307,7 +307,7 @@ void publish_retry_fail_callback(UniqueID id,
void *user_context,
void *user_data) {
/* The fail callback should not be called. */
CHECK(0);
RAY_CHECK(0);
}
TEST publish_retry_test(void) {
@@ -355,13 +355,13 @@ int subscribe_late_failed = 0;
void subscribe_late_fail_callback(UniqueID id,
void *user_context,
void *user_data) {
CHECK(user_context == (void *) subscribe_late_context);
RAY_CHECK(user_context == (void *) subscribe_late_context);
subscribe_late_failed = 1;
}
void subscribe_late_done_callback(TaskID task_id, void *user_context) {
/* This function should never be called. */
CHECK(0);
RAY_CHECK(0);
}
TEST subscribe_late_test(void) {
@@ -400,13 +400,13 @@ int publish_late_failed = 0;
void publish_late_fail_callback(UniqueID id,
void *user_context,
void *user_data) {
CHECK(user_context == (void *) publish_late_context);
RAY_CHECK(user_context == (void *) publish_late_context);
publish_late_failed = 1;
}
void publish_late_done_callback(TaskID task_id, void *user_context) {
/* This function should never be called. */
CHECK(0);
RAY_CHECK(0);
}
TEST publish_late_test(void) {
+2 -1
View File
@@ -22,7 +22,8 @@ static inline std::string bind_ipc_sock_retry(const char *socket_name_format,
int *fd) {
std::string socket_name;
for (int num_retries = 0; num_retries < 5; ++num_retries) {
LOG_INFO("trying to find plasma socket (attempt %d)", num_retries);
RAY_LOG(INFO) << "trying to find plasma socket (attempt " << num_retries
<< ")";
size_t size = std::snprintf(nullptr, 0, socket_name_format, rand()) + 1;
char socket_name_c_str[size];
std::snprintf(socket_name_c_str, size, socket_name_format, rand());
+37 -42
View File
@@ -31,7 +31,7 @@ void assign_task_to_local_scheduler_retry(UniqueID id,
void *user_data) {
GlobalSchedulerState *state = (GlobalSchedulerState *) user_context;
Task *task = (Task *) user_data;
CHECK(Task_state(task) == TASK_STATUS_SCHEDULED);
RAY_CHECK(Task_state(task) == TASK_STATUS_SCHEDULED);
// If the local scheduler has died since we requested the task assignment, do
// not retry again.
@@ -68,13 +68,13 @@ void assign_task_to_local_scheduler_retry(UniqueID id,
void assign_task_to_local_scheduler(GlobalSchedulerState *state,
Task *task,
DBClientID local_scheduler_id) {
std::string id_string = local_scheduler_id.hex();
TaskSpec *spec = Task_task_execution_spec(task)->Spec();
LOG_DEBUG("assigning task to local_scheduler_id = %s", id_string.c_str());
RAY_LOG(DEBUG) << "assigning task to local_scheduler_id = "
<< local_scheduler_id;
Task_set_state(task, TASK_STATUS_SCHEDULED);
Task_set_local_scheduler(task, local_scheduler_id);
id_string = Task_task_id(task).hex();
LOG_DEBUG("Issuing a task table update for task = %s", id_string.c_str());
RAY_LOG(DEBUG) << "Issuing a task table update for task = "
<< Task_task_id(task);
#if !RAY_USE_NEW_GCS
auto retryInfo = RetryInfo{
@@ -99,7 +99,7 @@ void assign_task_to_local_scheduler(GlobalSchedulerState *state,
/* The value -1 indicates that the size of the object is not known yet. */
obj_info_entry.data_size = -1;
}
CHECK(state->local_scheduler_plasma_map.count(local_scheduler_id) == 1);
RAY_CHECK(state->local_scheduler_plasma_map.count(local_scheduler_id) == 1);
state->scheduler_object_info_table[return_id].object_locations.push_back(
state->local_scheduler_plasma_map[local_scheduler_id]);
}
@@ -108,7 +108,7 @@ void assign_task_to_local_scheduler(GlobalSchedulerState *state,
* instead of db_client_id objects. */
/* Update the local scheduler info. */
auto it = state->local_schedulers.find(local_scheduler_id);
CHECK(it != state->local_schedulers.end());
RAY_CHECK(it != state->local_schedulers.end());
LocalScheduler &local_scheduler = it->second;
local_scheduler.num_tasks_sent += 1;
@@ -119,8 +119,9 @@ void assign_task_to_local_scheduler(GlobalSchedulerState *state,
double resource_quantity = resource_pair.second;
// The local scheduler must have this resource because otherwise we wouldn't
// be assigning the task to this local scheduler.
CHECK(local_scheduler.info.dynamic_resources.count(resource_name) == 1 ||
resource_quantity == 0);
RAY_CHECK(local_scheduler.info.dynamic_resources.count(resource_name) ==
1 ||
resource_quantity == 0);
// Subtract task's resource from the cached dynamic resource capacity for
// this local scheduler. This will be overwritten on the next heartbeat.
local_scheduler.info.dynamic_resources[resource_name] =
@@ -161,9 +162,8 @@ void GlobalSchedulerState_free(GlobalSchedulerState *state) {
/* Free the array of unschedulable tasks. */
int64_t num_pending_tasks = state->pending_tasks.size();
if (num_pending_tasks > 0) {
LOG_WARN("There are %" PRId64
" remaining tasks in the pending tasks array.",
num_pending_tasks);
RAY_LOG(WARNING) << "There are " << num_pending_tasks
<< " remaining tasks in the pending tasks array.";
}
for (int i = 0; i < num_pending_tasks; ++i) {
Task *pending_task = state->pending_tasks[i];
@@ -195,7 +195,7 @@ void signal_handler(int signal) {
void process_task_waiting(Task *waiting_task, void *user_context) {
GlobalSchedulerState *state = (GlobalSchedulerState *) user_context;
LOG_DEBUG("Task waiting callback is called.");
RAY_LOG(DEBUG) << "Task waiting callback is called.";
bool successfully_assigned =
handle_task_waiting(state, state->policy_state, waiting_task);
/* If the task was not successfully submitted to a local scheduler, add the
@@ -238,7 +238,7 @@ remove_local_scheduler(
GlobalSchedulerState *state,
std::unordered_map<DBClientID, LocalScheduler, UniqueIDHasher>::iterator
it) {
CHECK(it != state->local_schedulers.end());
RAY_CHECK(it != state->local_schedulers.end());
DBClientID local_scheduler_id = it->first;
it = state->local_schedulers.erase(it);
@@ -264,8 +264,8 @@ remove_local_scheduler(
*/
void process_new_db_client(DBClient *db_client, void *user_context) {
GlobalSchedulerState *state = (GlobalSchedulerState *) user_context;
std::string id_string = db_client->id.hex();
LOG_DEBUG("db client table callback for db client = %s", id_string.c_str());
RAY_LOG(DEBUG) << "db client table callback for db client = "
<< db_client->id;
if (strncmp(db_client->client_type.c_str(), "local_scheduler",
strlen("local_scheduler")) == 0) {
bool local_scheduler_present =
@@ -304,15 +304,14 @@ void object_table_subscribe_callback(ObjectID object_id,
void *user_context) {
/* Extract global scheduler state from the callback context. */
GlobalSchedulerState *state = (GlobalSchedulerState *) user_context;
std::string id_string = object_id.hex();
LOG_DEBUG("object table subscribe callback for OBJECT = %s",
id_string.c_str());
RAY_LOG(DEBUG) << "object table subscribe callback for OBJECT = "
<< object_id;
const std::vector<std::string> managers =
db_client_table_get_ip_addresses(state->db, manager_ids);
LOG_DEBUG("\tManagers<%lu>:", managers.size());
RAY_LOG(DEBUG) << "\tManagers<" << managers.size() << ">:";
for (size_t i = 0; i < managers.size(); i++) {
LOG_DEBUG("\t\t%s", managers[i].c_str());
RAY_LOG(DEBUG) << "\t\t" << managers[i];
}
if (state->scheduler_object_info_table.find(object_id) ==
@@ -322,12 +321,11 @@ void object_table_subscribe_callback(ObjectID object_id,
state->scheduler_object_info_table[object_id];
obj_info_entry.data_size = data_size;
id_string = object_id.hex();
LOG_DEBUG("New object added to object_info_table with id = %s",
id_string.c_str());
LOG_DEBUG("\tmanager locations:");
RAY_LOG(DEBUG) << "New object added to object_info_table with id = "
<< object_id;
RAY_LOG(DEBUG) << "\tmanager locations:";
for (size_t i = 0; i < managers.size(); i++) {
LOG_DEBUG("\t\t%s", managers[i].c_str());
RAY_LOG(DEBUG) << "\t\t" << managers[i];
}
}
@@ -347,12 +345,10 @@ void local_scheduler_table_handler(DBClientID client_id,
/* Extract global scheduler state from the callback context. */
GlobalSchedulerState *state = (GlobalSchedulerState *) user_context;
ARROW_UNUSED(state);
std::string id_string = client_id.hex();
LOG_DEBUG("Local scheduler heartbeat from db_client_id %s",
id_string.c_str());
LOG_DEBUG(
"total workers = %d, task queue length = %d, available workers = %d",
info.total_num_workers, info.task_queue_length, info.available_workers);
RAY_LOG(DEBUG) << "Local scheduler heartbeat from db_client_id " << client_id;
RAY_LOG(DEBUG) << "total workers = " << info.total_num_workers
<< ", task queue length = " << info.task_queue_length
<< ", available workers = " << info.available_workers;
/* Update the local scheduler info struct. */
auto it = state->local_schedulers.find(client_id);
@@ -371,7 +367,8 @@ void local_scheduler_table_handler(DBClientID client_id,
local_scheduler.info = info;
}
} else {
LOG_WARN("client_id didn't match any cached local scheduler entries");
RAY_LOG(WARNING) << "client_id didn't match any cached local scheduler "
<< "entries";
}
}
@@ -408,8 +405,8 @@ int heartbeat_timeout_handler(event_loop *loop, timer_id id, void *context) {
while (it != state->local_schedulers.end()) {
if (it->second.num_heartbeats_missed >=
RayConfig::instance().num_heartbeats_timeout()) {
LOG_WARN(
"Missed too many heartbeats from local scheduler, marking as dead.");
RAY_LOG(WARNING) << "Missed too many heartbeats from local scheduler, "
<< "marking as dead.";
/* Notify others by updating the global state. */
db_client_table_remove(state->db, it->second.id, NULL, NULL, NULL);
/* Remove the scheduler from the local state. The call to
@@ -484,22 +481,20 @@ int main(int argc, char *argv[]) {
node_ip_address = optarg;
break;
default:
LOG_ERROR("unknown option %c", c);
exit(-1);
RAY_LOG(FATAL) << "unknown option " << c;
}
}
char redis_primary_addr[16];
int redis_primary_port;
int redis_primary_port = -1;
if (!redis_primary_addr_port ||
parse_ip_addr_port(redis_primary_addr_port, redis_primary_addr,
&redis_primary_port) == -1) {
LOG_FATAL(
"specify the primary redis address like 127.0.0.1:6379 with the -r "
"switch");
RAY_LOG(FATAL) << "specify the primary redis address like 127.0.0.1:6379 "
<< "with the -r switch";
}
if (!node_ip_address) {
LOG_FATAL("specify the node IP address with the -h switch");
RAY_LOG(FATAL) << "specify the node IP address with the -h switch";
}
start_server(node_ip_address, redis_primary_addr, redis_primary_port);
}
@@ -55,7 +55,7 @@ int64_t locally_available_data_size(const GlobalSchedulerState *state,
* local scheduler. */
int64_t task_data_size = 0;
CHECK(state->local_scheduler_plasma_map.count(local_scheduler_id) == 1);
RAY_CHECK(state->local_scheduler_plasma_map.count(local_scheduler_id) == 1);
const std::string &plasma_manager =
state->local_scheduler_plasma_map.at(local_scheduler_id);
@@ -120,8 +120,8 @@ bool handle_task_waiting_random(GlobalSchedulerState *state,
GlobalSchedulerPolicyState *policy_state,
Task *task) {
TaskSpec *task_spec = Task_task_execution_spec(task)->Spec();
CHECKM(task_spec != NULL,
"task wait handler encounted a task with NULL spec");
RAY_CHECK(task_spec != NULL)
<< "task wait handler encounted a task with NULL spec";
std::vector<DBClientID> feasible_nodes;
@@ -136,10 +136,8 @@ bool handle_task_waiting_random(GlobalSchedulerState *state,
}
if (feasible_nodes.size() == 0) {
std::string id_string = Task_task_id(task).hex();
LOG_ERROR(
"Infeasible task. No nodes satisfy hard constraints for task = %s",
id_string.c_str());
RAY_LOG(ERROR) << "Infeasible task. No nodes satisfy hard constraints for "
<< "task = " << Task_task_id(task);
return false;
}
@@ -148,8 +146,8 @@ bool handle_task_waiting_random(GlobalSchedulerState *state,
std::uniform_int_distribution<> dis(0, feasible_nodes.size() - 1);
DBClientID local_scheduler_id =
feasible_nodes[dis(policy_state->getRandomGenerator())];
CHECKM(!local_scheduler_id.is_nil(),
"Task is feasible, but doesn't have a local scheduler assigned.");
RAY_CHECK(!local_scheduler_id.is_nil())
<< "Task is feasible, but doesn't have a local scheduler assigned.";
// A local scheduler ID was found, so assign the task.
assign_task_to_local_scheduler(state, task, local_scheduler_id);
return true;
@@ -161,15 +159,15 @@ bool handle_task_waiting_cost(GlobalSchedulerState *state,
TaskSpec *task_spec = Task_task_execution_spec(task)->Spec();
int64_t curtime = current_time_ms();
CHECKM(task_spec != NULL,
"task wait handler encounted a task with NULL spec");
RAY_CHECK(task_spec != NULL)
<< "task wait handler encounted a task with NULL spec";
// For tasks already seen by the global scheduler (spillback > 1),
// adjust scheduled task counts for the source local scheduler.
if (task->execution_spec->SpillbackCount() > 1) {
auto it = state->local_schedulers.find(task->local_scheduler_id);
// Task's previous local scheduler must be present and known.
CHECK(it != state->local_schedulers.end());
RAY_CHECK(it != state->local_schedulers.end());
LocalScheduler &src_local_scheduler = it->second;
src_local_scheduler.num_recent_tasks_sent -= 1;
}
@@ -178,12 +176,11 @@ bool handle_task_waiting_cost(GlobalSchedulerState *state,
// Go through all the nodes, calculate the score for each, pick max score.
double best_local_scheduler_score = INT32_MIN;
CHECKM(best_local_scheduler_score < 0,
"We might have a floating point underflow");
std::string id_string_fromlocalsched = task->local_scheduler_id.hex();
LOG_INFO("ct[%" PRId64 "] task from %s spillback %d", curtime,
id_string_fromlocalsched.c_str(),
task->execution_spec->SpillbackCount());
RAY_CHECK(best_local_scheduler_score < 0)
<< "We might have a floating point underflow";
RAY_LOG(INFO) << "ct[" << curtime << "] task from "
<< task->local_scheduler_id << " spillback "
<< task->execution_spec->SpillbackCount();
// The best node to send this task.
DBClientID best_local_scheduler_id = DBClientID::nil();
@@ -200,14 +197,13 @@ bool handle_task_waiting_cost(GlobalSchedulerState *state,
if (task->local_scheduler_id == scheduler->id) {
continue;
}
std::string id_string = scheduler->id.hex();
task_feasible = true;
// This node satisfies the hard capacity constraint. Calculate its score.
double score = -1 * calculate_cost_pending(state, scheduler, task_spec);
LOG_INFO("ct[%" PRId64 "][%s][q%d][w%d]: score %f bestscore %f\n", curtime,
id_string.c_str(), scheduler->info.task_queue_length,
scheduler->info.available_workers, score,
best_local_scheduler_score);
RAY_LOG(INFO) << "ct[" << curtime << "][" << scheduler->id << "][q"
<< scheduler->info.task_queue_length << "][w"
<< scheduler->info.available_workers << "]: score " << score
<< " bestscore " << best_local_scheduler_score;
if (score >= best_local_scheduler_score) {
best_local_scheduler_score = score;
best_local_scheduler_id = scheduler->id;
@@ -215,16 +211,14 @@ bool handle_task_waiting_cost(GlobalSchedulerState *state,
}
if (!task_feasible) {
std::string id_string = Task_task_id(task).hex();
LOG_ERROR(
"Infeasible task. No nodes satisfy hard constraints for task = %s",
id_string.c_str());
RAY_LOG(ERROR) << "Infeasible task. No nodes satisfy hard constraints for "
<< "task = " << Task_task_id(task);
// TODO(atumanov): propagate this error to the task's driver and/or
// cache the task in case new local schedulers satisfy it in the future.
return false;
}
CHECKM(!best_local_scheduler_id.is_nil(),
"Task is feasible, but doesn't have a local scheduler assigned.");
RAY_CHECK(!best_local_scheduler_id.is_nil())
<< "Task is feasible, but doesn't have a local scheduler assigned.";
// A local scheduler ID was found, so assign the task.
assign_task_to_local_scheduler(state, task, best_local_scheduler_id);
return true;
+105 -111
View File
@@ -73,12 +73,12 @@ void kill_worker(LocalSchedulerState *state,
bool suppress_warning) {
/* Erase the local scheduler's reference to the worker. */
auto it = std::find(state->workers.begin(), state->workers.end(), worker);
CHECK(it != state->workers.end());
RAY_CHECK(it != state->workers.end());
state->workers.erase(it);
/* Make sure that we removed the worker. */
it = std::find(state->workers.begin(), state->workers.end(), worker);
CHECK(it == state->workers.end());
RAY_CHECK(it == state->workers.end());
/* Release any resources held by the worker. It's important to do this before
* calling handle_worker_removed and handle_actor_worker_disconnect because
@@ -121,7 +121,7 @@ void kill_worker(LocalSchedulerState *state,
force_kill_worker, (void *) worker);
free_worker = false;
}
LOG_DEBUG("Killed worker with pid %d", worker->pid);
RAY_LOG(DEBUG) << "Killed worker with pid " << worker->pid;
}
/* If this worker is still running a task and we aren't cleaning up, push an
@@ -149,7 +149,7 @@ void kill_worker(LocalSchedulerState *state,
}
}
LOG_DEBUG("Killed worker with pid %d", worker->pid);
RAY_LOG(DEBUG) << "Killed worker with pid " << worker->pid;
if (free_worker) {
/* Clean up the client socket after killing the worker so that the worker
* can't receive the SIGPIPE before exiting. */
@@ -173,7 +173,8 @@ void LocalSchedulerState_free(LocalSchedulerState *state) {
for (auto const &worker_pid : state->child_pids) {
kill(worker_pid, SIGKILL);
waitpid(worker_pid, NULL, 0);
LOG_INFO("Killed worker pid %d which hadn't started yet.", worker_pid);
RAY_LOG(INFO) << "Killed worker pid " << worker_pid
<< " which hadn't started yet.";
}
/* Kill any registered workers. */
@@ -236,19 +237,19 @@ void start_worker(LocalSchedulerState *state,
bool reconstruct) {
/* Non-actors can't be started in reconstruct mode. */
if (actor_id.is_nil()) {
CHECK(!reconstruct);
RAY_CHECK(!reconstruct);
}
/* We can't start a worker if we don't have the path to the worker script. */
if (state->config.start_worker_command == NULL) {
LOG_DEBUG(
"No valid command to start worker provided. Cannot start worker.");
RAY_LOG(DEBUG) << "No valid command to start worker provided. Cannot start "
<< "worker.";
return;
}
/* Launch the process to create the worker. */
pid_t pid = fork();
if (pid != 0) {
state->child_pids.push_back(pid);
LOG_DEBUG("Started worker with pid %d", pid);
RAY_LOG(DEBUG) << "Started worker with pid " << pid;
return;
}
@@ -279,7 +280,7 @@ void start_worker(LocalSchedulerState *state,
execvp(command_vector[0], (char *const *) command_vector.data());
LocalSchedulerState_free(state);
LOG_FATAL("Failed to start worker");
RAY_LOG(FATAL) << "Failed to start worker";
}
/**
@@ -320,7 +321,7 @@ const char **parse_command(const char *command) {
}
free(command_copy);
CHECK(num_args == i);
RAY_CHECK(num_args == i);
return command_args;
}
@@ -345,9 +346,8 @@ LocalSchedulerState *LocalSchedulerState_init(
state->config.start_worker_command = NULL;
}
if (start_worker_command == NULL) {
LOG_WARN(
"No valid command to start a worker provided, local scheduler will not "
"start any workers.");
RAY_LOG(WARNING) << "No valid command to start a worker provided, local "
<< "scheduler will not start any workers.";
}
state->config.global_scheduler_exists = global_scheduler_exists;
@@ -443,14 +443,14 @@ void resource_sanity_checks(LocalSchedulerState *state,
const std::string resource_name = resource_pair.first;
double resource_quantity = resource_pair.second;
CHECK(state->dynamic_resources[resource_name] <=
state->static_resources[resource_name]);
RAY_CHECK(state->dynamic_resources[resource_name] <=
state->static_resources[resource_name]);
if (resource_name != std::string("CPU")) {
CHECK(state->dynamic_resources[resource_name] >= 0);
RAY_CHECK(state->dynamic_resources[resource_name] >= 0);
}
CHECK(resource_quantity >= 0);
CHECK(resource_quantity <= state->static_resources[resource_name]);
RAY_CHECK(resource_quantity >= 0);
RAY_CHECK(resource_quantity <= state->static_resources[resource_name]);
}
}
@@ -468,8 +468,8 @@ void acquire_resources(
if (resource_name == std::string("GPU")) {
if (resource_quantity != 0) {
// Make sure that the worker isn't using any GPUs already.
CHECK(worker->gpus_in_use.size() == 0);
CHECK(state->available_gpus.size() >= resource_quantity);
RAY_CHECK(worker->gpus_in_use.size() == 0);
RAY_CHECK(state->available_gpus.size() >= resource_quantity);
// Reserve GPUs for the worker.
for (int i = 0; i < resource_quantity; i++) {
worker->gpus_in_use.push_back(state->available_gpus.back());
@@ -480,11 +480,11 @@ void acquire_resources(
// Do bookkeeping for general resource types.
if (resource_name != std::string("CPU")) {
CHECK(state->dynamic_resources[resource_name] >= resource_quantity);
RAY_CHECK(state->dynamic_resources[resource_name] >= resource_quantity);
}
state->dynamic_resources[resource_name] -= resource_quantity;
if (resource_name == std::string("CPU")) {
CHECK(worker->resources_in_use[resource_name] == 0);
RAY_CHECK(worker->resources_in_use[resource_name] == 0);
}
worker->resources_in_use[resource_name] += resource_quantity;
}
@@ -504,7 +504,7 @@ void release_resources(
// Do some special handling for GPU resources.
if (resource_name == std::string("GPU")) {
if (resource_quantity != 0) {
CHECK(resource_quantity == worker->gpus_in_use.size());
RAY_CHECK(resource_quantity == worker->gpus_in_use.size());
// Move the GPU IDs the worker was using back to the local scheduler.
for (auto const &gpu_id : worker->gpus_in_use) {
state->available_gpus.push_back(gpu_id);
@@ -515,7 +515,7 @@ void release_resources(
// Do bookkeeping for general resources types.
if (resource_name == std::string("CPU")) {
CHECK(resource_quantity == worker->resources_in_use[resource_name]);
RAY_CHECK(resource_quantity == worker->resources_in_use[resource_name]);
}
state->dynamic_resources[resource_name] += resource_quantity;
worker->resources_in_use[resource_name] -= resource_quantity;
@@ -542,14 +542,14 @@ void assign_task_to_worker(LocalSchedulerState *state,
// non-CPU resources (in particular, GPUs) should already have been acquired
// by the actor worker.
if (!worker->actor_id.is_nil()) {
CHECK(required_resources.size() == 1);
CHECK(required_resources.count("CPU") == 1);
RAY_CHECK(required_resources.size() == 1);
RAY_CHECK(required_resources.count("CPU") == 1);
}
CHECK(worker->actor_id == TaskSpec_actor_id(spec));
RAY_CHECK(worker->actor_id == TaskSpec_actor_id(spec));
/* Make sure the driver for this task is still alive. */
WorkerID driver_id = TaskSpec_driver_id(spec);
CHECK(is_driver_alive(state, driver_id));
RAY_CHECK(is_driver_alive(state, driver_id));
/* Construct a flatbuffer object to send to the worker. */
flatbuffers::FlatBufferBuilder fbb;
@@ -563,12 +563,10 @@ void assign_task_to_worker(LocalSchedulerState *state,
if (errno == EPIPE || errno == EBADF) {
/* Something went wrong, so kill the worker. */
kill_worker(state, worker, false, false);
LOG_WARN(
"Failed to give task to worker on fd %d. The client may have hung "
"up.",
worker->sock);
RAY_LOG(WARNING) << "Failed to give task to worker on fd " << worker->sock
<< ". The client may have hung up.";
} else {
LOG_FATAL("Failed to give task to client on fd %d.", worker->sock);
RAY_LOG(FATAL) << "Failed to give task to client on fd " << worker->sock;
}
}
@@ -596,15 +594,15 @@ void finish_task(LocalSchedulerState *state, LocalSchedulerClient *worker) {
if (worker->task_in_progress != NULL) {
TaskSpec *spec = Task_task_execution_spec(worker->task_in_progress)->Spec();
/* Return dynamic resources back for the task in progress. */
CHECK(worker->resources_in_use["CPU"] ==
TaskSpec_get_required_resource(spec, "CPU"));
RAY_CHECK(worker->resources_in_use["CPU"] ==
TaskSpec_get_required_resource(spec, "CPU"));
if (worker->actor_id.is_nil()) {
CHECK(worker->gpus_in_use.size() ==
TaskSpec_get_required_resource(spec, "GPU"));
RAY_CHECK(worker->gpus_in_use.size() ==
TaskSpec_get_required_resource(spec, "GPU"));
release_resources(state, worker, worker->resources_in_use);
} else {
// Actor tasks should only specify CPU requirements.
CHECK(0 == TaskSpec_get_required_resource(spec, "GPU"));
RAY_CHECK(0 == TaskSpec_get_required_resource(spec, "GPU"));
std::unordered_map<std::string, double> cpu_resources;
cpu_resources["CPU"] = worker->resources_in_use["CPU"];
std::unordered_map<std::string, double> resources_to_release =
@@ -641,8 +639,8 @@ void process_plasma_notification(event_loop *loop,
if (!notification) {
/* The store has closed the socket. */
LocalSchedulerState_free(state);
LOG_FATAL(
"Lost connection to the plasma store, local scheduler is exiting!");
RAY_LOG(FATAL) << "Lost connection to the plasma store, local scheduler is "
<< "exiting!";
}
auto object_info = flatbuffers::GetRoot<ObjectInfo>(notification);
ObjectID object_id = from_flatbuf(*object_info->object_id());
@@ -773,8 +771,8 @@ void reconstruct_evicted_result_lookup_callback(ObjectID reconstruct_object_id,
TaskID task_id,
bool is_put,
void *user_context) {
CHECKM(!task_id.is_nil(),
"No task information found for object during reconstruction");
RAY_CHECK(!task_id.is_nil())
<< "No task information found for object during reconstruction";
LocalSchedulerState *state = (LocalSchedulerState *) user_context;
task_table_test_and_update_callback done_callback;
@@ -820,9 +818,8 @@ void reconstruct_failed_result_lookup_callback(ObjectID reconstruct_object_id,
* after this lookup returns, possibly due to concurrent clients. In most
* cases, this is okay because the initial execution is probably still
* pending, so for now, we log a warning and suppress reconstruction. */
LOG_WARN(
"No task information found for object during reconstruction (no object "
"entry yet)");
RAY_LOG(WARNING) << "No task information found for object during "
<< "reconstruction (no object entry yet)";
return;
}
LocalSchedulerState *state = (LocalSchedulerState *) user_context;
@@ -852,7 +849,7 @@ void reconstruct_object_lookup_callback(
bool never_created,
const std::vector<DBClientID> &manager_ids,
void *user_context) {
LOG_DEBUG("Manager count was %lu", manager_ids.size());
RAY_LOG(DEBUG) << "Manager count was " << manager_ids.size();
/* Only continue reconstruction if we find that the object doesn't exist on
* any nodes. NOTE: This codepath is not responsible for checking if the
* object table entry is up-to-date. */
@@ -887,14 +884,14 @@ void reconstruct_object_lookup_callback(
void reconstruct_object(LocalSchedulerState *state,
ObjectID reconstruct_object_id) {
LOG_DEBUG("Starting reconstruction");
RAY_LOG(DEBUG) << "Starting reconstruction";
/* If the object is locally available, no need to reconstruct. */
if (object_locally_available(state->algorithm_state, reconstruct_object_id)) {
return;
}
/* Determine if reconstruction is necessary by checking if the object exists
* on a node. */
CHECK(state->db != NULL);
RAY_CHECK(state->db != NULL);
object_table_lookup(state->db, reconstruct_object_id, NULL,
reconstruct_object_lookup_callback, (void *) state);
}
@@ -912,13 +909,12 @@ void send_client_register_reply(LocalSchedulerState *state,
if (errno == EPIPE || errno == EBADF || errno == ECONNRESET) {
/* Something went wrong, so kill the worker. */
kill_worker(state, worker, false, false);
LOG_WARN(
"Failed to give send register client reply to worker on fd %d. The "
"client may have hung up.",
worker->sock);
RAY_LOG(WARNING) << "Failed to give send register client reply to worker "
<< "on fd " << worker->sock
<< ". The client may have hung up.";
} else {
LOG_FATAL("Failed to send register client reply to client on fd %d.",
worker->sock);
RAY_LOG(FATAL) << "Failed to send register client reply to client on fd "
<< worker->sock;
}
}
}
@@ -927,10 +923,10 @@ void handle_client_register(LocalSchedulerState *state,
LocalSchedulerClient *worker,
const RegisterClientRequest *message) {
/* Make sure this worker hasn't already registered. */
CHECK(!worker->registered);
RAY_CHECK(!worker->registered);
worker->registered = true;
worker->is_worker = message->is_worker();
CHECK(worker->client_id.is_nil());
RAY_CHECK(worker->client_id.is_nil());
worker->client_id = from_flatbuf(*message->client_id());
/* Register the worker or driver. */
@@ -942,11 +938,11 @@ void handle_client_register(LocalSchedulerState *state,
if (!actor_id.is_nil()) {
/* Make sure that the local scheduler is aware that it is responsible for
* this actor. */
CHECK(state->actor_mapping.count(actor_id) == 1);
CHECK(state->actor_mapping[actor_id].local_scheduler_id ==
get_db_client_id(state->db));
RAY_CHECK(state->actor_mapping.count(actor_id) == 1);
RAY_CHECK(state->actor_mapping[actor_id].local_scheduler_id ==
get_db_client_id(state->db));
/* Update the worker struct with this actor ID. */
CHECK(worker->actor_id.is_nil());
RAY_CHECK(worker->actor_id.is_nil());
worker->actor_id = actor_id;
/* Let the scheduling algorithm process the presence of this new
* worker. */
@@ -965,16 +961,16 @@ void handle_client_register(LocalSchedulerState *state,
/* TODO(rkn): This means that an actor wants to register but that there
* aren't enough GPUs for it. We should queue this request, and reply to
* the actor when GPUs become available. */
LOG_WARN(
"Attempting to create an actor but there aren't enough available "
"GPUs. We'll start the worker anyway without any GPUs, but this is "
"incorrect behavior.");
RAY_LOG(WARNING) << "Attempting to create an actor but there aren't "
<< "enough available GPUs. We'll start the worker "
<< "anyway without any GPUs, but this is incorrect "
<< "behavior.";
}
}
/* Register worker process id with the scheduler. */
/* Determine if this worker is one of our child processes. */
LOG_DEBUG("PID is %d", worker->pid);
RAY_LOG(DEBUG) << "PID is " << worker->pid;
auto it = std::find(state->child_pids.begin(), state->child_pids.end(),
worker->pid);
if (it != state->child_pids.end()) {
@@ -983,7 +979,7 @@ void handle_client_register(LocalSchedulerState *state,
* cleanup. */
worker->is_child = true;
state->child_pids.erase(it);
LOG_DEBUG("Found matching child pid %d", worker->pid);
RAY_LOG(DEBUG) << "Found matching child pid " << worker->pid;
}
/* If the worker is an actor that corresponds to a driver that has been
@@ -1018,16 +1014,17 @@ void handle_driver_removed_callback(WorkerID driver_id, void *user_context) {
if (!actor_id.is_nil()) {
/* This is an actor. */
CHECK(state->actor_mapping.count(actor_id) == 1);
RAY_CHECK(state->actor_mapping.count(actor_id) == 1);
if (state->actor_mapping[actor_id].driver_id == driver_id) {
/* This actor was created by the removed driver, so kill the actor. */
LOG_DEBUG("Killing an actor for a removed driver.");
RAY_LOG(DEBUG) << "Killing an actor for a removed driver.";
kill_worker(state, *it, false, true);
}
} else if (task != NULL) {
TaskSpec *spec = Task_task_execution_spec(task)->Spec();
if (TaskSpec_driver_id(spec) == driver_id) {
LOG_DEBUG("Killing a worker executing a task for a removed driver.");
RAY_LOG(DEBUG) << "Killing a worker executing a task for a removed "
<< "driver.";
kill_worker(state, *it, false, true);
}
}
@@ -1081,13 +1078,10 @@ void handle_get_actor_frontier(LocalSchedulerState *state,
if (errno == EPIPE || errno == EBADF) {
/* Something went wrong, so kill the worker. */
kill_worker(state, worker, false, false);
LOG_WARN(
"Failed to return actor frontier to worker on fd %d. The client may "
"have hung "
"up.",
worker->sock);
RAY_LOG(WARNING) << "Failed to return actor frontier to worker on fd "
<< worker->sock << ". The client may have hung up.";
} else {
LOG_FATAL("Failed to give task to client on fd %d.", worker->sock);
RAY_LOG(FATAL) << "Failed to give task to client on fd " << worker->sock;
}
}
}
@@ -1124,7 +1118,7 @@ void process_message(event_loop *loop,
read_vector(client_sock, &type, state->input_buffer);
uint8_t *input = state->input_buffer.data();
LOG_DEBUG("New event of type %" PRId64, type);
RAY_LOG(DEBUG) << "New event of type " << type;
switch (type) {
case MessageType_SubmitTask: {
@@ -1159,7 +1153,7 @@ void process_message(event_loop *loop,
} break;
case MessageType_DisconnectClient: {
finish_task(state, worker);
CHECK(!worker->disconnected);
RAY_CHECK(!worker->disconnected);
worker->disconnected = true;
/* If the disconnected worker was not an actor, start a new worker to make
* sure there are enough workers in the pool. */
@@ -1217,7 +1211,7 @@ void process_message(event_loop *loop,
reconstruct_object(state, from_flatbuf(*message->object_id()));
} break;
case DISCONNECT_CLIENT: {
LOG_DEBUG("Disconnecting client on fd %d", client_sock);
RAY_LOG(DEBUG) << "Disconnecting client on fd " << client_sock;
handle_client_disconnect(state, worker);
} break;
case MessageType_NotifyUnblocked: {
@@ -1225,7 +1219,7 @@ void process_message(event_loop *loop,
if (worker->task_in_progress != NULL) {
/* If the worker was executing a task (i.e. non-driver), update its
* state to not blocked. */
CHECK(worker->is_blocked);
RAY_CHECK(worker->is_blocked);
worker->is_blocked = false;
/* Lease back the CPU resources that the blocked worker needs (note that
* it never released its GPU resources). TODO(swang): Leasing back the
@@ -1264,16 +1258,15 @@ void process_message(event_loop *loop,
} break;
default:
/* This code should be unreachable. */
CHECK(0);
RAY_CHECK(0);
}
/* Print a warning if this method took too long. */
int64_t end_time = current_time_ms();
if (end_time - start_time >
RayConfig::instance().max_time_for_handler_milliseconds()) {
LOG_WARN("process_message of type %" PRId64 " took %" PRId64
" milliseconds.",
type, end_time - start_time);
RAY_LOG(WARNING) << "process_message of type " << type << " took "
<< end_time - start_time << " milliseconds.";
}
}
@@ -1302,7 +1295,7 @@ void new_client_connection(event_loop *loop,
state->workers.push_back(worker);
event_loop_add_file(loop, new_socket, EVENT_LOOP_READ, process_message,
worker);
LOG_DEBUG("new connection with fd %d", new_socket);
RAY_LOG(DEBUG) << "new connection with fd " << new_socket;
}
/* We need this code so we can clean up when we get a SIGTERM signal. */
@@ -1310,7 +1303,7 @@ void new_client_connection(event_loop *loop,
LocalSchedulerState *g_state = NULL;
void signal_handler(int signal) {
LOG_DEBUG("Signal was %d", signal);
RAY_LOG(DEBUG) << "Signal was " << signal;
if (signal == SIGTERM) {
/* NOTE(swang): This call removes the SIGTERM handler to ensure that we
* free the local scheduler state at most once. If another SIGTERM is
@@ -1338,7 +1331,7 @@ void handle_task_scheduled_callback(Task *original_task,
* scheduling algorithm. */
WorkerID driver_id = TaskSpec_driver_id(spec);
if (!is_driver_alive(state, driver_id)) {
LOG_DEBUG("Ignoring scheduled task for removed driver.");
RAY_LOG(DEBUG) << "Ignoring scheduled task for removed driver.";
return;
}
@@ -1382,14 +1375,14 @@ void handle_actor_creation_callback(ActorID actor_id,
* TODO(rkn): We will need to remove this check to handle the case where the
* corresponding publish is retried and the case in which a task that
* creates an actor is resubmitted due to fault tolerance. */
CHECK(state->actor_mapping.count(actor_id) == 0);
RAY_CHECK(state->actor_mapping.count(actor_id) == 0);
} else {
/* In this case, the actor already exists. Check that the driver hasn't
* changed but that the local scheduler has. */
auto it = state->actor_mapping.find(actor_id);
CHECK(it != state->actor_mapping.end());
CHECK(it->second.driver_id == driver_id);
CHECK(!(it->second.local_scheduler_id == local_scheduler_id));
RAY_CHECK(it != state->actor_mapping.end());
RAY_CHECK(it->second.driver_id == driver_id);
RAY_CHECK(!(it->second.local_scheduler_id == local_scheduler_id));
/* If the actor was previously assigned to this local scheduler, kill the
* actor. */
if (it->second.local_scheduler_id == get_db_client_id(state->db)) {
@@ -1428,12 +1421,13 @@ int heartbeat_handler(event_loop *loop, timer_id id, void *context) {
/* Check that the last heartbeat was not sent too long ago. */
int64_t current_time = current_time_ms();
CHECK(current_time >= state->previous_heartbeat_time);
RAY_CHECK(current_time >= state->previous_heartbeat_time);
if (current_time - state->previous_heartbeat_time >
RayConfig::instance().num_heartbeats_timeout() *
RayConfig::instance().heartbeat_timeout_milliseconds()) {
LOG_FATAL("The last heartbeat was sent %" PRId64 " milliseconds ago.",
current_time - state->previous_heartbeat_time);
RAY_LOG(FATAL) << "The last heartbeat was sent "
<< current_time - state->previous_heartbeat_time
<< " milliseconds ago.";
}
state->previous_heartbeat_time = current_time;
@@ -1579,11 +1573,12 @@ int main(int argc, char *argv[]) {
num_workers_str = optarg;
break;
default:
LOG_FATAL("unknown option %c", c);
RAY_LOG(FATAL) << "unknown option " << c;
}
}
if (!static_resource_list) {
LOG_FATAL("please specify a static resource list with the -c switch");
RAY_LOG(FATAL) << "please specify a static resource list with the -c "
<< "switch";
}
// Parse the resource list.
std::istringstream resource_string(static_resource_list);
@@ -1591,27 +1586,28 @@ int main(int argc, char *argv[]) {
std::string resource_quantity;
while (std::getline(resource_string, resource_name, ',')) {
CHECK(std::getline(resource_string, resource_quantity, ','));
RAY_CHECK(std::getline(resource_string, resource_quantity, ','));
// TODO(rkn): The line below could throw an exception. What should we do
// about this?
static_resource_conf[resource_name] = std::stod(resource_quantity);
}
if (!scheduler_socket_name) {
LOG_FATAL("please specify socket for incoming connections with -s switch");
RAY_LOG(FATAL) << "please specify socket for incoming connections with "
<< "-s switch";
}
if (!plasma_store_socket_name) {
LOG_FATAL(
"please specify socket for connecting to Plasma store with -p switch");
RAY_LOG(FATAL) << "please specify socket for connecting to Plasma store "
<< "with -p switch";
}
if (!node_ip_address) {
LOG_FATAL("please specify the node IP address with -h switch");
RAY_LOG(FATAL) << "please specify the node IP address with -h switch";
}
int num_workers = 0;
if (num_workers_str) {
num_workers = strtol(num_workers_str, NULL, 10);
if (num_workers < 0) {
LOG_FATAL("Number of workers must be nonnegative");
RAY_LOG(FATAL) << "Number of workers must be nonnegative";
}
}
@@ -1621,9 +1617,9 @@ int main(int argc, char *argv[]) {
/* Start the local scheduler without connecting to Redis. In this case, all
* submitted tasks will be queued and scheduled locally. */
if (plasma_manager_socket_name) {
LOG_FATAL(
"if a plasma manager socket name is provided with the -m switch, "
"then a redis address must be provided with the -r switch");
RAY_LOG(FATAL) << "if a plasma manager socket name is provided with the "
<< "-m switch, then a redis address must be provided with "
<< "the -r switch";
}
} else {
char redis_primary_addr[16];
@@ -1631,14 +1627,12 @@ int main(int argc, char *argv[]) {
/* Parse the primary Redis address into an IP address and a port. */
if (parse_ip_addr_port(redis_primary_addr_port, redis_primary_addr,
&redis_primary_port) == -1) {
LOG_FATAL(
"if a redis address is provided with the -r switch, it should be "
"formatted like 127.0.0.1:6379");
RAY_LOG(FATAL) << "if a redis address is provided with the -r switch, it "
<< "should be formatted like 127.0.0.1:6379";
}
if (!plasma_manager_socket_name) {
LOG_FATAL(
"please specify socket for connecting to Plasma manager with -m "
"switch");
RAY_LOG(FATAL) << "please specify socket for connecting to Plasma "
<< "manager with -m switch";
}
redis_addr = redis_primary_addr;
redis_port = redis_primary_port;
@@ -224,25 +224,23 @@ void create_actor(SchedulingAlgorithmState *algorithm_state,
entry.task_queue = new std::list<TaskExecutionSpec>();
entry.worker = worker;
entry.worker_available = false;
CHECK(algorithm_state->local_actor_infos.count(actor_id) == 0)
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) == 0);
algorithm_state->local_actor_infos[actor_id] = entry;
/* Log some useful information about the actor that we created. */
std::string id_string = actor_id.hex();
LOG_DEBUG("Creating actor with ID %s.", id_string.c_str());
RAY_LOG(DEBUG) << "Creating actor with ID " << actor_id;
}
void remove_actor(SchedulingAlgorithmState *algorithm_state, ActorID actor_id) {
CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1);
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1);
LocalActorInfo &entry =
algorithm_state->local_actor_infos.find(actor_id)->second;
/* Log some useful information about the actor that we're removing. */
std::string id_string = actor_id.hex();
size_t count = entry.task_queue->size();
if (count > 0) {
LOG_WARN("Removing actor with ID %s and %lld remaining tasks.",
id_string.c_str(), (long long) count);
RAY_LOG(WARNING) << "Removing actor with ID " << actor_id << " and "
<< count << " remaining tasks.";
}
entry.task_queue->clear();
@@ -266,7 +264,7 @@ bool dispatch_actor_task(LocalSchedulerState *state,
SchedulingAlgorithmState *algorithm_state,
ActorID actor_id) {
/* Make sure this worker actually is an actor. */
CHECK(!actor_id.is_nil());
RAY_CHECK(!actor_id.is_nil());
/* Return if this actor doesn't have any pending tasks. */
if (algorithm_state->actors_with_pending_tasks.find(actor_id) ==
algorithm_state->actors_with_pending_tasks.end()) {
@@ -278,16 +276,16 @@ bool dispatch_actor_task(LocalSchedulerState *state,
* scheduler. This should be rare. */
return false;
}
CHECK(state->actor_mapping[actor_id].local_scheduler_id ==
get_db_client_id(state->db));
RAY_CHECK(state->actor_mapping[actor_id].local_scheduler_id ==
get_db_client_id(state->db));
/* Get the local actor entry for this actor. */
CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
LocalActorInfo &entry =
algorithm_state->local_actor_infos.find(actor_id)->second;
/* There should be some queued tasks for this actor. */
CHECK(!entry.task_queue->empty());
RAY_CHECK(!entry.task_queue->empty());
/* If the worker is not available, we cannot assign a task to it. */
if (!entry.worker_available) {
return false;
@@ -304,7 +302,7 @@ bool dispatch_actor_task(LocalSchedulerState *state,
}
/* If there are not enough resources available, we cannot assign the task. */
CHECK(0 == TaskSpec_get_required_resource(spec, "GPU"));
RAY_CHECK(0 == TaskSpec_get_required_resource(spec, "GPU"));
if (!check_dynamic_resources(state, TaskSpec_get_required_resources(spec))) {
return false;
}
@@ -428,7 +426,7 @@ void insert_actor_task_queue(LocalSchedulerState *state,
* not been created yet. The correct worker struct will be inserted when the
* actor worker connects to the local scheduler. */
create_actor(algorithm_state, actor_id, NULL);
CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1);
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1);
}
LocalActorInfo &entry =
algorithm_state->local_actor_infos.find(actor_id)->second;
@@ -437,7 +435,7 @@ void insert_actor_task_queue(LocalSchedulerState *state,
}
/* Extend the frontier to include the new handle. */
if (entry.frontier_dependencies.count(task_handle_id) == 0) {
CHECK(task_entry.ExecutionDependencies().size() == 1);
RAY_CHECK(task_entry.ExecutionDependencies().size() == 1);
entry.frontier_dependencies[task_handle_id] =
task_entry.ExecutionDependencies()[1];
}
@@ -448,9 +446,9 @@ void insert_actor_task_queue(LocalSchedulerState *state,
* check will fail if the fault-tolerance mechanism resubmits a task on an
* actor. */
if (task_counter < entry.task_counters[task_handle_id]) {
LOG_INFO(
"A task that has already been executed has been resubmitted, so we "
"are ignoring it. This should only happen during reconstruction.");
RAY_LOG(INFO) << "A task that has already been executed has been "
<< "resubmitted, so we are ignoring it. This should only "
<< "happen during reconstruction.";
return;
}
@@ -466,9 +464,8 @@ void insert_actor_task_queue(LocalSchedulerState *state,
}
/* A duplicate task submitted by the same handle. */
if (task_counter == TaskSpec_actor_counter(pending_task_spec)) {
LOG_INFO(
"A task was resubmitted, so we are ignoring it. This should only "
"happen during reconstruction.");
RAY_LOG(INFO) << "A task was resubmitted, so we are ignoring it. This "
<< "should only happen during reconstruction.";
return;
}
/* We found a task with the same handle ID and a greater task counter. */
@@ -501,7 +498,7 @@ void queue_actor_task(LocalSchedulerState *state,
bool from_global_scheduler) {
TaskSpec *spec = execution_spec.Spec();
ActorID actor_id = TaskSpec_actor_id(spec);
DCHECK(!actor_id.is_nil());
RAY_CHECK(!actor_id.is_nil());
/* Update the task table. */
if (state->db != NULL) {
@@ -567,10 +564,9 @@ void fetch_missing_dependency(
/* TODO(swang): Local scheduler should also exit even if there are no
* pending fetches. This could be done by subscribing to the db_client
* table, or pinging the plasma manager in the heartbeat handler. */
LOG_FATAL(
"Lost connection to the plasma manager, local scheduler is "
"exiting. Error: %s",
arrow_status.ToString().c_str());
RAY_LOG(FATAL) << "Lost connection to the plasma manager, local "
<< "scheduler is exiting. Error: "
<< arrow_status.ToString();
}
}
/* Create an entry and add it to the list of active fetch requests to
@@ -617,7 +613,7 @@ void fetch_missing_dependencies(
}
}
}
CHECK(num_missing_dependencies > 0);
RAY_CHECK(num_missing_dependencies > 0);
}
/**
@@ -700,7 +696,8 @@ int fetch_object_timeout_handler(event_loop *loop, timer_id id, void *context) {
LocalSchedulerState *state = (LocalSchedulerState *) context;
/* Only try the fetches if we are connected to the object store manager. */
if (state->plasma_conn->get_manager_fd() == -1) {
LOG_INFO("Local scheduler is not connected to a object store manager");
RAY_LOG(INFO)
<< "Local scheduler is not connected to a object store manager";
return RayConfig::instance().local_scheduler_fetch_timeout_milliseconds();
}
@@ -728,10 +725,9 @@ int fetch_object_timeout_handler(event_loop *loop, timer_id id, void *context) {
reinterpret_cast<plasma::ObjectID *>(&object_ids[j]));
if (!arrow_status.ok()) {
LocalSchedulerState_free(state);
LOG_FATAL(
"Lost connection to the plasma manager, local scheduler is exiting. "
"Error: %s",
arrow_status.ToString().c_str());
RAY_LOG(FATAL) << "Lost connection to the plasma manager, local "
<< "scheduler is exiting. Error: "
<< arrow_status.ToString();
}
}
@@ -739,8 +735,8 @@ int fetch_object_timeout_handler(event_loop *loop, timer_id id, void *context) {
int64_t end_time = current_time_ms();
if (end_time - start_time >
RayConfig::instance().max_time_for_handler_milliseconds()) {
LOG_WARN("fetch_object_timeout_handler took %" PRId64 " milliseconds.",
end_time - start_time);
RAY_LOG(WARNING) << "fetch_object_timeout_handler took "
<< end_time - start_time << " milliseconds.";
}
/* Wait at least local_scheduler_fetch_timeout_milliseconds before running
@@ -795,9 +791,8 @@ int reconstruct_object_timeout_handler(event_loop *loop,
int64_t end_time = current_time_ms();
if (end_time - start_time >
RayConfig::instance().max_time_for_handler_milliseconds()) {
LOG_WARN("reconstruct_object_timeout_handler took %" PRId64
" milliseconds.",
end_time - start_time);
RAY_LOG(WARNING) << "reconstruct_object_timeout_handler took "
<< end_time - start_time << " milliseconds.";
}
return RayConfig::instance()
@@ -879,7 +874,7 @@ void dispatch_tasks(LocalSchedulerState *state,
}
/* Dispatch this task to an available worker and dequeue the task. */
LOG_DEBUG("Dispatching task");
RAY_LOG(DEBUG) << "Dispatching task";
/* Get the last available worker in the available worker queue. */
LocalSchedulerClient *worker = algorithm_state->available_workers.back();
/* Tell the available worker to execute the task. */
@@ -1004,15 +999,15 @@ void queue_waiting_task(LocalSchedulerState *state,
if (task_counter != task_counters.end() &&
TaskSpec_actor_counter(spec) < task_counter->second) {
/* If the task to queue has a lower task counter, do not queue it. */
LOG_INFO(
"A task that has already been executed has been resubmitted, so we "
"are ignoring it. This should only happen during reconstruction.");
RAY_LOG(INFO) << "A task that has already been executed has been "
<< "resubmitted, so we are ignoring it. This should only "
<< "happen during reconstruction.";
return;
}
}
}
LOG_DEBUG("Queueing task in waiting queue");
RAY_LOG(DEBUG) << "Queueing task in waiting queue";
auto it = queue_task(state, algorithm_state->waiting_task_queue,
execution_spec, from_global_scheduler);
fetch_missing_dependencies(state, algorithm_state, it);
@@ -1033,7 +1028,7 @@ void queue_dispatch_task(LocalSchedulerState *state,
SchedulingAlgorithmState *algorithm_state,
TaskExecutionSpec &execution_spec,
bool from_global_scheduler) {
LOG_DEBUG("Queueing task in dispatch queue");
RAY_LOG(DEBUG) << "Queueing task in dispatch queue";
TaskSpec *spec = execution_spec.Spec();
if (TaskSpec_is_actor_task(spec)) {
queue_actor_task(state, algorithm_state, execution_spec,
@@ -1076,14 +1071,14 @@ void give_task_to_local_scheduler_retry(UniqueID id,
void *user_data) {
LocalSchedulerState *state = (LocalSchedulerState *) user_context;
Task *task = (Task *) user_data;
CHECK(Task_state(task) == TASK_STATUS_SCHEDULED);
RAY_CHECK(Task_state(task) == TASK_STATUS_SCHEDULED);
TaskExecutionSpec *execution_spec = Task_task_execution_spec(task);
TaskSpec *spec = execution_spec->Spec();
CHECK(TaskSpec_is_actor_task(spec));
RAY_CHECK(TaskSpec_is_actor_task(spec));
ActorID actor_id = TaskSpec_actor_id(spec);
CHECK(state->actor_mapping.count(actor_id) == 1);
RAY_CHECK(state->actor_mapping.count(actor_id) == 1);
if (state->actor_mapping[actor_id].local_scheduler_id ==
get_db_client_id(state->db)) {
@@ -1113,11 +1108,11 @@ void give_task_to_local_scheduler(LocalSchedulerState *state,
TaskExecutionSpec &execution_spec,
DBClientID local_scheduler_id) {
if (local_scheduler_id == get_db_client_id(state->db)) {
LOG_WARN("Local scheduler is trying to assign a task to itself.");
RAY_LOG(WARNING) << "Local scheduler is trying to assign a task to itself.";
}
CHECK(state->db != NULL);
RAY_CHECK(state->db != NULL);
/* Assign the task to the relevant local scheduler. */
DCHECK(state->config.global_scheduler_exists);
RAY_CHECK(state->config.global_scheduler_exists);
Task *task =
Task_alloc(execution_spec, TASK_STATUS_SCHEDULED, local_scheduler_id);
#if !RAY_USE_NEW_GCS
@@ -1139,11 +1134,11 @@ void give_task_to_global_scheduler_retry(UniqueID id,
void *user_data) {
LocalSchedulerState *state = (LocalSchedulerState *) user_context;
Task *task = (Task *) user_data;
CHECK(Task_state(task) == TASK_STATUS_WAITING);
RAY_CHECK(Task_state(task) == TASK_STATUS_WAITING);
TaskExecutionSpec *execution_spec = Task_task_execution_spec(task);
TaskSpec *spec = execution_spec->Spec();
CHECK(!TaskSpec_is_actor_task(spec));
RAY_CHECK(!TaskSpec_is_actor_task(spec));
give_task_to_global_scheduler(state, state->algorithm_state, *execution_spec);
}
@@ -1165,11 +1160,11 @@ void give_task_to_global_scheduler(LocalSchedulerState *state,
return;
}
/* Pass on the task to the global scheduler. */
DCHECK(state->config.global_scheduler_exists);
RAY_CHECK(state->config.global_scheduler_exists);
Task *task = Task_alloc(execution_spec, TASK_STATUS_WAITING,
get_db_client_id(state->db));
#if !RAY_USE_NEW_GCS
DCHECK(state->db != NULL);
RAY_CHECK(state->db != NULL);
auto retryInfo = RetryInfo{
.num_retries = 0, // This value is unused.
.timeout = 0, // This value is unused.
@@ -1225,7 +1220,7 @@ void handle_actor_task_submitted(LocalSchedulerState *state,
SchedulingAlgorithmState *algorithm_state,
TaskExecutionSpec &execution_spec) {
TaskSpec *task_spec = execution_spec.Spec();
CHECK(TaskSpec_is_actor_task(task_spec));
RAY_CHECK(TaskSpec_is_actor_task(task_spec));
ActorID actor_id = TaskSpec_actor_id(task_spec);
if (state->actor_mapping.count(actor_id) == 0) {
@@ -1284,8 +1279,8 @@ void handle_task_scheduled(LocalSchedulerState *state,
/* This callback handles tasks that were assigned to this local scheduler by
* the global scheduler, so we can safely assert that there is a connection to
* the database. */
DCHECK(state->db != NULL);
DCHECK(state->config.global_scheduler_exists);
RAY_CHECK(state->db != NULL);
RAY_CHECK(state->config.global_scheduler_exists);
/* Push the task to the appropriate queue. */
queue_task_locally(state, algorithm_state, execution_spec, true);
dispatch_tasks(state, algorithm_state);
@@ -1298,24 +1293,24 @@ void handle_actor_task_scheduled(LocalSchedulerState *state,
/* This callback handles tasks that were assigned to this local scheduler by
* the global scheduler or by other workers, so we can safely assert that
* there is a connection to the database. */
DCHECK(state->db != NULL);
DCHECK(state->config.global_scheduler_exists);
RAY_CHECK(state->db != NULL);
RAY_CHECK(state->config.global_scheduler_exists);
/* Check that the task is meant to run on an actor that this local scheduler
* is responsible for. */
DCHECK(TaskSpec_is_actor_task(spec));
RAY_CHECK(TaskSpec_is_actor_task(spec));
ActorID actor_id = TaskSpec_actor_id(spec);
if (state->actor_mapping.count(actor_id) == 1) {
DCHECK(state->actor_mapping[actor_id].local_scheduler_id ==
get_db_client_id(state->db));
RAY_CHECK(state->actor_mapping[actor_id].local_scheduler_id ==
get_db_client_id(state->db));
} else {
/* This means that an actor has been assigned to this local scheduler, and a
* task for that actor has been received by this local scheduler, but this
* local scheduler has not yet processed the notification about the actor
* creation. This may be possible though should be very uncommon. If it does
* happen, it's ok. */
LOG_INFO(
"handle_actor_task_scheduled called on local scheduler but the "
"corresponding actor_map_entry is not present. This should be rare.");
RAY_LOG(INFO) << "handle_actor_task_scheduled called on local scheduler "
<< "but the corresponding actor_map_entry is not present. "
<< "This should be rare.";
}
/* Push the task to the appropriate queue. */
queue_task_locally(state, algorithm_state, execution_spec, true);
@@ -1325,19 +1320,19 @@ void handle_actor_task_scheduled(LocalSchedulerState *state,
void handle_worker_available(LocalSchedulerState *state,
SchedulingAlgorithmState *algorithm_state,
LocalSchedulerClient *worker) {
CHECK(worker->task_in_progress == NULL);
RAY_CHECK(worker->task_in_progress == NULL);
/* Check that the worker isn't in the pool of available workers. */
DCHECK(!worker_in_vector(algorithm_state->available_workers, worker));
RAY_CHECK(!worker_in_vector(algorithm_state->available_workers, worker));
/* Check that the worker isn't in the list of blocked workers. */
DCHECK(!worker_in_vector(algorithm_state->blocked_workers, worker));
RAY_CHECK(!worker_in_vector(algorithm_state->blocked_workers, worker));
/* If the worker was executing a task, it must have finished, so remove it
* from the list of executing workers. If the worker is connecting for the
* first time, it will not be in the list of executing workers. */
remove_worker_from_vector(algorithm_state->executing_workers, worker);
/* Double check that we successfully removed the worker. */
DCHECK(!worker_in_vector(algorithm_state->executing_workers, worker));
RAY_CHECK(!worker_in_vector(algorithm_state->executing_workers, worker));
/* Add worker to the list of available workers. */
algorithm_state->available_workers.push_back(worker);
@@ -1350,7 +1345,7 @@ void handle_worker_removed(LocalSchedulerState *state,
SchedulingAlgorithmState *algorithm_state,
LocalSchedulerClient *worker) {
/* Make sure this is not an actor. */
CHECK(worker->actor_id.is_nil());
RAY_CHECK(worker->actor_id.is_nil());
/* Make sure that we remove the worker at most once. */
int num_times_removed = 0;
@@ -1360,24 +1355,24 @@ void handle_worker_removed(LocalSchedulerState *state,
remove_worker_from_vector(algorithm_state->available_workers, worker);
num_times_removed += removed_from_available;
/* Double check that we actually removed the worker. */
DCHECK(!worker_in_vector(algorithm_state->available_workers, worker));
RAY_CHECK(!worker_in_vector(algorithm_state->available_workers, worker));
/* Remove the worker from executing workers, if it's there. */
bool removed_from_executing =
remove_worker_from_vector(algorithm_state->executing_workers, worker);
num_times_removed += removed_from_executing;
/* Double check that we actually removed the worker. */
DCHECK(!worker_in_vector(algorithm_state->executing_workers, worker));
RAY_CHECK(!worker_in_vector(algorithm_state->executing_workers, worker));
/* Remove the worker from blocked workers, if it's there. */
bool removed_from_blocked =
remove_worker_from_vector(algorithm_state->blocked_workers, worker);
num_times_removed += removed_from_blocked;
/* Double check that we actually removed the worker. */
DCHECK(!worker_in_vector(algorithm_state->blocked_workers, worker));
RAY_CHECK(!worker_in_vector(algorithm_state->blocked_workers, worker));
/* Make sure we removed the worker at most once. */
CHECK(num_times_removed <= 1);
RAY_CHECK(num_times_removed <= 1);
/* Attempt to dispatch some tasks because some resources may have freed up. */
dispatch_all_tasks(state, algorithm_state);
@@ -1400,7 +1395,7 @@ void handle_actor_worker_disconnect(LocalSchedulerState *state,
state->removed_actors.insert(worker->actor_id);
CHECK(algorithm_state->local_actor_infos.count(worker->actor_id) != 0);
RAY_CHECK(algorithm_state->local_actor_infos.count(worker->actor_id) != 0);
LocalActorInfo &entry =
algorithm_state->local_actor_infos.find(worker->actor_id)->second;
for (auto &task : *entry.task_queue) {
@@ -1421,13 +1416,13 @@ void handle_actor_worker_available(LocalSchedulerState *state,
SchedulingAlgorithmState *algorithm_state,
LocalSchedulerClient *worker) {
ActorID actor_id = worker->actor_id;
CHECK(!actor_id.is_nil());
RAY_CHECK(!actor_id.is_nil());
/* Get the actor info for this worker. */
CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1);
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1);
LocalActorInfo &entry =
algorithm_state->local_actor_infos.find(actor_id)->second;
CHECK(worker == entry.worker);
CHECK(!entry.worker_available);
RAY_CHECK(worker == entry.worker);
RAY_CHECK(!entry.worker_available);
/* If an actor task was assigned, mark returned dummy object as locally
* available. This is not added to the object table, so the update will be
* invisible to other nodes. */
@@ -1446,10 +1441,11 @@ void handle_worker_blocked(LocalSchedulerState *state,
SchedulingAlgorithmState *algorithm_state,
LocalSchedulerClient *worker) {
/* Find the worker in the list of executing workers. */
CHECK(remove_worker_from_vector(algorithm_state->executing_workers, worker));
RAY_CHECK(
remove_worker_from_vector(algorithm_state->executing_workers, worker));
/* Check that the worker isn't in the list of blocked workers. */
DCHECK(!worker_in_vector(algorithm_state->blocked_workers, worker));
RAY_CHECK(!worker_in_vector(algorithm_state->blocked_workers, worker));
/* Add the worker to the list of blocked workers. */
algorithm_state->blocked_workers.push_back(worker);
@@ -1471,10 +1467,11 @@ void handle_worker_unblocked(LocalSchedulerState *state,
SchedulingAlgorithmState *algorithm_state,
LocalSchedulerClient *worker) {
/* Find the worker in the list of blocked workers. */
CHECK(remove_worker_from_vector(algorithm_state->blocked_workers, worker));
RAY_CHECK(
remove_worker_from_vector(algorithm_state->blocked_workers, worker));
/* Check that the worker isn't in the list of executing workers. */
DCHECK(!worker_in_vector(algorithm_state->executing_workers, worker));
RAY_CHECK(!worker_in_vector(algorithm_state->executing_workers, worker));
/* Add the worker to the list of executing workers. */
algorithm_state->executing_workers.push_back(worker);
@@ -1499,7 +1496,7 @@ void handle_object_available(LocalSchedulerState *state,
}
/* Add the entry to the set of locally available objects. */
CHECK(algorithm_state->local_objects.count(object_id) == 0);
RAY_CHECK(algorithm_state->local_objects.count(object_id) == 0);
algorithm_state->local_objects[object_id] = entry;
if (!entry.dependent_tasks.empty()) {
@@ -1530,7 +1527,7 @@ void handle_object_removed(LocalSchedulerState *state,
/* Remove the object from the set of locally available objects. */
SchedulingAlgorithmState *algorithm_state = state->algorithm_state;
CHECK(algorithm_state->local_objects.count(removed_object_id) == 1);
RAY_CHECK(algorithm_state->local_objects.count(removed_object_id) == 1);
algorithm_state->local_objects.erase(removed_object_id);
/* Track queued tasks that were dependent on this object.
@@ -1544,7 +1541,7 @@ void handle_object_removed(LocalSchedulerState *state,
it != algorithm_state->dispatch_task_queue->end();) {
if (it->DependsOn(removed_object_id)) {
/* This task was dependent on the removed object. */
LOG_DEBUG("Moved task from dispatch queue back to waiting queue");
RAY_LOG(DEBUG) << "Moved task from dispatch queue back to waiting queue";
algorithm_state->waiting_task_queue->push_back(std::move(*it));
/* Remove the task from the dispatch queue, but do not free the task
* spec. */
@@ -1563,7 +1560,8 @@ void handle_object_removed(LocalSchedulerState *state,
queue_it != actor_info.task_queue->end();) {
if (queue_it->DependsOn(removed_object_id)) {
/* This task was dependent on the removed object. */
LOG_DEBUG("Moved task from actor dispatch queue back to waiting queue");
RAY_LOG(DEBUG) << "Moved task from actor dispatch queue back to "
<< "waiting queue";
algorithm_state->waiting_task_queue->push_back(std::move(*queue_it));
/* Remove the task from the dispatch queue, but do not free the task
* spec. */
@@ -1667,16 +1665,16 @@ int num_dispatch_tasks(SchedulingAlgorithmState *algorithm_state) {
void print_worker_info(const char *message,
SchedulingAlgorithmState *algorithm_state) {
LOG_DEBUG("%s: %lu available, %lu executing, %lu blocked", message,
algorithm_state->available_workers.size(),
algorithm_state->executing_workers.size(),
algorithm_state->blocked_workers.size());
RAY_LOG(DEBUG) << message << ": " << algorithm_state->available_workers.size()
<< " available, " << algorithm_state->executing_workers.size()
<< " executing, " << algorithm_state->blocked_workers.size()
<< " blocked";
}
std::unordered_map<ActorHandleID, int64_t, UniqueIDHasher>
get_actor_task_counters(SchedulingAlgorithmState *algorithm_state,
ActorID actor_id) {
CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
return algorithm_state->local_actor_infos[actor_id].task_counters;
}
@@ -1685,7 +1683,7 @@ void set_actor_task_counters(
ActorID actor_id,
const std::unordered_map<ActorHandleID, int64_t, UniqueIDHasher>
&task_counters) {
CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
/* Overwrite the current task counters for the actor. This is necessary
* during reconstruction when resuming from a checkpoint so that we can
* resume the task frontier at the time that the checkpoint was saved. */
@@ -1731,7 +1729,7 @@ void set_actor_task_counters(
std::unordered_map<ActorHandleID, ObjectID, UniqueIDHasher> get_actor_frontier(
SchedulingAlgorithmState *algorithm_state,
ActorID actor_id) {
CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
return algorithm_state->local_actor_infos[actor_id].frontier_dependencies;
}
@@ -1741,7 +1739,7 @@ void set_actor_frontier(
ActorID actor_id,
const std::unordered_map<ActorHandleID, ObjectID, UniqueIDHasher>
&frontier_dependencies) {
CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
auto entry = algorithm_state->local_actor_infos[actor_id];
entry.frontier_dependencies = frontier_dependencies;
for (auto frontier_dependency : entry.frontier_dependencies) {
@@ -30,7 +30,7 @@ LocalSchedulerConnection *LocalSchedulerConnection_init(
/* Register the process ID with the local scheduler. */
int success = write_message(result->conn, MessageType_RegisterClientRequest,
fbb.GetSize(), fbb.GetBufferPointer());
CHECKM(success == 0, "Unable to register worker with local scheduler");
RAY_CHECK(success == 0) << "Unable to register worker with local scheduler";
/* Wait for a confirmation from the local scheduler. */
int64_t type;
@@ -38,10 +38,10 @@ LocalSchedulerConnection *LocalSchedulerConnection_init(
uint8_t *reply;
read_message(result->conn, &type, &reply_size, &reply);
if (type == DISCONNECT_CLIENT) {
LOG_DEBUG("Exiting because local scheduler closed connection.");
RAY_LOG(DEBUG) << "Exiting because local scheduler closed connection.";
exit(1);
}
CHECK(type == MessageType_RegisterClientReply);
RAY_CHECK(type == MessageType_RegisterClientReply);
/* Parse the reply object. */
auto reply_message = flatbuffers::GetRoot<RegisterClientReply>(reply);
@@ -50,7 +50,7 @@ LocalSchedulerConnection *LocalSchedulerConnection_init(
}
/* If the worker is not an actor, there should not be any GPU IDs here. */
if (ActorID_equal(result->actor_id, ActorID::nil())) {
CHECK(reply_message->gpu_ids()->size() == 0);
RAY_CHECK(reply_message->gpu_ids()->size() == 0);
}
free(reply);
@@ -111,10 +111,10 @@ TaskSpec *local_scheduler_get_task(LocalSchedulerConnection *conn,
* scheduler gives this client a task. */
read_message(conn->conn, &type, &reply_size, &reply);
if (type == DISCONNECT_CLIENT) {
LOG_WARN("Exiting because local scheduler closed connection.");
RAY_LOG(WARNING) << "Exiting because local scheduler closed connection.";
exit(1);
}
CHECK(type == MessageType_ExecuteTask);
RAY_CHECK(type == MessageType_ExecuteTask);
/* Parse the flatbuffer object. */
auto reply_message = flatbuffers::GetRoot<GetTaskReply>(reply);
@@ -186,10 +186,10 @@ const std::vector<uint8_t> local_scheduler_get_actor_frontier(
std::vector<uint8_t> reply;
read_vector(conn->conn, &type, reply);
if (type == DISCONNECT_CLIENT) {
LOG_DEBUG("Exiting because local scheduler closed connection.");
RAY_LOG(DEBUG) << "Exiting because local scheduler closed connection.";
exit(1);
}
CHECK(type == MessageType_GetActorFrontierReply);
RAY_CHECK(type == MessageType_GetActorFrontierReply);
return reply;
}
@@ -91,7 +91,7 @@ LocalSchedulerMock *LocalSchedulerMock_init(int num_workers,
connect_ipc_sock_retry(plasma_store_socket_name, 5, 100);
std::string local_scheduler_socket_name = bind_ipc_sock_retry(
local_scheduler_socket_name_format, &mock->local_scheduler_fd);
CHECK(mock->plasma_store_fd >= 0 && mock->local_scheduler_fd >= 0);
RAY_CHECK(mock->plasma_store_fd >= 0 && mock->local_scheduler_fd >= 0);
/* Construct worker command */
std::stringstream worker_command_ss;
@@ -411,7 +411,7 @@ TaskExecutionSpec *object_reconstruction_suppression_spec;
void object_reconstruction_suppression_callback(ObjectID object_id,
bool success,
void *user_context) {
CHECK(success);
RAY_CHECK(success);
/* Submit the task after adding the object to the object table. */
LocalSchedulerConnection *worker = (LocalSchedulerConnection *) user_context;
local_scheduler_submit(worker, *object_reconstruction_suppression_spec);
+80 -86
View File
@@ -74,8 +74,8 @@ int handle_sigpipe(plasma::Status s, int fd) {
} break;
default:
/* This code should be unreachable. */
CHECK(0);
LOG_FATAL("Failed to write message to client on fd %d", fd);
RAY_CHECK(0);
RAY_LOG(FATAL) << "Failed to write message to client on fd " << fd;
}
return err;
@@ -320,12 +320,12 @@ bool ClientConnection_request_finished(ClientConnection *client_conn) {
std::unordered_map<ObjectID, std::vector<WaitRequest *>, UniqueIDHasher> &
object_wait_requests_from_type(PlasmaManagerState *manager_state, int type) {
/* We use different types of hash tables for different requests. */
RAY_CHECK(type == plasma::PLASMA_QUERY_LOCAL ||
type == plasma::PLASMA_QUERY_ANYWHERE);
if (type == plasma::PLASMA_QUERY_LOCAL) {
return manager_state->object_wait_requests_local;
} else if (type == plasma::PLASMA_QUERY_ANYWHERE) {
return manager_state->object_wait_requests_remote;
} else {
LOG_FATAL("This code should be unreachable.");
return manager_state->object_wait_requests_remote;
}
}
@@ -367,8 +367,8 @@ void remove_wait_request_for_object(PlasmaManagerState *manager_state,
void remove_wait_request(PlasmaManagerState *manager_state,
WaitRequest *wait_req) {
if (wait_req->timer != -1) {
CHECK(event_loop_remove_timer(manager_state->loop, wait_req->timer) ==
AE_OK);
RAY_CHECK(event_loop_remove_timer(manager_state->loop, wait_req->timer) ==
AE_OK);
}
delete wait_req;
}
@@ -416,9 +416,9 @@ void update_object_wait_requests(PlasmaManagerState *manager_state,
auto object_request =
wait_req->object_requests.find(obj_id.to_plasma_id());
/* Check that we found the object. */
CHECK(object_request != wait_req->object_requests.end());
RAY_CHECK(object_request != wait_req->object_requests.end());
/* Check that the object found was not previously known to us. */
CHECK(object_request->second.status == ObjectStatus_Nonexistent);
RAY_CHECK(object_request->second.status == ObjectStatus_Nonexistent);
/* Update the found object's status to a known status. */
object_request->second.status = status;
@@ -431,7 +431,7 @@ void update_object_wait_requests(PlasmaManagerState *manager_state,
index += 1;
}
}
DCHECK(static_cast<size_t>(index) == wait_requests.size());
RAY_CHECK(static_cast<size_t>(index) == wait_requests.size());
/* Remove the array of wait requests for this object, since no one should be
* waiting for this object anymore. */
object_wait_requests.erase(object_wait_requests_it);
@@ -491,7 +491,7 @@ PlasmaManagerState *PlasmaManagerState_init(const char *store_socket_name,
RAY_CHECK_OK(state->gcs_client.context()->AttachToEventLoop(state->loop));
} else {
state->db = NULL;
LOG_DEBUG("No db connection specified");
RAY_LOG(DEBUG) << "No db connection specified";
}
state->addr = manager_addr;
state->port = manager_port;
@@ -572,14 +572,14 @@ int write_object_chunk(ClientConnection *conn, PlasmaRequestBuffer *buf) {
int err;
if (r <= 0) {
LOG_ERROR("Write error");
RAY_LOG(ERROR) << "Write error";
err = errno;
} else {
conn->cursor += r;
CHECK(conn->cursor <= buf->data_size + buf->metadata_size);
RAY_CHECK(conn->cursor <= buf->data_size + buf->metadata_size);
/* If we've finished writing this buffer, reset the cursor. */
if (conn->cursor == buf->data_size + buf->metadata_size) {
LOG_DEBUG("writing on channel %d finished", conn->fd);
RAY_LOG(DEBUG) << "writing on channel " << conn->fd << " finished";
ClientConnection_finish_request(conn);
}
err = 0;
@@ -612,7 +612,7 @@ void send_queued_request(event_loop *loop,
conn->fd);
break;
case MessageType_PlasmaDataReply:
LOG_DEBUG("Transferring object to manager");
RAY_LOG(DEBUG) << "Transferring object to manager";
if (ClientConnection_request_finished(conn)) {
/* If the cursor is not set, we haven't sent any requests for this object
* yet, so send the initial data request. */
@@ -627,7 +627,7 @@ void send_queued_request(event_loop *loop,
}
break;
default:
LOG_FATAL("Buffered request has unknown type.");
RAY_LOG(FATAL) << "Buffered request has unknown type.";
}
/* If the other side hung up, stop sending to this manager. */
@@ -658,7 +658,7 @@ void send_queued_request(event_loop *loop,
int read_object_chunk(ClientConnection *conn, PlasmaRequestBuffer *buf) {
ssize_t r, s;
CHECK(buf != NULL);
RAY_CHECK(buf != NULL);
/* Try to read one buf_size at a time. */
s = buf->data_size + buf->metadata_size - conn->cursor;
if (s > RayConfig::instance().buf_size()) {
@@ -668,11 +668,11 @@ int read_object_chunk(ClientConnection *conn, PlasmaRequestBuffer *buf) {
int err;
if (r <= 0) {
LOG_ERROR("Read error");
RAY_LOG(ERROR) << "Read error";
err = errno;
} else {
conn->cursor += r;
CHECK(conn->cursor <= buf->data_size + buf->metadata_size);
RAY_CHECK(conn->cursor <= buf->data_size + buf->metadata_size);
/* If the cursor is equal to the full object size, reset the cursor and
* we're done. */
if (conn->cursor == buf->data_size + buf->metadata_size) {
@@ -709,7 +709,7 @@ void process_data_chunk(event_loop *loop,
/* If we're done receiving the object, seal the object and release it. The
* release corresponds to the call to plasma_create that occurred in
* process_data_request. */
LOG_DEBUG("reading on channel %d finished", data_sock);
RAY_LOG(DEBUG) << "reading on channel " << data_sock << " finished";
/* The following seal also triggers notification of clients for fetch or
* wait requests, see process_object_notification. */
ARROW_CHECK_OK(plasma_conn->Seal(buf->object_id.to_plasma_id()));
@@ -805,9 +805,8 @@ void process_transfer_request(event_loop *loop,
/* If the object wasn't locally available, exit immediately. If the object
* later appears locally, the requesting plasma manager should request the
* transfer again. */
LOG_WARN(
"Unable to transfer object to requesting plasma manager, object not "
"local.");
RAY_LOG(WARNING) << "Unable to transfer object to requesting plasma "
<< "manager, object not local.";
return;
}
@@ -822,8 +821,8 @@ void process_transfer_request(event_loop *loop,
}
}
CHECK(object_buffer.metadata->data() ==
object_buffer.data->data() + object_buffer.data_size);
RAY_CHECK(object_buffer.metadata->data() ==
object_buffer.data->data() + object_buffer.data_size);
PlasmaRequestBuffer *buf = new PlasmaRequestBuffer();
buf->type = MessageType_PlasmaDataReply;
buf->object_id = obj_id;
@@ -875,7 +874,7 @@ void process_data_request(event_loop *loop,
* conn->transfer_queue. */
conn->transfer_queue.push_back(buf);
}
CHECK(ClientConnection_request_finished(conn));
RAY_CHECK(ClientConnection_request_finished(conn));
ClientConnection_start_request(conn);
/* Switch to reading the data from this socket, instead of listening for
@@ -912,10 +911,10 @@ void process_data_request(event_loop *loop,
void request_transfer_from(PlasmaManagerState *manager_state,
FetchRequest *fetch_req) {
CHECK(fetch_req->manager_vector.size() > 0);
CHECK(fetch_req->next_manager >= 0 &&
static_cast<size_t>(fetch_req->next_manager) <
fetch_req->manager_vector.size());
RAY_CHECK(fetch_req->manager_vector.size() > 0);
RAY_CHECK(fetch_req->next_manager >= 0 &&
static_cast<size_t>(fetch_req->next_manager) <
fetch_req->manager_vector.size());
char addr[16];
int port;
parse_ip_addr_port(fetch_req->manager_vector[fetch_req->next_manager].c_str(),
@@ -931,8 +930,8 @@ void request_transfer_from(PlasmaManagerState *manager_state,
&temp_addr[2], &temp_addr[3]);
if (memcmp(temp_addr, manager_state->addr, 4) == 0 &&
port == manager_state->port) {
LOG_FATAL(
"This manager is attempting to request a transfer from itself.");
RAY_LOG(FATAL) << "This manager is attempting to request a transfer from "
<< "itself.";
}
PlasmaRequestBuffer *transfer_request = new PlasmaRequestBuffer();
@@ -975,12 +974,12 @@ int fetch_timeout_handler(event_loop *loop, timer_id id, void *context) {
if (is_receiving_or_received(manager_state, fetch_req->object_id)) {
// Do nothing if the object transfer is in progress or if the object
// has already been received.
LOG_DEBUG("fetch_timeout_handler: Object in progress or received. %s",
fetch_req->object_id.hex().c_str());
RAY_LOG(DEBUG) << "fetch_timeout_handler: Object in progress or "
<< "received. " << fetch_req->object_id;
continue;
}
LOG_DEBUG("fetch_timeout_handler: Object missing. %s",
fetch_req->object_id.hex().c_str());
RAY_LOG(DEBUG) << "fetch_timeout_handler: Object missing. "
<< fetch_req->object_id;
request_transfer_from(manager_state, fetch_req);
/* If we've tried all of the managers that we know about for this object,
* add this object to the list to resend requests for. */
@@ -1018,13 +1017,13 @@ void request_transfer(ObjectID object_id,
PlasmaManagerState *manager_state = (PlasmaManagerState *) context;
/* This callback is called from object_table_subscribe, which guarantees that
* the manager vector contains at least one element. */
CHECK(manager_vector.size() >= 1);
RAY_CHECK(manager_vector.size() >= 1);
auto it = manager_state->fetch_requests.find(object_id);
if (is_object_local(manager_state, object_id)) {
/* If the object is already here, then the fetch request should have been
* removed. */
CHECK(it == manager_state->fetch_requests.end());
RAY_CHECK(it == manager_state->fetch_requests.end());
return;
}
FetchRequest *fetch_req = it->second;
@@ -1033,7 +1032,7 @@ void request_transfer(ObjectID object_id,
* TODO(rkn): We actually have to remove this check to handle the rare
* scenario where the object is transferred here and then evicted before this
* callback gets called. */
CHECK(fetch_req != NULL);
RAY_CHECK(fetch_req != NULL);
/* Update the manager vector. */
fetch_req->manager_vector = manager_vector;
@@ -1055,7 +1054,7 @@ void call_request_transfer(ObjectID object_id,
PlasmaManagerState *manager_state = (PlasmaManagerState *) context;
/* Check that there isn't already a fetch request for this object. */
auto it = manager_state->fetch_requests.find(object_id);
CHECK(it == manager_state->fetch_requests.end());
RAY_CHECK(it == manager_state->fetch_requests.end());
/* Create a fetch request. */
FetchRequest *fetch_req = create_fetch_request(manager_state, object_id);
manager_state->fetch_requests[object_id] = fetch_req;
@@ -1063,7 +1062,7 @@ void call_request_transfer(ObjectID object_id,
}
void fatal_table_callback(ObjectID id, void *user_context, void *user_data) {
CHECK(0);
RAY_CHECK(0);
}
/* This callback is used by both fetch and wait. Therefore, it may have to
@@ -1143,7 +1142,7 @@ void process_wait_request(ClientConnection *client_conn,
plasma::ObjectRequestMap &&object_requests,
uint64_t timeout_ms,
int num_ready_objects) {
CHECK(client_conn != NULL);
RAY_CHECK(client_conn != NULL);
PlasmaManagerState *manager_state = client_conn->manager_state;
int num_object_requests = object_requests.size();
@@ -1184,7 +1183,7 @@ void process_wait_request(ClientConnection *client_conn,
num_object_ids_to_request += 1;
} else {
/* This code should be unreachable. */
CHECK(0);
RAY_CHECK(0);
}
}
@@ -1255,7 +1254,7 @@ void object_table_lookup_fail_callback(ObjectID object_id,
void *user_data) {
/* Fail for now. Later, we may want to send a ObjectStatus_Nonexistent to the
* client. */
CHECK(0);
RAY_CHECK(0);
}
void process_status_request(ClientConnection *client_conn,
@@ -1299,7 +1298,7 @@ void process_delete_object_notification(PlasmaManagerState *state,
void log_object_hash_mismatch_error_task_callback(Task *task,
void *user_context) {
CHECK(task != NULL);
RAY_CHECK(task != NULL);
PlasmaManagerState *state = (PlasmaManagerState *) user_context;
TaskSpec *spec = Task_task_execution_spec(task)->Spec();
FunctionID function = TaskSpec_function(spec);
@@ -1314,7 +1313,7 @@ void log_object_hash_mismatch_error_result_callback(ObjectID object_id,
TaskID task_id,
bool is_put,
void *user_context) {
CHECK(!task_id.is_nil());
RAY_CHECK(!task_id.is_nil());
PlasmaManagerState *state = (PlasmaManagerState *) user_context;
/* Get the specification for the nondeterministic task. */
#if !RAY_USE_NEW_GCS
@@ -1391,8 +1390,8 @@ void process_object_notification(event_loop *loop,
uint8_t *notification = read_message_async(loop, client_sock);
if (notification == NULL) {
PlasmaManagerState_free(state);
LOG_FATAL(
"Lost connection to the plasma store, plasma manager is exiting!");
RAY_LOG(FATAL) << "Lost connection to the plasma store, plasma manager is "
<< "exiting!";
}
auto object_info = flatbuffers::GetRoot<ObjectInfo>(notification);
/* Add object to locally available object. */
@@ -1437,7 +1436,7 @@ ClientConnection *ClientConnection_listen(event_loop *loop,
ClientConnection *conn = ClientConnection_init(state, new_socket, client_key);
event_loop_add_file(loop, new_socket, EVENT_LOOP_READ, process_message, conn);
LOG_DEBUG("New client connection with fd %d", new_socket);
RAY_LOG(DEBUG) << "New client connection with fd " << new_socket;
return conn;
}
@@ -1483,7 +1482,7 @@ void process_message(event_loop *loop,
switch (type) {
case MessageType_PlasmaDataRequest: {
LOG_DEBUG("Processing data request");
RAY_LOG(DEBUG) << "Processing data request";
plasma::ObjectID object_id;
char *address;
int port;
@@ -1493,7 +1492,7 @@ void process_message(event_loop *loop,
free(address);
} break;
case MessageType_PlasmaDataReply: {
LOG_DEBUG("Processing data reply");
RAY_LOG(DEBUG) << "Processing data reply";
plasma::ObjectID object_id;
int64_t object_size;
int64_t metadata_size;
@@ -1503,7 +1502,7 @@ void process_message(event_loop *loop,
metadata_size, conn);
} break;
case MessageType_PlasmaFetchRequest: {
LOG_DEBUG("Processing fetch remote");
RAY_LOG(DEBUG) << "Processing fetch remote";
std::vector<plasma::ObjectID> object_ids_to_fetch;
/* TODO(pcm): process_fetch_requests allocates an array of num_objects
* object_ids too so these should be shared in the future. */
@@ -1512,7 +1511,7 @@ void process_message(event_loop *loop,
object_ids_to_fetch.data());
} break;
case MessageType_PlasmaWaitRequest: {
LOG_DEBUG("Processing wait");
RAY_LOG(DEBUG) << "Processing wait";
plasma::ObjectRequestMap object_requests;
int64_t timeout_ms;
int num_ready_objects;
@@ -1522,18 +1521,18 @@ void process_message(event_loop *loop,
num_ready_objects);
} break;
case MessageType_PlasmaStatusRequest: {
LOG_DEBUG("Processing status");
RAY_LOG(DEBUG) << "Processing status";
plasma::ObjectID object_id;
ARROW_CHECK_OK(plasma::ReadStatusRequest(data, length, &object_id, 1));
process_status_request(conn, object_id);
} break;
case DISCONNECT_CLIENT: {
LOG_DEBUG("Disconnecting client on fd %d", client_sock);
RAY_LOG(DEBUG) << "Disconnecting client on fd " << client_sock;
event_loop_remove_file(loop, client_sock);
ClientConnection_free(conn);
} break;
default:
LOG_FATAL("invalid request %" PRId64, type);
RAY_LOG(FATAL) << "invalid request " << type;
}
free(data);
@@ -1541,9 +1540,8 @@ void process_message(event_loop *loop,
int64_t end_time = current_time_ms();
if (end_time - start_time >
RayConfig::instance().max_time_for_handler_milliseconds()) {
LOG_WARN("process_message of type %" PRId64 " took %" PRId64
" milliseconds.",
type, end_time - start_time);
RAY_LOG(WARNING) << "process_message of type " << type << " took "
<< end_time - start_time << " milliseconds.";
}
}
@@ -1552,12 +1550,13 @@ int heartbeat_handler(event_loop *loop, timer_id id, void *context) {
/* Check that the last heartbeat was not sent too long ago. */
int64_t current_time = current_time_ms();
CHECK(current_time >= state->previous_heartbeat_time);
RAY_CHECK(current_time >= state->previous_heartbeat_time);
if (current_time - state->previous_heartbeat_time >
RayConfig::instance().num_heartbeats_timeout() *
RayConfig::instance().heartbeat_timeout_milliseconds()) {
LOG_FATAL("The last heartbeat was sent %" PRId64 " milliseconds ago.",
current_time - state->previous_heartbeat_time);
RAY_LOG(FATAL) << "The last heartbeat was sent "
<< current_time - state->previous_heartbeat_time
<< " milliseconds ago.";
}
state->previous_heartbeat_time = current_time;
@@ -1583,18 +1582,18 @@ void start_server(const char *store_socket_name,
}
int local_sock = bind_ipc_sock(manager_socket_name, false);
CHECKM(local_sock >= 0, "Unable to bind local manager socket");
RAY_CHECK(local_sock >= 0) << "Unable to bind local manager socket";
g_manager_state = PlasmaManagerState_init(
store_socket_name, manager_socket_name, master_addr, port,
redis_primary_addr, redis_primary_port);
CHECK(g_manager_state);
RAY_CHECK(g_manager_state);
CHECK(listen(remote_sock, 128) != -1);
CHECK(listen(local_sock, 128) != -1);
RAY_CHECK(listen(remote_sock, 128) != -1);
RAY_CHECK(listen(local_sock, 128) != -1);
LOG_DEBUG("Started server connected to store %s, listening on port %d",
store_socket_name, port);
RAY_LOG(DEBUG) << "Started server connected to store " << store_socket_name
<< ", listening on port " << port;
event_loop_add_file(g_manager_state->loop, local_sock, EVENT_LOOP_READ,
handle_new_client, g_manager_state);
event_loop_add_file(g_manager_state->loop, remote_sock, EVENT_LOOP_READ,
@@ -1619,7 +1618,7 @@ void start_server(const char *store_socket_name,
/* Report "success" to valgrind. */
void signal_handler(int signal) {
LOG_DEBUG("Signal was %d", signal);
RAY_LOG(DEBUG) << "Signal was " << signal;
if (signal == SIGTERM) {
if (g_manager_state) {
PlasmaManagerState_free(g_manager_state);
@@ -1662,37 +1661,32 @@ int main(int argc, char *argv[]) {
redis_primary_addr_port = optarg;
break;
default:
LOG_FATAL("unknown option %c", c);
RAY_LOG(FATAL) << "unknown option " << c;
}
}
if (!store_socket_name) {
LOG_FATAL(
"please specify socket for connecting to the plasma store with -s "
"switch");
RAY_LOG(FATAL) << "please specify socket for connecting to the plasma "
<< "store with -s switch";
}
if (!manager_socket_name) {
LOG_FATAL(
"please specify socket name of the manager's local socket with -m "
"switch");
RAY_LOG(FATAL) << "please specify socket name of the manager's local "
<< "socket with -m switch";
}
if (!master_addr) {
LOG_FATAL(
"please specify ip address of the current host in the format "
"123.456.789.10 with -h switch");
RAY_LOG(FATAL) << "please specify ip address of the current host in the "
<< "format 123.456.789.10 with -h switch";
}
if (port == -1) {
LOG_FATAL(
"please specify port the plasma manager shall listen to in the"
"format 12345 with -p switch");
RAY_LOG(FATAL) << "please specify port the plasma manager shall listen to "
<< "in the format 12345 with -p switch";
}
char redis_primary_addr[16];
int redis_primary_port;
int redis_primary_port = -1;
if (!redis_primary_addr_port ||
parse_ip_addr_port(redis_primary_addr_port, redis_primary_addr,
&redis_primary_port) == -1) {
LOG_FATAL(
"specify the primary redis address like 127.0.0.1:6379 with the -r "
"switch");
RAY_LOG(FATAL) << "specify the primary redis address like 127.0.0.1:6379 "
<< "with the -r switch";
}
start_server(store_socket_name, manager_socket_name, master_addr, port,
redis_primary_addr, redis_primary_port);
+27 -27
View File
@@ -22,8 +22,8 @@ Status PlasmaReceive(int sock,
std::vector<uint8_t> &buffer) {
int64_t type;
RETURN_NOT_OK(ReadMessage(sock, &type, buffer));
ARROW_CHECK(type == message_type) << "type = " << type
<< ", message_type = " << message_type;
RAY_CHECK(type == message_type) << "type = " << type
<< ", message_type = " << message_type;
return Status::OK();
}
@@ -45,7 +45,7 @@ Status ReadCreateRequest(uint8_t *data,
ObjectID *object_id,
int64_t *data_size,
int64_t *metadata_size) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaCreateRequest>(data);
*data_size = message->data_size();
*metadata_size = message->metadata_size();
@@ -72,7 +72,7 @@ Status SendCreateReply(int sock,
Status ReadCreateReply(uint8_t *data,
ObjectID *object_id,
PlasmaObject *object) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaCreateReply>(data);
*object_id = ObjectID::from_binary(message->object_id()->str());
object->handle.store_fd = message->plasma_object()->segment_index();
@@ -99,10 +99,10 @@ Status SendSealRequest(int sock, ObjectID object_id, unsigned char *digest) {
Status ReadSealRequest(uint8_t *data,
ObjectID *object_id,
unsigned char *digest) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaSealRequest>(data);
*object_id = ObjectID::from_binary(message->object_id()->str());
ARROW_CHECK(message->digest()->size() == kDigestSize);
RAY_CHECK(message->digest()->size() == kDigestSize);
memcpy(digest, message->digest()->data(), kDigestSize);
return Status::OK();
}
@@ -117,7 +117,7 @@ Status SendSealReply(int sock, ObjectID object_id, int error) {
}
Status ReadSealReply(uint8_t *data, ObjectID *object_id) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaSealReply>(data);
*object_id = ObjectID::from_binary(message->object_id()->str());
return plasma_error_status(message->error());
@@ -135,7 +135,7 @@ Status SendReleaseRequest(int sock, ObjectID object_id) {
}
Status ReadReleaseRequest(uint8_t *data, ObjectID *object_id) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaReleaseRequest>(data);
*object_id = ObjectID::from_binary(message->object_id()->str());
return Status::OK();
@@ -151,7 +151,7 @@ Status SendReleaseReply(int sock, ObjectID object_id, int error) {
}
Status ReadReleaseReply(uint8_t *data, ObjectID *object_id) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaReleaseReply>(data);
*object_id = ObjectID::from_binary(message->object_id()->str());
return plasma_error_status(message->error());
@@ -169,7 +169,7 @@ Status SendDeleteRequest(int sock, ObjectID object_id) {
}
Status ReadDeleteRequest(uint8_t *data, ObjectID *object_id) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaReleaseReply>(data);
*object_id = ObjectID::from_binary(message->object_id()->str());
return Status::OK();
@@ -185,7 +185,7 @@ Status SendDeleteReply(int sock, ObjectID object_id, int error) {
}
Status ReadDeleteReply(uint8_t *data, ObjectID *object_id) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaDeleteReply>(data);
*object_id = ObjectID::from_binary(message->object_id()->str());
return plasma_error_status(message->error());
@@ -205,7 +205,7 @@ Status SendStatusRequest(int sock, ObjectID object_ids[], int64_t num_objects) {
Status ReadStatusRequest(uint8_t *data,
ObjectID object_ids[],
int64_t num_objects) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaStatusRequest>(data);
for (int64_t i = 0; i < num_objects; ++i) {
object_ids[i] = ObjectID::from_binary(message->object_ids()->Get(i)->str());
@@ -227,7 +227,7 @@ Status SendStatusReply(int sock,
}
int64_t ReadStatusReply_num_objects(uint8_t *data) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaStatusReply>(data);
return message->object_ids()->size();
}
@@ -236,7 +236,7 @@ Status ReadStatusReply(uint8_t *data,
ObjectID object_ids[],
int object_status[],
int64_t num_objects) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaStatusReply>(data);
for (int64_t i = 0; i < num_objects; ++i) {
object_ids[i] = ObjectID::from_binary(message->object_ids()->Get(i)->str());
@@ -259,7 +259,7 @@ Status SendContainsRequest(int sock, ObjectID object_id) {
}
Status ReadContainsRequest(uint8_t *data, ObjectID *object_id) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaContainsRequest>(data);
*object_id = ObjectID::from_binary(message->object_id()->str());
return Status::OK();
@@ -275,7 +275,7 @@ Status SendContainsReply(int sock, ObjectID object_id, int has_object) {
}
Status ReadContainsReply(uint8_t *data, ObjectID *object_id, int *has_object) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaContainsReply>(data);
*object_id = ObjectID::from_binary(message->object_id()->str());
*has_object = message->has_object();
@@ -305,7 +305,7 @@ Status SendConnectReply(int sock, int64_t memory_capacity) {
}
Status ReadConnectReply(uint8_t *data, int64_t *memory_capacity) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaConnectReply>(data);
*memory_capacity = message->memory_capacity();
return Status::OK();
@@ -322,7 +322,7 @@ Status SendEvictRequest(int sock, int64_t num_bytes) {
}
Status ReadEvictRequest(uint8_t *data, int64_t *num_bytes) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaEvictRequest>(data);
*num_bytes = message->num_bytes();
return Status::OK();
@@ -337,7 +337,7 @@ Status SendEvictReply(int sock, int64_t num_bytes) {
}
Status ReadEvictReply(uint8_t *data, int64_t &num_bytes) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaEvictReply>(data);
num_bytes = message->num_bytes();
return Status::OK();
@@ -360,7 +360,7 @@ Status SendGetRequest(int sock,
Status ReadGetRequest(uint8_t *data,
std::vector<ObjectID> &object_ids,
int64_t *timeout_ms) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaGetRequest>(data);
for (int64_t i = 0; i < message->object_ids()->size(); ++i) {
auto object_id = message->object_ids()->Get(i)->str();
@@ -396,7 +396,7 @@ Status ReadGetReply(uint8_t *data,
ObjectID object_ids[],
PlasmaObject plasma_objects[],
int64_t num_objects) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaGetReply>(data);
for (int64_t i = 0; i < num_objects; ++i) {
object_ids[i] = ObjectID::from_binary(message->object_ids()->Get(i)->str());
@@ -425,7 +425,7 @@ Status SendFetchRequest(int sock, ObjectID object_ids[], int64_t num_objects) {
}
Status ReadFetchRequest(uint8_t *data, std::vector<ObjectID> &object_ids) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaFetchRequest>(data);
for (int64_t i = 0; i < message->object_ids()->size(); ++i) {
object_ids.push_back(
@@ -462,7 +462,7 @@ Status ReadWaitRequest(uint8_t *data,
ObjectRequestMap &object_requests,
int64_t *timeout_ms,
int *num_ready_objects) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaWaitRequest>(data);
*num_ready_objects = message->num_ready_objects();
*timeout_ms = message->timeout();
@@ -502,7 +502,7 @@ Status SendWaitReply(int sock,
Status ReadWaitReply(uint8_t *data,
ObjectRequest object_requests[],
int *num_ready_objects) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaWaitReply>(data);
*num_ready_objects = message->num_ready_objects();
@@ -543,9 +543,9 @@ Status ReadDataRequest(uint8_t *data,
ObjectID *object_id,
char **address,
int *port) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaDataRequest>(data);
DCHECK(message->object_id()->size() == sizeof(ObjectID));
RAY_DCHECK(message->object_id()->size() == sizeof(ObjectID));
*object_id = ObjectID::from_binary(message->object_id()->str());
*address = strdup(message->address()->c_str());
*port = message->port();
@@ -568,7 +568,7 @@ Status ReadDataReply(uint8_t *data,
ObjectID *object_id,
int64_t *object_size,
int64_t *metadata_size) {
DCHECK(data);
RAY_DCHECK(data);
auto message = flatbuffers::GetRoot<PlasmaDataReply>(data);
*object_id = ObjectID::from_binary(message->object_id()->str());
*object_size = (int64_t) message->object_size();
+3 -3
View File
@@ -31,7 +31,7 @@ void wait_for_pollin(int fd) {
poll_list[0].fd = fd;
poll_list[0].events = POLLIN;
int retval = poll(poll_list, (unsigned long) 1, -1);
CHECK(retval > 0);
RAY_CHECK(retval > 0);
}
int test_done_handler(event_loop *loop, timer_id id, void *context) {
@@ -66,7 +66,7 @@ plasma_mock *init_plasma_mock(plasma_mock *remote_mock) {
std::string manager_socket_name = bind_ipc_sock_retry(
plasma_manager_socket_name_format, &mock->manager_local_fd);
CHECK(mock->manager_local_fd >= 0 && mock->local_store >= 0);
RAY_CHECK(mock->manager_local_fd >= 0 && mock->local_store >= 0);
mock->state = PlasmaManagerState_init(plasma_store_socket_name,
manager_socket_name.c_str(),
@@ -252,7 +252,7 @@ TEST object_notifications_test(void) {
int fd[2];
socketpair(AF_UNIX, SOCK_STREAM, 0, fd);
int flags = fcntl(fd[1], F_GETFL, 0);
CHECK(fcntl(fd[1], F_SETFL, flags | O_NONBLOCK) == 0);
RAY_CHECK(fcntl(fd[1], F_SETFL, flags | O_NONBLOCK) == 0);
ObjectID object_id = ObjectID::from_random();
ObjectInfoT info;
+5
View File
@@ -78,4 +78,9 @@ bool UniqueID::operator==(const UniqueID &rhs) const {
return std::memcmp(data(), rhs.data(), kUniqueIDSize) == 0;
}
std::ostream &operator<<(std::ostream &os, const UniqueID &id) {
os << id.hex();
return os;
}
} // namespace ray
+2
View File
@@ -44,6 +44,8 @@ struct UniqueIDHasher {
}
};
std::ostream &operator<<(std::ostream &os, const UniqueID &id);
typedef UniqueID TaskID;
typedef UniqueID JobID;
typedef UniqueID ObjectID;
+14
View File
@@ -34,12 +34,26 @@ namespace ray {
<< __FILE__ << __LINE__ \
<< " Check failed: " #condition " "
#ifdef NDEBUG
#define RAY_DCHECK(condition) \
RAY_IGNORE_EXPR(condition) \
while (false) \
::ray::internal::NullLog()
#else
#define RAY_DCHECK(condition) RAY_CHECK(condition)
#endif // NDEBUG
namespace internal {
class NullLog {
public:
template <class T>
NullLog &operator<<(const T &t) {
RAY_IGNORE_EXPR(t);
return *this;
}
};