mirror of
https://github.com/wassname/ray.git
synced 2026-07-04 11:37:51 +08:00
Update logging and check macros. (#1627)
* Update logging and check macros. * Fix linting. * Fix RAY_DCHECK and unused variable. * Fix linting
This commit is contained in:
committed by
Philipp Moritz
parent
e7df293946
commit
0fcceef772
+1
-63
@@ -25,6 +25,7 @@ extern "C" {
|
||||
#include "arrow/util/macros.h"
|
||||
#include "plasma/common.h"
|
||||
#include "ray/id.h"
|
||||
#include "ray/util/logging.h"
|
||||
|
||||
#include "state/ray_config.h"
|
||||
|
||||
@@ -44,69 +45,6 @@ extern "C" {
|
||||
#define RAY_COMMON_LOG_LEVEL RAY_COMMON_INFO
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Macros to enable each level of Ray logging statements depending on the
|
||||
* current logging level. */
|
||||
#if (RAY_COMMON_LOG_LEVEL > RAY_COMMON_DEBUG)
|
||||
#define LOG_DEBUG(M, ...)
|
||||
#else
|
||||
#define LOG_DEBUG(M, ...) \
|
||||
fprintf(stderr, "[DEBUG] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#if (RAY_COMMON_LOG_LEVEL > RAY_COMMON_INFO)
|
||||
#define LOG_INFO(M, ...)
|
||||
#else
|
||||
#define LOG_INFO(M, ...) \
|
||||
fprintf(stderr, "[INFO] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#if (RAY_COMMON_LOG_LEVEL > RAY_COMMON_WARNING)
|
||||
#define LOG_WARN(M, ...)
|
||||
#else
|
||||
#define LOG_WARN(M, ...) \
|
||||
fprintf(stderr, "[WARN] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#if (RAY_COMMON_LOG_LEVEL > RAY_COMMON_ERROR)
|
||||
#define LOG_ERROR(M, ...)
|
||||
#else
|
||||
#define LOG_ERROR(M, ...) \
|
||||
fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
|
||||
errno == 0 ? "None" : strerror(errno), ##__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#if (RAY_COMMON_LOG_LEVEL > RAY_COMMON_FATAL)
|
||||
#define LOG_FATAL(M, ...)
|
||||
#elif defined(_EXECINFO_H) || !defined(_WIN32)
|
||||
#define LOG_FATAL(M, ...) \
|
||||
do { \
|
||||
fprintf(stderr, "[FATAL] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
|
||||
errno == 0 ? "None" : strerror(errno), ##__VA_ARGS__); \
|
||||
void *buffer[255]; \
|
||||
const int calls = backtrace(buffer, sizeof(buffer) / sizeof(void *)); \
|
||||
backtrace_symbols_fd(buffer, calls, 1); \
|
||||
abort(); \
|
||||
} while (0)
|
||||
#else
|
||||
#define LOG_FATAL(M, ...) \
|
||||
do { \
|
||||
fprintf(stderr, "[FATAL] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
|
||||
errno == 0 ? "None" : strerror(errno), ##__VA_ARGS__); \
|
||||
exit(-1); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
/** Assertion definitions, with optional logging. */
|
||||
#define CHECKM(COND, M, ...) \
|
||||
if (!(COND)) { \
|
||||
LOG_FATAL("Check failure: %s \n" M, #COND, ##__VA_ARGS__); \
|
||||
}
|
||||
|
||||
#define CHECK(COND) CHECKM(COND, "")
|
||||
|
||||
#define RAY_DCHECK(COND) CHECK(COND)
|
||||
|
||||
/* These are exit codes for common errors that can occur in Ray components. */
|
||||
#define EXIT_COULD_NOT_BIND_PORT -2
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ flatbuffers::Offset<flatbuffers::String> to_flatbuf(
|
||||
|
||||
ray::ObjectID from_flatbuf(const flatbuffers::String &string) {
|
||||
ray::ObjectID object_id;
|
||||
CHECK(string.size() == sizeof(ray::ObjectID));
|
||||
RAY_CHECK(string.size() == sizeof(ray::ObjectID));
|
||||
memcpy(object_id.mutable_data(), string.data(), sizeof(ray::ObjectID));
|
||||
return object_id;
|
||||
}
|
||||
|
||||
+33
-30
@@ -24,7 +24,7 @@ int bind_inet_sock(const int port, bool shall_listen) {
|
||||
struct sockaddr_in name;
|
||||
int socket_fd = socket(PF_INET, SOCK_STREAM, 0);
|
||||
if (socket_fd < 0) {
|
||||
LOG_ERROR("socket() failed for port %d.", port);
|
||||
RAY_LOG(ERROR) << "socket() failed for port " << port;
|
||||
return -1;
|
||||
}
|
||||
name.sin_family = AF_INET;
|
||||
@@ -33,23 +33,23 @@ int bind_inet_sock(const int port, bool shall_listen) {
|
||||
int on = 1;
|
||||
/* TODO(pcm): http://stackoverflow.com/q/1150635 */
|
||||
if (ioctl(socket_fd, FIONBIO, (char *) &on) < 0) {
|
||||
LOG_ERROR("ioctl failed");
|
||||
RAY_LOG(ERROR) << "ioctl failed";
|
||||
close(socket_fd);
|
||||
return -1;
|
||||
}
|
||||
int *const pon = (int *const) & on;
|
||||
if (setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, pon, sizeof(on)) < 0) {
|
||||
LOG_ERROR("setsockopt failed for port %d", port);
|
||||
RAY_LOG(ERROR) << "setsockopt failed for port " << port;
|
||||
close(socket_fd);
|
||||
return -1;
|
||||
}
|
||||
if (bind(socket_fd, (struct sockaddr *) &name, sizeof(name)) < 0) {
|
||||
LOG_ERROR("Bind failed for port %d", port);
|
||||
RAY_LOG(ERROR) << "Bind failed for port " << port;
|
||||
close(socket_fd);
|
||||
return -1;
|
||||
}
|
||||
if (shall_listen && listen(socket_fd, 128) == -1) {
|
||||
LOG_ERROR("Could not listen to socket %d", port);
|
||||
RAY_LOG(ERROR) << "Could not listen to socket " << port;
|
||||
close(socket_fd);
|
||||
return -1;
|
||||
}
|
||||
@@ -60,14 +60,14 @@ int bind_ipc_sock(const char *socket_pathname, bool shall_listen) {
|
||||
struct sockaddr_un socket_address;
|
||||
int socket_fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||
if (socket_fd < 0) {
|
||||
LOG_ERROR("socket() failed for pathname %s.", socket_pathname);
|
||||
RAY_LOG(ERROR) << "socket() failed for pathname " << socket_pathname;
|
||||
return -1;
|
||||
}
|
||||
/* Tell the system to allow the port to be reused. */
|
||||
int on = 1;
|
||||
if (setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, (char *) &on,
|
||||
sizeof(on)) < 0) {
|
||||
LOG_ERROR("setsockopt failed for pathname %s", socket_pathname);
|
||||
RAY_LOG(ERROR) << "setsockopt failed for pathname " << socket_pathname;
|
||||
close(socket_fd);
|
||||
return -1;
|
||||
}
|
||||
@@ -76,7 +76,7 @@ int bind_ipc_sock(const char *socket_pathname, bool shall_listen) {
|
||||
memset(&socket_address, 0, sizeof(socket_address));
|
||||
socket_address.sun_family = AF_UNIX;
|
||||
if (strlen(socket_pathname) + 1 > sizeof(socket_address.sun_path)) {
|
||||
LOG_ERROR("Socket pathname is too long.");
|
||||
RAY_LOG(ERROR) << "Socket pathname is too long.";
|
||||
close(socket_fd);
|
||||
return -1;
|
||||
}
|
||||
@@ -85,12 +85,12 @@ int bind_ipc_sock(const char *socket_pathname, bool shall_listen) {
|
||||
|
||||
if (bind(socket_fd, (struct sockaddr *) &socket_address,
|
||||
sizeof(socket_address)) != 0) {
|
||||
LOG_ERROR("Bind failed for pathname %s.", socket_pathname);
|
||||
RAY_LOG(ERROR) << "Bind failed for pathname " << socket_pathname;
|
||||
close(socket_fd);
|
||||
return -1;
|
||||
}
|
||||
if (shall_listen && listen(socket_fd, 128) == -1) {
|
||||
LOG_ERROR("Could not listen to socket %s", socket_pathname);
|
||||
RAY_LOG(ERROR) << "Could not listen to socket " << socket_pathname;
|
||||
close(socket_fd);
|
||||
return -1;
|
||||
}
|
||||
@@ -108,7 +108,7 @@ int connect_ipc_sock_retry(const char *socket_pathname,
|
||||
timeout = RayConfig::instance().connect_timeout_milliseconds();
|
||||
}
|
||||
|
||||
CHECK(socket_pathname);
|
||||
RAY_CHECK(socket_pathname);
|
||||
int fd = -1;
|
||||
for (int num_attempts = 0; num_attempts < num_retries; ++num_attempts) {
|
||||
fd = connect_ipc_sock(socket_pathname);
|
||||
@@ -116,15 +116,15 @@ int connect_ipc_sock_retry(const char *socket_pathname,
|
||||
break;
|
||||
}
|
||||
if (num_attempts == 0) {
|
||||
LOG_ERROR("Connection to socket failed for pathname %s.",
|
||||
socket_pathname);
|
||||
RAY_LOG(ERROR) << "Connection to socket failed for pathname "
|
||||
<< socket_pathname;
|
||||
}
|
||||
/* Sleep for timeout milliseconds. */
|
||||
usleep(timeout * 1000);
|
||||
}
|
||||
/* If we could not connect to the socket, exit. */
|
||||
if (fd == -1) {
|
||||
LOG_FATAL("Could not connect to socket %s", socket_pathname);
|
||||
RAY_LOG(FATAL) << "Could not connect to socket " << socket_pathname;
|
||||
}
|
||||
return fd;
|
||||
}
|
||||
@@ -135,14 +135,14 @@ int connect_ipc_sock(const char *socket_pathname) {
|
||||
|
||||
socket_fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||
if (socket_fd < 0) {
|
||||
LOG_ERROR("socket() failed for pathname %s.", socket_pathname);
|
||||
RAY_LOG(ERROR) << "socket() failed for pathname " << socket_pathname;
|
||||
return -1;
|
||||
}
|
||||
|
||||
memset(&socket_address, 0, sizeof(socket_address));
|
||||
socket_address.sun_family = AF_UNIX;
|
||||
if (strlen(socket_pathname) + 1 > sizeof(socket_address.sun_path)) {
|
||||
LOG_ERROR("Socket pathname is too long.");
|
||||
RAY_LOG(ERROR) << "Socket pathname is too long.";
|
||||
return -1;
|
||||
}
|
||||
strncpy(socket_address.sun_path, socket_pathname,
|
||||
@@ -169,7 +169,7 @@ int connect_inet_sock_retry(const char *ip_addr,
|
||||
timeout = RayConfig::instance().connect_timeout_milliseconds();
|
||||
}
|
||||
|
||||
CHECK(ip_addr);
|
||||
RAY_CHECK(ip_addr);
|
||||
int fd = -1;
|
||||
for (int num_attempts = 0; num_attempts < num_retries; ++num_attempts) {
|
||||
fd = connect_inet_sock(ip_addr, port);
|
||||
@@ -177,15 +177,15 @@ int connect_inet_sock_retry(const char *ip_addr,
|
||||
break;
|
||||
}
|
||||
if (num_attempts == 0) {
|
||||
LOG_ERROR("Connection to socket failed for address %s:%d.", ip_addr,
|
||||
port);
|
||||
RAY_LOG(ERROR) << "Connection to socket failed for address " << ip_addr
|
||||
<< ":" << port;
|
||||
}
|
||||
/* Sleep for timeout milliseconds. */
|
||||
usleep(timeout * 1000);
|
||||
}
|
||||
/* If we could not connect to the socket, exit. */
|
||||
if (fd == -1) {
|
||||
LOG_FATAL("Could not connect to address %s:%d", ip_addr, port);
|
||||
RAY_LOG(FATAL) << "Could not connect to address " << ip_addr << ":" << port;
|
||||
}
|
||||
return fd;
|
||||
}
|
||||
@@ -193,13 +193,14 @@ int connect_inet_sock_retry(const char *ip_addr,
|
||||
int connect_inet_sock(const char *ip_addr, int port) {
|
||||
int fd = socket(PF_INET, SOCK_STREAM, 0);
|
||||
if (fd < 0) {
|
||||
LOG_ERROR("socket() failed for address %s:%d.", ip_addr, port);
|
||||
RAY_LOG(ERROR) << "socket() failed for address " << ip_addr << ":" << port;
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct hostent *manager = gethostbyname(ip_addr); /* TODO(pcm): cache this */
|
||||
if (!manager) {
|
||||
LOG_ERROR("Failed to get hostname from address %s:%d.", ip_addr, port);
|
||||
RAY_LOG(ERROR) << "Failed to get hostname from address " << ip_addr << ":"
|
||||
<< port;
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
@@ -219,7 +220,7 @@ int connect_inet_sock(const char *ip_addr, int port) {
|
||||
int accept_client(int socket_fd) {
|
||||
int client_fd = accept(socket_fd, NULL, NULL);
|
||||
if (client_fd < 0) {
|
||||
LOG_ERROR("Error reading from socket.");
|
||||
RAY_LOG(ERROR) << "Error reading from socket.";
|
||||
return -1;
|
||||
}
|
||||
return client_fd;
|
||||
@@ -242,7 +243,7 @@ int write_bytes(int fd, uint8_t *cursor, size_t length) {
|
||||
/* Encountered early EOF. */
|
||||
return -1;
|
||||
}
|
||||
CHECK(nbytes > 0);
|
||||
RAY_CHECK(nbytes > 0);
|
||||
bytesleft -= nbytes;
|
||||
offset += nbytes;
|
||||
}
|
||||
@@ -288,7 +289,7 @@ int read_bytes(int fd, uint8_t *cursor, size_t length) {
|
||||
/* Encountered early EOF. */
|
||||
return -1;
|
||||
}
|
||||
CHECK(nbytes > 0);
|
||||
RAY_CHECK(nbytes > 0);
|
||||
bytesleft -= nbytes;
|
||||
offset += nbytes;
|
||||
}
|
||||
@@ -302,7 +303,7 @@ void read_message(int fd, int64_t *type, int64_t *length, uint8_t **bytes) {
|
||||
if (closed) {
|
||||
goto disconnected;
|
||||
}
|
||||
CHECK(version == RayConfig::instance().ray_protocol_version());
|
||||
RAY_CHECK(version == RayConfig::instance().ray_protocol_version());
|
||||
closed = read_bytes(fd, (uint8_t *) type, sizeof(*type));
|
||||
if (closed) {
|
||||
goto disconnected;
|
||||
@@ -332,7 +333,8 @@ uint8_t *read_message_async(event_loop *loop, int sock) {
|
||||
int error = read_bytes(sock, (uint8_t *) &size, sizeof(int64_t));
|
||||
if (error < 0) {
|
||||
/* The other side has closed the socket. */
|
||||
LOG_DEBUG("Socket has been closed, or some other error has occurred.");
|
||||
RAY_LOG(DEBUG) << "Socket has been closed, or some other error has "
|
||||
<< "occurred.";
|
||||
if (loop != NULL) {
|
||||
event_loop_remove_file(loop, sock);
|
||||
}
|
||||
@@ -343,7 +345,8 @@ uint8_t *read_message_async(event_loop *loop, int sock) {
|
||||
error = read_bytes(sock, message, size);
|
||||
if (error < 0) {
|
||||
/* The other side has closed the socket. */
|
||||
LOG_DEBUG("Socket has been closed, or some other error has occurred.");
|
||||
RAY_LOG(DEBUG) << "Socket has been closed, or some other error has "
|
||||
<< "occurred.";
|
||||
if (loop != NULL) {
|
||||
event_loop_remove_file(loop, sock);
|
||||
}
|
||||
@@ -359,7 +362,7 @@ int64_t read_vector(int fd, int64_t *type, std::vector<uint8_t> &buffer) {
|
||||
if (closed) {
|
||||
goto disconnected;
|
||||
}
|
||||
CHECK(version == RayConfig::instance().ray_protocol_version());
|
||||
RAY_CHECK(version == RayConfig::instance().ray_protocol_version());
|
||||
int64_t length;
|
||||
closed = read_bytes(fd, (uint8_t *) type, sizeof(*type));
|
||||
if (closed) {
|
||||
@@ -393,6 +396,6 @@ char *read_log_message(int fd) {
|
||||
int64_t type;
|
||||
int64_t length;
|
||||
read_message(fd, &type, &length, &bytes);
|
||||
CHECK(type == LOG_MESSAGE);
|
||||
RAY_CHECK(type == LOG_MESSAGE);
|
||||
return (char *) bytes;
|
||||
}
|
||||
|
||||
@@ -28,14 +28,14 @@ void init_pickle_module(void) {
|
||||
#else
|
||||
pickle_module = PyImport_ImportModuleNoBlock("cPickle");
|
||||
#endif
|
||||
CHECK(pickle_module != NULL);
|
||||
CHECK(PyObject_HasAttrString(pickle_module, "loads"));
|
||||
CHECK(PyObject_HasAttrString(pickle_module, "dumps"));
|
||||
CHECK(PyObject_HasAttrString(pickle_module, "HIGHEST_PROTOCOL"));
|
||||
RAY_CHECK(pickle_module != NULL);
|
||||
RAY_CHECK(PyObject_HasAttrString(pickle_module, "loads"));
|
||||
RAY_CHECK(PyObject_HasAttrString(pickle_module, "dumps"));
|
||||
RAY_CHECK(PyObject_HasAttrString(pickle_module, "HIGHEST_PROTOCOL"));
|
||||
pickle_loads = PyUnicode_FromString("loads");
|
||||
pickle_dumps = PyUnicode_FromString("dumps");
|
||||
pickle_protocol = PyObject_GetAttrString(pickle_module, "HIGHEST_PROTOCOL");
|
||||
CHECK(pickle_protocol != NULL);
|
||||
RAY_CHECK(pickle_protocol != NULL);
|
||||
}
|
||||
|
||||
TaskBuilder *g_task_builder = NULL;
|
||||
@@ -449,8 +449,8 @@ static PyObject *PyTask_arguments(PyObject *self) {
|
||||
assert(count == 1);
|
||||
PyList_SetItem(arg_list, i, PyObjectID_make(TaskSpec_arg_id(task, i, 0)));
|
||||
} else {
|
||||
CHECK(pickle_module != NULL);
|
||||
CHECK(pickle_loads != NULL);
|
||||
RAY_CHECK(pickle_module != NULL);
|
||||
RAY_CHECK(pickle_loads != NULL);
|
||||
PyObject *str =
|
||||
PyBytes_FromStringAndSize((char *) TaskSpec_arg_val(task, i),
|
||||
(Py_ssize_t) TaskSpec_arg_length(task, i));
|
||||
|
||||
@@ -441,7 +441,8 @@ int TableAdd_RedisCommand(RedisModuleCtx *ctx,
|
||||
|
||||
/* See how many clients received this publish. */
|
||||
long long num_clients = RedisModule_CallReplyInteger(reply);
|
||||
CHECKM(num_clients <= 1, "Published to %lld clients.", num_clients);
|
||||
RAY_CHECK(num_clients <= 1) << "Published to " << num_clients
|
||||
<< " clients.";
|
||||
|
||||
RedisModule_FreeString(ctx, publish_message);
|
||||
RedisModule_FreeString(ctx, publish_topic);
|
||||
@@ -473,7 +474,7 @@ int TableLookup_RedisCommand(RedisModuleCtx *ctx,
|
||||
}
|
||||
|
||||
bool is_nil(const std::string &data) {
|
||||
CHECK(data.size() == kUniqueIDSize);
|
||||
RAY_CHECK(data.size() == kUniqueIDSize);
|
||||
const uint8_t *d = reinterpret_cast<const uint8_t *>(data.data());
|
||||
for (int i = 0; i < kUniqueIDSize; ++i) {
|
||||
if (d[i] != 255) {
|
||||
@@ -518,9 +519,9 @@ int TableTestAndUpdate_RedisCommand(RedisModuleCtx *ctx,
|
||||
}
|
||||
|
||||
if (do_update) {
|
||||
CHECK(data->mutate_scheduling_state(update->update_state()));
|
||||
RAY_CHECK(data->mutate_scheduling_state(update->update_state()));
|
||||
}
|
||||
CHECK(data->mutate_updated(do_update));
|
||||
RAY_CHECK(data->mutate_updated(do_update));
|
||||
|
||||
int result = RedisModule_ReplyWithStringBuffer(ctx, value_buf, value_len);
|
||||
|
||||
@@ -978,8 +979,8 @@ int ResultTableLookup_RedisCommand(RedisModuleCtx *ctx,
|
||||
data_size_value = -1;
|
||||
} else {
|
||||
RedisModule_StringToLongLong(data_size, &data_size_value);
|
||||
CHECK(RedisModule_StringToLongLong(data_size, &data_size_value) ==
|
||||
REDISMODULE_OK);
|
||||
RAY_CHECK(RedisModule_StringToLongLong(data_size, &data_size_value) ==
|
||||
REDISMODULE_OK);
|
||||
}
|
||||
|
||||
flatbuffers::Offset<flatbuffers::String> hash_str;
|
||||
@@ -1091,7 +1092,8 @@ int TaskTableWrite(RedisModuleCtx *ctx,
|
||||
|
||||
/* See how many clients received this publish. */
|
||||
long long num_clients = RedisModule_CallReplyInteger(reply);
|
||||
CHECKM(num_clients <= 1, "Published to %lld clients.", num_clients);
|
||||
RAY_CHECK(num_clients <= 1) << "Published to " << num_clients
|
||||
<< " clients.";
|
||||
|
||||
RedisModule_FreeString(ctx, publish_message);
|
||||
RedisModule_FreeString(ctx, publish_topic);
|
||||
|
||||
@@ -44,16 +44,15 @@ const std::vector<std::string> db_client_table_get_ip_addresses(
|
||||
|
||||
for (auto const &manager_id : manager_ids) {
|
||||
DBClient client = redis_cache_get_db_client(db_handle, manager_id);
|
||||
CHECK(!client.manager_address.empty());
|
||||
RAY_CHECK(!client.manager_address.empty());
|
||||
manager_vector.push_back(client.manager_address);
|
||||
}
|
||||
|
||||
int64_t end_time = current_time_ms();
|
||||
if (end_time - start_time > RayConfig::instance().max_time_for_loop()) {
|
||||
LOG_WARN(
|
||||
"calling redis_get_cached_db_client in a loop in with %zu manager IDs "
|
||||
"took %" PRId64 " milliseconds.",
|
||||
manager_ids.size(), end_time - start_time);
|
||||
RAY_LOG(WARNING) << "calling redis_get_cached_db_client in a loop in with "
|
||||
<< manager_ids.size() << " manager IDs took "
|
||||
<< end_time - start_time << " milliseconds.";
|
||||
}
|
||||
|
||||
return manager_vector;
|
||||
@@ -71,7 +70,7 @@ void db_client_table_cache_init(DBHandle *db_handle) {
|
||||
}
|
||||
|
||||
DBClient db_client_table_cache_get(DBHandle *db_handle, DBClientID client_id) {
|
||||
CHECK(!client_id.is_nil());
|
||||
RAY_CHECK(!client_id.is_nil());
|
||||
return redis_cache_get_db_client(db_handle, client_id);
|
||||
}
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ void push_error(DBHandle *db_handle,
|
||||
int error_index,
|
||||
size_t data_length,
|
||||
const unsigned char *data) {
|
||||
CHECK(error_index >= 0 && error_index < MAX_ERROR_INDEX);
|
||||
RAY_CHECK(error_index >= 0 && error_index < MAX_ERROR_INDEX);
|
||||
/* Allocate a struct to hold the error information. */
|
||||
ErrorInfo *info = (ErrorInfo *) malloc(sizeof(ErrorInfo) + data_length);
|
||||
info->driver_id = driver_id;
|
||||
@@ -22,7 +22,7 @@ void push_error(DBHandle *db_handle,
|
||||
info->data_length = data_length;
|
||||
memcpy(info->data, data, data_length);
|
||||
/* Generate a random key to identify this error message. */
|
||||
CHECK(sizeof(info->error_key) >= sizeof(UniqueID));
|
||||
RAY_CHECK(sizeof(info->error_key) >= sizeof(UniqueID));
|
||||
UniqueID error_key = UniqueID::from_random();
|
||||
memcpy(info->error_key, error_key.data(), sizeof(info->error_key));
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ void object_table_lookup(DBHandle *db_handle,
|
||||
RetryInfo *retry,
|
||||
object_table_lookup_done_callback done_callback,
|
||||
void *user_context) {
|
||||
CHECK(db_handle != NULL);
|
||||
RAY_CHECK(db_handle != NULL);
|
||||
init_table_callback(db_handle, object_id, __func__,
|
||||
new CommonCallbackData(NULL), retry,
|
||||
(table_done_callback) done_callback,
|
||||
@@ -20,7 +20,7 @@ void object_table_add(DBHandle *db_handle,
|
||||
RetryInfo *retry,
|
||||
object_table_done_callback done_callback,
|
||||
void *user_context) {
|
||||
CHECK(db_handle != NULL);
|
||||
RAY_CHECK(db_handle != NULL);
|
||||
|
||||
ObjectTableAddData *info =
|
||||
(ObjectTableAddData *) malloc(sizeof(ObjectTableAddData));
|
||||
@@ -38,7 +38,7 @@ void object_table_remove(DBHandle *db_handle,
|
||||
RetryInfo *retry,
|
||||
object_table_done_callback done_callback,
|
||||
void *user_context) {
|
||||
CHECK(db_handle != NULL);
|
||||
RAY_CHECK(db_handle != NULL);
|
||||
/* Copy the client ID, if one was provided. */
|
||||
DBClientID *client_id_copy = NULL;
|
||||
if (client_id != NULL) {
|
||||
@@ -59,7 +59,7 @@ void object_table_subscribe_to_notifications(
|
||||
RetryInfo *retry,
|
||||
object_table_lookup_done_callback done_callback,
|
||||
void *user_context) {
|
||||
CHECK(db_handle != NULL);
|
||||
RAY_CHECK(db_handle != NULL);
|
||||
ObjectTableSubscribeData *sub_data =
|
||||
(ObjectTableSubscribeData *) malloc(sizeof(ObjectTableSubscribeData));
|
||||
sub_data->object_available_callback = object_available_callback;
|
||||
@@ -76,8 +76,8 @@ void object_table_request_notifications(DBHandle *db_handle,
|
||||
int num_object_ids,
|
||||
ObjectID object_ids[],
|
||||
RetryInfo *retry) {
|
||||
CHECK(db_handle != NULL);
|
||||
CHECK(num_object_ids > 0);
|
||||
RAY_CHECK(db_handle != NULL);
|
||||
RAY_CHECK(num_object_ids > 0);
|
||||
ObjectTableRequestNotificationsData *data =
|
||||
(ObjectTableRequestNotificationsData *) malloc(
|
||||
sizeof(ObjectTableRequestNotificationsData) +
|
||||
|
||||
+140
-135
@@ -35,17 +35,17 @@ extern "C" {
|
||||
extern int usleep(useconds_t usec);
|
||||
#endif
|
||||
|
||||
#define CHECK_REDIS_CONNECT(CONTEXT_TYPE, context, M, ...) \
|
||||
do { \
|
||||
CONTEXT_TYPE *_context = (context); \
|
||||
if (!_context) { \
|
||||
LOG_FATAL("could not allocate redis context"); \
|
||||
} \
|
||||
if (_context->err) { \
|
||||
LOG_ERROR(M, ##__VA_ARGS__); \
|
||||
LOG_REDIS_ERROR(_context, ""); \
|
||||
exit(-1); \
|
||||
} \
|
||||
#define CHECK_REDIS_CONNECT(CONTEXT_TYPE, context, M, ...) \
|
||||
do { \
|
||||
CONTEXT_TYPE *_context = (context); \
|
||||
if (!_context) { \
|
||||
RAY_LOG(FATAL) << "could not allocate redis context"; \
|
||||
} \
|
||||
if (_context->err) { \
|
||||
RAY_LOG(ERROR) << M; \
|
||||
LOG_REDIS_ERROR(_context, ""); \
|
||||
exit(-1); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
@@ -110,14 +110,14 @@ void get_redis_shards(redisContext *context,
|
||||
num_attempts++;
|
||||
continue;
|
||||
}
|
||||
CHECKM(num_attempts < RayConfig::instance().redis_db_connect_retries(),
|
||||
"No entry found for NumRedisShards");
|
||||
CHECKM(reply->type == REDIS_REPLY_STRING,
|
||||
"Expected string, found Redis type %d for NumRedisShards",
|
||||
reply->type);
|
||||
RAY_CHECK(num_attempts < RayConfig::instance().redis_db_connect_retries())
|
||||
<< "No entry found for NumRedisShards";
|
||||
RAY_CHECK(reply->type == REDIS_REPLY_STRING)
|
||||
<< "Expected string, found Redis type " << reply->type
|
||||
<< " for NumRedisShards";
|
||||
int num_redis_shards = atoi(reply->str);
|
||||
CHECKM(num_redis_shards >= 1, "Expected at least one Redis shard, found %d.",
|
||||
num_redis_shards);
|
||||
RAY_CHECK(num_redis_shards >= 1) << "Expected at least one Redis shard, "
|
||||
<< "found " << num_redis_shards;
|
||||
freeReplyObject(reply);
|
||||
|
||||
/* Get the addresses of all of the Redis shards. */
|
||||
@@ -137,18 +137,18 @@ void get_redis_shards(redisContext *context,
|
||||
num_attempts++;
|
||||
continue;
|
||||
}
|
||||
CHECKM(num_attempts < RayConfig::instance().redis_db_connect_retries(),
|
||||
"Expected %d Redis shard addresses, found %d", num_redis_shards,
|
||||
(int) reply->elements);
|
||||
RAY_CHECK(num_attempts < RayConfig::instance().redis_db_connect_retries())
|
||||
<< "Expected " << num_redis_shards << " Redis shard addresses, found "
|
||||
<< reply->elements;
|
||||
|
||||
/* Parse the Redis shard addresses. */
|
||||
char db_shard_address[16];
|
||||
int db_shard_port;
|
||||
for (size_t i = 0; i < reply->elements; ++i) {
|
||||
/* Parse the shard addresses and ports. */
|
||||
CHECK(reply->element[i]->type == REDIS_REPLY_STRING);
|
||||
CHECK(parse_ip_addr_port(reply->element[i]->str, db_shard_address,
|
||||
&db_shard_port) == 0);
|
||||
RAY_CHECK(reply->element[i]->type == REDIS_REPLY_STRING);
|
||||
RAY_CHECK(parse_ip_addr_port(reply->element[i]->str, db_shard_address,
|
||||
&db_shard_port) == 0);
|
||||
db_shards_addresses.push_back(std::string(db_shard_address));
|
||||
db_shards_ports.push_back(db_shard_port);
|
||||
}
|
||||
@@ -174,7 +174,7 @@ void db_connect_shard(const std::string &db_address,
|
||||
RayConfig::instance().redis_db_connect_retries()) {
|
||||
break;
|
||||
}
|
||||
LOG_WARN("Failed to connect to Redis, retrying.");
|
||||
RAY_LOG(WARNING) << "Failed to connect to Redis, retrying.";
|
||||
/* Sleep for a little. */
|
||||
usleep(RayConfig::instance().redis_db_connect_wait_milliseconds() * 1000);
|
||||
sync_context = redisConnect(db_address.c_str(), db_port);
|
||||
@@ -190,13 +190,13 @@ void db_connect_shard(const std::string &db_address,
|
||||
* processes by hand), it is easier to do it multiple times. */
|
||||
reply = (redisReply *) redisCommand(sync_context,
|
||||
"CONFIG SET notify-keyspace-events Kl");
|
||||
CHECKM(reply != NULL, "db_connect failed on CONFIG SET");
|
||||
RAY_CHECK(reply != NULL) << "db_connect failed on CONFIG SET";
|
||||
freeReplyObject(reply);
|
||||
/* Also configure Redis to not run in protected mode, so clients on other
|
||||
* hosts can connect to it. */
|
||||
reply =
|
||||
(redisReply *) redisCommand(sync_context, "CONFIG SET protected-mode no");
|
||||
CHECKM(reply != NULL, "db_connect failed on CONFIG SET");
|
||||
RAY_CHECK(reply != NULL) << "db_connect failed on CONFIG SET";
|
||||
freeReplyObject(reply);
|
||||
|
||||
/* Construct the argument arrays for RAY.CONNECT. */
|
||||
@@ -224,9 +224,9 @@ void db_connect_shard(const std::string &db_address,
|
||||
/* Register this client with Redis. RAY.CONNECT is a custom Redis command that
|
||||
* we've defined. */
|
||||
reply = (redisReply *) redisCommandArgv(sync_context, argc, argv, argvlen);
|
||||
CHECKM(reply != NULL, "db_connect failed on RAY.CONNECT");
|
||||
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
|
||||
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
|
||||
RAY_CHECK(reply != NULL) << "db_connect failed on RAY.CONNECT";
|
||||
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str;
|
||||
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
|
||||
freeReplyObject(reply);
|
||||
free(argv);
|
||||
free(argvlen);
|
||||
@@ -261,7 +261,7 @@ DBHandle *db_connect(const std::string &db_primary_address,
|
||||
/* Check that the number of args is even. These args will be passed to the
|
||||
* RAY.CONNECT Redis command, which takes arguments in pairs. */
|
||||
if (args.size() % 2 != 0) {
|
||||
LOG_FATAL("The number of extra args must be divisible by two.");
|
||||
RAY_LOG(FATAL) << "The number of extra args must be divisible by two.";
|
||||
}
|
||||
|
||||
/* Create a client ID for this client. */
|
||||
@@ -288,7 +288,7 @@ DBHandle *db_connect(const std::string &db_primary_address,
|
||||
std::vector<std::string> db_shards_addresses;
|
||||
std::vector<int> db_shards_ports;
|
||||
get_redis_shards(db->sync_context, db_shards_addresses, db_shards_ports);
|
||||
CHECKM(db_shards_addresses.size() > 0, "No Redis shards found");
|
||||
RAY_CHECK(db_shards_addresses.size() > 0) << "No Redis shards found";
|
||||
/* Connect to the shards. */
|
||||
for (size_t i = 0; i < db_shards_addresses.size(); ++i) {
|
||||
db_connect_shard(db_shards_addresses[i], db_shards_ports[i], client,
|
||||
@@ -309,7 +309,7 @@ void DBHandle_free(DBHandle *db) {
|
||||
redisAsyncFree(db->subscribe_context);
|
||||
|
||||
/* Clean up the Redis shards. */
|
||||
CHECK(db->contexts.size() == db->subscribe_contexts.size());
|
||||
RAY_CHECK(db->contexts.size() == db->subscribe_contexts.size());
|
||||
for (size_t i = 0; i < db->contexts.size(); ++i) {
|
||||
redisAsyncFree(db->contexts[i]);
|
||||
redisAsyncFree(db->subscribe_contexts[i]);
|
||||
@@ -326,8 +326,8 @@ void db_disconnect(DBHandle *db) {
|
||||
redisReply *reply =
|
||||
(redisReply *) redisCommand(db->sync_context, "RAY.DISCONNECT %b",
|
||||
db->client.data(), sizeof(db->client));
|
||||
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
|
||||
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
|
||||
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str;
|
||||
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
|
||||
freeReplyObject(reply);
|
||||
|
||||
DBHandle_free(db);
|
||||
@@ -340,24 +340,24 @@ void db_attach(DBHandle *db, event_loop *loop, bool reattach) {
|
||||
/* If the database is reattached in the tests, redis normally gives
|
||||
* an error which we can safely ignore. */
|
||||
if (!reattach) {
|
||||
CHECKM(err == REDIS_OK, "failed to attach the event loop");
|
||||
RAY_CHECK(err == REDIS_OK) << "failed to attach the event loop";
|
||||
}
|
||||
err = redisAeAttach(loop, db->subscribe_context);
|
||||
if (!reattach) {
|
||||
CHECKM(err == REDIS_OK, "failed to attach the event loop");
|
||||
RAY_CHECK(err == REDIS_OK) << "failed to attach the event loop";
|
||||
}
|
||||
/* Attach other redis shards to the event loop. */
|
||||
CHECK(db->contexts.size() == db->subscribe_contexts.size());
|
||||
RAY_CHECK(db->contexts.size() == db->subscribe_contexts.size());
|
||||
for (size_t i = 0; i < db->contexts.size(); ++i) {
|
||||
int err = redisAeAttach(loop, db->contexts[i]);
|
||||
/* If the database is reattached in the tests, redis normally gives
|
||||
* an error which we can safely ignore. */
|
||||
if (!reattach) {
|
||||
CHECKM(err == REDIS_OK, "failed to attach the event loop");
|
||||
RAY_CHECK(err == REDIS_OK) << "failed to attach the event loop";
|
||||
}
|
||||
err = redisAeAttach(loop, db->subscribe_contexts[i]);
|
||||
if (!reattach) {
|
||||
CHECKM(err == REDIS_OK, "failed to attach the event loop");
|
||||
RAY_CHECK(err == REDIS_OK) << "failed to attach the event loop";
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -377,13 +377,14 @@ void redis_object_table_add_callback(redisAsyncContext *c,
|
||||
if (!success) {
|
||||
/* If our object hash doesn't match the one recorded in the table, report
|
||||
* the error back to the user and exit immediately. */
|
||||
LOG_WARN(
|
||||
"Found objects with different value but same object ID, most likely "
|
||||
"because a nondeterministic task was executed twice, either for "
|
||||
"reconstruction or for speculation.");
|
||||
RAY_LOG(WARNING) << "Found objects with different value but same object "
|
||||
<< "ID, most likely because a nondeterministic task was "
|
||||
<< "executed twice, either for reconstruction or for "
|
||||
<< "speculation.";
|
||||
} else {
|
||||
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
|
||||
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
|
||||
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is "
|
||||
<< reply->str;
|
||||
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
|
||||
}
|
||||
/* Call the done callback if there is one. */
|
||||
if (callback_data->done_callback != NULL) {
|
||||
@@ -428,8 +429,8 @@ void redis_object_table_remove_callback(redisAsyncContext *c,
|
||||
* condition with an object_table_add. */
|
||||
return;
|
||||
}
|
||||
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
|
||||
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
|
||||
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str;
|
||||
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
|
||||
/* Call the done callback if there is one. */
|
||||
if (callback_data->done_callback != NULL) {
|
||||
object_table_done_callback done_callback =
|
||||
@@ -464,7 +465,7 @@ void redis_object_table_remove(TableCallbackData *callback_data) {
|
||||
}
|
||||
|
||||
void redis_object_table_lookup(TableCallbackData *callback_data) {
|
||||
CHECK(callback_data);
|
||||
RAY_CHECK(callback_data);
|
||||
DBHandle *db = callback_data->db_handle;
|
||||
|
||||
ObjectID obj_id = callback_data->id;
|
||||
@@ -486,9 +487,9 @@ void redis_result_table_add_callback(redisAsyncContext *c,
|
||||
REDIS_CALLBACK_HEADER(db, callback_data, r);
|
||||
redisReply *reply = (redisReply *) r;
|
||||
/* Check that the command succeeded. */
|
||||
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
|
||||
CHECKM(strncmp(reply->str, "OK", strlen("OK")) == 0, "reply->str is %s",
|
||||
reply->str);
|
||||
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str;
|
||||
RAY_CHECK(strncmp(reply->str, "OK", strlen("OK")) == 0) << "reply->str is "
|
||||
<< reply->str;
|
||||
/* Call the done callback if there is one. */
|
||||
if (callback_data->done_callback) {
|
||||
result_table_done_callback done_callback =
|
||||
@@ -499,7 +500,7 @@ void redis_result_table_add_callback(redisAsyncContext *c,
|
||||
}
|
||||
|
||||
void redis_result_table_add(TableCallbackData *callback_data) {
|
||||
CHECK(callback_data);
|
||||
RAY_CHECK(callback_data);
|
||||
DBHandle *db = callback_data->db_handle;
|
||||
ObjectID id = callback_data->id;
|
||||
ResultTableAddInfo *info = (ResultTableAddInfo *) callback_data->data->Get();
|
||||
@@ -522,10 +523,9 @@ void redis_result_table_add(TableCallbackData *callback_data) {
|
||||
* task is NULL. This is used by both redis_result_table_lookup_callback and
|
||||
* redis_task_table_get_task_callback. */
|
||||
Task *parse_and_construct_task_from_redis_reply(redisReply *reply) {
|
||||
Task *task;
|
||||
Task *task = NULL;
|
||||
if (reply->type == REDIS_REPLY_NIL) {
|
||||
/* There is no task in the reply, so return NULL. */
|
||||
task = NULL;
|
||||
} else if (reply->type == REDIS_REPLY_STRING) {
|
||||
/* The reply is a flatbuffer TaskReply object. Parse it and construct the
|
||||
* task. */
|
||||
@@ -540,7 +540,7 @@ Task *parse_and_construct_task_from_redis_reply(redisReply *reply) {
|
||||
from_flatbuf(*message->local_scheduler_id()),
|
||||
from_flatbuf(*execution_dependencies->execution_dependencies()));
|
||||
} else {
|
||||
LOG_FATAL("Unexpected reply type %d", reply->type);
|
||||
RAY_LOG(FATAL) << "Unexpected reply type " << reply->type;
|
||||
}
|
||||
/* Return the task. If it is not NULL, then it must be freed by the caller. */
|
||||
return task;
|
||||
@@ -551,9 +551,9 @@ void redis_result_table_lookup_callback(redisAsyncContext *c,
|
||||
void *privdata) {
|
||||
REDIS_CALLBACK_HEADER(db, callback_data, r);
|
||||
redisReply *reply = (redisReply *) r;
|
||||
CHECKM(reply->type == REDIS_REPLY_NIL || reply->type == REDIS_REPLY_STRING,
|
||||
"Unexpected reply type %d in redis_result_table_lookup_callback",
|
||||
reply->type);
|
||||
RAY_CHECK(reply->type == REDIS_REPLY_NIL || reply->type == REDIS_REPLY_STRING)
|
||||
<< "Unexpected reply type " << reply->type << " in "
|
||||
<< "redis_result_table_lookup_callback";
|
||||
/* Parse the task from the reply. */
|
||||
TaskID result_id = TaskID::nil();
|
||||
bool is_put = false;
|
||||
@@ -575,7 +575,7 @@ void redis_result_table_lookup_callback(redisAsyncContext *c,
|
||||
}
|
||||
|
||||
void redis_result_table_lookup(TableCallbackData *callback_data) {
|
||||
CHECK(callback_data);
|
||||
RAY_CHECK(callback_data);
|
||||
DBHandle *db = callback_data->db_handle;
|
||||
ObjectID id = callback_data->id;
|
||||
redisAsyncContext *context = get_redis_context(db, id);
|
||||
@@ -594,8 +594,8 @@ DBClient redis_db_client_table_get(DBHandle *db,
|
||||
redisReply *reply =
|
||||
(redisReply *) redisCommand(db->sync_context, "HGETALL %s%b",
|
||||
DB_CLIENT_PREFIX, client_id, client_id_len);
|
||||
CHECK(reply->type == REDIS_REPLY_ARRAY);
|
||||
CHECK(reply->elements > 0);
|
||||
RAY_CHECK(reply->type == REDIS_REPLY_ARRAY);
|
||||
RAY_CHECK(reply->elements > 0);
|
||||
DBClient db_client;
|
||||
int num_fields = 0;
|
||||
/* Parse the fields into a DBClient. */
|
||||
@@ -620,7 +620,7 @@ DBClient redis_db_client_table_get(DBHandle *db,
|
||||
freeReplyObject(reply);
|
||||
/* The client ID, type, and whether it is deleted are all
|
||||
* mandatory fields. Auxiliary address is optional. */
|
||||
CHECK(num_fields >= 3);
|
||||
RAY_CHECK(num_fields >= 3);
|
||||
return db_client;
|
||||
}
|
||||
|
||||
@@ -651,8 +651,8 @@ void redis_object_table_lookup_callback(redisAsyncContext *c,
|
||||
void *privdata) {
|
||||
REDIS_CALLBACK_HEADER(db, callback_data, r);
|
||||
redisReply *reply = (redisReply *) r;
|
||||
LOG_DEBUG("Object table lookup callback");
|
||||
CHECK(reply->type == REDIS_REPLY_NIL || reply->type == REDIS_REPLY_ARRAY);
|
||||
RAY_LOG(DEBUG) << "Object table lookup callback";
|
||||
RAY_CHECK(reply->type == REDIS_REPLY_NIL || reply->type == REDIS_REPLY_ARRAY);
|
||||
|
||||
object_table_lookup_done_callback done_callback =
|
||||
(object_table_lookup_done_callback) callback_data->done_callback;
|
||||
@@ -671,7 +671,7 @@ void redis_object_table_lookup_callback(redisAsyncContext *c,
|
||||
std::vector<DBClientID> manager_ids;
|
||||
|
||||
for (size_t j = 0; j < reply->elements; ++j) {
|
||||
CHECK(reply->element[j]->type == REDIS_REPLY_STRING);
|
||||
RAY_CHECK(reply->element[j]->type == REDIS_REPLY_STRING);
|
||||
DBClientID manager_id;
|
||||
memcpy(manager_id.mutable_data(), reply->element[j]->str,
|
||||
sizeof(manager_id));
|
||||
@@ -682,7 +682,7 @@ void redis_object_table_lookup_callback(redisAsyncContext *c,
|
||||
done_callback(obj_id, false, manager_ids, callback_data->user_context);
|
||||
}
|
||||
} else {
|
||||
LOG_FATAL("Unexpected reply type from object table lookup.");
|
||||
RAY_LOG(FATAL) << "Unexpected reply type from object table lookup.";
|
||||
}
|
||||
|
||||
/* Clean up timer and callback. */
|
||||
@@ -708,11 +708,11 @@ void object_table_redis_subscribe_to_notifications_callback(
|
||||
* - reply->emement[2]->str is the contents of the message.
|
||||
*/
|
||||
redisReply *reply = (redisReply *) r;
|
||||
CHECK(reply->type == REDIS_REPLY_ARRAY);
|
||||
CHECK(reply->elements == 3);
|
||||
RAY_CHECK(reply->type == REDIS_REPLY_ARRAY);
|
||||
RAY_CHECK(reply->elements == 3);
|
||||
redisReply *message_type = reply->element[0];
|
||||
LOG_DEBUG("Object table subscribe to notifications callback, message %s",
|
||||
message_type->str);
|
||||
RAY_LOG(DEBUG) << "Object table subscribe to notifications callback, message"
|
||||
<< message_type->str;
|
||||
|
||||
if (strcmp(message_type->str, "message") == 0) {
|
||||
/* We received an object notification. Parse the payload. */
|
||||
@@ -752,8 +752,8 @@ void object_table_redis_subscribe_to_notifications_callback(
|
||||
* destroy the callback data. */
|
||||
remove_timer_callback(db->loop, callback_data);
|
||||
} else {
|
||||
LOG_FATAL(
|
||||
"Unexpected reply type from object table subscribe to notifications.");
|
||||
RAY_LOG(FATAL) << "Unexpected reply type from object table subscribe to "
|
||||
<< "notifications.";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -770,8 +770,8 @@ void redis_object_table_subscribe_to_notifications(
|
||||
* as the channel name so this channel is specific to this client.
|
||||
* TODO(rkn):
|
||||
* The channel name should probably be the client ID with some prefix. */
|
||||
CHECKM(callback_data->data->Get() != NULL,
|
||||
"Object table subscribe data passed as NULL.");
|
||||
RAY_CHECK(callback_data->data->Get() != NULL)
|
||||
<< "Object table subscribe data passed as NULL.";
|
||||
if (((ObjectTableSubscribeData *) (callback_data->data->Get()))
|
||||
->subscribe_all) {
|
||||
/* Subscribe to the object broadcast channel. */
|
||||
@@ -802,9 +802,9 @@ void redis_object_table_request_notifications_callback(redisAsyncContext *c,
|
||||
|
||||
/* Do some minimal checking. */
|
||||
redisReply *reply = (redisReply *) r;
|
||||
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
|
||||
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
|
||||
CHECK(callback_data->done_callback == NULL);
|
||||
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str;
|
||||
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
|
||||
RAY_CHECK(callback_data->done_callback == NULL);
|
||||
/* Clean up the timer and callback. */
|
||||
destroy_timer_callback(db->loop, callback_data);
|
||||
}
|
||||
@@ -876,7 +876,7 @@ void redis_task_table_get_task_callback(redisAsyncContext *c,
|
||||
|
||||
void redis_task_table_get_task(TableCallbackData *callback_data) {
|
||||
DBHandle *db = callback_data->db_handle;
|
||||
CHECK(callback_data->data->Get() == NULL);
|
||||
RAY_CHECK(callback_data->data->Get() == NULL);
|
||||
TaskID task_id = callback_data->id;
|
||||
|
||||
redisAsyncContext *context = get_redis_context(db, task_id);
|
||||
@@ -902,15 +902,16 @@ void redis_task_table_add_task_callback(redisAsyncContext *c,
|
||||
// db_client table before retrying the add.
|
||||
if (reply->type == REDIS_REPLY_ERROR &&
|
||||
strcmp(reply->str, "No subscribers received message.") == 0) {
|
||||
LOG_WARN("No subscribers received the task_table_add message.");
|
||||
RAY_LOG(WARNING) << "No subscribers received the task_table_add message.";
|
||||
if (callback_data->retry.fail_callback != NULL) {
|
||||
callback_data->retry.fail_callback(callback_data->id,
|
||||
callback_data->user_context,
|
||||
callback_data->data->Get());
|
||||
}
|
||||
} else {
|
||||
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
|
||||
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
|
||||
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is "
|
||||
<< reply->str;
|
||||
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
|
||||
/* Call the done callback if there is one. */
|
||||
if (callback_data->done_callback != NULL) {
|
||||
task_table_done_callback done_callback =
|
||||
@@ -926,7 +927,7 @@ void redis_task_table_add_task_callback(redisAsyncContext *c,
|
||||
void redis_task_table_add_task(TableCallbackData *callback_data) {
|
||||
DBHandle *db = callback_data->db_handle;
|
||||
Task *task = (Task *) callback_data->data->Get();
|
||||
CHECKM(task != NULL, "NULL task passed to redis_task_table_add_task.");
|
||||
RAY_CHECK(task != NULL) << "NULL task passed to redis_task_table_add_task.";
|
||||
|
||||
TaskID task_id = Task_task_id(task);
|
||||
DBClientID local_scheduler_id = Task_local_scheduler(task);
|
||||
@@ -967,15 +968,17 @@ void redis_task_table_update_callback(redisAsyncContext *c,
|
||||
// alive in the db_client table.
|
||||
if (reply->type == REDIS_REPLY_ERROR &&
|
||||
strcmp(reply->str, "No subscribers received message.") == 0) {
|
||||
LOG_WARN("No subscribers received the task_table_update message.");
|
||||
RAY_LOG(WARNING) << "No subscribers received the task_table_update "
|
||||
<< "message.";
|
||||
if (callback_data->retry.fail_callback != NULL) {
|
||||
callback_data->retry.fail_callback(callback_data->id,
|
||||
callback_data->user_context,
|
||||
callback_data->data->Get());
|
||||
}
|
||||
} else {
|
||||
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
|
||||
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
|
||||
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is "
|
||||
<< reply->str;
|
||||
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
|
||||
|
||||
/* Call the done callback if there is one. */
|
||||
if (callback_data->done_callback != NULL) {
|
||||
@@ -992,7 +995,7 @@ void redis_task_table_update_callback(redisAsyncContext *c,
|
||||
void redis_task_table_update(TableCallbackData *callback_data) {
|
||||
DBHandle *db = callback_data->db_handle;
|
||||
Task *task = (Task *) callback_data->data->Get();
|
||||
CHECKM(task != NULL, "NULL task passed to redis_task_table_update.");
|
||||
RAY_CHECK(task != NULL) << "NULL task passed to redis_task_table_update.";
|
||||
|
||||
TaskID task_id = Task_task_id(task);
|
||||
redisAsyncContext *context = get_redis_context(db, task_id);
|
||||
@@ -1030,7 +1033,7 @@ void redis_task_table_test_and_update_callback(redisAsyncContext *c,
|
||||
* delayed when added to the task table if they are submitted to a local
|
||||
* scheduler before it receives the notification that maps the actor to a
|
||||
* local scheduler. */
|
||||
LOG_ERROR("No task found during task_table_test_and_update");
|
||||
RAY_LOG(ERROR) << "No task found during task_table_test_and_update";
|
||||
return;
|
||||
}
|
||||
/* Determine whether the update happened. */
|
||||
@@ -1091,11 +1094,11 @@ void redis_task_table_subscribe_callback(redisAsyncContext *c,
|
||||
REDIS_CALLBACK_HEADER(db, callback_data, r);
|
||||
redisReply *reply = (redisReply *) r;
|
||||
|
||||
CHECK(reply->type == REDIS_REPLY_ARRAY);
|
||||
RAY_CHECK(reply->type == REDIS_REPLY_ARRAY);
|
||||
/* The number of elements is 3 for a reply to SUBSCRIBE, and 4 for a reply to
|
||||
* PSUBSCRIBE. */
|
||||
CHECKM(reply->elements == 3 || reply->elements == 4, "reply->elements is %zu",
|
||||
reply->elements);
|
||||
RAY_CHECK(reply->elements == 3 || reply->elements == 4)
|
||||
<< "reply->elements is " << reply->elements;
|
||||
/* The first element is the message type and the last entry is the payload.
|
||||
* The middle one or middle two elements describe the channel that was
|
||||
* published on. */
|
||||
@@ -1148,9 +1151,8 @@ void redis_task_table_subscribe_callback(redisAsyncContext *c,
|
||||
* subscription callback needs this data. */
|
||||
remove_timer_callback(db->loop, callback_data);
|
||||
} else {
|
||||
LOG_FATAL(
|
||||
"Unexpected reply type from task table subscribe. Message type is %s.",
|
||||
message_type->str);
|
||||
RAY_LOG(FATAL) << "Unexpected reply type from task table subscribe. "
|
||||
<< "Message type is " << message_type->str;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1200,8 +1202,8 @@ void redis_db_client_table_remove_callback(redisAsyncContext *c,
|
||||
REDIS_CALLBACK_HEADER(db, callback_data, r);
|
||||
redisReply *reply = (redisReply *) r;
|
||||
|
||||
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
|
||||
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
|
||||
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str;
|
||||
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
|
||||
|
||||
/* Call the done callback if there is one. */
|
||||
db_client_table_done_callback done_callback =
|
||||
@@ -1235,7 +1237,7 @@ void redis_db_client_table_scan(DBHandle *db,
|
||||
return;
|
||||
}
|
||||
/* Get all the database client information. */
|
||||
CHECK(reply->type == REDIS_REPLY_ARRAY);
|
||||
RAY_CHECK(reply->type == REDIS_REPLY_ARRAY);
|
||||
for (size_t i = 0; i < reply->elements; ++i) {
|
||||
/* Strip the database client table prefix. */
|
||||
unsigned char *key = (unsigned char *) reply->element[i]->str;
|
||||
@@ -1255,8 +1257,8 @@ void redis_db_client_table_subscribe_callback(redisAsyncContext *c,
|
||||
REDIS_CALLBACK_HEADER(db, callback_data, r);
|
||||
redisReply *reply = (redisReply *) r;
|
||||
|
||||
CHECK(reply->type == REDIS_REPLY_ARRAY);
|
||||
CHECK(reply->elements > 2);
|
||||
RAY_CHECK(reply->type == REDIS_REPLY_ARRAY);
|
||||
RAY_CHECK(reply->elements > 2);
|
||||
/* First entry is message type, then possibly the regex we psubscribed to,
|
||||
* then topic, then payload. */
|
||||
redisReply *payload = reply->element[reply->elements - 1];
|
||||
@@ -1323,11 +1325,11 @@ void redis_local_scheduler_table_subscribe_callback(redisAsyncContext *c,
|
||||
REDIS_CALLBACK_HEADER(db, callback_data, r);
|
||||
|
||||
redisReply *reply = (redisReply *) r;
|
||||
CHECK(reply->type == REDIS_REPLY_ARRAY);
|
||||
CHECK(reply->elements == 3);
|
||||
RAY_CHECK(reply->type == REDIS_REPLY_ARRAY);
|
||||
RAY_CHECK(reply->elements == 3);
|
||||
redisReply *message_type = reply->element[0];
|
||||
LOG_DEBUG("Local scheduler table subscribe callback, message %s",
|
||||
message_type->str);
|
||||
RAY_LOG(DEBUG) << "Local scheduler table subscribe callback, message "
|
||||
<< message_type->str;
|
||||
|
||||
if (strcmp(message_type->str, "message") == 0) {
|
||||
/* Handle a local scheduler heartbeat. Parse the payload and call the
|
||||
@@ -1362,13 +1364,13 @@ void redis_local_scheduler_table_subscribe_callback(redisAsyncContext *c,
|
||||
}
|
||||
} else if (strcmp(message_type->str, "subscribe") == 0) {
|
||||
/* The reply for the initial SUBSCRIBE command. */
|
||||
CHECK(callback_data->done_callback == NULL);
|
||||
RAY_CHECK(callback_data->done_callback == NULL);
|
||||
/* If the initial SUBSCRIBE was successful, clean up the timer, but don't
|
||||
* destroy the callback data. */
|
||||
remove_timer_callback(db->loop, callback_data);
|
||||
|
||||
} else {
|
||||
LOG_FATAL("Unexpected reply type from local scheduler subscribe.");
|
||||
RAY_LOG(FATAL) << "Unexpected reply type from local scheduler subscribe.";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1389,10 +1391,10 @@ void redis_local_scheduler_table_send_info_callback(redisAsyncContext *c,
|
||||
REDIS_CALLBACK_HEADER(db, callback_data, r);
|
||||
|
||||
redisReply *reply = (redisReply *) r;
|
||||
CHECK(reply->type == REDIS_REPLY_INTEGER);
|
||||
LOG_DEBUG("%lld subscribers received this publish.\n", reply->integer);
|
||||
RAY_CHECK(reply->type == REDIS_REPLY_INTEGER);
|
||||
RAY_LOG(DEBUG) << reply->integer << " subscribers received this publish.";
|
||||
|
||||
CHECK(callback_data->done_callback == NULL);
|
||||
RAY_CHECK(callback_data->done_callback == NULL);
|
||||
/* Clean up the timer and callback. */
|
||||
destroy_timer_callback(db->loop, callback_data);
|
||||
}
|
||||
@@ -1430,9 +1432,9 @@ void redis_local_scheduler_table_disconnect(DBHandle *db) {
|
||||
redisReply *reply = (redisReply *) redisCommand(
|
||||
db->sync_context, "PUBLISH local_schedulers %b", fbb.GetBufferPointer(),
|
||||
(size_t) fbb.GetSize());
|
||||
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
|
||||
CHECK(reply->type == REDIS_REPLY_INTEGER);
|
||||
LOG_DEBUG("%lld subscribers received this publish.\n", reply->integer);
|
||||
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str;
|
||||
RAY_CHECK(reply->type == REDIS_REPLY_INTEGER);
|
||||
RAY_LOG(DEBUG) << reply->integer << " subscribers received this publish.";
|
||||
freeReplyObject(reply);
|
||||
}
|
||||
|
||||
@@ -1442,10 +1444,11 @@ void redis_driver_table_subscribe_callback(redisAsyncContext *c,
|
||||
REDIS_CALLBACK_HEADER(db, callback_data, r);
|
||||
|
||||
redisReply *reply = (redisReply *) r;
|
||||
CHECK(reply->type == REDIS_REPLY_ARRAY);
|
||||
CHECK(reply->elements == 3);
|
||||
RAY_CHECK(reply->type == REDIS_REPLY_ARRAY);
|
||||
RAY_CHECK(reply->elements == 3);
|
||||
redisReply *message_type = reply->element[0];
|
||||
LOG_DEBUG("Driver table subscribe callback, message %s", message_type->str);
|
||||
RAY_LOG(DEBUG) << "Driver table subscribe callback, message "
|
||||
<< message_type->str;
|
||||
|
||||
if (strcmp(message_type->str, "message") == 0) {
|
||||
/* Handle a driver heartbeat. Parse the payload and call the subscribe
|
||||
@@ -1463,13 +1466,13 @@ void redis_driver_table_subscribe_callback(redisAsyncContext *c,
|
||||
}
|
||||
} else if (strcmp(message_type->str, "subscribe") == 0) {
|
||||
/* The reply for the initial SUBSCRIBE command. */
|
||||
CHECK(callback_data->done_callback == NULL);
|
||||
RAY_CHECK(callback_data->done_callback == NULL);
|
||||
/* If the initial SUBSCRIBE was successful, clean up the timer, but don't
|
||||
* destroy the callback data. */
|
||||
remove_timer_callback(db->loop, callback_data);
|
||||
|
||||
} else {
|
||||
LOG_FATAL("Unexpected reply type from driver subscribe.");
|
||||
RAY_LOG(FATAL) << "Unexpected reply type from driver subscribe.";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1490,13 +1493,13 @@ void redis_driver_table_send_driver_death_callback(redisAsyncContext *c,
|
||||
REDIS_CALLBACK_HEADER(db, callback_data, r);
|
||||
|
||||
redisReply *reply = (redisReply *) r;
|
||||
CHECK(reply->type == REDIS_REPLY_INTEGER);
|
||||
LOG_DEBUG("%lld subscribers received this publish.\n", reply->integer);
|
||||
RAY_CHECK(reply->type == REDIS_REPLY_INTEGER);
|
||||
RAY_LOG(DEBUG) << reply->integer << " subscribers received this publish.";
|
||||
/* At the very least, the local scheduler that publishes this message should
|
||||
* also receive it. */
|
||||
CHECK(reply->integer >= 1);
|
||||
RAY_CHECK(reply->integer >= 1);
|
||||
|
||||
CHECK(callback_data->done_callback == NULL);
|
||||
RAY_CHECK(callback_data->done_callback == NULL);
|
||||
/* Clean up the timer and callback. */
|
||||
destroy_timer_callback(db->loop, callback_data);
|
||||
}
|
||||
@@ -1544,11 +1547,11 @@ void redis_actor_notification_table_subscribe_callback(redisAsyncContext *c,
|
||||
REDIS_CALLBACK_HEADER(db, callback_data, r);
|
||||
|
||||
redisReply *reply = (redisReply *) r;
|
||||
CHECK(reply->type == REDIS_REPLY_ARRAY);
|
||||
CHECK(reply->elements == 3);
|
||||
RAY_CHECK(reply->type == REDIS_REPLY_ARRAY);
|
||||
RAY_CHECK(reply->elements == 3);
|
||||
redisReply *message_type = reply->element[0];
|
||||
LOG_DEBUG("Local scheduler table subscribe callback, message %s",
|
||||
message_type->str);
|
||||
RAY_LOG(DEBUG) << "Local scheduler table subscribe callback, message "
|
||||
<< message_type->str;
|
||||
|
||||
if (strcmp(message_type->str, "message") == 0) {
|
||||
/* Handle an actor notification message. Parse the payload and call the
|
||||
@@ -1561,9 +1564,9 @@ void redis_actor_notification_table_subscribe_callback(redisAsyncContext *c,
|
||||
WorkerID driver_id;
|
||||
DBClientID local_scheduler_id;
|
||||
bool reconstruct;
|
||||
CHECK(sizeof(actor_id) + sizeof(driver_id) + sizeof(local_scheduler_id) +
|
||||
1 ==
|
||||
payload->len);
|
||||
RAY_CHECK(sizeof(actor_id) + sizeof(driver_id) +
|
||||
sizeof(local_scheduler_id) + 1 ==
|
||||
payload->len);
|
||||
char *current_ptr = payload->str;
|
||||
/* Parse the actor ID. */
|
||||
memcpy(&actor_id, current_ptr, sizeof(actor_id));
|
||||
@@ -1580,7 +1583,8 @@ void redis_actor_notification_table_subscribe_callback(redisAsyncContext *c,
|
||||
} else if (*current_ptr == '0') {
|
||||
reconstruct = false;
|
||||
} else {
|
||||
LOG_FATAL("This code should be unreachable.");
|
||||
reconstruct = false; // We set this value to avoid a compiler warning.
|
||||
RAY_LOG(FATAL) << "This code should be unreachable.";
|
||||
}
|
||||
current_ptr += 1;
|
||||
|
||||
@@ -1590,13 +1594,14 @@ void redis_actor_notification_table_subscribe_callback(redisAsyncContext *c,
|
||||
}
|
||||
} else if (strcmp(message_type->str, "subscribe") == 0) {
|
||||
/* The reply for the initial SUBSCRIBE command. */
|
||||
CHECK(callback_data->done_callback == NULL);
|
||||
RAY_CHECK(callback_data->done_callback == NULL);
|
||||
/* If the initial SUBSCRIBE was successful, clean up the timer, but don't
|
||||
* destroy the callback data. */
|
||||
remove_timer_callback(db->loop, callback_data);
|
||||
|
||||
} else {
|
||||
LOG_FATAL("Unexpected reply type from actor notification subscribe.");
|
||||
RAY_LOG(FATAL) << "Unexpected reply type from actor notification "
|
||||
<< "subscribe.";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1627,7 +1632,7 @@ void redis_push_error_rpush_callback(redisAsyncContext *c,
|
||||
REDIS_CALLBACK_HEADER(db, callback_data, r);
|
||||
redisReply *reply = (redisReply *) r;
|
||||
/* The reply should be the length of the errors list after our RPUSH. */
|
||||
CHECK(reply->type == REDIS_REPLY_INTEGER);
|
||||
RAY_CHECK(reply->type == REDIS_REPLY_INTEGER);
|
||||
destroy_timer_callback(db->loop, callback_data);
|
||||
}
|
||||
|
||||
@@ -1638,8 +1643,8 @@ void redis_push_error_hmset_callback(redisAsyncContext *c,
|
||||
redisReply *reply = (redisReply *) r;
|
||||
|
||||
/* Make sure we were able to add the error information. */
|
||||
CHECKM(reply->type != REDIS_REPLY_ERROR, "reply->str is %s", reply->str);
|
||||
CHECKM(strcmp(reply->str, "OK") == 0, "reply->str is %s", reply->str);
|
||||
RAY_CHECK(reply->type != REDIS_REPLY_ERROR) << "reply->str is " << reply->str;
|
||||
RAY_CHECK(strcmp(reply->str, "OK") == 0) << "reply->str is " << reply->str;
|
||||
|
||||
/* Add the error to this driver's list of errors. */
|
||||
ErrorInfo *info = (ErrorInfo *) callback_data->data->Get();
|
||||
@@ -1656,7 +1661,7 @@ void redis_push_error_hmset_callback(redisAsyncContext *c,
|
||||
void redis_push_error(TableCallbackData *callback_data) {
|
||||
DBHandle *db = callback_data->db_handle;
|
||||
ErrorInfo *info = (ErrorInfo *) callback_data->data->Get();
|
||||
CHECK(info->error_index < MAX_ERROR_INDEX && info->error_index >= 0);
|
||||
RAY_CHECK(info->error_index < MAX_ERROR_INDEX && info->error_index >= 0);
|
||||
/* Look up the error type. */
|
||||
const char *error_type = error_types[info->error_index];
|
||||
const char *error_message = error_messages[info->error_index];
|
||||
@@ -1674,6 +1679,6 @@ void redis_push_error(TableCallbackData *callback_data) {
|
||||
}
|
||||
|
||||
DBClientID get_db_client_id(DBHandle *db) {
|
||||
CHECK(db != NULL);
|
||||
RAY_CHECK(db != NULL);
|
||||
return db->client;
|
||||
}
|
||||
|
||||
@@ -11,11 +11,13 @@
|
||||
#include "hiredis/hiredis.h"
|
||||
#include "hiredis/async.h"
|
||||
|
||||
#define LOG_REDIS_ERROR(context, M, ...) \
|
||||
LOG_ERROR("Redis error %d %s; %s", context->err, context->errstr, M)
|
||||
#define LOG_REDIS_ERROR(context, M, ...) \
|
||||
RAY_LOG(ERROR) << "Redis error " << context->err << " " << context->errstr \
|
||||
<< "; " << M
|
||||
|
||||
#define LOG_REDIS_DEBUG(context, M, ...) \
|
||||
LOG_DEBUG("Redis error %d %s; %s", context->err, context->errstr, M)
|
||||
#define LOG_REDIS_DEBUG(context, M, ...) \
|
||||
RAY_LOG(DEBUG) << "Redis error " << context->err << " " << context->errstr \
|
||||
<< "; " << M;
|
||||
|
||||
struct DBHandle {
|
||||
/** String that identifies this client type. */
|
||||
|
||||
+17
-17
@@ -43,18 +43,18 @@ TableCallbackData *init_table_callback(DBHandle *db_handle,
|
||||
table_done_callback done_callback,
|
||||
table_retry_callback retry_callback,
|
||||
void *user_context) {
|
||||
CHECK(db_handle);
|
||||
CHECK(db_handle->loop);
|
||||
CHECK(data);
|
||||
RAY_CHECK(db_handle);
|
||||
RAY_CHECK(db_handle->loop);
|
||||
RAY_CHECK(data);
|
||||
/* If no retry info is provided, use the default retry info. */
|
||||
if (retry == NULL) {
|
||||
retry = (RetryInfo *) &default_retry;
|
||||
}
|
||||
CHECK(retry);
|
||||
RAY_CHECK(retry);
|
||||
/* Allocate and initialize callback data structure for object table */
|
||||
TableCallbackData *callback_data =
|
||||
(TableCallbackData *) malloc(sizeof(TableCallbackData));
|
||||
CHECKM(callback_data != NULL, "Memory allocation error!")
|
||||
RAY_CHECK(callback_data != NULL) << "Memory allocation error!";
|
||||
callback_data->id = id;
|
||||
callback_data->label = label;
|
||||
callback_data->retry = *retry;
|
||||
@@ -70,8 +70,8 @@ TableCallbackData *init_table_callback(DBHandle *db_handle,
|
||||
callback_data->timer_id = callback_data_id++;
|
||||
outstanding_callbacks_add(callback_data);
|
||||
|
||||
LOG_DEBUG("Initializing table command %s with timer ID %" PRId64,
|
||||
callback_data->label, callback_data->timer_id);
|
||||
RAY_LOG(DEBUG) << "Initializing table command " << callback_data->label
|
||||
<< " with timer ID " << callback_data->timer_id;
|
||||
callback_data->retry_callback(callback_data);
|
||||
|
||||
return callback_data;
|
||||
@@ -92,12 +92,12 @@ void remove_timer_callback(event_loop *loop, TableCallbackData *callback_data) {
|
||||
}
|
||||
|
||||
void destroy_table_callback(TableCallbackData *callback_data) {
|
||||
CHECK(callback_data != NULL);
|
||||
RAY_CHECK(callback_data != NULL);
|
||||
|
||||
if (callback_data->requests_info)
|
||||
free(callback_data->requests_info);
|
||||
|
||||
CHECK(callback_data->data != NULL);
|
||||
RAY_CHECK(callback_data->data != NULL);
|
||||
delete callback_data->data;
|
||||
callback_data->data = NULL;
|
||||
|
||||
@@ -110,20 +110,20 @@ void destroy_table_callback(TableCallbackData *callback_data) {
|
||||
int64_t table_timeout_handler(event_loop *loop,
|
||||
int64_t timer_id,
|
||||
void *user_context) {
|
||||
CHECK(loop != NULL);
|
||||
CHECK(user_context != NULL);
|
||||
RAY_CHECK(loop != NULL);
|
||||
RAY_CHECK(user_context != NULL);
|
||||
TableCallbackData *callback_data = (TableCallbackData *) user_context;
|
||||
|
||||
CHECK(callback_data->retry.num_retries >= 0 ||
|
||||
callback_data->retry.num_retries == -1);
|
||||
LOG_WARN("retrying operation %s, retry_count = %d", callback_data->label,
|
||||
callback_data->retry.num_retries);
|
||||
RAY_CHECK(callback_data->retry.num_retries >= 0 ||
|
||||
callback_data->retry.num_retries == -1);
|
||||
RAY_LOG(WARNING) << "retrying operation " << callback_data->label
|
||||
<< ", retry_count = " << callback_data->retry.num_retries;
|
||||
|
||||
if (callback_data->retry.num_retries == 0) {
|
||||
/* We didn't get a response from the database after exhausting all retries;
|
||||
* let user know, cleanup the state, and remove the timer. */
|
||||
LOG_WARN("Table command %s with timer ID %" PRId64 " failed",
|
||||
callback_data->label, timer_id);
|
||||
RAY_LOG(WARNING) << "Table command " << callback_data->label
|
||||
<< " with timer ID " << timer_id << " failed";
|
||||
if (callback_data->retry.fail_callback) {
|
||||
callback_data->retry.fail_callback(callback_data->id,
|
||||
callback_data->user_context,
|
||||
|
||||
+24
-24
@@ -80,7 +80,7 @@ class TaskBuilder {
|
||||
}
|
||||
|
||||
void SetRequiredResource(const std::string &resource_name, double value) {
|
||||
CHECK(resource_map_.count(resource_name) == 0);
|
||||
RAY_CHECK(resource_map_.count(resource_name) == 0);
|
||||
resource_map_[resource_name] = value;
|
||||
}
|
||||
|
||||
@@ -91,7 +91,7 @@ class TaskBuilder {
|
||||
BYTE buff[DIGEST_SIZE];
|
||||
sha256_final(&ctx, buff);
|
||||
TaskID task_id;
|
||||
CHECK(sizeof(task_id) <= DIGEST_SIZE);
|
||||
RAY_CHECK(sizeof(task_id) <= DIGEST_SIZE);
|
||||
memcpy(&task_id, buff, sizeof(task_id));
|
||||
/* Add return object IDs. */
|
||||
std::vector<flatbuffers::Offset<flatbuffers::String>> returns;
|
||||
@@ -206,25 +206,25 @@ void TaskSpec_set_required_resource(TaskBuilder *builder,
|
||||
/* Functions for reading tasks. */
|
||||
|
||||
TaskID TaskSpec_task_id(const TaskSpec *spec) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
return from_flatbuf(*message->task_id());
|
||||
}
|
||||
|
||||
FunctionID TaskSpec_function(TaskSpec *spec) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
return from_flatbuf(*message->function_id());
|
||||
}
|
||||
|
||||
ActorID TaskSpec_actor_id(TaskSpec *spec) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
return from_flatbuf(*message->actor_id());
|
||||
}
|
||||
|
||||
ActorID TaskSpec_actor_handle_id(TaskSpec *spec) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
return from_flatbuf(*message->actor_handle_id());
|
||||
}
|
||||
@@ -234,19 +234,19 @@ bool TaskSpec_is_actor_task(TaskSpec *spec) {
|
||||
}
|
||||
|
||||
int64_t TaskSpec_actor_counter(TaskSpec *spec) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
return std::abs(message->actor_counter());
|
||||
}
|
||||
|
||||
bool TaskSpec_is_actor_checkpoint_method(TaskSpec *spec) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
return message->is_actor_checkpoint_method();
|
||||
}
|
||||
|
||||
ObjectID TaskSpec_actor_dummy_object(TaskSpec *spec) {
|
||||
CHECK(TaskSpec_is_actor_task(spec));
|
||||
RAY_CHECK(TaskSpec_is_actor_task(spec));
|
||||
/* The last return value for actor tasks is the dummy object that
|
||||
* represents that this task has completed execution. */
|
||||
int64_t num_returns = TaskSpec_num_returns(spec);
|
||||
@@ -254,25 +254,25 @@ ObjectID TaskSpec_actor_dummy_object(TaskSpec *spec) {
|
||||
}
|
||||
|
||||
UniqueID TaskSpec_driver_id(const TaskSpec *spec) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
return from_flatbuf(*message->driver_id());
|
||||
}
|
||||
|
||||
TaskID TaskSpec_parent_task_id(const TaskSpec *spec) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
return from_flatbuf(*message->parent_task_id());
|
||||
}
|
||||
|
||||
int64_t TaskSpec_parent_counter(TaskSpec *spec) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
return message->parent_counter();
|
||||
}
|
||||
|
||||
int64_t TaskSpec_num_args(TaskSpec *spec) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
return message->args()->size();
|
||||
}
|
||||
@@ -289,45 +289,45 @@ int64_t TaskSpec_num_args_by_ref(TaskSpec *spec) {
|
||||
}
|
||||
|
||||
int TaskSpec_arg_id_count(TaskSpec *spec, int64_t arg_index) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
auto ids = message->args()->Get(arg_index)->object_ids();
|
||||
return ids->size();
|
||||
}
|
||||
|
||||
ObjectID TaskSpec_arg_id(TaskSpec *spec, int64_t arg_index, int64_t id_index) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
return from_flatbuf(
|
||||
*message->args()->Get(arg_index)->object_ids()->Get(id_index));
|
||||
}
|
||||
|
||||
const uint8_t *TaskSpec_arg_val(TaskSpec *spec, int64_t arg_index) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
return (uint8_t *) message->args()->Get(arg_index)->data()->c_str();
|
||||
}
|
||||
|
||||
int64_t TaskSpec_arg_length(TaskSpec *spec, int64_t arg_index) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
return message->args()->Get(arg_index)->data()->size();
|
||||
}
|
||||
|
||||
int64_t TaskSpec_num_returns(TaskSpec *spec) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
return message->returns()->size();
|
||||
}
|
||||
|
||||
bool TaskSpec_arg_by_ref(TaskSpec *spec, int64_t arg_index) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
return message->args()->Get(arg_index)->object_ids()->size() != 0;
|
||||
}
|
||||
|
||||
ObjectID TaskSpec_return(TaskSpec *spec, int64_t return_index) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
return from_flatbuf(*message->returns()->Get(return_index));
|
||||
}
|
||||
@@ -336,7 +336,7 @@ double TaskSpec_get_required_resource(const TaskSpec *spec,
|
||||
const std::string &resource_name) {
|
||||
// This is a bit ugly. However it shouldn't be much of a performance issue
|
||||
// because there shouldn't be many distinct resources in a single task spec.
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
for (size_t i = 0; i < message->required_resources()->size(); i++) {
|
||||
const ResourcePair *resource_pair = message->required_resources()->Get(i);
|
||||
@@ -349,7 +349,7 @@ double TaskSpec_get_required_resource(const TaskSpec *spec,
|
||||
|
||||
const std::unordered_map<std::string, double> TaskSpec_get_required_resources(
|
||||
const TaskSpec *spec) {
|
||||
CHECK(spec);
|
||||
RAY_CHECK(spec);
|
||||
auto message = flatbuffers::GetRoot<TaskInfo>(spec);
|
||||
return map_from_flatbuf(*message->required_resources());
|
||||
}
|
||||
@@ -446,7 +446,7 @@ int TaskExecutionSpec::DependencyIdCount(int64_t dependency_index) const {
|
||||
} else {
|
||||
/* Index into the execution dependencies. */
|
||||
dependency_index -= num_args;
|
||||
CHECK((size_t) dependency_index < execution_dependencies_.size());
|
||||
RAY_CHECK((size_t) dependency_index < execution_dependencies_.size());
|
||||
/* All elements in the execution dependency list have exactly one ID. */
|
||||
return 1;
|
||||
}
|
||||
@@ -465,7 +465,7 @@ ObjectID TaskExecutionSpec::DependencyId(int64_t dependency_index,
|
||||
} else {
|
||||
/* Index into the execution dependencies. */
|
||||
dependency_index -= num_args;
|
||||
CHECK((size_t) dependency_index < execution_dependencies_.size());
|
||||
RAY_CHECK((size_t) dependency_index < execution_dependencies_.size());
|
||||
return execution_dependencies_[dependency_index];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,13 +42,13 @@ void lookup_done_callback(ObjectID object_id,
|
||||
const std::vector<DBClientID> &manager_ids,
|
||||
void *user_context) {
|
||||
DBHandle *db = (DBHandle *) user_context;
|
||||
CHECK(manager_ids.size() == 2);
|
||||
RAY_CHECK(manager_ids.size() == 2);
|
||||
const std::vector<std::string> managers =
|
||||
db_client_table_get_ip_addresses(db, manager_ids);
|
||||
CHECK(parse_ip_addr_port(managers.at(0).c_str(), received_addr1,
|
||||
&received_port1) == 0);
|
||||
CHECK(parse_ip_addr_port(managers.at(1).c_str(), received_addr2,
|
||||
&received_port2) == 0);
|
||||
RAY_CHECK(parse_ip_addr_port(managers.at(0).c_str(), received_addr1,
|
||||
&received_port1) == 0);
|
||||
RAY_CHECK(parse_ip_addr_port(managers.at(1).c_str(), received_addr2,
|
||||
&received_port2) == 0);
|
||||
}
|
||||
|
||||
/* Entry added to database successfully. */
|
||||
@@ -57,7 +57,7 @@ void add_done_callback(ObjectID object_id, bool success, void *user_context) {}
|
||||
/* Test if we got a timeout callback if we couldn't connect database. */
|
||||
void timeout_callback(ObjectID object_id, void *context, void *user_data) {
|
||||
user_context *uc = (user_context *) context;
|
||||
CHECK(uc->test_number == TEST_NUMBER)
|
||||
RAY_CHECK(uc->test_number == TEST_NUMBER);
|
||||
}
|
||||
|
||||
int64_t timeout_handler(event_loop *loop, int64_t id, void *context) {
|
||||
@@ -136,9 +136,9 @@ int64_t task_table_delayed_add_task(event_loop *loop,
|
||||
|
||||
void task_table_test_callback(Task *callback_task, void *user_data) {
|
||||
task_table_test_callback_called = 1;
|
||||
CHECK(Task_state(callback_task) == TASK_STATUS_SCHEDULED);
|
||||
CHECK(Task_size(callback_task) == Task_size(task_table_test_task));
|
||||
CHECK(Task_equals(callback_task, task_table_test_task));
|
||||
RAY_CHECK(Task_state(callback_task) == TASK_STATUS_SCHEDULED);
|
||||
RAY_CHECK(Task_size(callback_task) == Task_size(task_table_test_task));
|
||||
RAY_CHECK(Task_equals(callback_task, task_table_test_task));
|
||||
event_loop *loop = (event_loop *) user_data;
|
||||
event_loop_stop(loop);
|
||||
}
|
||||
|
||||
@@ -38,13 +38,13 @@ void new_object_done_callback(ObjectID object_id,
|
||||
bool is_put,
|
||||
void *user_context) {
|
||||
new_object_succeeded = 1;
|
||||
CHECK(object_id == new_object_id);
|
||||
CHECK(task_id == new_object_task_id);
|
||||
RAY_CHECK(object_id == new_object_id);
|
||||
RAY_CHECK(task_id == new_object_task_id);
|
||||
event_loop_stop(g_loop);
|
||||
}
|
||||
|
||||
void new_object_lookup_callback(ObjectID object_id, void *user_context) {
|
||||
CHECK(object_id == new_object_id);
|
||||
RAY_CHECK(object_id == new_object_id);
|
||||
RetryInfo retry = {
|
||||
.num_retries = 5,
|
||||
.timeout = 100,
|
||||
@@ -109,7 +109,7 @@ void new_object_no_task_callback(ObjectID object_id,
|
||||
bool is_put,
|
||||
void *user_context) {
|
||||
new_object_succeeded = 1;
|
||||
CHECK(task_id.is_nil());
|
||||
RAY_CHECK(task_id.is_nil());
|
||||
event_loop_stop(g_loop);
|
||||
}
|
||||
|
||||
@@ -150,12 +150,12 @@ void lookup_done_callback(ObjectID object_id,
|
||||
const std::vector<DBClientID> &manager_vector,
|
||||
void *context) {
|
||||
/* The done callback should not be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
void lookup_fail_callback(UniqueID id, void *user_context, void *user_data) {
|
||||
lookup_failed = 1;
|
||||
CHECK(user_context == (void *) lookup_timeout_context);
|
||||
RAY_CHECK(user_context == (void *) lookup_timeout_context);
|
||||
event_loop_stop(g_loop);
|
||||
}
|
||||
|
||||
@@ -189,12 +189,12 @@ int add_failed = 0;
|
||||
|
||||
void add_done_callback(ObjectID object_id, bool success, void *user_context) {
|
||||
/* The done callback should not be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
void add_fail_callback(UniqueID id, void *user_context, void *user_data) {
|
||||
add_failed = 1;
|
||||
CHECK(user_context == (void *) add_timeout_context);
|
||||
RAY_CHECK(user_context == (void *) add_timeout_context);
|
||||
event_loop_stop(g_loop);
|
||||
}
|
||||
|
||||
@@ -230,7 +230,7 @@ void subscribe_done_callback(ObjectID object_id,
|
||||
const std::vector<DBClientID> &manager_vector,
|
||||
void *user_context) {
|
||||
/* The done callback should not be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
void subscribe_fail_callback(UniqueID id, void *user_context, void *user_data) {
|
||||
@@ -277,7 +277,7 @@ int64_t reconnect_context_callback(event_loop *loop,
|
||||
db->sync_context = redisConnect("127.0.0.1", 6379);
|
||||
/* Re-attach the database to the event loop (the file descriptor changed). */
|
||||
db_attach(db, loop, true);
|
||||
LOG_DEBUG("Reconnected to Redis");
|
||||
RAY_LOG(DEBUG) << "Reconnected to Redis";
|
||||
return EVENT_LOOP_TIMER_DONE;
|
||||
}
|
||||
|
||||
@@ -297,7 +297,7 @@ void lookup_retry_fail_callback(UniqueID id,
|
||||
void *user_context,
|
||||
void *user_data) {
|
||||
/* The fail callback should not be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
/* === Test add retry === */
|
||||
@@ -312,15 +312,15 @@ void add_lookup_done_callback(ObjectID object_id,
|
||||
const std::vector<DBClientID> &manager_ids,
|
||||
void *context) {
|
||||
DBHandle *db = (DBHandle *) context;
|
||||
CHECK(manager_ids.size() == 1);
|
||||
RAY_CHECK(manager_ids.size() == 1);
|
||||
const std::vector<std::string> managers =
|
||||
db_client_table_get_ip_addresses(db, manager_ids);
|
||||
CHECK(managers.at(0) == "127.0.0.1:11235");
|
||||
RAY_CHECK(managers.at(0) == "127.0.0.1:11235");
|
||||
lookup_retry_succeeded = 1;
|
||||
}
|
||||
|
||||
void add_lookup_callback(ObjectID object_id, bool success, void *user_context) {
|
||||
CHECK(success);
|
||||
RAY_CHECK(success);
|
||||
DBHandle *db = (DBHandle *) user_context;
|
||||
RetryInfo retry = {
|
||||
.num_retries = 5,
|
||||
@@ -366,15 +366,15 @@ void add_remove_lookup_done_callback(
|
||||
bool never_created,
|
||||
const std::vector<DBClientID> &manager_vector,
|
||||
void *context) {
|
||||
CHECK(context == (void *) lookup_retry_context);
|
||||
CHECK(manager_vector.size() == 0);
|
||||
RAY_CHECK(context == (void *) lookup_retry_context);
|
||||
RAY_CHECK(manager_vector.size() == 0);
|
||||
lookup_retry_succeeded = 1;
|
||||
}
|
||||
|
||||
void add_remove_lookup_callback(ObjectID object_id,
|
||||
bool success,
|
||||
void *user_context) {
|
||||
CHECK(success);
|
||||
RAY_CHECK(success);
|
||||
DBHandle *db = (DBHandle *) user_context;
|
||||
RetryInfo retry = {
|
||||
.num_retries = 5,
|
||||
@@ -387,7 +387,7 @@ void add_remove_lookup_callback(ObjectID object_id,
|
||||
}
|
||||
|
||||
void add_remove_callback(ObjectID object_id, bool success, void *user_context) {
|
||||
CHECK(success);
|
||||
RAY_CHECK(success);
|
||||
DBHandle *db = (DBHandle *) user_context;
|
||||
RetryInfo retry = {
|
||||
.num_retries = 5,
|
||||
@@ -433,7 +433,7 @@ int lookup_late_failed = 0;
|
||||
void lookup_late_fail_callback(UniqueID id,
|
||||
void *user_context,
|
||||
void *user_data) {
|
||||
CHECK(user_context == (void *) lookup_late_context);
|
||||
RAY_CHECK(user_context == (void *) lookup_late_context);
|
||||
lookup_late_failed = 1;
|
||||
}
|
||||
|
||||
@@ -442,7 +442,7 @@ void lookup_late_done_callback(ObjectID object_id,
|
||||
const std::vector<DBClientID> &manager_vector,
|
||||
void *context) {
|
||||
/* This function should never be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
TEST lookup_late_test(void) {
|
||||
@@ -478,7 +478,7 @@ const char *add_late_context = "add_late";
|
||||
int add_late_failed = 0;
|
||||
|
||||
void add_late_fail_callback(UniqueID id, void *user_context, void *user_data) {
|
||||
CHECK(user_context == (void *) add_late_context);
|
||||
RAY_CHECK(user_context == (void *) add_late_context);
|
||||
add_late_failed = 1;
|
||||
}
|
||||
|
||||
@@ -486,7 +486,7 @@ void add_late_done_callback(ObjectID object_id,
|
||||
bool success,
|
||||
void *user_context) {
|
||||
/* This function should never be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
TEST add_late_test(void) {
|
||||
@@ -522,7 +522,7 @@ int subscribe_late_failed = 0;
|
||||
void subscribe_late_fail_callback(UniqueID id,
|
||||
void *user_context,
|
||||
void *user_data) {
|
||||
CHECK(user_context == (void *) subscribe_late_context);
|
||||
RAY_CHECK(user_context == (void *) subscribe_late_context);
|
||||
subscribe_late_failed = 1;
|
||||
}
|
||||
|
||||
@@ -531,7 +531,7 @@ void subscribe_late_done_callback(ObjectID object_id,
|
||||
const std::vector<DBClientID> &manager_vector,
|
||||
void *user_context) {
|
||||
/* This function should never be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
TEST subscribe_late_test(void) {
|
||||
@@ -573,7 +573,7 @@ void subscribe_success_fail_callback(UniqueID id,
|
||||
void *user_context,
|
||||
void *user_data) {
|
||||
/* This function should never be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
void subscribe_success_done_callback(
|
||||
@@ -594,9 +594,9 @@ void subscribe_success_object_available_callback(
|
||||
int64_t data_size,
|
||||
const std::vector<DBClientID> &manager_vector,
|
||||
void *user_context) {
|
||||
CHECK(user_context == (void *) subscribe_success_context);
|
||||
CHECK(object_id == subscribe_id);
|
||||
CHECK(manager_vector.size() == 1);
|
||||
RAY_CHECK(user_context == (void *) subscribe_success_context);
|
||||
RAY_CHECK(object_id == subscribe_id);
|
||||
RAY_CHECK(manager_vector.size() == 1);
|
||||
subscribe_success_succeeded = 1;
|
||||
}
|
||||
|
||||
@@ -656,15 +656,15 @@ void subscribe_object_present_object_available_callback(
|
||||
void *user_context) {
|
||||
subscribe_object_present_context_t *ctx =
|
||||
(subscribe_object_present_context_t *) user_context;
|
||||
CHECK(ctx->data_size == data_size);
|
||||
CHECK(strcmp(subscribe_object_present_str, ctx->teststr) == 0);
|
||||
RAY_CHECK(ctx->data_size == data_size);
|
||||
RAY_CHECK(strcmp(subscribe_object_present_str, ctx->teststr) == 0);
|
||||
subscribe_object_present_succeeded = 1;
|
||||
CHECK(manager_vector.size() == 1);
|
||||
RAY_CHECK(manager_vector.size() == 1);
|
||||
}
|
||||
|
||||
void fatal_fail_callback(UniqueID id, void *user_context, void *user_data) {
|
||||
/* This function should never be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
TEST subscribe_object_present_test(void) {
|
||||
@@ -723,7 +723,7 @@ void subscribe_object_not_present_object_available_callback(
|
||||
const std::vector<DBClientID> &manager_vector,
|
||||
void *user_context) {
|
||||
/* This should not be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
TEST subscribe_object_not_present_test(void) {
|
||||
@@ -773,11 +773,12 @@ void subscribe_object_available_later_object_available_callback(
|
||||
void *user_context) {
|
||||
subscribe_object_present_context_t *myctx =
|
||||
(subscribe_object_present_context_t *) user_context;
|
||||
CHECK(myctx->data_size == data_size);
|
||||
CHECK(strcmp(myctx->teststr, subscribe_object_available_later_context) == 0);
|
||||
RAY_CHECK(myctx->data_size == data_size);
|
||||
RAY_CHECK(strcmp(myctx->teststr, subscribe_object_available_later_context) ==
|
||||
0);
|
||||
/* Make sure the callback is only called once. */
|
||||
subscribe_object_available_later_succeeded += 1;
|
||||
CHECK(manager_vector.size() == 1);
|
||||
RAY_CHECK(manager_vector.size() == 1);
|
||||
}
|
||||
|
||||
TEST subscribe_object_available_later_test(void) {
|
||||
|
||||
@@ -47,10 +47,10 @@ void async_redis_socket_test_callback(redisAsyncContext *ac,
|
||||
redisReply *reply =
|
||||
(redisReply *) redisCommand(context, test_get_format, test_key);
|
||||
redisFree(context);
|
||||
CHECK(reply != NULL);
|
||||
RAY_CHECK(reply != NULL);
|
||||
if (strcmp(reply->str, test_value)) {
|
||||
freeReplyObject(reply);
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
freeReplyObject(reply);
|
||||
}
|
||||
@@ -97,7 +97,7 @@ void redis_accept_callback(event_loop *loop,
|
||||
void *context,
|
||||
int events) {
|
||||
int accept_fd = accept_client(socket_fd);
|
||||
CHECK(accept_fd >= 0);
|
||||
RAY_CHECK(accept_fd >= 0);
|
||||
connections.push_back(accept_fd);
|
||||
event_loop_add_file(loop, accept_fd, EVENT_LOOP_READ, redis_read_callback,
|
||||
context);
|
||||
@@ -155,8 +155,8 @@ void logging_test_callback(redisAsyncContext *ac, void *r, void *privdata) {
|
||||
redisContext *context = redisConnect("127.0.0.1", 6379);
|
||||
redisReply *reply = (redisReply *) redisCommand(context, "KEYS %s", "log:*");
|
||||
redisFree(context);
|
||||
CHECK(reply != NULL);
|
||||
CHECK(reply->elements > 0);
|
||||
RAY_CHECK(reply != NULL);
|
||||
RAY_CHECK(reply->elements > 0);
|
||||
freeReplyObject(reply);
|
||||
}
|
||||
|
||||
@@ -176,7 +176,7 @@ void logging_accept_callback(event_loop *loop,
|
||||
void *context,
|
||||
int events) {
|
||||
int accept_fd = accept_client(socket_fd);
|
||||
CHECK(accept_fd >= 0);
|
||||
RAY_CHECK(accept_fd >= 0);
|
||||
connections.push_back(accept_fd);
|
||||
event_loop_add_file(loop, accept_fd, EVENT_LOOP_READ, logging_read_callback,
|
||||
context);
|
||||
|
||||
@@ -27,13 +27,13 @@ void lookup_nil_fail_callback(UniqueID id,
|
||||
void *user_context,
|
||||
void *user_data) {
|
||||
/* The fail callback should not be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
void lookup_nil_success_callback(Task *task, void *context) {
|
||||
lookup_nil_success = 1;
|
||||
CHECK(task == NULL);
|
||||
CHECK(context == (void *) lookup_nil_context);
|
||||
RAY_CHECK(task == NULL);
|
||||
RAY_CHECK(context == (void *) lookup_nil_context);
|
||||
event_loop_stop(g_loop);
|
||||
}
|
||||
|
||||
@@ -70,18 +70,18 @@ void add_lookup_fail_callback(UniqueID id,
|
||||
void *user_context,
|
||||
void *user_data) {
|
||||
/* The fail callback should not be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
void lookup_success_callback(Task *task, void *context) {
|
||||
lookup_success = 1;
|
||||
CHECK(Task_equals(task, add_lookup_task));
|
||||
RAY_CHECK(Task_equals(task, add_lookup_task));
|
||||
event_loop_stop(g_loop);
|
||||
}
|
||||
|
||||
void add_success_callback(TaskID task_id, void *context) {
|
||||
add_success = 1;
|
||||
CHECK(TaskID_equal(task_id, Task_task_id(add_lookup_task)));
|
||||
RAY_CHECK(TaskID_equal(task_id, Task_task_id(add_lookup_task)));
|
||||
|
||||
DBHandle *db = (DBHandle *) context;
|
||||
RetryInfo retry = {
|
||||
@@ -137,12 +137,12 @@ int subscribe_failed = 0;
|
||||
|
||||
void subscribe_done_callback(TaskID task_id, void *user_context) {
|
||||
/* The done callback should not be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
void subscribe_fail_callback(UniqueID id, void *user_context, void *user_data) {
|
||||
subscribe_failed = 1;
|
||||
CHECK(user_context == (void *) subscribe_timeout_context);
|
||||
RAY_CHECK(user_context == (void *) subscribe_timeout_context);
|
||||
event_loop_stop(g_loop);
|
||||
}
|
||||
|
||||
@@ -180,12 +180,12 @@ int publish_failed = 0;
|
||||
|
||||
void publish_done_callback(TaskID task_id, void *user_context) {
|
||||
/* The done callback should not be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
void publish_fail_callback(UniqueID id, void *user_context, void *user_data) {
|
||||
publish_failed = 1;
|
||||
CHECK(user_context == (void *) publish_timeout_context);
|
||||
RAY_CHECK(user_context == (void *) publish_timeout_context);
|
||||
event_loop_stop(g_loop);
|
||||
}
|
||||
|
||||
@@ -249,7 +249,7 @@ const char *subscribe_retry_context = "subscribe_retry";
|
||||
int subscribe_retry_succeeded = 0;
|
||||
|
||||
void subscribe_retry_done_callback(ObjectID object_id, void *user_context) {
|
||||
CHECK(user_context == (void *) subscribe_retry_context);
|
||||
RAY_CHECK(user_context == (void *) subscribe_retry_context);
|
||||
subscribe_retry_succeeded = 1;
|
||||
}
|
||||
|
||||
@@ -257,7 +257,7 @@ void subscribe_retry_fail_callback(UniqueID id,
|
||||
void *user_context,
|
||||
void *user_data) {
|
||||
/* The fail callback should not be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
TEST subscribe_retry_test(void) {
|
||||
@@ -299,7 +299,7 @@ const char *publish_retry_context = "publish_retry";
|
||||
int publish_retry_succeeded = 0;
|
||||
|
||||
void publish_retry_done_callback(ObjectID object_id, void *user_context) {
|
||||
CHECK(user_context == (void *) publish_retry_context);
|
||||
RAY_CHECK(user_context == (void *) publish_retry_context);
|
||||
publish_retry_succeeded = 1;
|
||||
}
|
||||
|
||||
@@ -307,7 +307,7 @@ void publish_retry_fail_callback(UniqueID id,
|
||||
void *user_context,
|
||||
void *user_data) {
|
||||
/* The fail callback should not be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
TEST publish_retry_test(void) {
|
||||
@@ -355,13 +355,13 @@ int subscribe_late_failed = 0;
|
||||
void subscribe_late_fail_callback(UniqueID id,
|
||||
void *user_context,
|
||||
void *user_data) {
|
||||
CHECK(user_context == (void *) subscribe_late_context);
|
||||
RAY_CHECK(user_context == (void *) subscribe_late_context);
|
||||
subscribe_late_failed = 1;
|
||||
}
|
||||
|
||||
void subscribe_late_done_callback(TaskID task_id, void *user_context) {
|
||||
/* This function should never be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
TEST subscribe_late_test(void) {
|
||||
@@ -400,13 +400,13 @@ int publish_late_failed = 0;
|
||||
void publish_late_fail_callback(UniqueID id,
|
||||
void *user_context,
|
||||
void *user_data) {
|
||||
CHECK(user_context == (void *) publish_late_context);
|
||||
RAY_CHECK(user_context == (void *) publish_late_context);
|
||||
publish_late_failed = 1;
|
||||
}
|
||||
|
||||
void publish_late_done_callback(TaskID task_id, void *user_context) {
|
||||
/* This function should never be called. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
TEST publish_late_test(void) {
|
||||
|
||||
@@ -22,7 +22,8 @@ static inline std::string bind_ipc_sock_retry(const char *socket_name_format,
|
||||
int *fd) {
|
||||
std::string socket_name;
|
||||
for (int num_retries = 0; num_retries < 5; ++num_retries) {
|
||||
LOG_INFO("trying to find plasma socket (attempt %d)", num_retries);
|
||||
RAY_LOG(INFO) << "trying to find plasma socket (attempt " << num_retries
|
||||
<< ")";
|
||||
size_t size = std::snprintf(nullptr, 0, socket_name_format, rand()) + 1;
|
||||
char socket_name_c_str[size];
|
||||
std::snprintf(socket_name_c_str, size, socket_name_format, rand());
|
||||
|
||||
@@ -31,7 +31,7 @@ void assign_task_to_local_scheduler_retry(UniqueID id,
|
||||
void *user_data) {
|
||||
GlobalSchedulerState *state = (GlobalSchedulerState *) user_context;
|
||||
Task *task = (Task *) user_data;
|
||||
CHECK(Task_state(task) == TASK_STATUS_SCHEDULED);
|
||||
RAY_CHECK(Task_state(task) == TASK_STATUS_SCHEDULED);
|
||||
|
||||
// If the local scheduler has died since we requested the task assignment, do
|
||||
// not retry again.
|
||||
@@ -68,13 +68,13 @@ void assign_task_to_local_scheduler_retry(UniqueID id,
|
||||
void assign_task_to_local_scheduler(GlobalSchedulerState *state,
|
||||
Task *task,
|
||||
DBClientID local_scheduler_id) {
|
||||
std::string id_string = local_scheduler_id.hex();
|
||||
TaskSpec *spec = Task_task_execution_spec(task)->Spec();
|
||||
LOG_DEBUG("assigning task to local_scheduler_id = %s", id_string.c_str());
|
||||
RAY_LOG(DEBUG) << "assigning task to local_scheduler_id = "
|
||||
<< local_scheduler_id;
|
||||
Task_set_state(task, TASK_STATUS_SCHEDULED);
|
||||
Task_set_local_scheduler(task, local_scheduler_id);
|
||||
id_string = Task_task_id(task).hex();
|
||||
LOG_DEBUG("Issuing a task table update for task = %s", id_string.c_str());
|
||||
RAY_LOG(DEBUG) << "Issuing a task table update for task = "
|
||||
<< Task_task_id(task);
|
||||
|
||||
#if !RAY_USE_NEW_GCS
|
||||
auto retryInfo = RetryInfo{
|
||||
@@ -99,7 +99,7 @@ void assign_task_to_local_scheduler(GlobalSchedulerState *state,
|
||||
/* The value -1 indicates that the size of the object is not known yet. */
|
||||
obj_info_entry.data_size = -1;
|
||||
}
|
||||
CHECK(state->local_scheduler_plasma_map.count(local_scheduler_id) == 1);
|
||||
RAY_CHECK(state->local_scheduler_plasma_map.count(local_scheduler_id) == 1);
|
||||
state->scheduler_object_info_table[return_id].object_locations.push_back(
|
||||
state->local_scheduler_plasma_map[local_scheduler_id]);
|
||||
}
|
||||
@@ -108,7 +108,7 @@ void assign_task_to_local_scheduler(GlobalSchedulerState *state,
|
||||
* instead of db_client_id objects. */
|
||||
/* Update the local scheduler info. */
|
||||
auto it = state->local_schedulers.find(local_scheduler_id);
|
||||
CHECK(it != state->local_schedulers.end());
|
||||
RAY_CHECK(it != state->local_schedulers.end());
|
||||
|
||||
LocalScheduler &local_scheduler = it->second;
|
||||
local_scheduler.num_tasks_sent += 1;
|
||||
@@ -119,8 +119,9 @@ void assign_task_to_local_scheduler(GlobalSchedulerState *state,
|
||||
double resource_quantity = resource_pair.second;
|
||||
// The local scheduler must have this resource because otherwise we wouldn't
|
||||
// be assigning the task to this local scheduler.
|
||||
CHECK(local_scheduler.info.dynamic_resources.count(resource_name) == 1 ||
|
||||
resource_quantity == 0);
|
||||
RAY_CHECK(local_scheduler.info.dynamic_resources.count(resource_name) ==
|
||||
1 ||
|
||||
resource_quantity == 0);
|
||||
// Subtract task's resource from the cached dynamic resource capacity for
|
||||
// this local scheduler. This will be overwritten on the next heartbeat.
|
||||
local_scheduler.info.dynamic_resources[resource_name] =
|
||||
@@ -161,9 +162,8 @@ void GlobalSchedulerState_free(GlobalSchedulerState *state) {
|
||||
/* Free the array of unschedulable tasks. */
|
||||
int64_t num_pending_tasks = state->pending_tasks.size();
|
||||
if (num_pending_tasks > 0) {
|
||||
LOG_WARN("There are %" PRId64
|
||||
" remaining tasks in the pending tasks array.",
|
||||
num_pending_tasks);
|
||||
RAY_LOG(WARNING) << "There are " << num_pending_tasks
|
||||
<< " remaining tasks in the pending tasks array.";
|
||||
}
|
||||
for (int i = 0; i < num_pending_tasks; ++i) {
|
||||
Task *pending_task = state->pending_tasks[i];
|
||||
@@ -195,7 +195,7 @@ void signal_handler(int signal) {
|
||||
|
||||
void process_task_waiting(Task *waiting_task, void *user_context) {
|
||||
GlobalSchedulerState *state = (GlobalSchedulerState *) user_context;
|
||||
LOG_DEBUG("Task waiting callback is called.");
|
||||
RAY_LOG(DEBUG) << "Task waiting callback is called.";
|
||||
bool successfully_assigned =
|
||||
handle_task_waiting(state, state->policy_state, waiting_task);
|
||||
/* If the task was not successfully submitted to a local scheduler, add the
|
||||
@@ -238,7 +238,7 @@ remove_local_scheduler(
|
||||
GlobalSchedulerState *state,
|
||||
std::unordered_map<DBClientID, LocalScheduler, UniqueIDHasher>::iterator
|
||||
it) {
|
||||
CHECK(it != state->local_schedulers.end());
|
||||
RAY_CHECK(it != state->local_schedulers.end());
|
||||
DBClientID local_scheduler_id = it->first;
|
||||
it = state->local_schedulers.erase(it);
|
||||
|
||||
@@ -264,8 +264,8 @@ remove_local_scheduler(
|
||||
*/
|
||||
void process_new_db_client(DBClient *db_client, void *user_context) {
|
||||
GlobalSchedulerState *state = (GlobalSchedulerState *) user_context;
|
||||
std::string id_string = db_client->id.hex();
|
||||
LOG_DEBUG("db client table callback for db client = %s", id_string.c_str());
|
||||
RAY_LOG(DEBUG) << "db client table callback for db client = "
|
||||
<< db_client->id;
|
||||
if (strncmp(db_client->client_type.c_str(), "local_scheduler",
|
||||
strlen("local_scheduler")) == 0) {
|
||||
bool local_scheduler_present =
|
||||
@@ -304,15 +304,14 @@ void object_table_subscribe_callback(ObjectID object_id,
|
||||
void *user_context) {
|
||||
/* Extract global scheduler state from the callback context. */
|
||||
GlobalSchedulerState *state = (GlobalSchedulerState *) user_context;
|
||||
std::string id_string = object_id.hex();
|
||||
LOG_DEBUG("object table subscribe callback for OBJECT = %s",
|
||||
id_string.c_str());
|
||||
RAY_LOG(DEBUG) << "object table subscribe callback for OBJECT = "
|
||||
<< object_id;
|
||||
|
||||
const std::vector<std::string> managers =
|
||||
db_client_table_get_ip_addresses(state->db, manager_ids);
|
||||
LOG_DEBUG("\tManagers<%lu>:", managers.size());
|
||||
RAY_LOG(DEBUG) << "\tManagers<" << managers.size() << ">:";
|
||||
for (size_t i = 0; i < managers.size(); i++) {
|
||||
LOG_DEBUG("\t\t%s", managers[i].c_str());
|
||||
RAY_LOG(DEBUG) << "\t\t" << managers[i];
|
||||
}
|
||||
|
||||
if (state->scheduler_object_info_table.find(object_id) ==
|
||||
@@ -322,12 +321,11 @@ void object_table_subscribe_callback(ObjectID object_id,
|
||||
state->scheduler_object_info_table[object_id];
|
||||
obj_info_entry.data_size = data_size;
|
||||
|
||||
id_string = object_id.hex();
|
||||
LOG_DEBUG("New object added to object_info_table with id = %s",
|
||||
id_string.c_str());
|
||||
LOG_DEBUG("\tmanager locations:");
|
||||
RAY_LOG(DEBUG) << "New object added to object_info_table with id = "
|
||||
<< object_id;
|
||||
RAY_LOG(DEBUG) << "\tmanager locations:";
|
||||
for (size_t i = 0; i < managers.size(); i++) {
|
||||
LOG_DEBUG("\t\t%s", managers[i].c_str());
|
||||
RAY_LOG(DEBUG) << "\t\t" << managers[i];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -347,12 +345,10 @@ void local_scheduler_table_handler(DBClientID client_id,
|
||||
/* Extract global scheduler state from the callback context. */
|
||||
GlobalSchedulerState *state = (GlobalSchedulerState *) user_context;
|
||||
ARROW_UNUSED(state);
|
||||
std::string id_string = client_id.hex();
|
||||
LOG_DEBUG("Local scheduler heartbeat from db_client_id %s",
|
||||
id_string.c_str());
|
||||
LOG_DEBUG(
|
||||
"total workers = %d, task queue length = %d, available workers = %d",
|
||||
info.total_num_workers, info.task_queue_length, info.available_workers);
|
||||
RAY_LOG(DEBUG) << "Local scheduler heartbeat from db_client_id " << client_id;
|
||||
RAY_LOG(DEBUG) << "total workers = " << info.total_num_workers
|
||||
<< ", task queue length = " << info.task_queue_length
|
||||
<< ", available workers = " << info.available_workers;
|
||||
|
||||
/* Update the local scheduler info struct. */
|
||||
auto it = state->local_schedulers.find(client_id);
|
||||
@@ -371,7 +367,8 @@ void local_scheduler_table_handler(DBClientID client_id,
|
||||
local_scheduler.info = info;
|
||||
}
|
||||
} else {
|
||||
LOG_WARN("client_id didn't match any cached local scheduler entries");
|
||||
RAY_LOG(WARNING) << "client_id didn't match any cached local scheduler "
|
||||
<< "entries";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -408,8 +405,8 @@ int heartbeat_timeout_handler(event_loop *loop, timer_id id, void *context) {
|
||||
while (it != state->local_schedulers.end()) {
|
||||
if (it->second.num_heartbeats_missed >=
|
||||
RayConfig::instance().num_heartbeats_timeout()) {
|
||||
LOG_WARN(
|
||||
"Missed too many heartbeats from local scheduler, marking as dead.");
|
||||
RAY_LOG(WARNING) << "Missed too many heartbeats from local scheduler, "
|
||||
<< "marking as dead.";
|
||||
/* Notify others by updating the global state. */
|
||||
db_client_table_remove(state->db, it->second.id, NULL, NULL, NULL);
|
||||
/* Remove the scheduler from the local state. The call to
|
||||
@@ -484,22 +481,20 @@ int main(int argc, char *argv[]) {
|
||||
node_ip_address = optarg;
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR("unknown option %c", c);
|
||||
exit(-1);
|
||||
RAY_LOG(FATAL) << "unknown option " << c;
|
||||
}
|
||||
}
|
||||
|
||||
char redis_primary_addr[16];
|
||||
int redis_primary_port;
|
||||
int redis_primary_port = -1;
|
||||
if (!redis_primary_addr_port ||
|
||||
parse_ip_addr_port(redis_primary_addr_port, redis_primary_addr,
|
||||
&redis_primary_port) == -1) {
|
||||
LOG_FATAL(
|
||||
"specify the primary redis address like 127.0.0.1:6379 with the -r "
|
||||
"switch");
|
||||
RAY_LOG(FATAL) << "specify the primary redis address like 127.0.0.1:6379 "
|
||||
<< "with the -r switch";
|
||||
}
|
||||
if (!node_ip_address) {
|
||||
LOG_FATAL("specify the node IP address with the -h switch");
|
||||
RAY_LOG(FATAL) << "specify the node IP address with the -h switch";
|
||||
}
|
||||
start_server(node_ip_address, redis_primary_addr, redis_primary_port);
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@ int64_t locally_available_data_size(const GlobalSchedulerState *state,
|
||||
* local scheduler. */
|
||||
int64_t task_data_size = 0;
|
||||
|
||||
CHECK(state->local_scheduler_plasma_map.count(local_scheduler_id) == 1);
|
||||
RAY_CHECK(state->local_scheduler_plasma_map.count(local_scheduler_id) == 1);
|
||||
|
||||
const std::string &plasma_manager =
|
||||
state->local_scheduler_plasma_map.at(local_scheduler_id);
|
||||
@@ -120,8 +120,8 @@ bool handle_task_waiting_random(GlobalSchedulerState *state,
|
||||
GlobalSchedulerPolicyState *policy_state,
|
||||
Task *task) {
|
||||
TaskSpec *task_spec = Task_task_execution_spec(task)->Spec();
|
||||
CHECKM(task_spec != NULL,
|
||||
"task wait handler encounted a task with NULL spec");
|
||||
RAY_CHECK(task_spec != NULL)
|
||||
<< "task wait handler encounted a task with NULL spec";
|
||||
|
||||
std::vector<DBClientID> feasible_nodes;
|
||||
|
||||
@@ -136,10 +136,8 @@ bool handle_task_waiting_random(GlobalSchedulerState *state,
|
||||
}
|
||||
|
||||
if (feasible_nodes.size() == 0) {
|
||||
std::string id_string = Task_task_id(task).hex();
|
||||
LOG_ERROR(
|
||||
"Infeasible task. No nodes satisfy hard constraints for task = %s",
|
||||
id_string.c_str());
|
||||
RAY_LOG(ERROR) << "Infeasible task. No nodes satisfy hard constraints for "
|
||||
<< "task = " << Task_task_id(task);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -148,8 +146,8 @@ bool handle_task_waiting_random(GlobalSchedulerState *state,
|
||||
std::uniform_int_distribution<> dis(0, feasible_nodes.size() - 1);
|
||||
DBClientID local_scheduler_id =
|
||||
feasible_nodes[dis(policy_state->getRandomGenerator())];
|
||||
CHECKM(!local_scheduler_id.is_nil(),
|
||||
"Task is feasible, but doesn't have a local scheduler assigned.");
|
||||
RAY_CHECK(!local_scheduler_id.is_nil())
|
||||
<< "Task is feasible, but doesn't have a local scheduler assigned.";
|
||||
// A local scheduler ID was found, so assign the task.
|
||||
assign_task_to_local_scheduler(state, task, local_scheduler_id);
|
||||
return true;
|
||||
@@ -161,15 +159,15 @@ bool handle_task_waiting_cost(GlobalSchedulerState *state,
|
||||
TaskSpec *task_spec = Task_task_execution_spec(task)->Spec();
|
||||
int64_t curtime = current_time_ms();
|
||||
|
||||
CHECKM(task_spec != NULL,
|
||||
"task wait handler encounted a task with NULL spec");
|
||||
RAY_CHECK(task_spec != NULL)
|
||||
<< "task wait handler encounted a task with NULL spec";
|
||||
|
||||
// For tasks already seen by the global scheduler (spillback > 1),
|
||||
// adjust scheduled task counts for the source local scheduler.
|
||||
if (task->execution_spec->SpillbackCount() > 1) {
|
||||
auto it = state->local_schedulers.find(task->local_scheduler_id);
|
||||
// Task's previous local scheduler must be present and known.
|
||||
CHECK(it != state->local_schedulers.end());
|
||||
RAY_CHECK(it != state->local_schedulers.end());
|
||||
LocalScheduler &src_local_scheduler = it->second;
|
||||
src_local_scheduler.num_recent_tasks_sent -= 1;
|
||||
}
|
||||
@@ -178,12 +176,11 @@ bool handle_task_waiting_cost(GlobalSchedulerState *state,
|
||||
|
||||
// Go through all the nodes, calculate the score for each, pick max score.
|
||||
double best_local_scheduler_score = INT32_MIN;
|
||||
CHECKM(best_local_scheduler_score < 0,
|
||||
"We might have a floating point underflow");
|
||||
std::string id_string_fromlocalsched = task->local_scheduler_id.hex();
|
||||
LOG_INFO("ct[%" PRId64 "] task from %s spillback %d", curtime,
|
||||
id_string_fromlocalsched.c_str(),
|
||||
task->execution_spec->SpillbackCount());
|
||||
RAY_CHECK(best_local_scheduler_score < 0)
|
||||
<< "We might have a floating point underflow";
|
||||
RAY_LOG(INFO) << "ct[" << curtime << "] task from "
|
||||
<< task->local_scheduler_id << " spillback "
|
||||
<< task->execution_spec->SpillbackCount();
|
||||
|
||||
// The best node to send this task.
|
||||
DBClientID best_local_scheduler_id = DBClientID::nil();
|
||||
@@ -200,14 +197,13 @@ bool handle_task_waiting_cost(GlobalSchedulerState *state,
|
||||
if (task->local_scheduler_id == scheduler->id) {
|
||||
continue;
|
||||
}
|
||||
std::string id_string = scheduler->id.hex();
|
||||
task_feasible = true;
|
||||
// This node satisfies the hard capacity constraint. Calculate its score.
|
||||
double score = -1 * calculate_cost_pending(state, scheduler, task_spec);
|
||||
LOG_INFO("ct[%" PRId64 "][%s][q%d][w%d]: score %f bestscore %f\n", curtime,
|
||||
id_string.c_str(), scheduler->info.task_queue_length,
|
||||
scheduler->info.available_workers, score,
|
||||
best_local_scheduler_score);
|
||||
RAY_LOG(INFO) << "ct[" << curtime << "][" << scheduler->id << "][q"
|
||||
<< scheduler->info.task_queue_length << "][w"
|
||||
<< scheduler->info.available_workers << "]: score " << score
|
||||
<< " bestscore " << best_local_scheduler_score;
|
||||
if (score >= best_local_scheduler_score) {
|
||||
best_local_scheduler_score = score;
|
||||
best_local_scheduler_id = scheduler->id;
|
||||
@@ -215,16 +211,14 @@ bool handle_task_waiting_cost(GlobalSchedulerState *state,
|
||||
}
|
||||
|
||||
if (!task_feasible) {
|
||||
std::string id_string = Task_task_id(task).hex();
|
||||
LOG_ERROR(
|
||||
"Infeasible task. No nodes satisfy hard constraints for task = %s",
|
||||
id_string.c_str());
|
||||
RAY_LOG(ERROR) << "Infeasible task. No nodes satisfy hard constraints for "
|
||||
<< "task = " << Task_task_id(task);
|
||||
// TODO(atumanov): propagate this error to the task's driver and/or
|
||||
// cache the task in case new local schedulers satisfy it in the future.
|
||||
return false;
|
||||
}
|
||||
CHECKM(!best_local_scheduler_id.is_nil(),
|
||||
"Task is feasible, but doesn't have a local scheduler assigned.");
|
||||
RAY_CHECK(!best_local_scheduler_id.is_nil())
|
||||
<< "Task is feasible, but doesn't have a local scheduler assigned.";
|
||||
// A local scheduler ID was found, so assign the task.
|
||||
assign_task_to_local_scheduler(state, task, best_local_scheduler_id);
|
||||
return true;
|
||||
|
||||
@@ -73,12 +73,12 @@ void kill_worker(LocalSchedulerState *state,
|
||||
bool suppress_warning) {
|
||||
/* Erase the local scheduler's reference to the worker. */
|
||||
auto it = std::find(state->workers.begin(), state->workers.end(), worker);
|
||||
CHECK(it != state->workers.end());
|
||||
RAY_CHECK(it != state->workers.end());
|
||||
state->workers.erase(it);
|
||||
|
||||
/* Make sure that we removed the worker. */
|
||||
it = std::find(state->workers.begin(), state->workers.end(), worker);
|
||||
CHECK(it == state->workers.end());
|
||||
RAY_CHECK(it == state->workers.end());
|
||||
|
||||
/* Release any resources held by the worker. It's important to do this before
|
||||
* calling handle_worker_removed and handle_actor_worker_disconnect because
|
||||
@@ -121,7 +121,7 @@ void kill_worker(LocalSchedulerState *state,
|
||||
force_kill_worker, (void *) worker);
|
||||
free_worker = false;
|
||||
}
|
||||
LOG_DEBUG("Killed worker with pid %d", worker->pid);
|
||||
RAY_LOG(DEBUG) << "Killed worker with pid " << worker->pid;
|
||||
}
|
||||
|
||||
/* If this worker is still running a task and we aren't cleaning up, push an
|
||||
@@ -149,7 +149,7 @@ void kill_worker(LocalSchedulerState *state,
|
||||
}
|
||||
}
|
||||
|
||||
LOG_DEBUG("Killed worker with pid %d", worker->pid);
|
||||
RAY_LOG(DEBUG) << "Killed worker with pid " << worker->pid;
|
||||
if (free_worker) {
|
||||
/* Clean up the client socket after killing the worker so that the worker
|
||||
* can't receive the SIGPIPE before exiting. */
|
||||
@@ -173,7 +173,8 @@ void LocalSchedulerState_free(LocalSchedulerState *state) {
|
||||
for (auto const &worker_pid : state->child_pids) {
|
||||
kill(worker_pid, SIGKILL);
|
||||
waitpid(worker_pid, NULL, 0);
|
||||
LOG_INFO("Killed worker pid %d which hadn't started yet.", worker_pid);
|
||||
RAY_LOG(INFO) << "Killed worker pid " << worker_pid
|
||||
<< " which hadn't started yet.";
|
||||
}
|
||||
|
||||
/* Kill any registered workers. */
|
||||
@@ -236,19 +237,19 @@ void start_worker(LocalSchedulerState *state,
|
||||
bool reconstruct) {
|
||||
/* Non-actors can't be started in reconstruct mode. */
|
||||
if (actor_id.is_nil()) {
|
||||
CHECK(!reconstruct);
|
||||
RAY_CHECK(!reconstruct);
|
||||
}
|
||||
/* We can't start a worker if we don't have the path to the worker script. */
|
||||
if (state->config.start_worker_command == NULL) {
|
||||
LOG_DEBUG(
|
||||
"No valid command to start worker provided. Cannot start worker.");
|
||||
RAY_LOG(DEBUG) << "No valid command to start worker provided. Cannot start "
|
||||
<< "worker.";
|
||||
return;
|
||||
}
|
||||
/* Launch the process to create the worker. */
|
||||
pid_t pid = fork();
|
||||
if (pid != 0) {
|
||||
state->child_pids.push_back(pid);
|
||||
LOG_DEBUG("Started worker with pid %d", pid);
|
||||
RAY_LOG(DEBUG) << "Started worker with pid " << pid;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -279,7 +280,7 @@ void start_worker(LocalSchedulerState *state,
|
||||
execvp(command_vector[0], (char *const *) command_vector.data());
|
||||
|
||||
LocalSchedulerState_free(state);
|
||||
LOG_FATAL("Failed to start worker");
|
||||
RAY_LOG(FATAL) << "Failed to start worker";
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -320,7 +321,7 @@ const char **parse_command(const char *command) {
|
||||
}
|
||||
free(command_copy);
|
||||
|
||||
CHECK(num_args == i);
|
||||
RAY_CHECK(num_args == i);
|
||||
return command_args;
|
||||
}
|
||||
|
||||
@@ -345,9 +346,8 @@ LocalSchedulerState *LocalSchedulerState_init(
|
||||
state->config.start_worker_command = NULL;
|
||||
}
|
||||
if (start_worker_command == NULL) {
|
||||
LOG_WARN(
|
||||
"No valid command to start a worker provided, local scheduler will not "
|
||||
"start any workers.");
|
||||
RAY_LOG(WARNING) << "No valid command to start a worker provided, local "
|
||||
<< "scheduler will not start any workers.";
|
||||
}
|
||||
state->config.global_scheduler_exists = global_scheduler_exists;
|
||||
|
||||
@@ -443,14 +443,14 @@ void resource_sanity_checks(LocalSchedulerState *state,
|
||||
const std::string resource_name = resource_pair.first;
|
||||
double resource_quantity = resource_pair.second;
|
||||
|
||||
CHECK(state->dynamic_resources[resource_name] <=
|
||||
state->static_resources[resource_name]);
|
||||
RAY_CHECK(state->dynamic_resources[resource_name] <=
|
||||
state->static_resources[resource_name]);
|
||||
if (resource_name != std::string("CPU")) {
|
||||
CHECK(state->dynamic_resources[resource_name] >= 0);
|
||||
RAY_CHECK(state->dynamic_resources[resource_name] >= 0);
|
||||
}
|
||||
|
||||
CHECK(resource_quantity >= 0);
|
||||
CHECK(resource_quantity <= state->static_resources[resource_name]);
|
||||
RAY_CHECK(resource_quantity >= 0);
|
||||
RAY_CHECK(resource_quantity <= state->static_resources[resource_name]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -468,8 +468,8 @@ void acquire_resources(
|
||||
if (resource_name == std::string("GPU")) {
|
||||
if (resource_quantity != 0) {
|
||||
// Make sure that the worker isn't using any GPUs already.
|
||||
CHECK(worker->gpus_in_use.size() == 0);
|
||||
CHECK(state->available_gpus.size() >= resource_quantity);
|
||||
RAY_CHECK(worker->gpus_in_use.size() == 0);
|
||||
RAY_CHECK(state->available_gpus.size() >= resource_quantity);
|
||||
// Reserve GPUs for the worker.
|
||||
for (int i = 0; i < resource_quantity; i++) {
|
||||
worker->gpus_in_use.push_back(state->available_gpus.back());
|
||||
@@ -480,11 +480,11 @@ void acquire_resources(
|
||||
|
||||
// Do bookkeeping for general resource types.
|
||||
if (resource_name != std::string("CPU")) {
|
||||
CHECK(state->dynamic_resources[resource_name] >= resource_quantity);
|
||||
RAY_CHECK(state->dynamic_resources[resource_name] >= resource_quantity);
|
||||
}
|
||||
state->dynamic_resources[resource_name] -= resource_quantity;
|
||||
if (resource_name == std::string("CPU")) {
|
||||
CHECK(worker->resources_in_use[resource_name] == 0);
|
||||
RAY_CHECK(worker->resources_in_use[resource_name] == 0);
|
||||
}
|
||||
worker->resources_in_use[resource_name] += resource_quantity;
|
||||
}
|
||||
@@ -504,7 +504,7 @@ void release_resources(
|
||||
// Do some special handling for GPU resources.
|
||||
if (resource_name == std::string("GPU")) {
|
||||
if (resource_quantity != 0) {
|
||||
CHECK(resource_quantity == worker->gpus_in_use.size());
|
||||
RAY_CHECK(resource_quantity == worker->gpus_in_use.size());
|
||||
// Move the GPU IDs the worker was using back to the local scheduler.
|
||||
for (auto const &gpu_id : worker->gpus_in_use) {
|
||||
state->available_gpus.push_back(gpu_id);
|
||||
@@ -515,7 +515,7 @@ void release_resources(
|
||||
|
||||
// Do bookkeeping for general resources types.
|
||||
if (resource_name == std::string("CPU")) {
|
||||
CHECK(resource_quantity == worker->resources_in_use[resource_name]);
|
||||
RAY_CHECK(resource_quantity == worker->resources_in_use[resource_name]);
|
||||
}
|
||||
state->dynamic_resources[resource_name] += resource_quantity;
|
||||
worker->resources_in_use[resource_name] -= resource_quantity;
|
||||
@@ -542,14 +542,14 @@ void assign_task_to_worker(LocalSchedulerState *state,
|
||||
// non-CPU resources (in particular, GPUs) should already have been acquired
|
||||
// by the actor worker.
|
||||
if (!worker->actor_id.is_nil()) {
|
||||
CHECK(required_resources.size() == 1);
|
||||
CHECK(required_resources.count("CPU") == 1);
|
||||
RAY_CHECK(required_resources.size() == 1);
|
||||
RAY_CHECK(required_resources.count("CPU") == 1);
|
||||
}
|
||||
|
||||
CHECK(worker->actor_id == TaskSpec_actor_id(spec));
|
||||
RAY_CHECK(worker->actor_id == TaskSpec_actor_id(spec));
|
||||
/* Make sure the driver for this task is still alive. */
|
||||
WorkerID driver_id = TaskSpec_driver_id(spec);
|
||||
CHECK(is_driver_alive(state, driver_id));
|
||||
RAY_CHECK(is_driver_alive(state, driver_id));
|
||||
|
||||
/* Construct a flatbuffer object to send to the worker. */
|
||||
flatbuffers::FlatBufferBuilder fbb;
|
||||
@@ -563,12 +563,10 @@ void assign_task_to_worker(LocalSchedulerState *state,
|
||||
if (errno == EPIPE || errno == EBADF) {
|
||||
/* Something went wrong, so kill the worker. */
|
||||
kill_worker(state, worker, false, false);
|
||||
LOG_WARN(
|
||||
"Failed to give task to worker on fd %d. The client may have hung "
|
||||
"up.",
|
||||
worker->sock);
|
||||
RAY_LOG(WARNING) << "Failed to give task to worker on fd " << worker->sock
|
||||
<< ". The client may have hung up.";
|
||||
} else {
|
||||
LOG_FATAL("Failed to give task to client on fd %d.", worker->sock);
|
||||
RAY_LOG(FATAL) << "Failed to give task to client on fd " << worker->sock;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -596,15 +594,15 @@ void finish_task(LocalSchedulerState *state, LocalSchedulerClient *worker) {
|
||||
if (worker->task_in_progress != NULL) {
|
||||
TaskSpec *spec = Task_task_execution_spec(worker->task_in_progress)->Spec();
|
||||
/* Return dynamic resources back for the task in progress. */
|
||||
CHECK(worker->resources_in_use["CPU"] ==
|
||||
TaskSpec_get_required_resource(spec, "CPU"));
|
||||
RAY_CHECK(worker->resources_in_use["CPU"] ==
|
||||
TaskSpec_get_required_resource(spec, "CPU"));
|
||||
if (worker->actor_id.is_nil()) {
|
||||
CHECK(worker->gpus_in_use.size() ==
|
||||
TaskSpec_get_required_resource(spec, "GPU"));
|
||||
RAY_CHECK(worker->gpus_in_use.size() ==
|
||||
TaskSpec_get_required_resource(spec, "GPU"));
|
||||
release_resources(state, worker, worker->resources_in_use);
|
||||
} else {
|
||||
// Actor tasks should only specify CPU requirements.
|
||||
CHECK(0 == TaskSpec_get_required_resource(spec, "GPU"));
|
||||
RAY_CHECK(0 == TaskSpec_get_required_resource(spec, "GPU"));
|
||||
std::unordered_map<std::string, double> cpu_resources;
|
||||
cpu_resources["CPU"] = worker->resources_in_use["CPU"];
|
||||
std::unordered_map<std::string, double> resources_to_release =
|
||||
@@ -641,8 +639,8 @@ void process_plasma_notification(event_loop *loop,
|
||||
if (!notification) {
|
||||
/* The store has closed the socket. */
|
||||
LocalSchedulerState_free(state);
|
||||
LOG_FATAL(
|
||||
"Lost connection to the plasma store, local scheduler is exiting!");
|
||||
RAY_LOG(FATAL) << "Lost connection to the plasma store, local scheduler is "
|
||||
<< "exiting!";
|
||||
}
|
||||
auto object_info = flatbuffers::GetRoot<ObjectInfo>(notification);
|
||||
ObjectID object_id = from_flatbuf(*object_info->object_id());
|
||||
@@ -773,8 +771,8 @@ void reconstruct_evicted_result_lookup_callback(ObjectID reconstruct_object_id,
|
||||
TaskID task_id,
|
||||
bool is_put,
|
||||
void *user_context) {
|
||||
CHECKM(!task_id.is_nil(),
|
||||
"No task information found for object during reconstruction");
|
||||
RAY_CHECK(!task_id.is_nil())
|
||||
<< "No task information found for object during reconstruction";
|
||||
LocalSchedulerState *state = (LocalSchedulerState *) user_context;
|
||||
|
||||
task_table_test_and_update_callback done_callback;
|
||||
@@ -820,9 +818,8 @@ void reconstruct_failed_result_lookup_callback(ObjectID reconstruct_object_id,
|
||||
* after this lookup returns, possibly due to concurrent clients. In most
|
||||
* cases, this is okay because the initial execution is probably still
|
||||
* pending, so for now, we log a warning and suppress reconstruction. */
|
||||
LOG_WARN(
|
||||
"No task information found for object during reconstruction (no object "
|
||||
"entry yet)");
|
||||
RAY_LOG(WARNING) << "No task information found for object during "
|
||||
<< "reconstruction (no object entry yet)";
|
||||
return;
|
||||
}
|
||||
LocalSchedulerState *state = (LocalSchedulerState *) user_context;
|
||||
@@ -852,7 +849,7 @@ void reconstruct_object_lookup_callback(
|
||||
bool never_created,
|
||||
const std::vector<DBClientID> &manager_ids,
|
||||
void *user_context) {
|
||||
LOG_DEBUG("Manager count was %lu", manager_ids.size());
|
||||
RAY_LOG(DEBUG) << "Manager count was " << manager_ids.size();
|
||||
/* Only continue reconstruction if we find that the object doesn't exist on
|
||||
* any nodes. NOTE: This codepath is not responsible for checking if the
|
||||
* object table entry is up-to-date. */
|
||||
@@ -887,14 +884,14 @@ void reconstruct_object_lookup_callback(
|
||||
|
||||
void reconstruct_object(LocalSchedulerState *state,
|
||||
ObjectID reconstruct_object_id) {
|
||||
LOG_DEBUG("Starting reconstruction");
|
||||
RAY_LOG(DEBUG) << "Starting reconstruction";
|
||||
/* If the object is locally available, no need to reconstruct. */
|
||||
if (object_locally_available(state->algorithm_state, reconstruct_object_id)) {
|
||||
return;
|
||||
}
|
||||
/* Determine if reconstruction is necessary by checking if the object exists
|
||||
* on a node. */
|
||||
CHECK(state->db != NULL);
|
||||
RAY_CHECK(state->db != NULL);
|
||||
object_table_lookup(state->db, reconstruct_object_id, NULL,
|
||||
reconstruct_object_lookup_callback, (void *) state);
|
||||
}
|
||||
@@ -912,13 +909,12 @@ void send_client_register_reply(LocalSchedulerState *state,
|
||||
if (errno == EPIPE || errno == EBADF || errno == ECONNRESET) {
|
||||
/* Something went wrong, so kill the worker. */
|
||||
kill_worker(state, worker, false, false);
|
||||
LOG_WARN(
|
||||
"Failed to give send register client reply to worker on fd %d. The "
|
||||
"client may have hung up.",
|
||||
worker->sock);
|
||||
RAY_LOG(WARNING) << "Failed to give send register client reply to worker "
|
||||
<< "on fd " << worker->sock
|
||||
<< ". The client may have hung up.";
|
||||
} else {
|
||||
LOG_FATAL("Failed to send register client reply to client on fd %d.",
|
||||
worker->sock);
|
||||
RAY_LOG(FATAL) << "Failed to send register client reply to client on fd "
|
||||
<< worker->sock;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -927,10 +923,10 @@ void handle_client_register(LocalSchedulerState *state,
|
||||
LocalSchedulerClient *worker,
|
||||
const RegisterClientRequest *message) {
|
||||
/* Make sure this worker hasn't already registered. */
|
||||
CHECK(!worker->registered);
|
||||
RAY_CHECK(!worker->registered);
|
||||
worker->registered = true;
|
||||
worker->is_worker = message->is_worker();
|
||||
CHECK(worker->client_id.is_nil());
|
||||
RAY_CHECK(worker->client_id.is_nil());
|
||||
worker->client_id = from_flatbuf(*message->client_id());
|
||||
|
||||
/* Register the worker or driver. */
|
||||
@@ -942,11 +938,11 @@ void handle_client_register(LocalSchedulerState *state,
|
||||
if (!actor_id.is_nil()) {
|
||||
/* Make sure that the local scheduler is aware that it is responsible for
|
||||
* this actor. */
|
||||
CHECK(state->actor_mapping.count(actor_id) == 1);
|
||||
CHECK(state->actor_mapping[actor_id].local_scheduler_id ==
|
||||
get_db_client_id(state->db));
|
||||
RAY_CHECK(state->actor_mapping.count(actor_id) == 1);
|
||||
RAY_CHECK(state->actor_mapping[actor_id].local_scheduler_id ==
|
||||
get_db_client_id(state->db));
|
||||
/* Update the worker struct with this actor ID. */
|
||||
CHECK(worker->actor_id.is_nil());
|
||||
RAY_CHECK(worker->actor_id.is_nil());
|
||||
worker->actor_id = actor_id;
|
||||
/* Let the scheduling algorithm process the presence of this new
|
||||
* worker. */
|
||||
@@ -965,16 +961,16 @@ void handle_client_register(LocalSchedulerState *state,
|
||||
/* TODO(rkn): This means that an actor wants to register but that there
|
||||
* aren't enough GPUs for it. We should queue this request, and reply to
|
||||
* the actor when GPUs become available. */
|
||||
LOG_WARN(
|
||||
"Attempting to create an actor but there aren't enough available "
|
||||
"GPUs. We'll start the worker anyway without any GPUs, but this is "
|
||||
"incorrect behavior.");
|
||||
RAY_LOG(WARNING) << "Attempting to create an actor but there aren't "
|
||||
<< "enough available GPUs. We'll start the worker "
|
||||
<< "anyway without any GPUs, but this is incorrect "
|
||||
<< "behavior.";
|
||||
}
|
||||
}
|
||||
|
||||
/* Register worker process id with the scheduler. */
|
||||
/* Determine if this worker is one of our child processes. */
|
||||
LOG_DEBUG("PID is %d", worker->pid);
|
||||
RAY_LOG(DEBUG) << "PID is " << worker->pid;
|
||||
auto it = std::find(state->child_pids.begin(), state->child_pids.end(),
|
||||
worker->pid);
|
||||
if (it != state->child_pids.end()) {
|
||||
@@ -983,7 +979,7 @@ void handle_client_register(LocalSchedulerState *state,
|
||||
* cleanup. */
|
||||
worker->is_child = true;
|
||||
state->child_pids.erase(it);
|
||||
LOG_DEBUG("Found matching child pid %d", worker->pid);
|
||||
RAY_LOG(DEBUG) << "Found matching child pid " << worker->pid;
|
||||
}
|
||||
|
||||
/* If the worker is an actor that corresponds to a driver that has been
|
||||
@@ -1018,16 +1014,17 @@ void handle_driver_removed_callback(WorkerID driver_id, void *user_context) {
|
||||
|
||||
if (!actor_id.is_nil()) {
|
||||
/* This is an actor. */
|
||||
CHECK(state->actor_mapping.count(actor_id) == 1);
|
||||
RAY_CHECK(state->actor_mapping.count(actor_id) == 1);
|
||||
if (state->actor_mapping[actor_id].driver_id == driver_id) {
|
||||
/* This actor was created by the removed driver, so kill the actor. */
|
||||
LOG_DEBUG("Killing an actor for a removed driver.");
|
||||
RAY_LOG(DEBUG) << "Killing an actor for a removed driver.";
|
||||
kill_worker(state, *it, false, true);
|
||||
}
|
||||
} else if (task != NULL) {
|
||||
TaskSpec *spec = Task_task_execution_spec(task)->Spec();
|
||||
if (TaskSpec_driver_id(spec) == driver_id) {
|
||||
LOG_DEBUG("Killing a worker executing a task for a removed driver.");
|
||||
RAY_LOG(DEBUG) << "Killing a worker executing a task for a removed "
|
||||
<< "driver.";
|
||||
kill_worker(state, *it, false, true);
|
||||
}
|
||||
}
|
||||
@@ -1081,13 +1078,10 @@ void handle_get_actor_frontier(LocalSchedulerState *state,
|
||||
if (errno == EPIPE || errno == EBADF) {
|
||||
/* Something went wrong, so kill the worker. */
|
||||
kill_worker(state, worker, false, false);
|
||||
LOG_WARN(
|
||||
"Failed to return actor frontier to worker on fd %d. The client may "
|
||||
"have hung "
|
||||
"up.",
|
||||
worker->sock);
|
||||
RAY_LOG(WARNING) << "Failed to return actor frontier to worker on fd "
|
||||
<< worker->sock << ". The client may have hung up.";
|
||||
} else {
|
||||
LOG_FATAL("Failed to give task to client on fd %d.", worker->sock);
|
||||
RAY_LOG(FATAL) << "Failed to give task to client on fd " << worker->sock;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1124,7 +1118,7 @@ void process_message(event_loop *loop,
|
||||
read_vector(client_sock, &type, state->input_buffer);
|
||||
uint8_t *input = state->input_buffer.data();
|
||||
|
||||
LOG_DEBUG("New event of type %" PRId64, type);
|
||||
RAY_LOG(DEBUG) << "New event of type " << type;
|
||||
|
||||
switch (type) {
|
||||
case MessageType_SubmitTask: {
|
||||
@@ -1159,7 +1153,7 @@ void process_message(event_loop *loop,
|
||||
} break;
|
||||
case MessageType_DisconnectClient: {
|
||||
finish_task(state, worker);
|
||||
CHECK(!worker->disconnected);
|
||||
RAY_CHECK(!worker->disconnected);
|
||||
worker->disconnected = true;
|
||||
/* If the disconnected worker was not an actor, start a new worker to make
|
||||
* sure there are enough workers in the pool. */
|
||||
@@ -1217,7 +1211,7 @@ void process_message(event_loop *loop,
|
||||
reconstruct_object(state, from_flatbuf(*message->object_id()));
|
||||
} break;
|
||||
case DISCONNECT_CLIENT: {
|
||||
LOG_DEBUG("Disconnecting client on fd %d", client_sock);
|
||||
RAY_LOG(DEBUG) << "Disconnecting client on fd " << client_sock;
|
||||
handle_client_disconnect(state, worker);
|
||||
} break;
|
||||
case MessageType_NotifyUnblocked: {
|
||||
@@ -1225,7 +1219,7 @@ void process_message(event_loop *loop,
|
||||
if (worker->task_in_progress != NULL) {
|
||||
/* If the worker was executing a task (i.e. non-driver), update its
|
||||
* state to not blocked. */
|
||||
CHECK(worker->is_blocked);
|
||||
RAY_CHECK(worker->is_blocked);
|
||||
worker->is_blocked = false;
|
||||
/* Lease back the CPU resources that the blocked worker needs (note that
|
||||
* it never released its GPU resources). TODO(swang): Leasing back the
|
||||
@@ -1264,16 +1258,15 @@ void process_message(event_loop *loop,
|
||||
} break;
|
||||
default:
|
||||
/* This code should be unreachable. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
/* Print a warning if this method took too long. */
|
||||
int64_t end_time = current_time_ms();
|
||||
if (end_time - start_time >
|
||||
RayConfig::instance().max_time_for_handler_milliseconds()) {
|
||||
LOG_WARN("process_message of type %" PRId64 " took %" PRId64
|
||||
" milliseconds.",
|
||||
type, end_time - start_time);
|
||||
RAY_LOG(WARNING) << "process_message of type " << type << " took "
|
||||
<< end_time - start_time << " milliseconds.";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1302,7 +1295,7 @@ void new_client_connection(event_loop *loop,
|
||||
state->workers.push_back(worker);
|
||||
event_loop_add_file(loop, new_socket, EVENT_LOOP_READ, process_message,
|
||||
worker);
|
||||
LOG_DEBUG("new connection with fd %d", new_socket);
|
||||
RAY_LOG(DEBUG) << "new connection with fd " << new_socket;
|
||||
}
|
||||
|
||||
/* We need this code so we can clean up when we get a SIGTERM signal. */
|
||||
@@ -1310,7 +1303,7 @@ void new_client_connection(event_loop *loop,
|
||||
LocalSchedulerState *g_state = NULL;
|
||||
|
||||
void signal_handler(int signal) {
|
||||
LOG_DEBUG("Signal was %d", signal);
|
||||
RAY_LOG(DEBUG) << "Signal was " << signal;
|
||||
if (signal == SIGTERM) {
|
||||
/* NOTE(swang): This call removes the SIGTERM handler to ensure that we
|
||||
* free the local scheduler state at most once. If another SIGTERM is
|
||||
@@ -1338,7 +1331,7 @@ void handle_task_scheduled_callback(Task *original_task,
|
||||
* scheduling algorithm. */
|
||||
WorkerID driver_id = TaskSpec_driver_id(spec);
|
||||
if (!is_driver_alive(state, driver_id)) {
|
||||
LOG_DEBUG("Ignoring scheduled task for removed driver.");
|
||||
RAY_LOG(DEBUG) << "Ignoring scheduled task for removed driver.";
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1382,14 +1375,14 @@ void handle_actor_creation_callback(ActorID actor_id,
|
||||
* TODO(rkn): We will need to remove this check to handle the case where the
|
||||
* corresponding publish is retried and the case in which a task that
|
||||
* creates an actor is resubmitted due to fault tolerance. */
|
||||
CHECK(state->actor_mapping.count(actor_id) == 0);
|
||||
RAY_CHECK(state->actor_mapping.count(actor_id) == 0);
|
||||
} else {
|
||||
/* In this case, the actor already exists. Check that the driver hasn't
|
||||
* changed but that the local scheduler has. */
|
||||
auto it = state->actor_mapping.find(actor_id);
|
||||
CHECK(it != state->actor_mapping.end());
|
||||
CHECK(it->second.driver_id == driver_id);
|
||||
CHECK(!(it->second.local_scheduler_id == local_scheduler_id));
|
||||
RAY_CHECK(it != state->actor_mapping.end());
|
||||
RAY_CHECK(it->second.driver_id == driver_id);
|
||||
RAY_CHECK(!(it->second.local_scheduler_id == local_scheduler_id));
|
||||
/* If the actor was previously assigned to this local scheduler, kill the
|
||||
* actor. */
|
||||
if (it->second.local_scheduler_id == get_db_client_id(state->db)) {
|
||||
@@ -1428,12 +1421,13 @@ int heartbeat_handler(event_loop *loop, timer_id id, void *context) {
|
||||
|
||||
/* Check that the last heartbeat was not sent too long ago. */
|
||||
int64_t current_time = current_time_ms();
|
||||
CHECK(current_time >= state->previous_heartbeat_time);
|
||||
RAY_CHECK(current_time >= state->previous_heartbeat_time);
|
||||
if (current_time - state->previous_heartbeat_time >
|
||||
RayConfig::instance().num_heartbeats_timeout() *
|
||||
RayConfig::instance().heartbeat_timeout_milliseconds()) {
|
||||
LOG_FATAL("The last heartbeat was sent %" PRId64 " milliseconds ago.",
|
||||
current_time - state->previous_heartbeat_time);
|
||||
RAY_LOG(FATAL) << "The last heartbeat was sent "
|
||||
<< current_time - state->previous_heartbeat_time
|
||||
<< " milliseconds ago.";
|
||||
}
|
||||
state->previous_heartbeat_time = current_time;
|
||||
|
||||
@@ -1579,11 +1573,12 @@ int main(int argc, char *argv[]) {
|
||||
num_workers_str = optarg;
|
||||
break;
|
||||
default:
|
||||
LOG_FATAL("unknown option %c", c);
|
||||
RAY_LOG(FATAL) << "unknown option " << c;
|
||||
}
|
||||
}
|
||||
if (!static_resource_list) {
|
||||
LOG_FATAL("please specify a static resource list with the -c switch");
|
||||
RAY_LOG(FATAL) << "please specify a static resource list with the -c "
|
||||
<< "switch";
|
||||
}
|
||||
// Parse the resource list.
|
||||
std::istringstream resource_string(static_resource_list);
|
||||
@@ -1591,27 +1586,28 @@ int main(int argc, char *argv[]) {
|
||||
std::string resource_quantity;
|
||||
|
||||
while (std::getline(resource_string, resource_name, ',')) {
|
||||
CHECK(std::getline(resource_string, resource_quantity, ','));
|
||||
RAY_CHECK(std::getline(resource_string, resource_quantity, ','));
|
||||
// TODO(rkn): The line below could throw an exception. What should we do
|
||||
// about this?
|
||||
static_resource_conf[resource_name] = std::stod(resource_quantity);
|
||||
}
|
||||
|
||||
if (!scheduler_socket_name) {
|
||||
LOG_FATAL("please specify socket for incoming connections with -s switch");
|
||||
RAY_LOG(FATAL) << "please specify socket for incoming connections with "
|
||||
<< "-s switch";
|
||||
}
|
||||
if (!plasma_store_socket_name) {
|
||||
LOG_FATAL(
|
||||
"please specify socket for connecting to Plasma store with -p switch");
|
||||
RAY_LOG(FATAL) << "please specify socket for connecting to Plasma store "
|
||||
<< "with -p switch";
|
||||
}
|
||||
if (!node_ip_address) {
|
||||
LOG_FATAL("please specify the node IP address with -h switch");
|
||||
RAY_LOG(FATAL) << "please specify the node IP address with -h switch";
|
||||
}
|
||||
int num_workers = 0;
|
||||
if (num_workers_str) {
|
||||
num_workers = strtol(num_workers_str, NULL, 10);
|
||||
if (num_workers < 0) {
|
||||
LOG_FATAL("Number of workers must be nonnegative");
|
||||
RAY_LOG(FATAL) << "Number of workers must be nonnegative";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1621,9 +1617,9 @@ int main(int argc, char *argv[]) {
|
||||
/* Start the local scheduler without connecting to Redis. In this case, all
|
||||
* submitted tasks will be queued and scheduled locally. */
|
||||
if (plasma_manager_socket_name) {
|
||||
LOG_FATAL(
|
||||
"if a plasma manager socket name is provided with the -m switch, "
|
||||
"then a redis address must be provided with the -r switch");
|
||||
RAY_LOG(FATAL) << "if a plasma manager socket name is provided with the "
|
||||
<< "-m switch, then a redis address must be provided with "
|
||||
<< "the -r switch";
|
||||
}
|
||||
} else {
|
||||
char redis_primary_addr[16];
|
||||
@@ -1631,14 +1627,12 @@ int main(int argc, char *argv[]) {
|
||||
/* Parse the primary Redis address into an IP address and a port. */
|
||||
if (parse_ip_addr_port(redis_primary_addr_port, redis_primary_addr,
|
||||
&redis_primary_port) == -1) {
|
||||
LOG_FATAL(
|
||||
"if a redis address is provided with the -r switch, it should be "
|
||||
"formatted like 127.0.0.1:6379");
|
||||
RAY_LOG(FATAL) << "if a redis address is provided with the -r switch, it "
|
||||
<< "should be formatted like 127.0.0.1:6379";
|
||||
}
|
||||
if (!plasma_manager_socket_name) {
|
||||
LOG_FATAL(
|
||||
"please specify socket for connecting to Plasma manager with -m "
|
||||
"switch");
|
||||
RAY_LOG(FATAL) << "please specify socket for connecting to Plasma "
|
||||
<< "manager with -m switch";
|
||||
}
|
||||
redis_addr = redis_primary_addr;
|
||||
redis_port = redis_primary_port;
|
||||
|
||||
@@ -224,25 +224,23 @@ void create_actor(SchedulingAlgorithmState *algorithm_state,
|
||||
entry.task_queue = new std::list<TaskExecutionSpec>();
|
||||
entry.worker = worker;
|
||||
entry.worker_available = false;
|
||||
CHECK(algorithm_state->local_actor_infos.count(actor_id) == 0)
|
||||
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) == 0);
|
||||
algorithm_state->local_actor_infos[actor_id] = entry;
|
||||
|
||||
/* Log some useful information about the actor that we created. */
|
||||
std::string id_string = actor_id.hex();
|
||||
LOG_DEBUG("Creating actor with ID %s.", id_string.c_str());
|
||||
RAY_LOG(DEBUG) << "Creating actor with ID " << actor_id;
|
||||
}
|
||||
|
||||
void remove_actor(SchedulingAlgorithmState *algorithm_state, ActorID actor_id) {
|
||||
CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1);
|
||||
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1);
|
||||
LocalActorInfo &entry =
|
||||
algorithm_state->local_actor_infos.find(actor_id)->second;
|
||||
|
||||
/* Log some useful information about the actor that we're removing. */
|
||||
std::string id_string = actor_id.hex();
|
||||
size_t count = entry.task_queue->size();
|
||||
if (count > 0) {
|
||||
LOG_WARN("Removing actor with ID %s and %lld remaining tasks.",
|
||||
id_string.c_str(), (long long) count);
|
||||
RAY_LOG(WARNING) << "Removing actor with ID " << actor_id << " and "
|
||||
<< count << " remaining tasks.";
|
||||
}
|
||||
|
||||
entry.task_queue->clear();
|
||||
@@ -266,7 +264,7 @@ bool dispatch_actor_task(LocalSchedulerState *state,
|
||||
SchedulingAlgorithmState *algorithm_state,
|
||||
ActorID actor_id) {
|
||||
/* Make sure this worker actually is an actor. */
|
||||
CHECK(!actor_id.is_nil());
|
||||
RAY_CHECK(!actor_id.is_nil());
|
||||
/* Return if this actor doesn't have any pending tasks. */
|
||||
if (algorithm_state->actors_with_pending_tasks.find(actor_id) ==
|
||||
algorithm_state->actors_with_pending_tasks.end()) {
|
||||
@@ -278,16 +276,16 @@ bool dispatch_actor_task(LocalSchedulerState *state,
|
||||
* scheduler. This should be rare. */
|
||||
return false;
|
||||
}
|
||||
CHECK(state->actor_mapping[actor_id].local_scheduler_id ==
|
||||
get_db_client_id(state->db));
|
||||
RAY_CHECK(state->actor_mapping[actor_id].local_scheduler_id ==
|
||||
get_db_client_id(state->db));
|
||||
|
||||
/* Get the local actor entry for this actor. */
|
||||
CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
|
||||
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
|
||||
LocalActorInfo &entry =
|
||||
algorithm_state->local_actor_infos.find(actor_id)->second;
|
||||
|
||||
/* There should be some queued tasks for this actor. */
|
||||
CHECK(!entry.task_queue->empty());
|
||||
RAY_CHECK(!entry.task_queue->empty());
|
||||
/* If the worker is not available, we cannot assign a task to it. */
|
||||
if (!entry.worker_available) {
|
||||
return false;
|
||||
@@ -304,7 +302,7 @@ bool dispatch_actor_task(LocalSchedulerState *state,
|
||||
}
|
||||
|
||||
/* If there are not enough resources available, we cannot assign the task. */
|
||||
CHECK(0 == TaskSpec_get_required_resource(spec, "GPU"));
|
||||
RAY_CHECK(0 == TaskSpec_get_required_resource(spec, "GPU"));
|
||||
if (!check_dynamic_resources(state, TaskSpec_get_required_resources(spec))) {
|
||||
return false;
|
||||
}
|
||||
@@ -428,7 +426,7 @@ void insert_actor_task_queue(LocalSchedulerState *state,
|
||||
* not been created yet. The correct worker struct will be inserted when the
|
||||
* actor worker connects to the local scheduler. */
|
||||
create_actor(algorithm_state, actor_id, NULL);
|
||||
CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1);
|
||||
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1);
|
||||
}
|
||||
LocalActorInfo &entry =
|
||||
algorithm_state->local_actor_infos.find(actor_id)->second;
|
||||
@@ -437,7 +435,7 @@ void insert_actor_task_queue(LocalSchedulerState *state,
|
||||
}
|
||||
/* Extend the frontier to include the new handle. */
|
||||
if (entry.frontier_dependencies.count(task_handle_id) == 0) {
|
||||
CHECK(task_entry.ExecutionDependencies().size() == 1);
|
||||
RAY_CHECK(task_entry.ExecutionDependencies().size() == 1);
|
||||
entry.frontier_dependencies[task_handle_id] =
|
||||
task_entry.ExecutionDependencies()[1];
|
||||
}
|
||||
@@ -448,9 +446,9 @@ void insert_actor_task_queue(LocalSchedulerState *state,
|
||||
* check will fail if the fault-tolerance mechanism resubmits a task on an
|
||||
* actor. */
|
||||
if (task_counter < entry.task_counters[task_handle_id]) {
|
||||
LOG_INFO(
|
||||
"A task that has already been executed has been resubmitted, so we "
|
||||
"are ignoring it. This should only happen during reconstruction.");
|
||||
RAY_LOG(INFO) << "A task that has already been executed has been "
|
||||
<< "resubmitted, so we are ignoring it. This should only "
|
||||
<< "happen during reconstruction.";
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -466,9 +464,8 @@ void insert_actor_task_queue(LocalSchedulerState *state,
|
||||
}
|
||||
/* A duplicate task submitted by the same handle. */
|
||||
if (task_counter == TaskSpec_actor_counter(pending_task_spec)) {
|
||||
LOG_INFO(
|
||||
"A task was resubmitted, so we are ignoring it. This should only "
|
||||
"happen during reconstruction.");
|
||||
RAY_LOG(INFO) << "A task was resubmitted, so we are ignoring it. This "
|
||||
<< "should only happen during reconstruction.";
|
||||
return;
|
||||
}
|
||||
/* We found a task with the same handle ID and a greater task counter. */
|
||||
@@ -501,7 +498,7 @@ void queue_actor_task(LocalSchedulerState *state,
|
||||
bool from_global_scheduler) {
|
||||
TaskSpec *spec = execution_spec.Spec();
|
||||
ActorID actor_id = TaskSpec_actor_id(spec);
|
||||
DCHECK(!actor_id.is_nil());
|
||||
RAY_CHECK(!actor_id.is_nil());
|
||||
|
||||
/* Update the task table. */
|
||||
if (state->db != NULL) {
|
||||
@@ -567,10 +564,9 @@ void fetch_missing_dependency(
|
||||
/* TODO(swang): Local scheduler should also exit even if there are no
|
||||
* pending fetches. This could be done by subscribing to the db_client
|
||||
* table, or pinging the plasma manager in the heartbeat handler. */
|
||||
LOG_FATAL(
|
||||
"Lost connection to the plasma manager, local scheduler is "
|
||||
"exiting. Error: %s",
|
||||
arrow_status.ToString().c_str());
|
||||
RAY_LOG(FATAL) << "Lost connection to the plasma manager, local "
|
||||
<< "scheduler is exiting. Error: "
|
||||
<< arrow_status.ToString();
|
||||
}
|
||||
}
|
||||
/* Create an entry and add it to the list of active fetch requests to
|
||||
@@ -617,7 +613,7 @@ void fetch_missing_dependencies(
|
||||
}
|
||||
}
|
||||
}
|
||||
CHECK(num_missing_dependencies > 0);
|
||||
RAY_CHECK(num_missing_dependencies > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -700,7 +696,8 @@ int fetch_object_timeout_handler(event_loop *loop, timer_id id, void *context) {
|
||||
LocalSchedulerState *state = (LocalSchedulerState *) context;
|
||||
/* Only try the fetches if we are connected to the object store manager. */
|
||||
if (state->plasma_conn->get_manager_fd() == -1) {
|
||||
LOG_INFO("Local scheduler is not connected to a object store manager");
|
||||
RAY_LOG(INFO)
|
||||
<< "Local scheduler is not connected to a object store manager";
|
||||
return RayConfig::instance().local_scheduler_fetch_timeout_milliseconds();
|
||||
}
|
||||
|
||||
@@ -728,10 +725,9 @@ int fetch_object_timeout_handler(event_loop *loop, timer_id id, void *context) {
|
||||
reinterpret_cast<plasma::ObjectID *>(&object_ids[j]));
|
||||
if (!arrow_status.ok()) {
|
||||
LocalSchedulerState_free(state);
|
||||
LOG_FATAL(
|
||||
"Lost connection to the plasma manager, local scheduler is exiting. "
|
||||
"Error: %s",
|
||||
arrow_status.ToString().c_str());
|
||||
RAY_LOG(FATAL) << "Lost connection to the plasma manager, local "
|
||||
<< "scheduler is exiting. Error: "
|
||||
<< arrow_status.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -739,8 +735,8 @@ int fetch_object_timeout_handler(event_loop *loop, timer_id id, void *context) {
|
||||
int64_t end_time = current_time_ms();
|
||||
if (end_time - start_time >
|
||||
RayConfig::instance().max_time_for_handler_milliseconds()) {
|
||||
LOG_WARN("fetch_object_timeout_handler took %" PRId64 " milliseconds.",
|
||||
end_time - start_time);
|
||||
RAY_LOG(WARNING) << "fetch_object_timeout_handler took "
|
||||
<< end_time - start_time << " milliseconds.";
|
||||
}
|
||||
|
||||
/* Wait at least local_scheduler_fetch_timeout_milliseconds before running
|
||||
@@ -795,9 +791,8 @@ int reconstruct_object_timeout_handler(event_loop *loop,
|
||||
int64_t end_time = current_time_ms();
|
||||
if (end_time - start_time >
|
||||
RayConfig::instance().max_time_for_handler_milliseconds()) {
|
||||
LOG_WARN("reconstruct_object_timeout_handler took %" PRId64
|
||||
" milliseconds.",
|
||||
end_time - start_time);
|
||||
RAY_LOG(WARNING) << "reconstruct_object_timeout_handler took "
|
||||
<< end_time - start_time << " milliseconds.";
|
||||
}
|
||||
|
||||
return RayConfig::instance()
|
||||
@@ -879,7 +874,7 @@ void dispatch_tasks(LocalSchedulerState *state,
|
||||
}
|
||||
|
||||
/* Dispatch this task to an available worker and dequeue the task. */
|
||||
LOG_DEBUG("Dispatching task");
|
||||
RAY_LOG(DEBUG) << "Dispatching task";
|
||||
/* Get the last available worker in the available worker queue. */
|
||||
LocalSchedulerClient *worker = algorithm_state->available_workers.back();
|
||||
/* Tell the available worker to execute the task. */
|
||||
@@ -1004,15 +999,15 @@ void queue_waiting_task(LocalSchedulerState *state,
|
||||
if (task_counter != task_counters.end() &&
|
||||
TaskSpec_actor_counter(spec) < task_counter->second) {
|
||||
/* If the task to queue has a lower task counter, do not queue it. */
|
||||
LOG_INFO(
|
||||
"A task that has already been executed has been resubmitted, so we "
|
||||
"are ignoring it. This should only happen during reconstruction.");
|
||||
RAY_LOG(INFO) << "A task that has already been executed has been "
|
||||
<< "resubmitted, so we are ignoring it. This should only "
|
||||
<< "happen during reconstruction.";
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LOG_DEBUG("Queueing task in waiting queue");
|
||||
RAY_LOG(DEBUG) << "Queueing task in waiting queue";
|
||||
auto it = queue_task(state, algorithm_state->waiting_task_queue,
|
||||
execution_spec, from_global_scheduler);
|
||||
fetch_missing_dependencies(state, algorithm_state, it);
|
||||
@@ -1033,7 +1028,7 @@ void queue_dispatch_task(LocalSchedulerState *state,
|
||||
SchedulingAlgorithmState *algorithm_state,
|
||||
TaskExecutionSpec &execution_spec,
|
||||
bool from_global_scheduler) {
|
||||
LOG_DEBUG("Queueing task in dispatch queue");
|
||||
RAY_LOG(DEBUG) << "Queueing task in dispatch queue";
|
||||
TaskSpec *spec = execution_spec.Spec();
|
||||
if (TaskSpec_is_actor_task(spec)) {
|
||||
queue_actor_task(state, algorithm_state, execution_spec,
|
||||
@@ -1076,14 +1071,14 @@ void give_task_to_local_scheduler_retry(UniqueID id,
|
||||
void *user_data) {
|
||||
LocalSchedulerState *state = (LocalSchedulerState *) user_context;
|
||||
Task *task = (Task *) user_data;
|
||||
CHECK(Task_state(task) == TASK_STATUS_SCHEDULED);
|
||||
RAY_CHECK(Task_state(task) == TASK_STATUS_SCHEDULED);
|
||||
|
||||
TaskExecutionSpec *execution_spec = Task_task_execution_spec(task);
|
||||
TaskSpec *spec = execution_spec->Spec();
|
||||
CHECK(TaskSpec_is_actor_task(spec));
|
||||
RAY_CHECK(TaskSpec_is_actor_task(spec));
|
||||
|
||||
ActorID actor_id = TaskSpec_actor_id(spec);
|
||||
CHECK(state->actor_mapping.count(actor_id) == 1);
|
||||
RAY_CHECK(state->actor_mapping.count(actor_id) == 1);
|
||||
|
||||
if (state->actor_mapping[actor_id].local_scheduler_id ==
|
||||
get_db_client_id(state->db)) {
|
||||
@@ -1113,11 +1108,11 @@ void give_task_to_local_scheduler(LocalSchedulerState *state,
|
||||
TaskExecutionSpec &execution_spec,
|
||||
DBClientID local_scheduler_id) {
|
||||
if (local_scheduler_id == get_db_client_id(state->db)) {
|
||||
LOG_WARN("Local scheduler is trying to assign a task to itself.");
|
||||
RAY_LOG(WARNING) << "Local scheduler is trying to assign a task to itself.";
|
||||
}
|
||||
CHECK(state->db != NULL);
|
||||
RAY_CHECK(state->db != NULL);
|
||||
/* Assign the task to the relevant local scheduler. */
|
||||
DCHECK(state->config.global_scheduler_exists);
|
||||
RAY_CHECK(state->config.global_scheduler_exists);
|
||||
Task *task =
|
||||
Task_alloc(execution_spec, TASK_STATUS_SCHEDULED, local_scheduler_id);
|
||||
#if !RAY_USE_NEW_GCS
|
||||
@@ -1139,11 +1134,11 @@ void give_task_to_global_scheduler_retry(UniqueID id,
|
||||
void *user_data) {
|
||||
LocalSchedulerState *state = (LocalSchedulerState *) user_context;
|
||||
Task *task = (Task *) user_data;
|
||||
CHECK(Task_state(task) == TASK_STATUS_WAITING);
|
||||
RAY_CHECK(Task_state(task) == TASK_STATUS_WAITING);
|
||||
|
||||
TaskExecutionSpec *execution_spec = Task_task_execution_spec(task);
|
||||
TaskSpec *spec = execution_spec->Spec();
|
||||
CHECK(!TaskSpec_is_actor_task(spec));
|
||||
RAY_CHECK(!TaskSpec_is_actor_task(spec));
|
||||
|
||||
give_task_to_global_scheduler(state, state->algorithm_state, *execution_spec);
|
||||
}
|
||||
@@ -1165,11 +1160,11 @@ void give_task_to_global_scheduler(LocalSchedulerState *state,
|
||||
return;
|
||||
}
|
||||
/* Pass on the task to the global scheduler. */
|
||||
DCHECK(state->config.global_scheduler_exists);
|
||||
RAY_CHECK(state->config.global_scheduler_exists);
|
||||
Task *task = Task_alloc(execution_spec, TASK_STATUS_WAITING,
|
||||
get_db_client_id(state->db));
|
||||
#if !RAY_USE_NEW_GCS
|
||||
DCHECK(state->db != NULL);
|
||||
RAY_CHECK(state->db != NULL);
|
||||
auto retryInfo = RetryInfo{
|
||||
.num_retries = 0, // This value is unused.
|
||||
.timeout = 0, // This value is unused.
|
||||
@@ -1225,7 +1220,7 @@ void handle_actor_task_submitted(LocalSchedulerState *state,
|
||||
SchedulingAlgorithmState *algorithm_state,
|
||||
TaskExecutionSpec &execution_spec) {
|
||||
TaskSpec *task_spec = execution_spec.Spec();
|
||||
CHECK(TaskSpec_is_actor_task(task_spec));
|
||||
RAY_CHECK(TaskSpec_is_actor_task(task_spec));
|
||||
ActorID actor_id = TaskSpec_actor_id(task_spec);
|
||||
|
||||
if (state->actor_mapping.count(actor_id) == 0) {
|
||||
@@ -1284,8 +1279,8 @@ void handle_task_scheduled(LocalSchedulerState *state,
|
||||
/* This callback handles tasks that were assigned to this local scheduler by
|
||||
* the global scheduler, so we can safely assert that there is a connection to
|
||||
* the database. */
|
||||
DCHECK(state->db != NULL);
|
||||
DCHECK(state->config.global_scheduler_exists);
|
||||
RAY_CHECK(state->db != NULL);
|
||||
RAY_CHECK(state->config.global_scheduler_exists);
|
||||
/* Push the task to the appropriate queue. */
|
||||
queue_task_locally(state, algorithm_state, execution_spec, true);
|
||||
dispatch_tasks(state, algorithm_state);
|
||||
@@ -1298,24 +1293,24 @@ void handle_actor_task_scheduled(LocalSchedulerState *state,
|
||||
/* This callback handles tasks that were assigned to this local scheduler by
|
||||
* the global scheduler or by other workers, so we can safely assert that
|
||||
* there is a connection to the database. */
|
||||
DCHECK(state->db != NULL);
|
||||
DCHECK(state->config.global_scheduler_exists);
|
||||
RAY_CHECK(state->db != NULL);
|
||||
RAY_CHECK(state->config.global_scheduler_exists);
|
||||
/* Check that the task is meant to run on an actor that this local scheduler
|
||||
* is responsible for. */
|
||||
DCHECK(TaskSpec_is_actor_task(spec));
|
||||
RAY_CHECK(TaskSpec_is_actor_task(spec));
|
||||
ActorID actor_id = TaskSpec_actor_id(spec);
|
||||
if (state->actor_mapping.count(actor_id) == 1) {
|
||||
DCHECK(state->actor_mapping[actor_id].local_scheduler_id ==
|
||||
get_db_client_id(state->db));
|
||||
RAY_CHECK(state->actor_mapping[actor_id].local_scheduler_id ==
|
||||
get_db_client_id(state->db));
|
||||
} else {
|
||||
/* This means that an actor has been assigned to this local scheduler, and a
|
||||
* task for that actor has been received by this local scheduler, but this
|
||||
* local scheduler has not yet processed the notification about the actor
|
||||
* creation. This may be possible though should be very uncommon. If it does
|
||||
* happen, it's ok. */
|
||||
LOG_INFO(
|
||||
"handle_actor_task_scheduled called on local scheduler but the "
|
||||
"corresponding actor_map_entry is not present. This should be rare.");
|
||||
RAY_LOG(INFO) << "handle_actor_task_scheduled called on local scheduler "
|
||||
<< "but the corresponding actor_map_entry is not present. "
|
||||
<< "This should be rare.";
|
||||
}
|
||||
/* Push the task to the appropriate queue. */
|
||||
queue_task_locally(state, algorithm_state, execution_spec, true);
|
||||
@@ -1325,19 +1320,19 @@ void handle_actor_task_scheduled(LocalSchedulerState *state,
|
||||
void handle_worker_available(LocalSchedulerState *state,
|
||||
SchedulingAlgorithmState *algorithm_state,
|
||||
LocalSchedulerClient *worker) {
|
||||
CHECK(worker->task_in_progress == NULL);
|
||||
RAY_CHECK(worker->task_in_progress == NULL);
|
||||
/* Check that the worker isn't in the pool of available workers. */
|
||||
DCHECK(!worker_in_vector(algorithm_state->available_workers, worker));
|
||||
RAY_CHECK(!worker_in_vector(algorithm_state->available_workers, worker));
|
||||
|
||||
/* Check that the worker isn't in the list of blocked workers. */
|
||||
DCHECK(!worker_in_vector(algorithm_state->blocked_workers, worker));
|
||||
RAY_CHECK(!worker_in_vector(algorithm_state->blocked_workers, worker));
|
||||
|
||||
/* If the worker was executing a task, it must have finished, so remove it
|
||||
* from the list of executing workers. If the worker is connecting for the
|
||||
* first time, it will not be in the list of executing workers. */
|
||||
remove_worker_from_vector(algorithm_state->executing_workers, worker);
|
||||
/* Double check that we successfully removed the worker. */
|
||||
DCHECK(!worker_in_vector(algorithm_state->executing_workers, worker));
|
||||
RAY_CHECK(!worker_in_vector(algorithm_state->executing_workers, worker));
|
||||
|
||||
/* Add worker to the list of available workers. */
|
||||
algorithm_state->available_workers.push_back(worker);
|
||||
@@ -1350,7 +1345,7 @@ void handle_worker_removed(LocalSchedulerState *state,
|
||||
SchedulingAlgorithmState *algorithm_state,
|
||||
LocalSchedulerClient *worker) {
|
||||
/* Make sure this is not an actor. */
|
||||
CHECK(worker->actor_id.is_nil());
|
||||
RAY_CHECK(worker->actor_id.is_nil());
|
||||
|
||||
/* Make sure that we remove the worker at most once. */
|
||||
int num_times_removed = 0;
|
||||
@@ -1360,24 +1355,24 @@ void handle_worker_removed(LocalSchedulerState *state,
|
||||
remove_worker_from_vector(algorithm_state->available_workers, worker);
|
||||
num_times_removed += removed_from_available;
|
||||
/* Double check that we actually removed the worker. */
|
||||
DCHECK(!worker_in_vector(algorithm_state->available_workers, worker));
|
||||
RAY_CHECK(!worker_in_vector(algorithm_state->available_workers, worker));
|
||||
|
||||
/* Remove the worker from executing workers, if it's there. */
|
||||
bool removed_from_executing =
|
||||
remove_worker_from_vector(algorithm_state->executing_workers, worker);
|
||||
num_times_removed += removed_from_executing;
|
||||
/* Double check that we actually removed the worker. */
|
||||
DCHECK(!worker_in_vector(algorithm_state->executing_workers, worker));
|
||||
RAY_CHECK(!worker_in_vector(algorithm_state->executing_workers, worker));
|
||||
|
||||
/* Remove the worker from blocked workers, if it's there. */
|
||||
bool removed_from_blocked =
|
||||
remove_worker_from_vector(algorithm_state->blocked_workers, worker);
|
||||
num_times_removed += removed_from_blocked;
|
||||
/* Double check that we actually removed the worker. */
|
||||
DCHECK(!worker_in_vector(algorithm_state->blocked_workers, worker));
|
||||
RAY_CHECK(!worker_in_vector(algorithm_state->blocked_workers, worker));
|
||||
|
||||
/* Make sure we removed the worker at most once. */
|
||||
CHECK(num_times_removed <= 1);
|
||||
RAY_CHECK(num_times_removed <= 1);
|
||||
|
||||
/* Attempt to dispatch some tasks because some resources may have freed up. */
|
||||
dispatch_all_tasks(state, algorithm_state);
|
||||
@@ -1400,7 +1395,7 @@ void handle_actor_worker_disconnect(LocalSchedulerState *state,
|
||||
|
||||
state->removed_actors.insert(worker->actor_id);
|
||||
|
||||
CHECK(algorithm_state->local_actor_infos.count(worker->actor_id) != 0);
|
||||
RAY_CHECK(algorithm_state->local_actor_infos.count(worker->actor_id) != 0);
|
||||
LocalActorInfo &entry =
|
||||
algorithm_state->local_actor_infos.find(worker->actor_id)->second;
|
||||
for (auto &task : *entry.task_queue) {
|
||||
@@ -1421,13 +1416,13 @@ void handle_actor_worker_available(LocalSchedulerState *state,
|
||||
SchedulingAlgorithmState *algorithm_state,
|
||||
LocalSchedulerClient *worker) {
|
||||
ActorID actor_id = worker->actor_id;
|
||||
CHECK(!actor_id.is_nil());
|
||||
RAY_CHECK(!actor_id.is_nil());
|
||||
/* Get the actor info for this worker. */
|
||||
CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1);
|
||||
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) == 1);
|
||||
LocalActorInfo &entry =
|
||||
algorithm_state->local_actor_infos.find(actor_id)->second;
|
||||
CHECK(worker == entry.worker);
|
||||
CHECK(!entry.worker_available);
|
||||
RAY_CHECK(worker == entry.worker);
|
||||
RAY_CHECK(!entry.worker_available);
|
||||
/* If an actor task was assigned, mark returned dummy object as locally
|
||||
* available. This is not added to the object table, so the update will be
|
||||
* invisible to other nodes. */
|
||||
@@ -1446,10 +1441,11 @@ void handle_worker_blocked(LocalSchedulerState *state,
|
||||
SchedulingAlgorithmState *algorithm_state,
|
||||
LocalSchedulerClient *worker) {
|
||||
/* Find the worker in the list of executing workers. */
|
||||
CHECK(remove_worker_from_vector(algorithm_state->executing_workers, worker));
|
||||
RAY_CHECK(
|
||||
remove_worker_from_vector(algorithm_state->executing_workers, worker));
|
||||
|
||||
/* Check that the worker isn't in the list of blocked workers. */
|
||||
DCHECK(!worker_in_vector(algorithm_state->blocked_workers, worker));
|
||||
RAY_CHECK(!worker_in_vector(algorithm_state->blocked_workers, worker));
|
||||
|
||||
/* Add the worker to the list of blocked workers. */
|
||||
algorithm_state->blocked_workers.push_back(worker);
|
||||
@@ -1471,10 +1467,11 @@ void handle_worker_unblocked(LocalSchedulerState *state,
|
||||
SchedulingAlgorithmState *algorithm_state,
|
||||
LocalSchedulerClient *worker) {
|
||||
/* Find the worker in the list of blocked workers. */
|
||||
CHECK(remove_worker_from_vector(algorithm_state->blocked_workers, worker));
|
||||
RAY_CHECK(
|
||||
remove_worker_from_vector(algorithm_state->blocked_workers, worker));
|
||||
|
||||
/* Check that the worker isn't in the list of executing workers. */
|
||||
DCHECK(!worker_in_vector(algorithm_state->executing_workers, worker));
|
||||
RAY_CHECK(!worker_in_vector(algorithm_state->executing_workers, worker));
|
||||
|
||||
/* Add the worker to the list of executing workers. */
|
||||
algorithm_state->executing_workers.push_back(worker);
|
||||
@@ -1499,7 +1496,7 @@ void handle_object_available(LocalSchedulerState *state,
|
||||
}
|
||||
|
||||
/* Add the entry to the set of locally available objects. */
|
||||
CHECK(algorithm_state->local_objects.count(object_id) == 0);
|
||||
RAY_CHECK(algorithm_state->local_objects.count(object_id) == 0);
|
||||
algorithm_state->local_objects[object_id] = entry;
|
||||
|
||||
if (!entry.dependent_tasks.empty()) {
|
||||
@@ -1530,7 +1527,7 @@ void handle_object_removed(LocalSchedulerState *state,
|
||||
/* Remove the object from the set of locally available objects. */
|
||||
SchedulingAlgorithmState *algorithm_state = state->algorithm_state;
|
||||
|
||||
CHECK(algorithm_state->local_objects.count(removed_object_id) == 1);
|
||||
RAY_CHECK(algorithm_state->local_objects.count(removed_object_id) == 1);
|
||||
algorithm_state->local_objects.erase(removed_object_id);
|
||||
|
||||
/* Track queued tasks that were dependent on this object.
|
||||
@@ -1544,7 +1541,7 @@ void handle_object_removed(LocalSchedulerState *state,
|
||||
it != algorithm_state->dispatch_task_queue->end();) {
|
||||
if (it->DependsOn(removed_object_id)) {
|
||||
/* This task was dependent on the removed object. */
|
||||
LOG_DEBUG("Moved task from dispatch queue back to waiting queue");
|
||||
RAY_LOG(DEBUG) << "Moved task from dispatch queue back to waiting queue";
|
||||
algorithm_state->waiting_task_queue->push_back(std::move(*it));
|
||||
/* Remove the task from the dispatch queue, but do not free the task
|
||||
* spec. */
|
||||
@@ -1563,7 +1560,8 @@ void handle_object_removed(LocalSchedulerState *state,
|
||||
queue_it != actor_info.task_queue->end();) {
|
||||
if (queue_it->DependsOn(removed_object_id)) {
|
||||
/* This task was dependent on the removed object. */
|
||||
LOG_DEBUG("Moved task from actor dispatch queue back to waiting queue");
|
||||
RAY_LOG(DEBUG) << "Moved task from actor dispatch queue back to "
|
||||
<< "waiting queue";
|
||||
algorithm_state->waiting_task_queue->push_back(std::move(*queue_it));
|
||||
/* Remove the task from the dispatch queue, but do not free the task
|
||||
* spec. */
|
||||
@@ -1667,16 +1665,16 @@ int num_dispatch_tasks(SchedulingAlgorithmState *algorithm_state) {
|
||||
|
||||
void print_worker_info(const char *message,
|
||||
SchedulingAlgorithmState *algorithm_state) {
|
||||
LOG_DEBUG("%s: %lu available, %lu executing, %lu blocked", message,
|
||||
algorithm_state->available_workers.size(),
|
||||
algorithm_state->executing_workers.size(),
|
||||
algorithm_state->blocked_workers.size());
|
||||
RAY_LOG(DEBUG) << message << ": " << algorithm_state->available_workers.size()
|
||||
<< " available, " << algorithm_state->executing_workers.size()
|
||||
<< " executing, " << algorithm_state->blocked_workers.size()
|
||||
<< " blocked";
|
||||
}
|
||||
|
||||
std::unordered_map<ActorHandleID, int64_t, UniqueIDHasher>
|
||||
get_actor_task_counters(SchedulingAlgorithmState *algorithm_state,
|
||||
ActorID actor_id) {
|
||||
CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
|
||||
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
|
||||
return algorithm_state->local_actor_infos[actor_id].task_counters;
|
||||
}
|
||||
|
||||
@@ -1685,7 +1683,7 @@ void set_actor_task_counters(
|
||||
ActorID actor_id,
|
||||
const std::unordered_map<ActorHandleID, int64_t, UniqueIDHasher>
|
||||
&task_counters) {
|
||||
CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
|
||||
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
|
||||
/* Overwrite the current task counters for the actor. This is necessary
|
||||
* during reconstruction when resuming from a checkpoint so that we can
|
||||
* resume the task frontier at the time that the checkpoint was saved. */
|
||||
@@ -1731,7 +1729,7 @@ void set_actor_task_counters(
|
||||
std::unordered_map<ActorHandleID, ObjectID, UniqueIDHasher> get_actor_frontier(
|
||||
SchedulingAlgorithmState *algorithm_state,
|
||||
ActorID actor_id) {
|
||||
CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
|
||||
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
|
||||
return algorithm_state->local_actor_infos[actor_id].frontier_dependencies;
|
||||
}
|
||||
|
||||
@@ -1741,7 +1739,7 @@ void set_actor_frontier(
|
||||
ActorID actor_id,
|
||||
const std::unordered_map<ActorHandleID, ObjectID, UniqueIDHasher>
|
||||
&frontier_dependencies) {
|
||||
CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
|
||||
RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0);
|
||||
auto entry = algorithm_state->local_actor_infos[actor_id];
|
||||
entry.frontier_dependencies = frontier_dependencies;
|
||||
for (auto frontier_dependency : entry.frontier_dependencies) {
|
||||
|
||||
@@ -30,7 +30,7 @@ LocalSchedulerConnection *LocalSchedulerConnection_init(
|
||||
/* Register the process ID with the local scheduler. */
|
||||
int success = write_message(result->conn, MessageType_RegisterClientRequest,
|
||||
fbb.GetSize(), fbb.GetBufferPointer());
|
||||
CHECKM(success == 0, "Unable to register worker with local scheduler");
|
||||
RAY_CHECK(success == 0) << "Unable to register worker with local scheduler";
|
||||
|
||||
/* Wait for a confirmation from the local scheduler. */
|
||||
int64_t type;
|
||||
@@ -38,10 +38,10 @@ LocalSchedulerConnection *LocalSchedulerConnection_init(
|
||||
uint8_t *reply;
|
||||
read_message(result->conn, &type, &reply_size, &reply);
|
||||
if (type == DISCONNECT_CLIENT) {
|
||||
LOG_DEBUG("Exiting because local scheduler closed connection.");
|
||||
RAY_LOG(DEBUG) << "Exiting because local scheduler closed connection.";
|
||||
exit(1);
|
||||
}
|
||||
CHECK(type == MessageType_RegisterClientReply);
|
||||
RAY_CHECK(type == MessageType_RegisterClientReply);
|
||||
|
||||
/* Parse the reply object. */
|
||||
auto reply_message = flatbuffers::GetRoot<RegisterClientReply>(reply);
|
||||
@@ -50,7 +50,7 @@ LocalSchedulerConnection *LocalSchedulerConnection_init(
|
||||
}
|
||||
/* If the worker is not an actor, there should not be any GPU IDs here. */
|
||||
if (ActorID_equal(result->actor_id, ActorID::nil())) {
|
||||
CHECK(reply_message->gpu_ids()->size() == 0);
|
||||
RAY_CHECK(reply_message->gpu_ids()->size() == 0);
|
||||
}
|
||||
|
||||
free(reply);
|
||||
@@ -111,10 +111,10 @@ TaskSpec *local_scheduler_get_task(LocalSchedulerConnection *conn,
|
||||
* scheduler gives this client a task. */
|
||||
read_message(conn->conn, &type, &reply_size, &reply);
|
||||
if (type == DISCONNECT_CLIENT) {
|
||||
LOG_WARN("Exiting because local scheduler closed connection.");
|
||||
RAY_LOG(WARNING) << "Exiting because local scheduler closed connection.";
|
||||
exit(1);
|
||||
}
|
||||
CHECK(type == MessageType_ExecuteTask);
|
||||
RAY_CHECK(type == MessageType_ExecuteTask);
|
||||
|
||||
/* Parse the flatbuffer object. */
|
||||
auto reply_message = flatbuffers::GetRoot<GetTaskReply>(reply);
|
||||
@@ -186,10 +186,10 @@ const std::vector<uint8_t> local_scheduler_get_actor_frontier(
|
||||
std::vector<uint8_t> reply;
|
||||
read_vector(conn->conn, &type, reply);
|
||||
if (type == DISCONNECT_CLIENT) {
|
||||
LOG_DEBUG("Exiting because local scheduler closed connection.");
|
||||
RAY_LOG(DEBUG) << "Exiting because local scheduler closed connection.";
|
||||
exit(1);
|
||||
}
|
||||
CHECK(type == MessageType_GetActorFrontierReply);
|
||||
RAY_CHECK(type == MessageType_GetActorFrontierReply);
|
||||
return reply;
|
||||
}
|
||||
|
||||
|
||||
@@ -91,7 +91,7 @@ LocalSchedulerMock *LocalSchedulerMock_init(int num_workers,
|
||||
connect_ipc_sock_retry(plasma_store_socket_name, 5, 100);
|
||||
std::string local_scheduler_socket_name = bind_ipc_sock_retry(
|
||||
local_scheduler_socket_name_format, &mock->local_scheduler_fd);
|
||||
CHECK(mock->plasma_store_fd >= 0 && mock->local_scheduler_fd >= 0);
|
||||
RAY_CHECK(mock->plasma_store_fd >= 0 && mock->local_scheduler_fd >= 0);
|
||||
|
||||
/* Construct worker command */
|
||||
std::stringstream worker_command_ss;
|
||||
@@ -411,7 +411,7 @@ TaskExecutionSpec *object_reconstruction_suppression_spec;
|
||||
void object_reconstruction_suppression_callback(ObjectID object_id,
|
||||
bool success,
|
||||
void *user_context) {
|
||||
CHECK(success);
|
||||
RAY_CHECK(success);
|
||||
/* Submit the task after adding the object to the object table. */
|
||||
LocalSchedulerConnection *worker = (LocalSchedulerConnection *) user_context;
|
||||
local_scheduler_submit(worker, *object_reconstruction_suppression_spec);
|
||||
|
||||
@@ -74,8 +74,8 @@ int handle_sigpipe(plasma::Status s, int fd) {
|
||||
} break;
|
||||
default:
|
||||
/* This code should be unreachable. */
|
||||
CHECK(0);
|
||||
LOG_FATAL("Failed to write message to client on fd %d", fd);
|
||||
RAY_CHECK(0);
|
||||
RAY_LOG(FATAL) << "Failed to write message to client on fd " << fd;
|
||||
}
|
||||
|
||||
return err;
|
||||
@@ -320,12 +320,12 @@ bool ClientConnection_request_finished(ClientConnection *client_conn) {
|
||||
std::unordered_map<ObjectID, std::vector<WaitRequest *>, UniqueIDHasher> &
|
||||
object_wait_requests_from_type(PlasmaManagerState *manager_state, int type) {
|
||||
/* We use different types of hash tables for different requests. */
|
||||
RAY_CHECK(type == plasma::PLASMA_QUERY_LOCAL ||
|
||||
type == plasma::PLASMA_QUERY_ANYWHERE);
|
||||
if (type == plasma::PLASMA_QUERY_LOCAL) {
|
||||
return manager_state->object_wait_requests_local;
|
||||
} else if (type == plasma::PLASMA_QUERY_ANYWHERE) {
|
||||
return manager_state->object_wait_requests_remote;
|
||||
} else {
|
||||
LOG_FATAL("This code should be unreachable.");
|
||||
return manager_state->object_wait_requests_remote;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -367,8 +367,8 @@ void remove_wait_request_for_object(PlasmaManagerState *manager_state,
|
||||
void remove_wait_request(PlasmaManagerState *manager_state,
|
||||
WaitRequest *wait_req) {
|
||||
if (wait_req->timer != -1) {
|
||||
CHECK(event_loop_remove_timer(manager_state->loop, wait_req->timer) ==
|
||||
AE_OK);
|
||||
RAY_CHECK(event_loop_remove_timer(manager_state->loop, wait_req->timer) ==
|
||||
AE_OK);
|
||||
}
|
||||
delete wait_req;
|
||||
}
|
||||
@@ -416,9 +416,9 @@ void update_object_wait_requests(PlasmaManagerState *manager_state,
|
||||
auto object_request =
|
||||
wait_req->object_requests.find(obj_id.to_plasma_id());
|
||||
/* Check that we found the object. */
|
||||
CHECK(object_request != wait_req->object_requests.end());
|
||||
RAY_CHECK(object_request != wait_req->object_requests.end());
|
||||
/* Check that the object found was not previously known to us. */
|
||||
CHECK(object_request->second.status == ObjectStatus_Nonexistent);
|
||||
RAY_CHECK(object_request->second.status == ObjectStatus_Nonexistent);
|
||||
/* Update the found object's status to a known status. */
|
||||
object_request->second.status = status;
|
||||
|
||||
@@ -431,7 +431,7 @@ void update_object_wait_requests(PlasmaManagerState *manager_state,
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
DCHECK(static_cast<size_t>(index) == wait_requests.size());
|
||||
RAY_CHECK(static_cast<size_t>(index) == wait_requests.size());
|
||||
/* Remove the array of wait requests for this object, since no one should be
|
||||
* waiting for this object anymore. */
|
||||
object_wait_requests.erase(object_wait_requests_it);
|
||||
@@ -491,7 +491,7 @@ PlasmaManagerState *PlasmaManagerState_init(const char *store_socket_name,
|
||||
RAY_CHECK_OK(state->gcs_client.context()->AttachToEventLoop(state->loop));
|
||||
} else {
|
||||
state->db = NULL;
|
||||
LOG_DEBUG("No db connection specified");
|
||||
RAY_LOG(DEBUG) << "No db connection specified";
|
||||
}
|
||||
state->addr = manager_addr;
|
||||
state->port = manager_port;
|
||||
@@ -572,14 +572,14 @@ int write_object_chunk(ClientConnection *conn, PlasmaRequestBuffer *buf) {
|
||||
|
||||
int err;
|
||||
if (r <= 0) {
|
||||
LOG_ERROR("Write error");
|
||||
RAY_LOG(ERROR) << "Write error";
|
||||
err = errno;
|
||||
} else {
|
||||
conn->cursor += r;
|
||||
CHECK(conn->cursor <= buf->data_size + buf->metadata_size);
|
||||
RAY_CHECK(conn->cursor <= buf->data_size + buf->metadata_size);
|
||||
/* If we've finished writing this buffer, reset the cursor. */
|
||||
if (conn->cursor == buf->data_size + buf->metadata_size) {
|
||||
LOG_DEBUG("writing on channel %d finished", conn->fd);
|
||||
RAY_LOG(DEBUG) << "writing on channel " << conn->fd << " finished";
|
||||
ClientConnection_finish_request(conn);
|
||||
}
|
||||
err = 0;
|
||||
@@ -612,7 +612,7 @@ void send_queued_request(event_loop *loop,
|
||||
conn->fd);
|
||||
break;
|
||||
case MessageType_PlasmaDataReply:
|
||||
LOG_DEBUG("Transferring object to manager");
|
||||
RAY_LOG(DEBUG) << "Transferring object to manager";
|
||||
if (ClientConnection_request_finished(conn)) {
|
||||
/* If the cursor is not set, we haven't sent any requests for this object
|
||||
* yet, so send the initial data request. */
|
||||
@@ -627,7 +627,7 @@ void send_queued_request(event_loop *loop,
|
||||
}
|
||||
break;
|
||||
default:
|
||||
LOG_FATAL("Buffered request has unknown type.");
|
||||
RAY_LOG(FATAL) << "Buffered request has unknown type.";
|
||||
}
|
||||
|
||||
/* If the other side hung up, stop sending to this manager. */
|
||||
@@ -658,7 +658,7 @@ void send_queued_request(event_loop *loop,
|
||||
|
||||
int read_object_chunk(ClientConnection *conn, PlasmaRequestBuffer *buf) {
|
||||
ssize_t r, s;
|
||||
CHECK(buf != NULL);
|
||||
RAY_CHECK(buf != NULL);
|
||||
/* Try to read one buf_size at a time. */
|
||||
s = buf->data_size + buf->metadata_size - conn->cursor;
|
||||
if (s > RayConfig::instance().buf_size()) {
|
||||
@@ -668,11 +668,11 @@ int read_object_chunk(ClientConnection *conn, PlasmaRequestBuffer *buf) {
|
||||
|
||||
int err;
|
||||
if (r <= 0) {
|
||||
LOG_ERROR("Read error");
|
||||
RAY_LOG(ERROR) << "Read error";
|
||||
err = errno;
|
||||
} else {
|
||||
conn->cursor += r;
|
||||
CHECK(conn->cursor <= buf->data_size + buf->metadata_size);
|
||||
RAY_CHECK(conn->cursor <= buf->data_size + buf->metadata_size);
|
||||
/* If the cursor is equal to the full object size, reset the cursor and
|
||||
* we're done. */
|
||||
if (conn->cursor == buf->data_size + buf->metadata_size) {
|
||||
@@ -709,7 +709,7 @@ void process_data_chunk(event_loop *loop,
|
||||
/* If we're done receiving the object, seal the object and release it. The
|
||||
* release corresponds to the call to plasma_create that occurred in
|
||||
* process_data_request. */
|
||||
LOG_DEBUG("reading on channel %d finished", data_sock);
|
||||
RAY_LOG(DEBUG) << "reading on channel " << data_sock << " finished";
|
||||
/* The following seal also triggers notification of clients for fetch or
|
||||
* wait requests, see process_object_notification. */
|
||||
ARROW_CHECK_OK(plasma_conn->Seal(buf->object_id.to_plasma_id()));
|
||||
@@ -805,9 +805,8 @@ void process_transfer_request(event_loop *loop,
|
||||
/* If the object wasn't locally available, exit immediately. If the object
|
||||
* later appears locally, the requesting plasma manager should request the
|
||||
* transfer again. */
|
||||
LOG_WARN(
|
||||
"Unable to transfer object to requesting plasma manager, object not "
|
||||
"local.");
|
||||
RAY_LOG(WARNING) << "Unable to transfer object to requesting plasma "
|
||||
<< "manager, object not local.";
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -822,8 +821,8 @@ void process_transfer_request(event_loop *loop,
|
||||
}
|
||||
}
|
||||
|
||||
CHECK(object_buffer.metadata->data() ==
|
||||
object_buffer.data->data() + object_buffer.data_size);
|
||||
RAY_CHECK(object_buffer.metadata->data() ==
|
||||
object_buffer.data->data() + object_buffer.data_size);
|
||||
PlasmaRequestBuffer *buf = new PlasmaRequestBuffer();
|
||||
buf->type = MessageType_PlasmaDataReply;
|
||||
buf->object_id = obj_id;
|
||||
@@ -875,7 +874,7 @@ void process_data_request(event_loop *loop,
|
||||
* conn->transfer_queue. */
|
||||
conn->transfer_queue.push_back(buf);
|
||||
}
|
||||
CHECK(ClientConnection_request_finished(conn));
|
||||
RAY_CHECK(ClientConnection_request_finished(conn));
|
||||
ClientConnection_start_request(conn);
|
||||
|
||||
/* Switch to reading the data from this socket, instead of listening for
|
||||
@@ -912,10 +911,10 @@ void process_data_request(event_loop *loop,
|
||||
|
||||
void request_transfer_from(PlasmaManagerState *manager_state,
|
||||
FetchRequest *fetch_req) {
|
||||
CHECK(fetch_req->manager_vector.size() > 0);
|
||||
CHECK(fetch_req->next_manager >= 0 &&
|
||||
static_cast<size_t>(fetch_req->next_manager) <
|
||||
fetch_req->manager_vector.size());
|
||||
RAY_CHECK(fetch_req->manager_vector.size() > 0);
|
||||
RAY_CHECK(fetch_req->next_manager >= 0 &&
|
||||
static_cast<size_t>(fetch_req->next_manager) <
|
||||
fetch_req->manager_vector.size());
|
||||
char addr[16];
|
||||
int port;
|
||||
parse_ip_addr_port(fetch_req->manager_vector[fetch_req->next_manager].c_str(),
|
||||
@@ -931,8 +930,8 @@ void request_transfer_from(PlasmaManagerState *manager_state,
|
||||
&temp_addr[2], &temp_addr[3]);
|
||||
if (memcmp(temp_addr, manager_state->addr, 4) == 0 &&
|
||||
port == manager_state->port) {
|
||||
LOG_FATAL(
|
||||
"This manager is attempting to request a transfer from itself.");
|
||||
RAY_LOG(FATAL) << "This manager is attempting to request a transfer from "
|
||||
<< "itself.";
|
||||
}
|
||||
|
||||
PlasmaRequestBuffer *transfer_request = new PlasmaRequestBuffer();
|
||||
@@ -975,12 +974,12 @@ int fetch_timeout_handler(event_loop *loop, timer_id id, void *context) {
|
||||
if (is_receiving_or_received(manager_state, fetch_req->object_id)) {
|
||||
// Do nothing if the object transfer is in progress or if the object
|
||||
// has already been received.
|
||||
LOG_DEBUG("fetch_timeout_handler: Object in progress or received. %s",
|
||||
fetch_req->object_id.hex().c_str());
|
||||
RAY_LOG(DEBUG) << "fetch_timeout_handler: Object in progress or "
|
||||
<< "received. " << fetch_req->object_id;
|
||||
continue;
|
||||
}
|
||||
LOG_DEBUG("fetch_timeout_handler: Object missing. %s",
|
||||
fetch_req->object_id.hex().c_str());
|
||||
RAY_LOG(DEBUG) << "fetch_timeout_handler: Object missing. "
|
||||
<< fetch_req->object_id;
|
||||
request_transfer_from(manager_state, fetch_req);
|
||||
/* If we've tried all of the managers that we know about for this object,
|
||||
* add this object to the list to resend requests for. */
|
||||
@@ -1018,13 +1017,13 @@ void request_transfer(ObjectID object_id,
|
||||
PlasmaManagerState *manager_state = (PlasmaManagerState *) context;
|
||||
/* This callback is called from object_table_subscribe, which guarantees that
|
||||
* the manager vector contains at least one element. */
|
||||
CHECK(manager_vector.size() >= 1);
|
||||
RAY_CHECK(manager_vector.size() >= 1);
|
||||
auto it = manager_state->fetch_requests.find(object_id);
|
||||
|
||||
if (is_object_local(manager_state, object_id)) {
|
||||
/* If the object is already here, then the fetch request should have been
|
||||
* removed. */
|
||||
CHECK(it == manager_state->fetch_requests.end());
|
||||
RAY_CHECK(it == manager_state->fetch_requests.end());
|
||||
return;
|
||||
}
|
||||
FetchRequest *fetch_req = it->second;
|
||||
@@ -1033,7 +1032,7 @@ void request_transfer(ObjectID object_id,
|
||||
* TODO(rkn): We actually have to remove this check to handle the rare
|
||||
* scenario where the object is transferred here and then evicted before this
|
||||
* callback gets called. */
|
||||
CHECK(fetch_req != NULL);
|
||||
RAY_CHECK(fetch_req != NULL);
|
||||
|
||||
/* Update the manager vector. */
|
||||
fetch_req->manager_vector = manager_vector;
|
||||
@@ -1055,7 +1054,7 @@ void call_request_transfer(ObjectID object_id,
|
||||
PlasmaManagerState *manager_state = (PlasmaManagerState *) context;
|
||||
/* Check that there isn't already a fetch request for this object. */
|
||||
auto it = manager_state->fetch_requests.find(object_id);
|
||||
CHECK(it == manager_state->fetch_requests.end());
|
||||
RAY_CHECK(it == manager_state->fetch_requests.end());
|
||||
/* Create a fetch request. */
|
||||
FetchRequest *fetch_req = create_fetch_request(manager_state, object_id);
|
||||
manager_state->fetch_requests[object_id] = fetch_req;
|
||||
@@ -1063,7 +1062,7 @@ void call_request_transfer(ObjectID object_id,
|
||||
}
|
||||
|
||||
void fatal_table_callback(ObjectID id, void *user_context, void *user_data) {
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
/* This callback is used by both fetch and wait. Therefore, it may have to
|
||||
@@ -1143,7 +1142,7 @@ void process_wait_request(ClientConnection *client_conn,
|
||||
plasma::ObjectRequestMap &&object_requests,
|
||||
uint64_t timeout_ms,
|
||||
int num_ready_objects) {
|
||||
CHECK(client_conn != NULL);
|
||||
RAY_CHECK(client_conn != NULL);
|
||||
PlasmaManagerState *manager_state = client_conn->manager_state;
|
||||
int num_object_requests = object_requests.size();
|
||||
|
||||
@@ -1184,7 +1183,7 @@ void process_wait_request(ClientConnection *client_conn,
|
||||
num_object_ids_to_request += 1;
|
||||
} else {
|
||||
/* This code should be unreachable. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1255,7 +1254,7 @@ void object_table_lookup_fail_callback(ObjectID object_id,
|
||||
void *user_data) {
|
||||
/* Fail for now. Later, we may want to send a ObjectStatus_Nonexistent to the
|
||||
* client. */
|
||||
CHECK(0);
|
||||
RAY_CHECK(0);
|
||||
}
|
||||
|
||||
void process_status_request(ClientConnection *client_conn,
|
||||
@@ -1299,7 +1298,7 @@ void process_delete_object_notification(PlasmaManagerState *state,
|
||||
|
||||
void log_object_hash_mismatch_error_task_callback(Task *task,
|
||||
void *user_context) {
|
||||
CHECK(task != NULL);
|
||||
RAY_CHECK(task != NULL);
|
||||
PlasmaManagerState *state = (PlasmaManagerState *) user_context;
|
||||
TaskSpec *spec = Task_task_execution_spec(task)->Spec();
|
||||
FunctionID function = TaskSpec_function(spec);
|
||||
@@ -1314,7 +1313,7 @@ void log_object_hash_mismatch_error_result_callback(ObjectID object_id,
|
||||
TaskID task_id,
|
||||
bool is_put,
|
||||
void *user_context) {
|
||||
CHECK(!task_id.is_nil());
|
||||
RAY_CHECK(!task_id.is_nil());
|
||||
PlasmaManagerState *state = (PlasmaManagerState *) user_context;
|
||||
/* Get the specification for the nondeterministic task. */
|
||||
#if !RAY_USE_NEW_GCS
|
||||
@@ -1391,8 +1390,8 @@ void process_object_notification(event_loop *loop,
|
||||
uint8_t *notification = read_message_async(loop, client_sock);
|
||||
if (notification == NULL) {
|
||||
PlasmaManagerState_free(state);
|
||||
LOG_FATAL(
|
||||
"Lost connection to the plasma store, plasma manager is exiting!");
|
||||
RAY_LOG(FATAL) << "Lost connection to the plasma store, plasma manager is "
|
||||
<< "exiting!";
|
||||
}
|
||||
auto object_info = flatbuffers::GetRoot<ObjectInfo>(notification);
|
||||
/* Add object to locally available object. */
|
||||
@@ -1437,7 +1436,7 @@ ClientConnection *ClientConnection_listen(event_loop *loop,
|
||||
ClientConnection *conn = ClientConnection_init(state, new_socket, client_key);
|
||||
|
||||
event_loop_add_file(loop, new_socket, EVENT_LOOP_READ, process_message, conn);
|
||||
LOG_DEBUG("New client connection with fd %d", new_socket);
|
||||
RAY_LOG(DEBUG) << "New client connection with fd " << new_socket;
|
||||
return conn;
|
||||
}
|
||||
|
||||
@@ -1483,7 +1482,7 @@ void process_message(event_loop *loop,
|
||||
|
||||
switch (type) {
|
||||
case MessageType_PlasmaDataRequest: {
|
||||
LOG_DEBUG("Processing data request");
|
||||
RAY_LOG(DEBUG) << "Processing data request";
|
||||
plasma::ObjectID object_id;
|
||||
char *address;
|
||||
int port;
|
||||
@@ -1493,7 +1492,7 @@ void process_message(event_loop *loop,
|
||||
free(address);
|
||||
} break;
|
||||
case MessageType_PlasmaDataReply: {
|
||||
LOG_DEBUG("Processing data reply");
|
||||
RAY_LOG(DEBUG) << "Processing data reply";
|
||||
plasma::ObjectID object_id;
|
||||
int64_t object_size;
|
||||
int64_t metadata_size;
|
||||
@@ -1503,7 +1502,7 @@ void process_message(event_loop *loop,
|
||||
metadata_size, conn);
|
||||
} break;
|
||||
case MessageType_PlasmaFetchRequest: {
|
||||
LOG_DEBUG("Processing fetch remote");
|
||||
RAY_LOG(DEBUG) << "Processing fetch remote";
|
||||
std::vector<plasma::ObjectID> object_ids_to_fetch;
|
||||
/* TODO(pcm): process_fetch_requests allocates an array of num_objects
|
||||
* object_ids too so these should be shared in the future. */
|
||||
@@ -1512,7 +1511,7 @@ void process_message(event_loop *loop,
|
||||
object_ids_to_fetch.data());
|
||||
} break;
|
||||
case MessageType_PlasmaWaitRequest: {
|
||||
LOG_DEBUG("Processing wait");
|
||||
RAY_LOG(DEBUG) << "Processing wait";
|
||||
plasma::ObjectRequestMap object_requests;
|
||||
int64_t timeout_ms;
|
||||
int num_ready_objects;
|
||||
@@ -1522,18 +1521,18 @@ void process_message(event_loop *loop,
|
||||
num_ready_objects);
|
||||
} break;
|
||||
case MessageType_PlasmaStatusRequest: {
|
||||
LOG_DEBUG("Processing status");
|
||||
RAY_LOG(DEBUG) << "Processing status";
|
||||
plasma::ObjectID object_id;
|
||||
ARROW_CHECK_OK(plasma::ReadStatusRequest(data, length, &object_id, 1));
|
||||
process_status_request(conn, object_id);
|
||||
} break;
|
||||
case DISCONNECT_CLIENT: {
|
||||
LOG_DEBUG("Disconnecting client on fd %d", client_sock);
|
||||
RAY_LOG(DEBUG) << "Disconnecting client on fd " << client_sock;
|
||||
event_loop_remove_file(loop, client_sock);
|
||||
ClientConnection_free(conn);
|
||||
} break;
|
||||
default:
|
||||
LOG_FATAL("invalid request %" PRId64, type);
|
||||
RAY_LOG(FATAL) << "invalid request " << type;
|
||||
}
|
||||
free(data);
|
||||
|
||||
@@ -1541,9 +1540,8 @@ void process_message(event_loop *loop,
|
||||
int64_t end_time = current_time_ms();
|
||||
if (end_time - start_time >
|
||||
RayConfig::instance().max_time_for_handler_milliseconds()) {
|
||||
LOG_WARN("process_message of type %" PRId64 " took %" PRId64
|
||||
" milliseconds.",
|
||||
type, end_time - start_time);
|
||||
RAY_LOG(WARNING) << "process_message of type " << type << " took "
|
||||
<< end_time - start_time << " milliseconds.";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1552,12 +1550,13 @@ int heartbeat_handler(event_loop *loop, timer_id id, void *context) {
|
||||
|
||||
/* Check that the last heartbeat was not sent too long ago. */
|
||||
int64_t current_time = current_time_ms();
|
||||
CHECK(current_time >= state->previous_heartbeat_time);
|
||||
RAY_CHECK(current_time >= state->previous_heartbeat_time);
|
||||
if (current_time - state->previous_heartbeat_time >
|
||||
RayConfig::instance().num_heartbeats_timeout() *
|
||||
RayConfig::instance().heartbeat_timeout_milliseconds()) {
|
||||
LOG_FATAL("The last heartbeat was sent %" PRId64 " milliseconds ago.",
|
||||
current_time - state->previous_heartbeat_time);
|
||||
RAY_LOG(FATAL) << "The last heartbeat was sent "
|
||||
<< current_time - state->previous_heartbeat_time
|
||||
<< " milliseconds ago.";
|
||||
}
|
||||
state->previous_heartbeat_time = current_time;
|
||||
|
||||
@@ -1583,18 +1582,18 @@ void start_server(const char *store_socket_name,
|
||||
}
|
||||
|
||||
int local_sock = bind_ipc_sock(manager_socket_name, false);
|
||||
CHECKM(local_sock >= 0, "Unable to bind local manager socket");
|
||||
RAY_CHECK(local_sock >= 0) << "Unable to bind local manager socket";
|
||||
|
||||
g_manager_state = PlasmaManagerState_init(
|
||||
store_socket_name, manager_socket_name, master_addr, port,
|
||||
redis_primary_addr, redis_primary_port);
|
||||
CHECK(g_manager_state);
|
||||
RAY_CHECK(g_manager_state);
|
||||
|
||||
CHECK(listen(remote_sock, 128) != -1);
|
||||
CHECK(listen(local_sock, 128) != -1);
|
||||
RAY_CHECK(listen(remote_sock, 128) != -1);
|
||||
RAY_CHECK(listen(local_sock, 128) != -1);
|
||||
|
||||
LOG_DEBUG("Started server connected to store %s, listening on port %d",
|
||||
store_socket_name, port);
|
||||
RAY_LOG(DEBUG) << "Started server connected to store " << store_socket_name
|
||||
<< ", listening on port " << port;
|
||||
event_loop_add_file(g_manager_state->loop, local_sock, EVENT_LOOP_READ,
|
||||
handle_new_client, g_manager_state);
|
||||
event_loop_add_file(g_manager_state->loop, remote_sock, EVENT_LOOP_READ,
|
||||
@@ -1619,7 +1618,7 @@ void start_server(const char *store_socket_name,
|
||||
|
||||
/* Report "success" to valgrind. */
|
||||
void signal_handler(int signal) {
|
||||
LOG_DEBUG("Signal was %d", signal);
|
||||
RAY_LOG(DEBUG) << "Signal was " << signal;
|
||||
if (signal == SIGTERM) {
|
||||
if (g_manager_state) {
|
||||
PlasmaManagerState_free(g_manager_state);
|
||||
@@ -1662,37 +1661,32 @@ int main(int argc, char *argv[]) {
|
||||
redis_primary_addr_port = optarg;
|
||||
break;
|
||||
default:
|
||||
LOG_FATAL("unknown option %c", c);
|
||||
RAY_LOG(FATAL) << "unknown option " << c;
|
||||
}
|
||||
}
|
||||
if (!store_socket_name) {
|
||||
LOG_FATAL(
|
||||
"please specify socket for connecting to the plasma store with -s "
|
||||
"switch");
|
||||
RAY_LOG(FATAL) << "please specify socket for connecting to the plasma "
|
||||
<< "store with -s switch";
|
||||
}
|
||||
if (!manager_socket_name) {
|
||||
LOG_FATAL(
|
||||
"please specify socket name of the manager's local socket with -m "
|
||||
"switch");
|
||||
RAY_LOG(FATAL) << "please specify socket name of the manager's local "
|
||||
<< "socket with -m switch";
|
||||
}
|
||||
if (!master_addr) {
|
||||
LOG_FATAL(
|
||||
"please specify ip address of the current host in the format "
|
||||
"123.456.789.10 with -h switch");
|
||||
RAY_LOG(FATAL) << "please specify ip address of the current host in the "
|
||||
<< "format 123.456.789.10 with -h switch";
|
||||
}
|
||||
if (port == -1) {
|
||||
LOG_FATAL(
|
||||
"please specify port the plasma manager shall listen to in the"
|
||||
"format 12345 with -p switch");
|
||||
RAY_LOG(FATAL) << "please specify port the plasma manager shall listen to "
|
||||
<< "in the format 12345 with -p switch";
|
||||
}
|
||||
char redis_primary_addr[16];
|
||||
int redis_primary_port;
|
||||
int redis_primary_port = -1;
|
||||
if (!redis_primary_addr_port ||
|
||||
parse_ip_addr_port(redis_primary_addr_port, redis_primary_addr,
|
||||
&redis_primary_port) == -1) {
|
||||
LOG_FATAL(
|
||||
"specify the primary redis address like 127.0.0.1:6379 with the -r "
|
||||
"switch");
|
||||
RAY_LOG(FATAL) << "specify the primary redis address like 127.0.0.1:6379 "
|
||||
<< "with the -r switch";
|
||||
}
|
||||
start_server(store_socket_name, manager_socket_name, master_addr, port,
|
||||
redis_primary_addr, redis_primary_port);
|
||||
|
||||
@@ -22,8 +22,8 @@ Status PlasmaReceive(int sock,
|
||||
std::vector<uint8_t> &buffer) {
|
||||
int64_t type;
|
||||
RETURN_NOT_OK(ReadMessage(sock, &type, buffer));
|
||||
ARROW_CHECK(type == message_type) << "type = " << type
|
||||
<< ", message_type = " << message_type;
|
||||
RAY_CHECK(type == message_type) << "type = " << type
|
||||
<< ", message_type = " << message_type;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ Status ReadCreateRequest(uint8_t *data,
|
||||
ObjectID *object_id,
|
||||
int64_t *data_size,
|
||||
int64_t *metadata_size) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaCreateRequest>(data);
|
||||
*data_size = message->data_size();
|
||||
*metadata_size = message->metadata_size();
|
||||
@@ -72,7 +72,7 @@ Status SendCreateReply(int sock,
|
||||
Status ReadCreateReply(uint8_t *data,
|
||||
ObjectID *object_id,
|
||||
PlasmaObject *object) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaCreateReply>(data);
|
||||
*object_id = ObjectID::from_binary(message->object_id()->str());
|
||||
object->handle.store_fd = message->plasma_object()->segment_index();
|
||||
@@ -99,10 +99,10 @@ Status SendSealRequest(int sock, ObjectID object_id, unsigned char *digest) {
|
||||
Status ReadSealRequest(uint8_t *data,
|
||||
ObjectID *object_id,
|
||||
unsigned char *digest) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaSealRequest>(data);
|
||||
*object_id = ObjectID::from_binary(message->object_id()->str());
|
||||
ARROW_CHECK(message->digest()->size() == kDigestSize);
|
||||
RAY_CHECK(message->digest()->size() == kDigestSize);
|
||||
memcpy(digest, message->digest()->data(), kDigestSize);
|
||||
return Status::OK();
|
||||
}
|
||||
@@ -117,7 +117,7 @@ Status SendSealReply(int sock, ObjectID object_id, int error) {
|
||||
}
|
||||
|
||||
Status ReadSealReply(uint8_t *data, ObjectID *object_id) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaSealReply>(data);
|
||||
*object_id = ObjectID::from_binary(message->object_id()->str());
|
||||
return plasma_error_status(message->error());
|
||||
@@ -135,7 +135,7 @@ Status SendReleaseRequest(int sock, ObjectID object_id) {
|
||||
}
|
||||
|
||||
Status ReadReleaseRequest(uint8_t *data, ObjectID *object_id) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaReleaseRequest>(data);
|
||||
*object_id = ObjectID::from_binary(message->object_id()->str());
|
||||
return Status::OK();
|
||||
@@ -151,7 +151,7 @@ Status SendReleaseReply(int sock, ObjectID object_id, int error) {
|
||||
}
|
||||
|
||||
Status ReadReleaseReply(uint8_t *data, ObjectID *object_id) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaReleaseReply>(data);
|
||||
*object_id = ObjectID::from_binary(message->object_id()->str());
|
||||
return plasma_error_status(message->error());
|
||||
@@ -169,7 +169,7 @@ Status SendDeleteRequest(int sock, ObjectID object_id) {
|
||||
}
|
||||
|
||||
Status ReadDeleteRequest(uint8_t *data, ObjectID *object_id) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaReleaseReply>(data);
|
||||
*object_id = ObjectID::from_binary(message->object_id()->str());
|
||||
return Status::OK();
|
||||
@@ -185,7 +185,7 @@ Status SendDeleteReply(int sock, ObjectID object_id, int error) {
|
||||
}
|
||||
|
||||
Status ReadDeleteReply(uint8_t *data, ObjectID *object_id) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaDeleteReply>(data);
|
||||
*object_id = ObjectID::from_binary(message->object_id()->str());
|
||||
return plasma_error_status(message->error());
|
||||
@@ -205,7 +205,7 @@ Status SendStatusRequest(int sock, ObjectID object_ids[], int64_t num_objects) {
|
||||
Status ReadStatusRequest(uint8_t *data,
|
||||
ObjectID object_ids[],
|
||||
int64_t num_objects) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaStatusRequest>(data);
|
||||
for (int64_t i = 0; i < num_objects; ++i) {
|
||||
object_ids[i] = ObjectID::from_binary(message->object_ids()->Get(i)->str());
|
||||
@@ -227,7 +227,7 @@ Status SendStatusReply(int sock,
|
||||
}
|
||||
|
||||
int64_t ReadStatusReply_num_objects(uint8_t *data) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaStatusReply>(data);
|
||||
return message->object_ids()->size();
|
||||
}
|
||||
@@ -236,7 +236,7 @@ Status ReadStatusReply(uint8_t *data,
|
||||
ObjectID object_ids[],
|
||||
int object_status[],
|
||||
int64_t num_objects) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaStatusReply>(data);
|
||||
for (int64_t i = 0; i < num_objects; ++i) {
|
||||
object_ids[i] = ObjectID::from_binary(message->object_ids()->Get(i)->str());
|
||||
@@ -259,7 +259,7 @@ Status SendContainsRequest(int sock, ObjectID object_id) {
|
||||
}
|
||||
|
||||
Status ReadContainsRequest(uint8_t *data, ObjectID *object_id) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaContainsRequest>(data);
|
||||
*object_id = ObjectID::from_binary(message->object_id()->str());
|
||||
return Status::OK();
|
||||
@@ -275,7 +275,7 @@ Status SendContainsReply(int sock, ObjectID object_id, int has_object) {
|
||||
}
|
||||
|
||||
Status ReadContainsReply(uint8_t *data, ObjectID *object_id, int *has_object) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaContainsReply>(data);
|
||||
*object_id = ObjectID::from_binary(message->object_id()->str());
|
||||
*has_object = message->has_object();
|
||||
@@ -305,7 +305,7 @@ Status SendConnectReply(int sock, int64_t memory_capacity) {
|
||||
}
|
||||
|
||||
Status ReadConnectReply(uint8_t *data, int64_t *memory_capacity) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaConnectReply>(data);
|
||||
*memory_capacity = message->memory_capacity();
|
||||
return Status::OK();
|
||||
@@ -322,7 +322,7 @@ Status SendEvictRequest(int sock, int64_t num_bytes) {
|
||||
}
|
||||
|
||||
Status ReadEvictRequest(uint8_t *data, int64_t *num_bytes) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaEvictRequest>(data);
|
||||
*num_bytes = message->num_bytes();
|
||||
return Status::OK();
|
||||
@@ -337,7 +337,7 @@ Status SendEvictReply(int sock, int64_t num_bytes) {
|
||||
}
|
||||
|
||||
Status ReadEvictReply(uint8_t *data, int64_t &num_bytes) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaEvictReply>(data);
|
||||
num_bytes = message->num_bytes();
|
||||
return Status::OK();
|
||||
@@ -360,7 +360,7 @@ Status SendGetRequest(int sock,
|
||||
Status ReadGetRequest(uint8_t *data,
|
||||
std::vector<ObjectID> &object_ids,
|
||||
int64_t *timeout_ms) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaGetRequest>(data);
|
||||
for (int64_t i = 0; i < message->object_ids()->size(); ++i) {
|
||||
auto object_id = message->object_ids()->Get(i)->str();
|
||||
@@ -396,7 +396,7 @@ Status ReadGetReply(uint8_t *data,
|
||||
ObjectID object_ids[],
|
||||
PlasmaObject plasma_objects[],
|
||||
int64_t num_objects) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaGetReply>(data);
|
||||
for (int64_t i = 0; i < num_objects; ++i) {
|
||||
object_ids[i] = ObjectID::from_binary(message->object_ids()->Get(i)->str());
|
||||
@@ -425,7 +425,7 @@ Status SendFetchRequest(int sock, ObjectID object_ids[], int64_t num_objects) {
|
||||
}
|
||||
|
||||
Status ReadFetchRequest(uint8_t *data, std::vector<ObjectID> &object_ids) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaFetchRequest>(data);
|
||||
for (int64_t i = 0; i < message->object_ids()->size(); ++i) {
|
||||
object_ids.push_back(
|
||||
@@ -462,7 +462,7 @@ Status ReadWaitRequest(uint8_t *data,
|
||||
ObjectRequestMap &object_requests,
|
||||
int64_t *timeout_ms,
|
||||
int *num_ready_objects) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaWaitRequest>(data);
|
||||
*num_ready_objects = message->num_ready_objects();
|
||||
*timeout_ms = message->timeout();
|
||||
@@ -502,7 +502,7 @@ Status SendWaitReply(int sock,
|
||||
Status ReadWaitReply(uint8_t *data,
|
||||
ObjectRequest object_requests[],
|
||||
int *num_ready_objects) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
|
||||
auto message = flatbuffers::GetRoot<PlasmaWaitReply>(data);
|
||||
*num_ready_objects = message->num_ready_objects();
|
||||
@@ -543,9 +543,9 @@ Status ReadDataRequest(uint8_t *data,
|
||||
ObjectID *object_id,
|
||||
char **address,
|
||||
int *port) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaDataRequest>(data);
|
||||
DCHECK(message->object_id()->size() == sizeof(ObjectID));
|
||||
RAY_DCHECK(message->object_id()->size() == sizeof(ObjectID));
|
||||
*object_id = ObjectID::from_binary(message->object_id()->str());
|
||||
*address = strdup(message->address()->c_str());
|
||||
*port = message->port();
|
||||
@@ -568,7 +568,7 @@ Status ReadDataReply(uint8_t *data,
|
||||
ObjectID *object_id,
|
||||
int64_t *object_size,
|
||||
int64_t *metadata_size) {
|
||||
DCHECK(data);
|
||||
RAY_DCHECK(data);
|
||||
auto message = flatbuffers::GetRoot<PlasmaDataReply>(data);
|
||||
*object_id = ObjectID::from_binary(message->object_id()->str());
|
||||
*object_size = (int64_t) message->object_size();
|
||||
|
||||
@@ -31,7 +31,7 @@ void wait_for_pollin(int fd) {
|
||||
poll_list[0].fd = fd;
|
||||
poll_list[0].events = POLLIN;
|
||||
int retval = poll(poll_list, (unsigned long) 1, -1);
|
||||
CHECK(retval > 0);
|
||||
RAY_CHECK(retval > 0);
|
||||
}
|
||||
|
||||
int test_done_handler(event_loop *loop, timer_id id, void *context) {
|
||||
@@ -66,7 +66,7 @@ plasma_mock *init_plasma_mock(plasma_mock *remote_mock) {
|
||||
std::string manager_socket_name = bind_ipc_sock_retry(
|
||||
plasma_manager_socket_name_format, &mock->manager_local_fd);
|
||||
|
||||
CHECK(mock->manager_local_fd >= 0 && mock->local_store >= 0);
|
||||
RAY_CHECK(mock->manager_local_fd >= 0 && mock->local_store >= 0);
|
||||
|
||||
mock->state = PlasmaManagerState_init(plasma_store_socket_name,
|
||||
manager_socket_name.c_str(),
|
||||
@@ -252,7 +252,7 @@ TEST object_notifications_test(void) {
|
||||
int fd[2];
|
||||
socketpair(AF_UNIX, SOCK_STREAM, 0, fd);
|
||||
int flags = fcntl(fd[1], F_GETFL, 0);
|
||||
CHECK(fcntl(fd[1], F_SETFL, flags | O_NONBLOCK) == 0);
|
||||
RAY_CHECK(fcntl(fd[1], F_SETFL, flags | O_NONBLOCK) == 0);
|
||||
|
||||
ObjectID object_id = ObjectID::from_random();
|
||||
ObjectInfoT info;
|
||||
|
||||
@@ -78,4 +78,9 @@ bool UniqueID::operator==(const UniqueID &rhs) const {
|
||||
return std::memcmp(data(), rhs.data(), kUniqueIDSize) == 0;
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const UniqueID &id) {
|
||||
os << id.hex();
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace ray
|
||||
|
||||
@@ -44,6 +44,8 @@ struct UniqueIDHasher {
|
||||
}
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const UniqueID &id);
|
||||
|
||||
typedef UniqueID TaskID;
|
||||
typedef UniqueID JobID;
|
||||
typedef UniqueID ObjectID;
|
||||
|
||||
@@ -34,12 +34,26 @@ namespace ray {
|
||||
<< __FILE__ << __LINE__ \
|
||||
<< " Check failed: " #condition " "
|
||||
|
||||
#ifdef NDEBUG
|
||||
|
||||
#define RAY_DCHECK(condition) \
|
||||
RAY_IGNORE_EXPR(condition) \
|
||||
while (false) \
|
||||
::ray::internal::NullLog()
|
||||
|
||||
#else
|
||||
|
||||
#define RAY_DCHECK(condition) RAY_CHECK(condition)
|
||||
|
||||
#endif // NDEBUG
|
||||
|
||||
namespace internal {
|
||||
|
||||
class NullLog {
|
||||
public:
|
||||
template <class T>
|
||||
NullLog &operator<<(const T &t) {
|
||||
RAY_IGNORE_EXPR(t);
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user