Various performance improvements (#24)

* switch from array to linked list for photon queue

* performance optimizations

* fix tests

* various fixes
This commit is contained in:
Philipp Moritz
2016-11-04 00:41:20 -07:00
committed by Robert Nishihara
parent 1e2b3ceac9
commit 90a2aa4bf7
8 changed files with 130 additions and 52 deletions
+3 -3
View File
@@ -9,9 +9,9 @@ import ray
parser = argparse.ArgumentParser(description="Parse addresses for the worker to connect to.")
parser.add_argument("--node-ip-address", required=True, type=str, help="the ip address of the worker's node")
parser.add_argument("--redis-port", required=True, type=int, help="the port to use for Redis")
parser.add_argument("--object-store-name", type=str, help="the object store's name")
parser.add_argument("--object-store-manager-name", type=str, help="the object store manager's name")
parser.add_argument("--local-scheduler-name", type=str, help="the local scheduler's name")
parser.add_argument("--object-store-name", required=True, type=str, help="the object store's name")
parser.add_argument("--object-store-manager-name", required=True, type=str, help="the object store manager's name")
parser.add_argument("--local-scheduler-name", required=True, type=str, help="the local scheduler's name")
if __name__ == "__main__":
args = parser.parse_args()
+26 -9
View File
@@ -3,6 +3,7 @@ from __future__ import print_function
import os
import sys
import time
import signal
import subprocess
import string
import random
@@ -16,6 +17,11 @@ import plasma
# mode.
all_processes = []
# True if processes are run in the valgrind profiler.
RUN_PHOTON_PROFILER = False
RUN_PLASMA_MANAGER_PROFILER = False
RUN_PLASMA_STORE_PROFILER = False
def address(host, port):
return host + ":" + str(port)
@@ -39,6 +45,9 @@ def cleanup():
for p in all_processes[::-1]:
if p.poll() is not None: # process has already terminated
continue
if RUN_PHOTON_PROFILER or RUN_PLASMA_MANAGER_PROFILER or RUN_PLASMA_STORE_PROFILER:
os.kill(p.pid, signal.SIGINT) # Give process signal to write profiler data.
time.sleep(0.1) # Wait for profiling data to be written.
p.kill()
time.sleep(0.05) # is this necessary?
if p.poll() is not None:
@@ -54,21 +63,25 @@ def cleanup():
print("Ray did not shut down properly.")
all_processes = []
def start_redis(port):
def start_redis(port, cleanup=True):
redis_filepath = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../common/thirdparty/redis-3.2.3/src/redis-server")
p = subprocess.Popen([redis_filepath, "--port", str(port), "--loglevel", "warning"])
if cleanup:
all_processes.append(p)
def start_local_scheduler(redis_address, plasma_store_name):
def start_local_scheduler(redis_address, plasma_store_name, cleanup=True):
local_scheduler_filepath = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../photon/build/photon_scheduler")
if RUN_PHOTON_PROFILER:
local_scheduler_prefix = ["valgrind", "--tool=callgrind", local_scheduler_filepath]
else:
local_scheduler_prefix = [local_scheduler_filepath]
local_scheduler_name = "/tmp/scheduler{}".format(random_name())
p = subprocess.Popen([local_scheduler_filepath, "-s", local_scheduler_name, "-r", redis_address, "-p", plasma_store_name])
p = subprocess.Popen(local_scheduler_prefix + ["-s", local_scheduler_name, "-r", redis_address, "-p", plasma_store_name])
if cleanup:
all_processes.append(p)
return local_scheduler_name
def start_objstore(node_ip_address, redis_address, cleanup):
def start_objstore(node_ip_address, redis_address, cleanup=True):
"""This method starts an object store process.
Args:
@@ -77,12 +90,16 @@ def start_objstore(node_ip_address, redis_address, cleanup):
this process will be killed by serices.cleanup() when the Python process
that imported services exits.
"""
plasma_store_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../plasma/build/plasma_store")
plasma_store_filepath = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../plasma/build/plasma_store")
if RUN_PLASMA_STORE_PROFILER:
plasma_store_prefix = ["valgrind", "--tool=callgrind", plasma_store_filepath]
else:
plasma_store_prefix = [plasma_store_filepath]
store_name = "/tmp/ray_plasma_store{}".format(random_name())
p1 = subprocess.Popen([plasma_store_executable, "-s", store_name])
p1 = subprocess.Popen(plasma_store_prefix + ["-s", store_name])
manager_name = "/tmp/ray_plasma_manager{}".format(random_name())
p2, manager_port = plasma.start_plasma_manager(store_name, manager_name, redis_address)
p2, manager_port = plasma.start_plasma_manager(store_name, manager_name, redis_address, run_profiler=RUN_PLASMA_MANAGER_PROFILER)
if cleanup:
all_processes.append(p1)
@@ -131,13 +148,13 @@ def start_ray_local(node_ip_address="127.0.0.1", num_workers=0, worker_path=None
# Start Redis.
redis_port = new_port()
redis_address = address(node_ip_address, redis_port)
start_redis(redis_port)
start_redis(redis_port, cleanup=True)
time.sleep(0.1)
# Start Plasma.
object_store_name, object_store_manager_name, object_store_manager_port = start_objstore(node_ip_address, redis_address, cleanup=True)
# Start the local scheduler.
time.sleep(0.1)
local_scheduler_name = start_local_scheduler(redis_address, object_store_name)
local_scheduler_name = start_local_scheduler(redis_address, object_store_name, cleanup=True)
time.sleep(0.2)
# Aggregate the address information together.
address_info = {"node_ip_address": node_ip_address,
+5
View File
@@ -35,6 +35,11 @@
#define CHECK(COND) CHECKM(COND, "")
#define DCHECK(COND)
#ifdef RAY_COMMON_DEBUG
CHECK(COND)
#endif
/* These are exit codes for common errors that can occur in Ray components. */
#define EXIT_COULD_NOT_BIND_PORT -2
+48 -8
View File
@@ -266,16 +266,15 @@ int read_bytes(int fd, uint8_t *cursor, size_t length) {
*
* @param fd The file descriptor to read from. It can be non-blocking.
* @param type The type of the message that is read will be written at this
address. If there was an error while reading, this will be
DISCONNECT_CLIENT.
* address. If there was an error while reading, this will be
* DISCONNECT_CLIENT.
* @param length The size in bytes of the message that is read will be written
at this address. This size does not include the bytes used to encode
the type and length. If there was an error while reading, this will
be 0.
* at this address. This size does not include the bytes used to encode
* the type and length. If there was an error while reading, this will
* be 0.
* @param bytes The address at which to write the pointer to the bytes that are
read and allocated by this function. If there was an error while
reading, this will be NULL.
* read and allocated by this function. If there was an error while
* reading, this will be NULL.
* @return Void.
*/
void read_message(int fd, int64_t *type, int64_t *length, uint8_t **bytes) {
@@ -303,6 +302,47 @@ disconnected:
return;
}
/**
* Read a sequence of bytes written by write_message from a file descriptor.
* This does not allocate space for the message if the provided buffer is
* large enough and can therefore often avoid allocations.
*
* @note The caller must create and free the buffer.
*
* @param fd The file descriptor to read from. It can be non-blocking.
* @param type The type of the message that is read will be written at this
* address. If there was an error while reading, this will be
* DISCONNECT_CLIENT.
* @param buffer The array the message will be written to. If it is not
* large enough to hold the message, it will be enlarged by read_buffer.
* @return Number of bytes of the message that were read. This size does not
* include the bytes used to encode the type and length. If there was
* an error while reading, this will be 0.
*/
int64_t read_buffer(int fd, int64_t *type, UT_array *buffer) {
int64_t length;
int closed = read_bytes(fd, (uint8_t *) type, sizeof(int64_t));
if (closed) {
goto disconnected;
}
closed = read_bytes(fd, (uint8_t *) &length, sizeof(int64_t));
if (closed) {
goto disconnected;
}
if (length > utarray_len(buffer)) {
utarray_resize(buffer, length);
}
closed = read_bytes(fd, (uint8_t *) utarray_front(buffer), length);
if (closed) {
goto disconnected;
}
return length;
disconnected:
/* Handle the case in which the socket is closed. */
*type = DISCONNECT_CLIENT;
return 0;
}
/* Write a null-terminated string to a file descriptor. */
void write_log_message(int fd, char *message) {
/* Account for the \0 at the end of the string. */
+4 -1
View File
@@ -4,6 +4,8 @@
#include <stdbool.h>
#include <stdint.h>
#include "utarray.h"
enum common_message_type {
/** Disconnect a client. */
DISCONNECT_CLIENT,
@@ -21,10 +23,11 @@ int connect_ipc_sock(const char *socket_pathname);
int accept_client(int socket_fd);
/* Reading and writing data */
/* Reading and writing data. */
int write_message(int fd, int64_t type, int64_t length, uint8_t *bytes);
void read_message(int fd, int64_t *type, int64_t *length, uint8_t **bytes);
int64_t read_buffer(int fd, int64_t *type, UT_array *buffer);
void write_log_message(int fd, char *message);
void write_formatted_log_message(int fd, const char *format, ...);
+27 -19
View File
@@ -2,11 +2,18 @@
#include <stdbool.h>
#include "utarray.h"
#include "utlist.h"
#include "state/task_log.h"
#include "photon.h"
#include "photon_scheduler.h"
typedef struct task_queue_entry {
task_instance *task;
struct task_queue_entry *prev;
struct task_queue_entry *next;
} task_queue_entry;
typedef struct {
/* Object id of this object. */
object_id object_id;
@@ -17,7 +24,7 @@ typedef struct {
/** Part of the photon state that is maintained by the scheduling algorithm. */
struct scheduler_state {
/** An array of pointers to tasks that are waiting to be scheduled. */
UT_array *task_queue;
task_queue_entry *task_queue;
/** An array of worker indices corresponding to clients that are
* waiting for tasks. */
UT_array *available_workers;
@@ -31,21 +38,21 @@ scheduler_state *make_scheduler_state(void) {
/* Initialize an empty hash map for the cache of local available objects. */
state->local_objects = NULL;
/* Initialize the local data structures used for queuing tasks and workers. */
utarray_new(state->task_queue, &task_ptr_icd);
state->task_queue = NULL;
utarray_new(state->available_workers, &ut_int_icd);
return state;
}
void free_scheduler_state(scheduler_state *s) {
for (int i = 0; i < utarray_len(s->task_queue); ++i) {
task_instance **instance =
(task_instance **) utarray_eltptr(s->task_queue, i);
free(*instance);
task_queue_entry *elt, *tmp1;
DL_FOREACH_SAFE(s->task_queue, elt, tmp1) {
DL_DELETE(s->task_queue, elt);
free(elt->task);
free(elt);
}
utarray_free(s->task_queue);
utarray_free(s->available_workers);
available_object *available_obj, *tmp;
HASH_ITER(handle, s->local_objects, available_obj, tmp) {
available_object *available_obj, *tmp2;
HASH_ITER(handle, s->local_objects, available_obj, tmp2) {
HASH_DELETE(handle, s->local_objects, available_obj);
free(available_obj);
}
@@ -90,14 +97,12 @@ bool can_run(scheduler_state *s, task_spec *task) {
int find_and_schedule_task_if_possible(scheduler_info *info,
scheduler_state *state,
int worker_index) {
task_queue_entry *elt, *tmp;
task_spec *spec;
int found_task_to_schedule = 0;
/* Find the first task whose dependencies are available locally. */
task_spec *spec;
task_instance **task;
int i = 0;
for (; i < utarray_len(state->task_queue); ++i) {
task = (task_instance **) utarray_eltptr(state->task_queue, i);
spec = task_instance_task_spec(*task);
DL_FOREACH_SAFE(state->task_queue, elt, tmp) {
spec = task_instance_task_spec(elt->task);
if (can_run(state, spec)) {
found_task_to_schedule = 1;
break;
@@ -108,8 +113,9 @@ int find_and_schedule_task_if_possible(scheduler_info *info,
* worker. */
assign_task_to_worker(info, spec, worker_index);
/* Update the task queue data structure and free the task. */
free(*task);
utarray_erase(state->task_queue, i, 1);
DL_DELETE(state->task_queue, elt);
free(elt->task);
free(elt);
}
return found_task_to_schedule;
}
@@ -138,7 +144,9 @@ void handle_task_submitted(scheduler_info *info,
} else {
/* Add the task to the task queue. This passes ownership of the task queue.
* And the task will be freed when it is assigned to a worker. */
utarray_push_back(s->task_queue, &instance);
task_queue_entry *elt = malloc(sizeof(task_queue_entry));
elt->task = instance;
DL_APPEND(s->task_queue, elt);
}
/* Submit the task to redis. */
/* TODO(swang): We should set these values in a config file somewhere. */
@@ -164,7 +172,7 @@ void handle_worker_available(scheduler_info *info,
if (!scheduled_task) {
for (int *p = (int *) utarray_front(state->available_workers); p != NULL;
p = (int *) utarray_next(state->available_workers, p)) {
CHECK(*p != worker_index);
DCHECK(*p != worker_index);
}
/* Add client_sock to a list of available workers. This struct will be freed
* when a task is assigned to this worker. */
+14 -11
View File
@@ -32,18 +32,23 @@ typedef struct {
} worker_index;
struct local_scheduler_state {
/* The local scheduler event loop. */
/** The local scheduler event loop. */
event_loop *loop;
/* The Plasma client. */
/** The Plasma client. */
plasma_connection *plasma_conn;
/* Association between client socket and worker index. */
/** Association between client socket and worker index. */
worker_index *worker_index;
/* Info that is exposed to the scheduling algorithm. */
/** Info that is exposed to the scheduling algorithm. */
scheduler_info *scheduler_info;
/* State for the scheduling algorithm. */
/** State for the scheduling algorithm. */
scheduler_state *scheduler_state;
/** Input buffer, used for reading input in process_message to avoid
* allocation for each call to process_message. */
UT_array *input_buffer;
};
UT_icd byte_icd = {sizeof(uint8_t), NULL, NULL, NULL};
local_scheduler_state *init_local_scheduler(event_loop *loop,
const char *redis_addr,
int redis_port,
@@ -68,6 +73,7 @@ local_scheduler_state *init_local_scheduler(event_loop *loop,
db_attach(state->scheduler_info->db, loop);
/* Add scheduler state. */
state->scheduler_state = make_scheduler_state();
utarray_new(state->input_buffer, &byte_icd);
return state;
};
@@ -82,6 +88,7 @@ void free_local_scheduler(local_scheduler_state *s) {
utarray_free(s->scheduler_info->workers);
free(s->scheduler_info);
free_scheduler_state(s->scheduler_state);
utarray_free(s->input_buffer);
event_loop_destroy(s->loop);
free(s);
}
@@ -109,17 +116,14 @@ void process_message(event_loop *loop, int client_sock, void *context,
int events) {
local_scheduler_state *s = context;
uint8_t *message;
int64_t type;
int64_t length;
read_message(client_sock, &type, &length, &message);
read_buffer(client_sock, &type, s->input_buffer);
LOG_DEBUG("New event of type %" PRId64, type);
switch (type) {
case SUBMIT_TASK: {
task_spec *spec = (task_spec *) message;
CHECK(task_size(spec) == length);
task_spec *spec = (task_spec *) utarray_front(s->input_buffer);
handle_task_submitted(s->scheduler_info, s->scheduler_state, spec);
} break;
case TASK_DONE: {
@@ -140,7 +144,6 @@ void process_message(event_loop *loop, int client_sock, void *context,
/* This code should be unreachable. */
CHECK(0);
}
free(message);
}
void new_client_connection(event_loop *loop, int listener_sock, void *context,
+3 -1
View File
@@ -290,7 +290,7 @@ class PlasmaClient(object):
break
return message_data
def start_plasma_manager(store_name, manager_name, redis_address, num_retries=5, use_valgrind=False):
def start_plasma_manager(store_name, manager_name, redis_address, num_retries=5, use_valgrind=False, run_profiler=False):
"""Start a plasma manager and return the ports it listens on.
Args:
@@ -324,6 +324,8 @@ def start_plasma_manager(store_name, manager_name, redis_address, num_retries=5,
"-r", redis_address]
if use_valgrind:
process = subprocess.Popen(["valgrind", "--track-origins=yes", "--leak-check=full", "--show-leak-kinds=all", "--error-exitcode=1"] + command)
elif run_profiler:
process = subprocess.Popen(["valgrind", "--tool=callgrind"] + command)
else:
process = subprocess.Popen(command)
# This sleep is critical. If the plasma_manager fails to start because the