From 68056dafb146053e4cdf2f11c2cf05d68a641914 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Sat, 13 Aug 2016 15:20:19 -0700
Subject: [PATCH 01/91] Initial commit

---
 LICENSE | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 201 insertions(+)
 create mode 100644 LICENSE

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 000000000..8dada3eda
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "{}"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright {yyyy} {name of copyright owner}
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

From 81bfd8c201c1c7e678580a6cddbb182aa21ba052 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Sat, 13 Aug 2016 15:24:01 -0700
Subject: [PATCH 02/91] add readme

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 README.md

diff --git a/README.md b/README.md
new file mode 100644
index 000000000..eedccd357
--- /dev/null
+++ b/README.md
@@ -0,0 +1,4 @@
+# Plasma
+
+Plasma is an experimental in-memory object manager. It is under development and
+not ready for general use.

From 97087b079177f6617d242d954fbccac208b369b5 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Sat, 13 Aug 2016 17:11:11 -0700
Subject: [PATCH 03/91] initial version of object store

---
 .gitignore          |    2 +
 Makefile            |   14 +
 build/.gitkeep      |    0
 src/example.c       |   45 ++
 src/fling.c         |   73 +++
 src/fling.h         |   35 ++
 src/plasma.h        |   61 +++
 src/plasma_client.c |   84 ++++
 src/plasma_store.c  |  275 +++++++++++
 src/uthash.h        | 1074 +++++++++++++++++++++++++++++++++++++++++++
 10 files changed, 1663 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Makefile
 create mode 100644 build/.gitkeep
 create mode 100644 src/example.c
 create mode 100644 src/fling.c
 create mode 100644 src/fling.h
 create mode 100644 src/plasma.h
 create mode 100644 src/plasma_client.c
 create mode 100644 src/plasma_store.c
 create mode 100644 src/uthash.h

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..faf17a085
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+build/*
+*~
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..32cb50a61
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,14 @@
+CC = gcc
+CFLAGS = -g -Wall
+BUILD = build
+
+all: $(BUILD)/plasma_store $(BUILD)/example
+
+clean:
+	rm $(BUILD)/*
+
+$(BUILD)/plasma_store: src/plasma_store.c src/plasma.h src/fling.h src/fling.c
+	$(CC) $(CFLAGS) --std=c99 -D_XOPEN_SOURCE=500 src/plasma_store.c src/fling.c -o $(BUILD)/plasma_store
+
+$(BUILD)/example: src/plasma_client.c src/plasma.h src/example.c src/fling.h src/fling.c
+	$(CC) $(CFLAGS) --std=c99 -D_XOPEN_SOURCE=500 src/plasma_client.c src/example.c src/fling.c -o $(BUILD)/example
diff --git a/build/.gitkeep b/build/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/example.c b/src/example.c
new file mode 100644
index 000000000..20ecd7b64
--- /dev/null
+++ b/src/example.c
@@ -0,0 +1,45 @@
+// A simple example on how to use the plasma store
+// 
+// Can be called in the following way:
+// 
+// cd build
+// ./plasma_store -s /tmp/plasma_socket
+// ./example -s /tmp/plasma_socket -g
+// ./example -s /tmp/plasma_socket -c -f
+
+#include <stdlib.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include "plasma.h"
+
+int main(int argc, char *argv[]) {
+  int conn = -1;
+  int c;
+  plasma_id id = {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+		   255, 255, 255, 255, 255, 255, 255, 255}};
+  while ((c = getopt(argc, argv, "s:cfg")) != -1) {
+    switch (c) {
+    case 's':
+      conn = plasma_store_connect(optarg);
+      break;
+    case 'c':
+      assert(conn != -1);
+      plasma_create(conn, id, 100);
+      break;
+    case 'f':
+      assert(conn != -1);
+      plasma_seal(conn, id);
+      break;
+    case 'g':
+      plasma_get(conn, id);
+      break;
+    default:
+      abort();
+    }
+  }
+  assert(conn != -1);
+  close(conn);
+}
+
diff --git a/src/fling.c b/src/fling.c
new file mode 100644
index 000000000..d614a051d
--- /dev/null
+++ b/src/fling.c
@@ -0,0 +1,73 @@
+#include "fling.h"
+
+void init_msg(struct msghdr *msg, struct iovec *iov,
+	      char *buf, size_t buf_len) {
+  iov->iov_base = buf;
+  iov->iov_len = 1;
+
+  msg->msg_iov = iov;
+  msg->msg_iovlen = 1;
+  msg->msg_control = buf;
+  msg->msg_controllen = buf_len;
+  msg->msg_name = NULL;
+  msg->msg_namelen = 0;
+}
+
+int send_fd(int conn, int fd, const char* payload, int size) {
+  struct msghdr msg;
+  struct iovec iov;
+  char buf[CMSG_SPACE(sizeof(int))];
+
+  init_msg(&msg, &iov, buf, sizeof(buf));
+
+  struct cmsghdr *header = CMSG_FIRSTHDR(&msg);
+  header->cmsg_level = SOL_SOCKET;
+  header->cmsg_type = SCM_RIGHTS;
+  header->cmsg_len = CMSG_LEN(sizeof(int));
+  *(int *)CMSG_DATA(header) = fd;
+
+  // send file descriptor and payload
+  return sendmsg(conn, &msg, 0) != -1 && send(conn, payload, size, 0) == -1;
+}
+
+int recv_fd(int conn, char* payload, int size) {
+  struct msghdr msg;
+  struct iovec iov;
+  char buf[CMSG_SPACE(sizeof(int))];
+  init_msg(&msg, &iov, buf, sizeof(buf));
+
+  if (recvmsg(conn, &msg, 0) == -1)
+    return -1;
+
+  int found_fd = -1;
+  int oh_noes = 0;
+  for (struct cmsghdr *header = CMSG_FIRSTHDR(&msg); header != NULL; header = CMSG_NXTHDR(&msg, header))
+    if (header->cmsg_level == SOL_SOCKET && header->cmsg_type == SCM_RIGHTS) {
+      int count = (header->cmsg_len - (CMSG_DATA(header) - (unsigned char *)header)) / sizeof(int);
+      for (int i = 0; i < count; ++i) {
+        int fd = ((int *)CMSG_DATA(header))[i];
+        if (found_fd == -1) {
+          found_fd = fd;
+        } else {
+          close(fd);
+          oh_noes = 1;
+        }
+      }
+    }
+
+  // The sender sent us more than one file descriptor. We've closed
+  // them all to prevent fd leaks but notify the caller that we got
+  // a bad message.
+  if (oh_noes) {
+    close(found_fd);
+    errno = EBADMSG;
+    return -1;
+  }
+
+  ssize_t len = recv(conn, payload, size, 0);
+  if (len < 0) {
+    return -1;
+  }
+
+  return found_fd;
+}
diff --git a/src/fling.h b/src/fling.h
new file mode 100644
index 000000000..f6dc8a268
--- /dev/null
+++ b/src/fling.h
@@ -0,0 +1,35 @@
+// FLING: Exchanging file descriptors over sockets
+//
+// This is a little library for sending file descriptors over a socket
+// between processes. The reason for doing that (as opposed to using
+// filenames to share the files) is so (a) no files remain in the
+// filesystem after all the processes terminate, (b) to make sure that
+// there are no name collisions and (c) to be able to control who has
+// access to the data.
+//
+// Most of the code is from https://github.com/sharvil/flingfd
+
+#include <unistd.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+// This is neccessary for Mac OS X, see http://www.apuebook.com/faqs2e.html (10).
+#if !defined(CMSG_SPACE) && !defined(CMSG_LEN)
+  #define CMSG_SPACE(len) (__DARWIN_ALIGN32(sizeof(struct cmsghdr)) + __DARWIN_ALIGN32(len))
+  #define CMSG_LEN(len) (__DARWIN_ALIGN32(sizeof(struct cmsghdr)) + (len))
+#endif
+
+void init_msg(struct msghdr *msg, struct iovec *iov,
+              char *buf, size_t buf_len);
+
+// Send a file descriptor "fd" and a payload "payload" of size "size"
+// over the socket "conn". Return 0 on success.
+int send_fd(int conn, int fd, const char* payload, int size);
+
+// Receive a file descriptor and a payload of size up to "size" from a
+// socket "conn". The payload will be written to "payload" and the file
+// descriptor will be returned. Returns -1 on failure.
+int recv_fd(int conn, char* payload, int size);
+
diff --git a/src/plasma.h b/src/plasma.h
new file mode 100644
index 000000000..8577aab19
--- /dev/null
+++ b/src/plasma.h
@@ -0,0 +1,61 @@
+#ifndef PLASMA_H
+#define PLASMA_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#define LOG_ERR(M, ...) \
+  fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", \
+    __FILE__, __LINE__, errno == 0 ? "None" : strerror(errno), ##__VA_ARGS__)
+
+#define LOG_INFO(M, ...) \
+  fprintf(stderr, "[INFO] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
+
+typedef struct {
+  int64_t size;
+  int64_t create_time;
+  int64_t construct_duration;
+} plasma_object_info;
+
+// Represents an object id hash, can hold a full SHA1 hash
+typedef struct {
+  unsigned char id[20];
+} plasma_id;
+
+enum plasma_request_type {
+  PLASMA_CREATE, // create a new object
+  PLASMA_GET, // get an object
+  PLASMA_SEAL // seal an object
+};
+
+typedef struct {
+  int type;
+  plasma_id object_id;
+  int64_t size;
+} plasma_request;
+
+enum plasma_reply_type {
+  PLASMA_OBJECT, // the file descriptor represents an object
+  PLASMA_FUTURE, // the file descriptor represents a future
+};
+
+typedef struct {
+  int type;
+  int64_t size;
+} plasma_reply;
+
+typedef struct {
+  plasma_id object_id;
+  void *data;
+  int64_t size;
+  int writable;
+} plasma_buffer;
+
+int plasma_store_connect(const char* socket_name);
+plasma_buffer plasma_create(int conn, plasma_id object_id, int64_t size);
+plasma_buffer plasma_get(int conn, plasma_id object_id);
+void plasma_seal(int fd, plasma_id object_id);
+
+#endif
diff --git a/src/plasma_client.c b/src/plasma_client.c
new file mode 100644
index 000000000..9a8359eb6
--- /dev/null
+++ b/src/plasma_client.c
@@ -0,0 +1,84 @@
+// PLASMA CLIENT: Client library for using the plasma store and manager
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <strings.h>
+#include <netinet/in.h>
+#include <netdb.h> 
+
+#include "plasma.h"
+#include "fling.h"
+
+void plasma_send(int fd, plasma_request *req) {
+  int req_count = sizeof(plasma_request);
+  if (write(fd, req, req_count) != req_count) {
+    if (req_count > 0) {
+      LOG_ERR("partial write");
+    } else {
+      LOG_ERR("write error");
+      exit(-1);
+    }
+  }
+}
+
+plasma_buffer plasma_create(int conn, plasma_id object_id, int64_t size) {
+  plasma_request req = { PLASMA_CREATE, object_id, size };
+  plasma_send(conn, &req);
+  plasma_reply reply;
+  int fd = recv_fd(conn, (char*)&reply, sizeof(plasma_reply));
+  assert(reply.type == PLASMA_OBJECT);
+  assert(reply.size == size);
+  void *data = mmap(NULL, reply.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+  if (data == MAP_FAILED) {
+    LOG_ERR("mmap failed");
+    exit(-1);
+  }
+  plasma_buffer buffer = { object_id, data, size, 1 };
+  return buffer;
+}
+
+plasma_buffer plasma_get(int conn, plasma_id object_id) {
+  plasma_request req = { PLASMA_GET, object_id };
+  plasma_send(conn, &req);
+  plasma_reply reply;
+  // the following loop is run at most twice
+  int fd = recv_fd(conn, (char*)&reply, sizeof(plasma_reply));
+  if (reply.type == PLASMA_FUTURE) {
+    int new_fd = recv_fd(fd, (char*)&reply, sizeof(plasma_reply));
+    close(fd);
+    fd = new_fd;
+  }
+  assert(reply.type == PLASMA_OBJECT);
+  void *data = mmap(NULL, reply.size, PROT_READ, 0, fd, 0);
+  plasma_buffer buffer = { object_id, data, reply.size, 0 };
+  return buffer;
+}
+
+void plasma_seal(int fd, plasma_id object_id) {
+  plasma_request req = { PLASMA_SEAL, object_id };
+  plasma_send(fd, &req);
+}
+
+int plasma_store_connect(const char* socket_name) {
+  assert(socket_name);
+  struct sockaddr_un addr;
+  int fd;
+  if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
+    LOG_ERR("socket error");
+    exit(-1);
+  }
+  memset(&addr, 0, sizeof(addr));
+  addr.sun_family = AF_UNIX;
+  strncpy(addr.sun_path, socket_name, sizeof(addr.sun_path)-1);
+  if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) == -1) {
+    LOG_ERR("connect error");
+    exit(-1);
+  }
+  return fd;
+}
diff --git a/src/plasma_store.c b/src/plasma_store.c
new file mode 100644
index 000000000..528f8d52c
--- /dev/null
+++ b/src/plasma_store.c
@@ -0,0 +1,275 @@
+// PLASMA STORE: This is a simple object store server process
+//
+// It accepts incoming client connections on a unix domain socket
+// (name passed in via the -s option of the executable) and uses a
+// single thread to serve the clients. Each client establishes a
+// connection and can create objects, wait for objects and seal
+// objects through that connection.
+//
+// It keeps a hash table that maps object_ids (which are 20 byte long,
+// just enough to store and SHA1 hash) to memory mapped files.
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <getopt.h>
+#include <string.h>
+#include <limits.h>
+#include <poll.h>
+
+#include "uthash.h"
+#include "fling.h"
+#include "plasma.h"
+
+#define MAX_NUM_CLIENTS 2048
+
+typedef struct {
+  int num_clients; // number of clients connected
+  int client_id[MAX_NUM_CLIENTS]; // unique identifier for the clients
+  struct pollfd waiting[MAX_NUM_CLIENTS]; // data structure for polling
+} plasma_store_state;
+
+void init_state(plasma_store_state* s) {
+  memset(&s->waiting, 0, sizeof(s->waiting));
+  memset(&s->client_id, 0, sizeof(s->client_id));
+  s->num_clients = 0;
+}
+
+int add_client(plasma_store_state* s, int fd) {
+  static int curr_id = 0;
+  s->waiting[s->num_clients].fd = fd;
+  s->waiting[s->num_clients].events = POLLIN;
+  s->client_id[s->num_clients] = curr_id;
+  s->num_clients += 1;
+  return curr_id++;
+}
+
+void remove_client(plasma_store_state* s, int i) {
+  memcpy(&s->waiting[i], &s->waiting[s->num_clients-1], sizeof(struct pollfd));
+  memset(&s->waiting[s->num_clients-1], 0, sizeof(struct pollfd));
+  s->client_id[i] = s->client_id[s->num_clients-1];
+  s->client_id[s->num_clients-1] = 0;
+  s->num_clients -= 1;
+}
+
+typedef struct {
+  plasma_id object_id; // object id of this object
+  plasma_object_info info; // object info like size, creation time and owner
+  int fd; // memory mapped file containing the object
+  UT_hash_handle handle; // handle for the uthash table
+} object_table_entry;
+
+// objects that are still being written by their owner process
+object_table_entry* open_objects = NULL;
+
+// objects that have already been sealed by their owner process and
+// can now be shared with other processes
+object_table_entry* sealed_objects = NULL;
+
+typedef struct {
+  plasma_id object_id; // object id of this object
+  int num_waiting; // number of processes waiting for the object
+  int conn[MAX_NUM_CLIENTS]; // socket connections to waiting clients
+  UT_hash_handle handle; // handle for the uthash table
+} object_notify_entry;
+
+// objects that processes are waiting for
+object_notify_entry* objects_notify = NULL;
+
+// Create a buffer. This is creating a temporary file and then
+// immediately unlinking it so we do not leave traces in the system.
+int create_buffer(int64_t size) {
+  static char template[] = "/tmp/plasmaXXXXXX";
+  char file_name[32];
+  strncpy(file_name, template, 32);
+  int fd = mkstemp(file_name);
+  if (fd < 0)
+    return -1;
+  FILE* file = fdopen(fd, "a+");
+  if (!file) {
+    close(fd);
+    return -1;
+  }
+  if (unlink(file_name) != 0) {
+    LOG_ERR("unlink error");
+    return -1;
+  }
+  if (ftruncate(fd, (off_t) size) != 0) {
+    LOG_ERR("ftruncate error");
+    return -1;
+  }
+  return fd;
+}
+
+// create a new object buffer in the hash table
+void create_object(int conn, plasma_request* req) {
+  int fd = create_buffer(req->size);
+  if (fd < 0) {
+    LOG_ERR("could not create shared memory buffer");
+    exit(-1);
+  }
+  object_table_entry *entry = malloc(sizeof(object_table_entry));
+  memcpy(&entry->object_id, &req->object_id, 20);
+  entry->info.size = req->size;
+  // TODO(pcm): set the other fields
+  entry->fd = fd;
+  HASH_ADD(handle, open_objects, object_id, sizeof(plasma_id), entry);
+  plasma_reply reply = { PLASMA_OBJECT, req->size };
+  send_fd(conn, fd, (char*) &reply, sizeof(plasma_reply));
+}
+
+// get an object from the hash table
+void get_object(int conn, plasma_request* req) {
+  object_table_entry *entry;
+  HASH_FIND(handle, sealed_objects, &req->object_id, sizeof(plasma_id), entry);
+  if (entry) {
+    plasma_reply reply = { PLASMA_OBJECT, entry->info.size };
+    send_fd(conn, entry->fd, (char*) &reply, sizeof(plasma_reply));
+  } else {
+    LOG_INFO("object not in hash table of sealed objects");
+    int fd[2];
+    socketpair(AF_UNIX, SOCK_STREAM, 0, fd);
+    object_notify_entry *notify_entry = malloc(sizeof(object_notify_entry));
+    memcpy(&notify_entry->object_id, &req->object_id, 20);
+    notify_entry->conn[notify_entry->num_waiting] = fd[0];
+    notify_entry->num_waiting += 1;
+    HASH_ADD(handle, objects_notify, object_id, sizeof(plasma_id), notify_entry);
+    plasma_reply reply = { PLASMA_FUTURE, -1 };
+    send_fd(conn, fd[1], (char*) &reply, sizeof(plasma_reply));
+  }
+}
+
+// seal an object that has been created in the hash table
+void seal_object(int conn, plasma_request* req) {
+  object_table_entry *entry;
+  HASH_FIND(handle, open_objects, &req->object_id, sizeof(plasma_id), entry);
+  if (!entry) {
+    return; // TODO(pcm): return error
+  }
+  HASH_DELETE(handle, open_objects, entry);
+  int64_t size = entry->info.size;
+  int fd = entry->fd;
+  HASH_ADD(handle, sealed_objects, object_id, sizeof(plasma_id), entry);
+  // inform processes that the object is ready now
+  object_notify_entry* notify_entry;
+  HASH_FIND(handle, objects_notify, &req->object_id, sizeof(plasma_id), notify_entry);
+  if (!notify_entry) {
+    return;
+  }
+  plasma_reply reply = { PLASMA_OBJECT, size };
+  for (int i = 0; i < notify_entry->num_waiting; ++i) {
+    send_fd(notify_entry->conn[i], fd, (char*) &reply, sizeof(plasma_reply));
+  }
+  HASH_DELETE(handle, objects_notify, notify_entry);
+  free(notify_entry);
+}
+
+void process_event(int conn, plasma_request* req) {
+  switch (req->type) {
+  case PLASMA_CREATE:
+    create_object(conn, req);
+    break;
+  case PLASMA_GET:
+    get_object(conn, req);
+    break;
+  case PLASMA_SEAL:
+    seal_object(conn, req);
+    break;
+  }
+}
+
+void event_loop(int socket) {
+  plasma_store_state state;
+  init_state(&state);
+  add_client(&state, socket);
+  plasma_request req;
+  while (1) {
+    int num_ready = poll(state.waiting, state.num_clients, -1);
+    if (num_ready < 0) {
+      LOG_ERR("poll failed");
+      exit(-1);
+    }
+    for (int i = 0; i < state.num_clients; ++i) {
+      if (state.waiting[i].revents == 0)
+	continue;
+      if (state.waiting[i].fd == socket) {
+	while (1) {
+	  // handle new incoming connections
+	  int new_socket = accept(socket, NULL, NULL);
+	  if (new_socket < 0) {
+	    if (errno != EWOULDBLOCK) {
+	      LOG_ERR("accept failed");
+	      exit(-1);
+	    }
+	    break;
+	  }
+	  int client_id = add_client(&state, new_socket);
+	  LOG_INFO("adding new client with id %d", client_id);
+	}
+      } else {
+	int r = read(state.waiting[i].fd, &req, sizeof(plasma_request));
+	if (r == -1) {
+	  LOG_ERR("read error");
+	  continue;
+	} else if (r == 0) {
+	  LOG_INFO("client with id %d disconnected", state.client_id[i]);
+	  remove_client(&state, i);
+	} else {
+	  process_event(state.waiting[i].fd, &req);
+	}
+      }
+    }
+  }
+}
+
+void start_server(char* socket_name) {
+  int fd = socket(AF_UNIX, SOCK_STREAM, 0);
+  if (fd == -1) {
+    LOG_ERR("socket error");
+    exit(-1);
+  }
+  int on = 1;
+  if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*)&on, sizeof(on)) < 0) {
+    LOG_ERR("setsockopt failed");
+    close(fd);
+    exit(-1);
+  }
+  // TODO(pcm): http://stackoverflow.com/q/1150635
+  if (ioctl(fd, FIONBIO, (char*) &on) < 0) {
+    LOG_ERR("ioctl failed");
+    close(fd);
+    exit(-1);
+  }
+  struct sockaddr_un addr;
+  memset(&addr, 0, sizeof(addr));
+  addr.sun_family = AF_UNIX;
+  strncpy(addr.sun_path, socket_name, sizeof(addr.sun_path)-1);
+  unlink(socket_name);
+  bind(fd, (struct sockaddr*)&addr, sizeof(addr));
+  listen(fd, 5);
+  event_loop(fd);
+}
+
+int main(int argc, char* argv[]) {
+  char *socket_name = NULL;
+  int c;
+  while ((c = getopt(argc, argv, "s:")) != -1) {
+    switch (c) {
+    case 's':
+      socket_name = optarg;
+      break;
+    default:
+      exit(-1);
+    }
+  }
+  if (!socket_name) {
+    LOG_ERR("please specify socket for incoming connections with -s switch");
+    exit(-1);
+  }
+  LOG_INFO("starting server listening on %s", socket_name);
+  start_server(socket_name);
+}
diff --git a/src/uthash.h b/src/uthash.h
new file mode 100644
index 000000000..45d1f9fc1
--- /dev/null
+++ b/src/uthash.h
@@ -0,0 +1,1074 @@
+/*
+Copyright (c) 2003-2016, Troy D. Hanson     http://troydhanson.github.com/uthash/
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef UTHASH_H
+#define UTHASH_H
+
+#define UTHASH_VERSION 2.0.1
+
+#include <string.h>   /* memcmp,strlen */
+#include <stddef.h>   /* ptrdiff_t */
+#include <stdlib.h>   /* exit() */
+
+/* These macros use decltype or the earlier __typeof GNU extension.
+   As decltype is only available in newer compilers (VS2010 or gcc 4.3+
+   when compiling c++ source) this code uses whatever method is needed
+   or, for VS2008 where neither is available, uses casting workarounds. */
+#if defined(_MSC_VER)   /* MS compiler */
+#if _MSC_VER >= 1600 && defined(__cplusplus)  /* VS2010 or newer in C++ mode */
+#define DECLTYPE(x) (decltype(x))
+#else                   /* VS2008 or older (or VS2010 in C mode) */
+#define NO_DECLTYPE
+#define DECLTYPE(x)
+#endif
+#elif defined(__BORLANDC__) || defined(__LCC__) || defined(__WATCOMC__)
+#define NO_DECLTYPE
+#define DECLTYPE(x)
+#else                   /* GNU, Sun and other compilers */
+#define DECLTYPE(x) (__typeof(x))
+#endif
+
+#ifdef NO_DECLTYPE
+#define DECLTYPE_ASSIGN(dst,src)                                                 \
+do {                                                                             \
+  char **_da_dst = (char**)(&(dst));                                             \
+  *_da_dst = (char*)(src);                                                       \
+} while (0)
+#else
+#define DECLTYPE_ASSIGN(dst,src)                                                 \
+do {                                                                             \
+  (dst) = DECLTYPE(dst)(src);                                                    \
+} while (0)
+#endif
+
+/* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */
+#if defined(_WIN32)
+#if defined(_MSC_VER) && _MSC_VER >= 1600
+#include <stdint.h>
+#elif defined(__WATCOMC__) || defined(__MINGW32__) || defined(__CYGWIN__)
+#include <stdint.h>
+#else
+typedef unsigned int uint32_t;
+typedef unsigned char uint8_t;
+#endif
+#elif defined(__GNUC__) && !defined(__VXWORKS__)
+#include <stdint.h>
+#else
+typedef unsigned int uint32_t;
+typedef unsigned char uint8_t;
+#endif
+
+#ifndef uthash_fatal
+#define uthash_fatal(msg) exit(-1)        /* fatal error (out of memory,etc) */
+#endif
+#ifndef uthash_malloc
+#define uthash_malloc(sz) malloc(sz)      /* malloc fcn                      */
+#endif
+#ifndef uthash_free
+#define uthash_free(ptr,sz) free(ptr)     /* free fcn                        */
+#endif
+#ifndef uthash_strlen
+#define uthash_strlen(s) strlen(s)
+#endif
+#ifndef uthash_memcmp
+#define uthash_memcmp(a,b,n) memcmp(a,b,n)
+#endif
+
+#ifndef uthash_noexpand_fyi
+#define uthash_noexpand_fyi(tbl)          /* can be defined to log noexpand  */
+#endif
+#ifndef uthash_expand_fyi
+#define uthash_expand_fyi(tbl)            /* can be defined to log expands   */
+#endif
+
+/* initial number of buckets */
+#define HASH_INITIAL_NUM_BUCKETS 32U     /* initial number of buckets        */
+#define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */
+#define HASH_BKT_CAPACITY_THRESH 10U     /* expand when bucket count reaches */
+
+/* calculate the element whose hash handle address is hhp */
+#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
+/* calculate the hash handle from element address elp */
+#define HH_FROM_ELMT(tbl,elp) ((UT_hash_handle *)(((char*)(elp)) + ((tbl)->hho)))
+
+#define HASH_VALUE(keyptr,keylen,hashv)                                          \
+do {                                                                             \
+  HASH_FCN(keyptr, keylen, hashv);                                               \
+} while (0)
+
+#define HASH_FIND_BYHASHVALUE(hh,head,keyptr,keylen,hashval,out)                 \
+do {                                                                             \
+  (out) = NULL;                                                                  \
+  if (head) {                                                                    \
+    unsigned _hf_bkt;                                                            \
+    HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _hf_bkt);                  \
+    if (HASH_BLOOM_TEST((head)->hh.tbl, hashval) != 0) {                         \
+      HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], keyptr, keylen, hashval, out); \
+    }                                                                            \
+  }                                                                              \
+} while (0)
+
+#define HASH_FIND(hh,head,keyptr,keylen,out)                                     \
+do {                                                                             \
+  unsigned _hf_hashv;                                                            \
+  HASH_VALUE(keyptr, keylen, _hf_hashv);                                         \
+  HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out);               \
+} while (0)
+
+#ifdef HASH_BLOOM
+#define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM)
+#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL)
+#define HASH_BLOOM_MAKE(tbl)                                                     \
+do {                                                                             \
+  (tbl)->bloom_nbits = HASH_BLOOM;                                               \
+  (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN);                 \
+  if (!((tbl)->bloom_bv))  { uthash_fatal( "out of memory"); }                   \
+  memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN);                                \
+  (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE;                                       \
+} while (0)
+
+#define HASH_BLOOM_FREE(tbl)                                                     \
+do {                                                                             \
+  uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN);                              \
+} while (0)
+
+#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U)))
+#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U)))
+
+#define HASH_BLOOM_ADD(tbl,hashv)                                                \
+  HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U)))
+
+#define HASH_BLOOM_TEST(tbl,hashv)                                               \
+  HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U)))
+
+#else
+#define HASH_BLOOM_MAKE(tbl)
+#define HASH_BLOOM_FREE(tbl)
+#define HASH_BLOOM_ADD(tbl,hashv)
+#define HASH_BLOOM_TEST(tbl,hashv) (1)
+#define HASH_BLOOM_BYTELEN 0U
+#endif
+
+#define HASH_MAKE_TABLE(hh,head)                                                 \
+do {                                                                             \
+  (head)->hh.tbl = (UT_hash_table*)uthash_malloc(                                \
+                  sizeof(UT_hash_table));                                        \
+  if (!((head)->hh.tbl))  { uthash_fatal( "out of memory"); }                    \
+  memset((head)->hh.tbl, 0, sizeof(UT_hash_table));                              \
+  (head)->hh.tbl->tail = &((head)->hh);                                          \
+  (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS;                        \
+  (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2;              \
+  (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head);                    \
+  (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc(                      \
+          HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket));               \
+  if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); }             \
+  memset((head)->hh.tbl->buckets, 0,                                             \
+          HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket));               \
+  HASH_BLOOM_MAKE((head)->hh.tbl);                                               \
+  (head)->hh.tbl->signature = HASH_SIGNATURE;                                    \
+} while (0)
+
+#define HASH_REPLACE_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,replaced,cmpfcn) \
+do {                                                                             \
+  (replaced) = NULL;                                                             \
+  HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \
+  if (replaced) {                                                                \
+     HASH_DELETE(hh, head, replaced);                                            \
+  }                                                                              \
+  HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn); \
+} while (0)
+
+#define HASH_REPLACE_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add,replaced) \
+do {                                                                             \
+  (replaced) = NULL;                                                             \
+  HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \
+  if (replaced) {                                                                \
+     HASH_DELETE(hh, head, replaced);                                            \
+  }                                                                              \
+  HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add); \
+} while (0)
+
+#define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced)                   \
+do {                                                                             \
+  unsigned _hr_hashv;                                                            \
+  HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv);                         \
+  HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced); \
+} while (0)
+
+#define HASH_REPLACE_INORDER(hh,head,fieldname,keylen_in,add,replaced,cmpfcn)    \
+do {                                                                             \
+  unsigned _hr_hashv;                                                            \
+  HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv);                         \
+  HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced, cmpfcn); \
+} while (0)
+
+#define HASH_APPEND_LIST(hh, head, add)                                          \
+do {                                                                             \
+  (add)->hh.next = NULL;                                                         \
+  (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail);           \
+  (head)->hh.tbl->tail->next = (add);                                            \
+  (head)->hh.tbl->tail = &((add)->hh);                                           \
+} while (0)
+
+#define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh,head,keyptr,keylen_in,hashval,add,cmpfcn) \
+do {                                                                             \
+  unsigned _ha_bkt;                                                              \
+  (add)->hh.hashv = (hashval);                                                   \
+  (add)->hh.key = (char*) (keyptr);                                              \
+  (add)->hh.keylen = (unsigned) (keylen_in);                                     \
+  if (!(head)) {                                                                 \
+    (add)->hh.next = NULL;                                                       \
+    (add)->hh.prev = NULL;                                                       \
+    (head) = (add);                                                              \
+    HASH_MAKE_TABLE(hh, head);                                                   \
+  } else {                                                                       \
+    struct UT_hash_handle *_hs_iter = &(head)->hh;                               \
+    (add)->hh.tbl = (head)->hh.tbl;                                              \
+    do {                                                                         \
+      if (cmpfcn(DECLTYPE(head) ELMT_FROM_HH((head)->hh.tbl, _hs_iter), add) > 0) \
+        break;                                                                   \
+    } while ((_hs_iter = _hs_iter->next));                                       \
+    if (_hs_iter) {                                                              \
+      (add)->hh.next = _hs_iter;                                                 \
+      if (((add)->hh.prev = _hs_iter->prev)) {                                   \
+        HH_FROM_ELMT((head)->hh.tbl, _hs_iter->prev)->next = (add);              \
+      } else {                                                                   \
+        (head) = (add);                                                          \
+      }                                                                          \
+      _hs_iter->prev = (add);                                                    \
+    } else {                                                                     \
+      HASH_APPEND_LIST(hh, head, add);                                           \
+    }                                                                            \
+  }                                                                              \
+  (head)->hh.tbl->num_items++;                                                   \
+  HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt);                    \
+  HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], &(add)->hh);                 \
+  HASH_BLOOM_ADD((head)->hh.tbl, hashval);                                       \
+  HASH_EMIT_KEY(hh, head, keyptr, keylen_in);                                    \
+  HASH_FSCK(hh, head);                                                           \
+} while (0)
+
+#define HASH_ADD_KEYPTR_INORDER(hh,head,keyptr,keylen_in,add,cmpfcn)             \
+do {                                                                             \
+  unsigned _hs_hashv;                                                            \
+  HASH_VALUE(keyptr, keylen_in, _hs_hashv);                                      \
+  HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, _hs_hashv, add, cmpfcn); \
+} while (0)
+
+#define HASH_ADD_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,cmpfcn) \
+  HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn)
+
+#define HASH_ADD_INORDER(hh,head,fieldname,keylen_in,add,cmpfcn)                 \
+  HASH_ADD_KEYPTR_INORDER(hh, head, &((add)->fieldname), keylen_in, add, cmpfcn)
+
+#define HASH_ADD_KEYPTR_BYHASHVALUE(hh,head,keyptr,keylen_in,hashval,add)        \
+do {                                                                             \
+  unsigned _ha_bkt;                                                              \
+  (add)->hh.hashv = (hashval);                                                   \
+  (add)->hh.key = (char*) (keyptr);                                              \
+  (add)->hh.keylen = (unsigned) (keylen_in);                                     \
+  if (!(head)) {                                                                 \
+    (add)->hh.next = NULL;                                                       \
+    (add)->hh.prev = NULL;                                                       \
+    (head) = (add);                                                              \
+    HASH_MAKE_TABLE(hh, head);                                                   \
+  } else {                                                                       \
+    (add)->hh.tbl = (head)->hh.tbl;                                              \
+    HASH_APPEND_LIST(hh, head, add);                                             \
+  }                                                                              \
+  (head)->hh.tbl->num_items++;                                                   \
+  HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt);                    \
+  HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], &(add)->hh);                 \
+  HASH_BLOOM_ADD((head)->hh.tbl, hashval);                                       \
+  HASH_EMIT_KEY(hh, head, keyptr, keylen_in);                                    \
+  HASH_FSCK(hh, head);                                                           \
+} while (0)
+
+#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add)                            \
+do {                                                                             \
+  unsigned _ha_hashv;                                                            \
+  HASH_VALUE(keyptr, keylen_in, _ha_hashv);                                      \
+  HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, _ha_hashv, add);      \
+} while (0)
+
+#define HASH_ADD_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add)            \
+  HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add)
+
+#define HASH_ADD(hh,head,fieldname,keylen_in,add)                                \
+  HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add)
+
+#define HASH_TO_BKT(hashv,num_bkts,bkt)                                          \
+do {                                                                             \
+  bkt = ((hashv) & ((num_bkts) - 1U));                                           \
+} while (0)
+
+/* delete "delptr" from the hash table.
+ * "the usual" patch-up process for the app-order doubly-linked-list.
+ * The use of _hd_hh_del below deserves special explanation.
+ * These used to be expressed using (delptr) but that led to a bug
+ * if someone used the same symbol for the head and deletee, like
+ *  HASH_DELETE(hh,users,users);
+ * We want that to work, but by changing the head (users) below
+ * we were forfeiting our ability to further refer to the deletee (users)
+ * in the patch-up process. Solution: use scratch space to
+ * copy the deletee pointer, then the latter references are via that
+ * scratch pointer rather than through the repointed (users) symbol.
+ */
+#define HASH_DELETE(hh,head,delptr)                                              \
+do {                                                                             \
+    struct UT_hash_handle *_hd_hh_del;                                           \
+    if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) )  {         \
+        uthash_free((head)->hh.tbl->buckets,                                     \
+                    (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
+        HASH_BLOOM_FREE((head)->hh.tbl);                                         \
+        uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                      \
+        head = NULL;                                                             \
+    } else {                                                                     \
+        unsigned _hd_bkt;                                                        \
+        _hd_hh_del = &((delptr)->hh);                                            \
+        if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) {     \
+            (head)->hh.tbl->tail =                                               \
+                (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) +               \
+                (head)->hh.tbl->hho);                                            \
+        }                                                                        \
+        if ((delptr)->hh.prev != NULL) {                                         \
+            ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) +                  \
+                    (head)->hh.tbl->hho))->next = (delptr)->hh.next;             \
+        } else {                                                                 \
+            DECLTYPE_ASSIGN(head,(delptr)->hh.next);                             \
+        }                                                                        \
+        if (_hd_hh_del->next != NULL) {                                          \
+            ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next +                     \
+                    (head)->hh.tbl->hho))->prev =                                \
+                    _hd_hh_del->prev;                                            \
+        }                                                                        \
+        HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt);   \
+        HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del);        \
+        (head)->hh.tbl->num_items--;                                             \
+    }                                                                            \
+    HASH_FSCK(hh,head);                                                          \
+} while (0)
+
+
+/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
+#define HASH_FIND_STR(head,findstr,out)                                          \
+    HASH_FIND(hh,head,findstr,(unsigned)uthash_strlen(findstr),out)
+#define HASH_ADD_STR(head,strfield,add)                                          \
+    HASH_ADD(hh,head,strfield[0],(unsigned)uthash_strlen(add->strfield),add)
+#define HASH_REPLACE_STR(head,strfield,add,replaced)                             \
+    HASH_REPLACE(hh,head,strfield[0],(unsigned)uthash_strlen(add->strfield),add,replaced)
+#define HASH_FIND_INT(head,findint,out)                                          \
+    HASH_FIND(hh,head,findint,sizeof(int),out)
+#define HASH_ADD_INT(head,intfield,add)                                          \
+    HASH_ADD(hh,head,intfield,sizeof(int),add)
+#define HASH_REPLACE_INT(head,intfield,add,replaced)                             \
+    HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced)
+#define HASH_FIND_PTR(head,findptr,out)                                          \
+    HASH_FIND(hh,head,findptr,sizeof(void *),out)
+#define HASH_ADD_PTR(head,ptrfield,add)                                          \
+    HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
+#define HASH_REPLACE_PTR(head,ptrfield,add,replaced)                             \
+    HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced)
+#define HASH_DEL(head,delptr)                                                    \
+    HASH_DELETE(hh,head,delptr)
+
+/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
+ * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
+ */
+#ifdef HASH_DEBUG
+#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0)
+#define HASH_FSCK(hh,head)                                                       \
+do {                                                                             \
+    struct UT_hash_handle *_thh;                                                 \
+    if (head) {                                                                  \
+        unsigned _bkt_i;                                                         \
+        unsigned _count;                                                         \
+        char *_prev;                                                             \
+        _count = 0;                                                              \
+        for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) {       \
+            unsigned _bkt_count = 0;                                             \
+            _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head;                      \
+            _prev = NULL;                                                        \
+            while (_thh) {                                                       \
+               if (_prev != (char*)(_thh->hh_prev)) {                            \
+                   HASH_OOPS("invalid hh_prev %p, actual %p\n",                  \
+                    _thh->hh_prev, _prev );                                      \
+               }                                                                 \
+               _bkt_count++;                                                     \
+               _prev = (char*)(_thh);                                            \
+               _thh = _thh->hh_next;                                             \
+            }                                                                    \
+            _count += _bkt_count;                                                \
+            if ((head)->hh.tbl->buckets[_bkt_i].count !=  _bkt_count) {          \
+               HASH_OOPS("invalid bucket count %u, actual %u\n",                 \
+                (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count);              \
+            }                                                                    \
+        }                                                                        \
+        if (_count != (head)->hh.tbl->num_items) {                               \
+            HASH_OOPS("invalid hh item count %u, actual %u\n",                   \
+                (head)->hh.tbl->num_items, _count );                             \
+        }                                                                        \
+        /* traverse hh in app order; check next/prev integrity, count */         \
+        _count = 0;                                                              \
+        _prev = NULL;                                                            \
+        _thh =  &(head)->hh;                                                     \
+        while (_thh) {                                                           \
+           _count++;                                                             \
+           if (_prev !=(char*)(_thh->prev)) {                                    \
+              HASH_OOPS("invalid prev %p, actual %p\n",                          \
+                    _thh->prev, _prev );                                         \
+           }                                                                     \
+           _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh);                    \
+           _thh = ( _thh->next ?  (UT_hash_handle*)((char*)(_thh->next) +        \
+                                  (head)->hh.tbl->hho) : NULL );                 \
+        }                                                                        \
+        if (_count != (head)->hh.tbl->num_items) {                               \
+            HASH_OOPS("invalid app item count %u, actual %u\n",                  \
+                (head)->hh.tbl->num_items, _count );                             \
+        }                                                                        \
+    }                                                                            \
+} while (0)
+#else
+#define HASH_FSCK(hh,head)
+#endif
+
+/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
+ * the descriptor to which this macro is defined for tuning the hash function.
+ * The app can #include <unistd.h> to get the prototype for write(2). */
+#ifdef HASH_EMIT_KEYS
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)                                   \
+do {                                                                             \
+    unsigned _klen = fieldlen;                                                   \
+    write(HASH_EMIT_KEYS, &_klen, sizeof(_klen));                                \
+    write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen);                      \
+} while (0)
+#else
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
+#endif
+
+/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
+#ifdef HASH_FUNCTION
+#define HASH_FCN HASH_FUNCTION
+#else
+#define HASH_FCN HASH_JEN
+#endif
+
+/* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */
+#define HASH_BER(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned _hb_keylen=(unsigned)keylen;                                          \
+  const unsigned char *_hb_key=(const unsigned char*)(key);                      \
+  (hashv) = 0;                                                                   \
+  while (_hb_keylen-- != 0U) {                                                   \
+      (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++;                         \
+  }                                                                              \
+} while (0)
+
+
+/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
+ * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
+#define HASH_SAX(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned _sx_i;                                                                \
+  const unsigned char *_hs_key=(const unsigned char*)(key);                      \
+  hashv = 0;                                                                     \
+  for(_sx_i=0; _sx_i < keylen; _sx_i++) {                                        \
+      hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i];                     \
+  }                                                                              \
+} while (0)
+/* FNV-1a variation */
+#define HASH_FNV(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned _fn_i;                                                                \
+  const unsigned char *_hf_key=(const unsigned char*)(key);                      \
+  hashv = 2166136261U;                                                           \
+  for(_fn_i=0; _fn_i < keylen; _fn_i++) {                                        \
+      hashv = hashv ^ _hf_key[_fn_i];                                            \
+      hashv = hashv * 16777619U;                                                 \
+  }                                                                              \
+} while (0)
+
+#define HASH_OAT(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned _ho_i;                                                                \
+  const unsigned char *_ho_key=(const unsigned char*)(key);                      \
+  hashv = 0;                                                                     \
+  for(_ho_i=0; _ho_i < keylen; _ho_i++) {                                        \
+      hashv += _ho_key[_ho_i];                                                   \
+      hashv += (hashv << 10);                                                    \
+      hashv ^= (hashv >> 6);                                                     \
+  }                                                                              \
+  hashv += (hashv << 3);                                                         \
+  hashv ^= (hashv >> 11);                                                        \
+  hashv += (hashv << 15);                                                        \
+} while (0)
+
+#define HASH_JEN_MIX(a,b,c)                                                      \
+do {                                                                             \
+  a -= b; a -= c; a ^= ( c >> 13 );                                              \
+  b -= c; b -= a; b ^= ( a << 8 );                                               \
+  c -= a; c -= b; c ^= ( b >> 13 );                                              \
+  a -= b; a -= c; a ^= ( c >> 12 );                                              \
+  b -= c; b -= a; b ^= ( a << 16 );                                              \
+  c -= a; c -= b; c ^= ( b >> 5 );                                               \
+  a -= b; a -= c; a ^= ( c >> 3 );                                               \
+  b -= c; b -= a; b ^= ( a << 10 );                                              \
+  c -= a; c -= b; c ^= ( b >> 15 );                                              \
+} while (0)
+
+#define HASH_JEN(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned _hj_i,_hj_j,_hj_k;                                                    \
+  unsigned const char *_hj_key=(unsigned const char*)(key);                      \
+  hashv = 0xfeedbeefu;                                                           \
+  _hj_i = _hj_j = 0x9e3779b9u;                                                   \
+  _hj_k = (unsigned)(keylen);                                                    \
+  while (_hj_k >= 12U) {                                                         \
+    _hj_i +=    (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 )                      \
+        + ( (unsigned)_hj_key[2] << 16 )                                         \
+        + ( (unsigned)_hj_key[3] << 24 ) );                                      \
+    _hj_j +=    (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 )                      \
+        + ( (unsigned)_hj_key[6] << 16 )                                         \
+        + ( (unsigned)_hj_key[7] << 24 ) );                                      \
+    hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 )                         \
+        + ( (unsigned)_hj_key[10] << 16 )                                        \
+        + ( (unsigned)_hj_key[11] << 24 ) );                                     \
+                                                                                 \
+     HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                          \
+                                                                                 \
+     _hj_key += 12;                                                              \
+     _hj_k -= 12U;                                                               \
+  }                                                                              \
+  hashv += (unsigned)(keylen);                                                   \
+  switch ( _hj_k ) {                                                             \
+     case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */        \
+     case 10: hashv += ( (unsigned)_hj_key[9] << 16 );  /* FALLTHROUGH */        \
+     case 9:  hashv += ( (unsigned)_hj_key[8] << 8 );   /* FALLTHROUGH */        \
+     case 8:  _hj_j += ( (unsigned)_hj_key[7] << 24 );  /* FALLTHROUGH */        \
+     case 7:  _hj_j += ( (unsigned)_hj_key[6] << 16 );  /* FALLTHROUGH */        \
+     case 6:  _hj_j += ( (unsigned)_hj_key[5] << 8 );   /* FALLTHROUGH */        \
+     case 5:  _hj_j += _hj_key[4];                      /* FALLTHROUGH */        \
+     case 4:  _hj_i += ( (unsigned)_hj_key[3] << 24 );  /* FALLTHROUGH */        \
+     case 3:  _hj_i += ( (unsigned)_hj_key[2] << 16 );  /* FALLTHROUGH */        \
+     case 2:  _hj_i += ( (unsigned)_hj_key[1] << 8 );   /* FALLTHROUGH */        \
+     case 1:  _hj_i += _hj_key[0];                                               \
+  }                                                                              \
+  HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                             \
+} while (0)
+
+/* The Paul Hsieh hash function */
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__)             \
+  || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)             \
+                       +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+#define HASH_SFH(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned const char *_sfh_key=(unsigned const char*)(key);                     \
+  uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen;                                \
+                                                                                 \
+  unsigned _sfh_rem = _sfh_len & 3U;                                             \
+  _sfh_len >>= 2;                                                                \
+  hashv = 0xcafebabeu;                                                           \
+                                                                                 \
+  /* Main loop */                                                                \
+  for (;_sfh_len > 0U; _sfh_len--) {                                             \
+    hashv    += get16bits (_sfh_key);                                            \
+    _sfh_tmp  = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv;              \
+    hashv     = (hashv << 16) ^ _sfh_tmp;                                        \
+    _sfh_key += 2U*sizeof (uint16_t);                                            \
+    hashv    += hashv >> 11;                                                     \
+  }                                                                              \
+                                                                                 \
+  /* Handle end cases */                                                         \
+  switch (_sfh_rem) {                                                            \
+    case 3: hashv += get16bits (_sfh_key);                                       \
+            hashv ^= hashv << 16;                                                \
+            hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18;              \
+            hashv += hashv >> 11;                                                \
+            break;                                                               \
+    case 2: hashv += get16bits (_sfh_key);                                       \
+            hashv ^= hashv << 11;                                                \
+            hashv += hashv >> 17;                                                \
+            break;                                                               \
+    case 1: hashv += *_sfh_key;                                                  \
+            hashv ^= hashv << 10;                                                \
+            hashv += hashv >> 1;                                                 \
+  }                                                                              \
+                                                                                 \
+    /* Force "avalanching" of final 127 bits */                                  \
+    hashv ^= hashv << 3;                                                         \
+    hashv += hashv >> 5;                                                         \
+    hashv ^= hashv << 4;                                                         \
+    hashv += hashv >> 17;                                                        \
+    hashv ^= hashv << 25;                                                        \
+    hashv += hashv >> 6;                                                         \
+} while (0)
+
+#ifdef HASH_USING_NO_STRICT_ALIASING
+/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
+ * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
+ * MurmurHash uses the faster approach only on CPU's where we know it's safe.
+ *
+ * Note the preprocessor built-in defines can be emitted using:
+ *
+ *   gcc -m64 -dM -E - < /dev/null                  (on gcc)
+ *   cc -## a.c (where a.c is a simple test file)   (Sun Studio)
+ */
+#if (defined(__i386__) || defined(__x86_64__)  || defined(_M_IX86))
+#define MUR_GETBLOCK(p,i) p[i]
+#else /* non intel */
+#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL)
+#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL)
+#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL)
+#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL)
+#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL))
+#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__))
+#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24))
+#define MUR_TWO_TWO(p)   ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16))
+#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >>  8))
+#else /* assume little endian non-intel */
+#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24))
+#define MUR_TWO_TWO(p)   ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16))
+#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) <<  8))
+#endif
+#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) :           \
+                            (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
+                             (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) :  \
+                                                      MUR_ONE_THREE(p))))
+#endif
+#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
+#define MUR_FMIX(_h) \
+do {                 \
+  _h ^= _h >> 16;    \
+  _h *= 0x85ebca6bu; \
+  _h ^= _h >> 13;    \
+  _h *= 0xc2b2ae35u; \
+  _h ^= _h >> 16;    \
+} while (0)
+
+#define HASH_MUR(key,keylen,hashv)                                     \
+do {                                                                   \
+  const uint8_t *_mur_data = (const uint8_t*)(key);                    \
+  const int _mur_nblocks = (int)(keylen) / 4;                          \
+  uint32_t _mur_h1 = 0xf88D5353u;                                      \
+  uint32_t _mur_c1 = 0xcc9e2d51u;                                      \
+  uint32_t _mur_c2 = 0x1b873593u;                                      \
+  uint32_t _mur_k1 = 0;                                                \
+  const uint8_t *_mur_tail;                                            \
+  const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \
+  int _mur_i;                                                          \
+  for(_mur_i = -_mur_nblocks; _mur_i!=0; _mur_i++) {                   \
+    _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i);                        \
+    _mur_k1 *= _mur_c1;                                                \
+    _mur_k1 = MUR_ROTL32(_mur_k1,15);                                  \
+    _mur_k1 *= _mur_c2;                                                \
+                                                                       \
+    _mur_h1 ^= _mur_k1;                                                \
+    _mur_h1 = MUR_ROTL32(_mur_h1,13);                                  \
+    _mur_h1 = (_mur_h1*5U) + 0xe6546b64u;                              \
+  }                                                                    \
+  _mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4));          \
+  _mur_k1=0;                                                           \
+  switch((keylen) & 3U) {                                              \
+    case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \
+    case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8;  /* FALLTHROUGH */ \
+    case 1: _mur_k1 ^= (uint32_t)_mur_tail[0];                         \
+    _mur_k1 *= _mur_c1;                                                \
+    _mur_k1 = MUR_ROTL32(_mur_k1,15);                                  \
+    _mur_k1 *= _mur_c2;                                                \
+    _mur_h1 ^= _mur_k1;                                                \
+  }                                                                    \
+  _mur_h1 ^= (uint32_t)(keylen);                                       \
+  MUR_FMIX(_mur_h1);                                                   \
+  hashv = _mur_h1;                                                     \
+} while (0)
+#endif  /* HASH_USING_NO_STRICT_ALIASING */
+
+/* iterate over items in a known bucket to find desired item */
+#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,hashval,out)               \
+do {                                                                             \
+  if ((head).hh_head != NULL) {                                                  \
+    DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (head).hh_head));                     \
+  } else {                                                                       \
+    (out) = NULL;                                                                \
+  }                                                                              \
+  while ((out) != NULL) {                                                        \
+    if ((out)->hh.hashv == (hashval) && (out)->hh.keylen == (keylen_in)) {       \
+      if (uthash_memcmp((out)->hh.key, keyptr, keylen_in) == 0) {                \
+        break;                                                                   \
+      }                                                                          \
+    }                                                                            \
+    if ((out)->hh.hh_next != NULL) {                                             \
+      DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next));                \
+    } else {                                                                     \
+      (out) = NULL;                                                              \
+    }                                                                            \
+  }                                                                              \
+} while (0)
+
+/* add an item to a bucket  */
+#define HASH_ADD_TO_BKT(head,addhh)                                              \
+do {                                                                             \
+ head.count++;                                                                   \
+ (addhh)->hh_next = head.hh_head;                                                \
+ (addhh)->hh_prev = NULL;                                                        \
+ if (head.hh_head != NULL) { (head).hh_head->hh_prev = (addhh); }                \
+ (head).hh_head=addhh;                                                           \
+ if ((head.count >= ((head.expand_mult+1U) * HASH_BKT_CAPACITY_THRESH))          \
+     && ((addhh)->tbl->noexpand != 1U)) {                                        \
+       HASH_EXPAND_BUCKETS((addhh)->tbl);                                        \
+ }                                                                               \
+} while (0)
+
+/* remove an item from a given bucket */
+#define HASH_DEL_IN_BKT(hh,head,hh_del)                                          \
+    (head).count--;                                                              \
+    if ((head).hh_head == hh_del) {                                              \
+      (head).hh_head = hh_del->hh_next;                                          \
+    }                                                                            \
+    if (hh_del->hh_prev) {                                                       \
+        hh_del->hh_prev->hh_next = hh_del->hh_next;                              \
+    }                                                                            \
+    if (hh_del->hh_next) {                                                       \
+        hh_del->hh_next->hh_prev = hh_del->hh_prev;                              \
+    }
+
+/* Bucket expansion has the effect of doubling the number of buckets
+ * and redistributing the items into the new buckets. Ideally the
+ * items will distribute more or less evenly into the new buckets
+ * (the extent to which this is true is a measure of the quality of
+ * the hash function as it applies to the key domain).
+ *
+ * With the items distributed into more buckets, the chain length
+ * (item count) in each bucket is reduced. Thus by expanding buckets
+ * the hash keeps a bound on the chain length. This bounded chain
+ * length is the essence of how a hash provides constant time lookup.
+ *
+ * The calculation of tbl->ideal_chain_maxlen below deserves some
+ * explanation. First, keep in mind that we're calculating the ideal
+ * maximum chain length based on the *new* (doubled) bucket count.
+ * In fractions this is just n/b (n=number of items,b=new num buckets).
+ * Since the ideal chain length is an integer, we want to calculate
+ * ceil(n/b). We don't depend on floating point arithmetic in this
+ * hash, so to calculate ceil(n/b) with integers we could write
+ *
+ *      ceil(n/b) = (n/b) + ((n%b)?1:0)
+ *
+ * and in fact a previous version of this hash did just that.
+ * But now we have improved things a bit by recognizing that b is
+ * always a power of two. We keep its base 2 log handy (call it lb),
+ * so now we can write this with a bit shift and logical AND:
+ *
+ *      ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
+ *
+ */
+#define HASH_EXPAND_BUCKETS(tbl)                                                 \
+do {                                                                             \
+    unsigned _he_bkt;                                                            \
+    unsigned _he_bkt_i;                                                          \
+    struct UT_hash_handle *_he_thh, *_he_hh_nxt;                                 \
+    UT_hash_bucket *_he_new_buckets, *_he_newbkt;                                \
+    _he_new_buckets = (UT_hash_bucket*)uthash_malloc(                            \
+             2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket));            \
+    if (!_he_new_buckets) { uthash_fatal( "out of memory"); }                    \
+    memset(_he_new_buckets, 0,                                                   \
+            2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket));             \
+    tbl->ideal_chain_maxlen =                                                    \
+       (tbl->num_items >> (tbl->log2_num_buckets+1U)) +                          \
+       (((tbl->num_items & ((tbl->num_buckets*2U)-1U)) != 0U) ? 1U : 0U);        \
+    tbl->nonideal_items = 0;                                                     \
+    for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++)                \
+    {                                                                            \
+        _he_thh = tbl->buckets[ _he_bkt_i ].hh_head;                             \
+        while (_he_thh != NULL) {                                                \
+           _he_hh_nxt = _he_thh->hh_next;                                        \
+           HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2U, _he_bkt);           \
+           _he_newbkt = &(_he_new_buckets[ _he_bkt ]);                           \
+           if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) {                \
+             tbl->nonideal_items++;                                              \
+             _he_newbkt->expand_mult = _he_newbkt->count /                       \
+                                        tbl->ideal_chain_maxlen;                 \
+           }                                                                     \
+           _he_thh->hh_prev = NULL;                                              \
+           _he_thh->hh_next = _he_newbkt->hh_head;                               \
+           if (_he_newbkt->hh_head != NULL) { _he_newbkt->hh_head->hh_prev =     \
+                _he_thh; }                                                       \
+           _he_newbkt->hh_head = _he_thh;                                        \
+           _he_thh = _he_hh_nxt;                                                 \
+        }                                                                        \
+    }                                                                            \
+    uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
+    tbl->num_buckets *= 2U;                                                      \
+    tbl->log2_num_buckets++;                                                     \
+    tbl->buckets = _he_new_buckets;                                              \
+    tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ?         \
+        (tbl->ineff_expands+1U) : 0U;                                            \
+    if (tbl->ineff_expands > 1U) {                                               \
+        tbl->noexpand=1;                                                         \
+        uthash_noexpand_fyi(tbl);                                                \
+    }                                                                            \
+    uthash_expand_fyi(tbl);                                                      \
+} while (0)
+
+
+/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
+/* Note that HASH_SORT assumes the hash handle name to be hh.
+ * HASH_SRT was added to allow the hash handle name to be passed in. */
+#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
+#define HASH_SRT(hh,head,cmpfcn)                                                 \
+do {                                                                             \
+  unsigned _hs_i;                                                                \
+  unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize;               \
+  struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail;            \
+  if (head != NULL) {                                                            \
+      _hs_insize = 1;                                                            \
+      _hs_looping = 1;                                                           \
+      _hs_list = &((head)->hh);                                                  \
+      while (_hs_looping != 0U) {                                                \
+          _hs_p = _hs_list;                                                      \
+          _hs_list = NULL;                                                       \
+          _hs_tail = NULL;                                                       \
+          _hs_nmerges = 0;                                                       \
+          while (_hs_p != NULL) {                                                \
+              _hs_nmerges++;                                                     \
+              _hs_q = _hs_p;                                                     \
+              _hs_psize = 0;                                                     \
+              for ( _hs_i = 0; _hs_i  < _hs_insize; _hs_i++ ) {                  \
+                  _hs_psize++;                                                   \
+                  _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ?              \
+                          ((void*)((char*)(_hs_q->next) +                        \
+                          (head)->hh.tbl->hho)) : NULL);                         \
+                  if (! (_hs_q) ) { break; }                                     \
+              }                                                                  \
+              _hs_qsize = _hs_insize;                                            \
+              while ((_hs_psize > 0U) || ((_hs_qsize > 0U) && (_hs_q != NULL))) {\
+                  if (_hs_psize == 0U) {                                         \
+                      _hs_e = _hs_q;                                             \
+                      _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ?          \
+                              ((void*)((char*)(_hs_q->next) +                    \
+                              (head)->hh.tbl->hho)) : NULL);                     \
+                      _hs_qsize--;                                               \
+                  } else if ( (_hs_qsize == 0U) || (_hs_q == NULL) ) {           \
+                      _hs_e = _hs_p;                                             \
+                      if (_hs_p != NULL){                                        \
+                        _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ?        \
+                                ((void*)((char*)(_hs_p->next) +                  \
+                                (head)->hh.tbl->hho)) : NULL);                   \
+                       }                                                         \
+                      _hs_psize--;                                               \
+                  } else if ((                                                   \
+                      cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
+                             DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
+                             ) <= 0) {                                           \
+                      _hs_e = _hs_p;                                             \
+                      if (_hs_p != NULL){                                        \
+                        _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ?        \
+                               ((void*)((char*)(_hs_p->next) +                   \
+                               (head)->hh.tbl->hho)) : NULL);                    \
+                       }                                                         \
+                      _hs_psize--;                                               \
+                  } else {                                                       \
+                      _hs_e = _hs_q;                                             \
+                      _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ?          \
+                              ((void*)((char*)(_hs_q->next) +                    \
+                              (head)->hh.tbl->hho)) : NULL);                     \
+                      _hs_qsize--;                                               \
+                  }                                                              \
+                  if ( _hs_tail != NULL ) {                                      \
+                      _hs_tail->next = ((_hs_e != NULL) ?                        \
+                            ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL);          \
+                  } else {                                                       \
+                      _hs_list = _hs_e;                                          \
+                  }                                                              \
+                  if (_hs_e != NULL) {                                           \
+                  _hs_e->prev = ((_hs_tail != NULL) ?                            \
+                     ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL);              \
+                  }                                                              \
+                  _hs_tail = _hs_e;                                              \
+              }                                                                  \
+              _hs_p = _hs_q;                                                     \
+          }                                                                      \
+          if (_hs_tail != NULL){                                                 \
+            _hs_tail->next = NULL;                                               \
+          }                                                                      \
+          if ( _hs_nmerges <= 1U ) {                                             \
+              _hs_looping=0;                                                     \
+              (head)->hh.tbl->tail = _hs_tail;                                   \
+              DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list));      \
+          }                                                                      \
+          _hs_insize *= 2U;                                                      \
+      }                                                                          \
+      HASH_FSCK(hh,head);                                                        \
+ }                                                                               \
+} while (0)
+
+/* This function selects items from one hash into another hash.
+ * The end result is that the selected items have dual presence
+ * in both hashes. There is no copy of the items made; rather
+ * they are added into the new hash through a secondary hash
+ * hash handle that must be present in the structure. */
+#define HASH_SELECT(hh_dst, dst, hh_src, src, cond)                              \
+do {                                                                             \
+  unsigned _src_bkt, _dst_bkt;                                                   \
+  void *_last_elt=NULL, *_elt;                                                   \
+  UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL;                         \
+  ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst));                 \
+  if (src != NULL) {                                                             \
+    for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) {     \
+      for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head;                \
+          _src_hh != NULL;                                                       \
+          _src_hh = _src_hh->hh_next) {                                          \
+          _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh);                       \
+          if (cond(_elt)) {                                                      \
+            _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho);               \
+            _dst_hh->key = _src_hh->key;                                         \
+            _dst_hh->keylen = _src_hh->keylen;                                   \
+            _dst_hh->hashv = _src_hh->hashv;                                     \
+            _dst_hh->prev = _last_elt;                                           \
+            _dst_hh->next = NULL;                                                \
+            if (_last_elt_hh != NULL) { _last_elt_hh->next = _elt; }             \
+            if (dst == NULL) {                                                   \
+              DECLTYPE_ASSIGN(dst,_elt);                                         \
+              HASH_MAKE_TABLE(hh_dst,dst);                                       \
+            } else {                                                             \
+              _dst_hh->tbl = (dst)->hh_dst.tbl;                                  \
+            }                                                                    \
+            HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt);    \
+            HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh);            \
+            (dst)->hh_dst.tbl->num_items++;                                      \
+            _last_elt = _elt;                                                    \
+            _last_elt_hh = _dst_hh;                                              \
+          }                                                                      \
+      }                                                                          \
+    }                                                                            \
+  }                                                                              \
+  HASH_FSCK(hh_dst,dst);                                                         \
+} while (0)
+
+#define HASH_CLEAR(hh,head)                                                      \
+do {                                                                             \
+  if (head != NULL) {                                                            \
+    uthash_free((head)->hh.tbl->buckets,                                         \
+                (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket));      \
+    HASH_BLOOM_FREE((head)->hh.tbl);                                             \
+    uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                          \
+    (head)=NULL;                                                                 \
+  }                                                                              \
+} while (0)
+
+#define HASH_OVERHEAD(hh,head)                                                   \
+ ((head != NULL) ? (                                                             \
+ (size_t)(((head)->hh.tbl->num_items   * sizeof(UT_hash_handle))   +             \
+          ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket))   +             \
+           sizeof(UT_hash_table)                                   +             \
+           (HASH_BLOOM_BYTELEN))) : 0U)
+
+#ifdef NO_DECLTYPE
+#define HASH_ITER(hh,head,el,tmp)                                                \
+for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \
+  (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL)))
+#else
+#define HASH_ITER(hh,head,el,tmp)                                                \
+for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL));      \
+  (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL)))
+#endif
+
+/* obtain a count of items in the hash */
+#define HASH_COUNT(head) HASH_CNT(hh,head)
+#define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U)
+
+typedef struct UT_hash_bucket {
+   struct UT_hash_handle *hh_head;
+   unsigned count;
+
+   /* expand_mult is normally set to 0. In this situation, the max chain length
+    * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
+    * the bucket's chain exceeds this length, bucket expansion is triggered).
+    * However, setting expand_mult to a non-zero value delays bucket expansion
+    * (that would be triggered by additions to this particular bucket)
+    * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
+    * (The multiplier is simply expand_mult+1). The whole idea of this
+    * multiplier is to reduce bucket expansions, since they are expensive, in
+    * situations where we know that a particular bucket tends to be overused.
+    * It is better to let its chain length grow to a longer yet-still-bounded
+    * value, than to do an O(n) bucket expansion too often.
+    */
+   unsigned expand_mult;
+
+} UT_hash_bucket;
+
+/* random signature used only to find hash tables in external analysis */
+#define HASH_SIGNATURE 0xa0111fe1u
+#define HASH_BLOOM_SIGNATURE 0xb12220f2u
+
+typedef struct UT_hash_table {
+   UT_hash_bucket *buckets;
+   unsigned num_buckets, log2_num_buckets;
+   unsigned num_items;
+   struct UT_hash_handle *tail; /* tail hh in app order, for fast append    */
+   ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
+
+   /* in an ideal situation (all buckets used equally), no bucket would have
+    * more than ceil(#items/#buckets) items. that's the ideal chain length. */
+   unsigned ideal_chain_maxlen;
+
+   /* nonideal_items is the number of items in the hash whose chain position
+    * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
+    * hash distribution; reaching them in a chain traversal takes >ideal steps */
+   unsigned nonideal_items;
+
+   /* ineffective expands occur when a bucket doubling was performed, but
+    * afterward, more than half the items in the hash had nonideal chain
+    * positions. If this happens on two consecutive expansions we inhibit any
+    * further expansion, as it's not helping; this happens when the hash
+    * function isn't a good fit for the key domain. When expansion is inhibited
+    * the hash will still work, albeit no longer in constant time. */
+   unsigned ineff_expands, noexpand;
+
+   uint32_t signature; /* used only to find hash tables in external analysis */
+#ifdef HASH_BLOOM
+   uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
+   uint8_t *bloom_bv;
+   uint8_t bloom_nbits;
+#endif
+
+} UT_hash_table;
+
+typedef struct UT_hash_handle {
+   struct UT_hash_table *tbl;
+   void *prev;                       /* prev element in app order      */
+   void *next;                       /* next element in app order      */
+   struct UT_hash_handle *hh_prev;   /* previous hh in bucket order    */
+   struct UT_hash_handle *hh_next;   /* next hh in bucket order        */
+   void *key;                        /* ptr to enclosing struct's key  */
+   unsigned keylen;                  /* enclosing struct's key len     */
+   unsigned hashv;                   /* result of hash-fcn(key)        */
+} UT_hash_handle;
+
+#endif /* UTHASH_H */

From e5ecd497f384abb6bc5d82087844f9ca184f5173 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Sun, 14 Aug 2016 01:49:58 -0700
Subject: [PATCH 04/91] minimal travis file

---
 .travis.yml | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 000000000..41e3da95e
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,13 @@
+sudo: required
+
+language: generic
+
+matrix:
+  include:
+    - os: linux
+      dist: trusty
+    - os: osx
+      osx_image: xcode7
+
+install:
+  - make

From 7a18347b5cd3e6efdff1bf64677a86244d958486 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Mon, 15 Aug 2016 16:41:22 -0700
Subject: [PATCH 05/91] cleanup

---
 Makefile            |  5 ++++-
 src/plasma.h        |  8 ++++++-
 src/plasma_client.c | 17 +++++++++-----
 src/plasma_store.c  | 55 +++++++++++++++++++++++++--------------------
 4 files changed, 53 insertions(+), 32 deletions(-)

diff --git a/Makefile b/Makefile
index 32cb50a61..85167f6cd 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ CC = gcc
 CFLAGS = -g -Wall
 BUILD = build
 
-all: $(BUILD)/plasma_store $(BUILD)/example
+all: $(BUILD)/plasma_store $(BUILD)/plasma_client.so $(BUILD)/example
 
 clean:
 	rm $(BUILD)/*
@@ -10,5 +10,8 @@ clean:
 $(BUILD)/plasma_store: src/plasma_store.c src/plasma.h src/fling.h src/fling.c
 	$(CC) $(CFLAGS) --std=c99 -D_XOPEN_SOURCE=500 src/plasma_store.c src/fling.c -o $(BUILD)/plasma_store
 
+$(BUILD)/plasma_client.so: src/plasma_client.c src/fling.h src/fling.c
+	$(CC) $(CFLAGS) --std=c99 -D_XOPEN_SOURCE=500 src/plasma_client.c src/fling.c -fPIC -shared -o $(BUILD)/plasma_client.so
+
 $(BUILD)/example: src/plasma_client.c src/plasma.h src/example.c src/fling.h src/fling.c
 	$(CC) $(CFLAGS) --std=c99 -D_XOPEN_SOURCE=500 src/plasma_client.c src/example.c src/fling.c -o $(BUILD)/example
diff --git a/src/plasma.h b/src/plasma.h
index 8577aab19..42ac2c6f5 100644
--- a/src/plasma.h
+++ b/src/plasma.h
@@ -27,13 +27,19 @@ typedef struct {
 enum plasma_request_type {
   PLASMA_CREATE, // create a new object
   PLASMA_GET, // get an object
-  PLASMA_SEAL // seal an object
+  PLASMA_SEAL, // seal an object
+  PLASMA_TRANSFER, // request transfer to another store
+  PLASMA_DATA, // header for sending data
+  PLASMA_REGISTER // register a plasma manager
 };
 
 typedef struct {
   int type;
+  int manager_id;
   plasma_id object_id;
   int64_t size;
+  uint8_t addr[4];
+  int port;
 } plasma_request;
 
 enum plasma_reply_type {
diff --git a/src/plasma_client.c b/src/plasma_client.c
index 9a8359eb6..86b49125d 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -19,7 +19,7 @@ void plasma_send(int fd, plasma_request *req) {
   int req_count = sizeof(plasma_request);
   if (write(fd, req, req_count) != req_count) {
     if (req_count > 0) {
-      LOG_ERR("partial write");
+      LOG_ERR("partial write on fd %d", fd);
     } else {
       LOG_ERR("write error");
       exit(-1);
@@ -28,7 +28,8 @@ void plasma_send(int fd, plasma_request *req) {
 }
 
 plasma_buffer plasma_create(int conn, plasma_id object_id, int64_t size) {
-  plasma_request req = { PLASMA_CREATE, object_id, size };
+  LOG_INFO("called plasma_create on conn %d with size %" PRId64, conn, size);
+  plasma_request req = { .type = PLASMA_CREATE, .object_id = object_id, .size = size };
   plasma_send(conn, &req);
   plasma_reply reply;
   int fd = recv_fd(conn, (char*)&reply, sizeof(plasma_reply));
@@ -44,7 +45,7 @@ plasma_buffer plasma_create(int conn, plasma_id object_id, int64_t size) {
 }
 
 plasma_buffer plasma_get(int conn, plasma_id object_id) {
-  plasma_request req = { PLASMA_GET, object_id };
+  plasma_request req = { .type = PLASMA_GET, .object_id = object_id };
   plasma_send(conn, &req);
   plasma_reply reply;
   // the following loop is run at most twice
@@ -55,13 +56,17 @@ plasma_buffer plasma_get(int conn, plasma_id object_id) {
     fd = new_fd;
   }
   assert(reply.type == PLASMA_OBJECT);
-  void *data = mmap(NULL, reply.size, PROT_READ, 0, fd, 0);
+  void *data = mmap(NULL, reply.size, PROT_READ, MAP_SHARED, fd, 0);
+  if (data  == MAP_FAILED) {
+    LOG_ERR("mmap failed");
+    exit(-1);
+  }
   plasma_buffer buffer = { object_id, data, reply.size, 0 };
   return buffer;
 }
 
 void plasma_seal(int fd, plasma_id object_id) {
-  plasma_request req = { PLASMA_SEAL, object_id };
+  plasma_request req = { .type = PLASMA_SEAL, .object_id = object_id };
   plasma_send(fd, &req);
 }
 
@@ -77,7 +82,7 @@ int plasma_store_connect(const char* socket_name) {
   addr.sun_family = AF_UNIX;
   strncpy(addr.sun_path, socket_name, sizeof(addr.sun_path)-1);
   if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) == -1) {
-    LOG_ERR("connect error");
+    LOG_ERR("could not connect to store %s", socket_name);
     exit(-1);
   }
   return fd;
diff --git a/src/plasma_store.c b/src/plasma_store.c
index 528f8d52c..3260cde9e 100644
--- a/src/plasma_store.c
+++ b/src/plasma_store.c
@@ -48,6 +48,8 @@ int add_client(plasma_store_state* s, int fd) {
   return curr_id++;
 }
 
+// remove the client at index i by swapping it with the
+// client at index num_clients-1 and zeroing the latter out
 void remove_client(plasma_store_state* s, int i) {
   memcpy(&s->waiting[i], &s->waiting[s->num_clients-1], sizeof(struct pollfd));
   memset(&s->waiting[s->num_clients-1], 0, sizeof(struct pollfd));
@@ -107,6 +109,7 @@ int create_buffer(int64_t size) {
 
 // create a new object buffer in the hash table
 void create_object(int conn, plasma_request* req) {
+  LOG_INFO("creating object"); // TODO(pcm): add object_id here
   int fd = create_buffer(req->size);
   if (fd < 0) {
     LOG_ERR("could not create shared memory buffer");
@@ -145,6 +148,7 @@ void get_object(int conn, plasma_request* req) {
 
 // seal an object that has been created in the hash table
 void seal_object(int conn, plasma_request* req) {
+  LOG_INFO("sealing object"); // TODO(pcm): add object_id here
   object_table_entry *entry;
   HASH_FIND(handle, open_objects, &req->object_id, sizeof(plasma_id), entry);
   if (!entry) {
@@ -179,6 +183,9 @@ void process_event(int conn, plasma_request* req) {
   case PLASMA_SEAL:
     seal_object(conn, req);
     break;
+  default:
+    LOG_ERR("invalid request %d", req->type);
+    exit(-1);
   }
 }
 
@@ -195,32 +202,32 @@ void event_loop(int socket) {
     }
     for (int i = 0; i < state.num_clients; ++i) {
       if (state.waiting[i].revents == 0)
-	continue;
+        continue;
       if (state.waiting[i].fd == socket) {
-	while (1) {
-	  // handle new incoming connections
-	  int new_socket = accept(socket, NULL, NULL);
-	  if (new_socket < 0) {
-	    if (errno != EWOULDBLOCK) {
-	      LOG_ERR("accept failed");
-	      exit(-1);
-	    }
-	    break;
-	  }
-	  int client_id = add_client(&state, new_socket);
-	  LOG_INFO("adding new client with id %d", client_id);
-	}
+        while (1) {
+          // handle new incoming connections
+          int new_socket = accept(socket, NULL, NULL);
+          if (new_socket < 0) {
+            if (errno != EWOULDBLOCK) {
+              LOG_ERR("accept failed");
+              exit(-1);
+            }
+            break;
+          }
+          int client_id = add_client(&state, new_socket);
+          LOG_INFO("adding new client with id %d", client_id);
+        }
       } else {
-	int r = read(state.waiting[i].fd, &req, sizeof(plasma_request));
-	if (r == -1) {
-	  LOG_ERR("read error");
-	  continue;
-	} else if (r == 0) {
-	  LOG_INFO("client with id %d disconnected", state.client_id[i]);
-	  remove_client(&state, i);
-	} else {
-	  process_event(state.waiting[i].fd, &req);
-	}
+        int r = read(state.waiting[i].fd, &req, sizeof(plasma_request));
+        if (r == -1) {
+          LOG_ERR("read error");
+          continue;
+        } else if (r == 0) {
+          LOG_INFO("client with id %d disconnected", state.client_id[i]);
+          remove_client(&state, i);
+        } else {
+          process_event(state.waiting[i].fd, &req);
+        }
       }
     }
   }

From 29f0489b6e9f6dbcbe20a6e19e77acf07b9c554a Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Tue, 16 Aug 2016 15:06:29 -0700
Subject: [PATCH 06/91] Remove directories as well with make clean.

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 85167f6cd..e303d65b5 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BUILD = build
 all: $(BUILD)/plasma_store $(BUILD)/plasma_client.so $(BUILD)/example
 
 clean:
-	rm $(BUILD)/*
+	rm -r $(BUILD)/*
 
 $(BUILD)/plasma_store: src/plasma_store.c src/plasma.h src/fling.h src/fling.c
 	$(CC) $(CFLAGS) --std=c99 -D_XOPEN_SOURCE=500 src/plasma_store.c src/fling.c -o $(BUILD)/plasma_store

From 535f4403e49c9662f3058e23e4dd4b943ac729bf Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Tue, 16 Aug 2016 15:38:45 -0700
Subject: [PATCH 07/91] add python client

---
 lib/python/plasma.py | 70 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 lib/python/plasma.py

diff --git a/lib/python/plasma.py b/lib/python/plasma.py
new file mode 100644
index 000000000..4ce8fdb5c
--- /dev/null
+++ b/lib/python/plasma.py
@@ -0,0 +1,70 @@
+import socket
+import ctypes
+
+Addr = ctypes.c_ubyte * 4
+
+ID = ctypes.c_ubyte * 20
+
+class PlasmaID(ctypes.Structure):
+  _fields_ = [("plasma_id", ID)]
+
+# these must be in sync with plasma_request_type in plasma.h
+PLASMA_CREATE = 0
+
+class PlasmaRequest(ctypes.Structure):
+  _fields_ = [("type", ctypes.c_int),
+              ("manager_id", ctypes.c_int),
+              ("object_id", PlasmaID),
+              ("size", ctypes.c_int64),
+              ("addr", Addr),
+              ("port", ctypes.c_int)]
+
+class PlasmaBuffer(ctypes.Structure):
+  _fields_ = [("plasma_id", PlasmaID),
+              ("data", ctypes.c_void_p),
+              ("size", ctypes.c_int64),
+              ("writable", ctypes.c_int)]
+
+def make_plasma_id(string):
+  if len(string) != 20:
+    raise Exception("PlasmaIDs must be 20 characters long")
+  object_id = map(ord, string)
+  return PlasmaID(plasma_id=ID(*object_id))
+
+class PlasmaClient(object):
+  def __init__(self, socket_name):
+    self.client = ctypes.cdll.LoadLibrary("../../build/plasma_client.so")
+
+    self.client.plasma_store_connect.restype = ctypes.c_int
+
+    self.client.plasma_create.argtypes = [ctypes.c_int, PlasmaID, ctypes.c_int64]
+    self.client.plasma_create.restype = PlasmaBuffer
+    
+    self.client.plasma_get.argtypes = [ctypes.c_int, PlasmaID]
+    self.client.plasma_get.restype = PlasmaBuffer
+
+    self.client.plasma_seal.argtypes = [ctypes.c_int, PlasmaID]
+    self.client.plasma_seal.restype = None
+
+    self.buffer_from_memory = ctypes.pythonapi.PyBuffer_FromMemory
+    self.buffer_from_memory.argtypes = [ctypes.c_void_p, ctypes.c_int64]
+    self.buffer_from_memory.restype = ctypes.py_object
+
+    self.buffer_from_read_write_memory = ctypes.pythonapi.PyBuffer_FromReadWriteMemory
+    self.buffer_from_read_write_memory.argtypes = [ctypes.c_void_p, ctypes.c_int64]
+    self.buffer_from_read_write_memory.restype = ctypes.py_object
+
+    self.sock = self.client.plasma_store_connect(socket_name)
+
+  def create(self, object_id, size):
+    buf = self.client.plasma_create(self.sock, make_plasma_id(object_id), size)
+    return self.buffer_from_read_write_memory(buf.data, buf.size)
+
+  def get(self, object_id):
+    buf = self.client.plasma_get(self.sock, make_plasma_id(object_id))
+    return self.buffer_from_memory(buf.data, buf.size)
+    
+  def seal(self, object_id):
+    self.client.plasma_seal(self.sock, make_plasma_id(object_id))
+
+    

From 6db8d1c15acd4c458f731472f10237ed699a5b26 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Tue, 16 Aug 2016 15:49:26 -0700
Subject: [PATCH 08/91] fix error handling

---
 src/plasma_client.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/plasma_client.c b/src/plasma_client.c
index 86b49125d..cd60d659b 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -18,12 +18,8 @@
 void plasma_send(int fd, plasma_request *req) {
   int req_count = sizeof(plasma_request);
   if (write(fd, req, req_count) != req_count) {
-    if (req_count > 0) {
-      LOG_ERR("partial write on fd %d", fd);
-    } else {
-      LOG_ERR("write error");
-      exit(-1);
-    }
+    LOG_ERR("write error");
+    exit(-1);
   }
 }
 

From 1b66ac54ba7c7b2c1d7058116880d8323607056c Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Tue, 16 Aug 2016 16:52:16 -0700
Subject: [PATCH 09/91] Basic Python unit tests.

---
 .travis.yml          |  4 +++
 lib/python/plasma.py | 10 ++++----
 setup-env.sh         |  5 ++++
 test/test.py         | 61 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 75 insertions(+), 5 deletions(-)
 create mode 100644 setup-env.sh
 create mode 100644 test/test.py

diff --git a/.travis.yml b/.travis.yml
index 41e3da95e..3230e843f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -11,3 +11,7 @@ matrix:
 
 install:
   - make
+
+script:
+  - source setup-env.sh
+  - python test/test.py
diff --git a/lib/python/plasma.py b/lib/python/plasma.py
index 4ce8fdb5c..69361d63c 100644
--- a/lib/python/plasma.py
+++ b/lib/python/plasma.py
@@ -1,3 +1,4 @@
+import os
 import socket
 import ctypes
 
@@ -33,13 +34,14 @@ def make_plasma_id(string):
 
 class PlasmaClient(object):
   def __init__(self, socket_name):
-    self.client = ctypes.cdll.LoadLibrary("../../build/plasma_client.so")
+    plasma_client_library = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../../build/plasma_client.so")
+    self.client = ctypes.cdll.LoadLibrary(plasma_client_library)
 
     self.client.plasma_store_connect.restype = ctypes.c_int
 
     self.client.plasma_create.argtypes = [ctypes.c_int, PlasmaID, ctypes.c_int64]
     self.client.plasma_create.restype = PlasmaBuffer
-    
+
     self.client.plasma_get.argtypes = [ctypes.c_int, PlasmaID]
     self.client.plasma_get.restype = PlasmaBuffer
 
@@ -63,8 +65,6 @@ class PlasmaClient(object):
   def get(self, object_id):
     buf = self.client.plasma_get(self.sock, make_plasma_id(object_id))
     return self.buffer_from_memory(buf.data, buf.size)
-    
+
   def seal(self, object_id):
     self.client.plasma_seal(self.sock, make_plasma_id(object_id))
-
-    
diff --git a/setup-env.sh b/setup-env.sh
new file mode 100644
index 000000000..a1b2cb4b1
--- /dev/null
+++ b/setup-env.sh
@@ -0,0 +1,5 @@
+echo "Adding Plasma to PYTHONPATH" 1>&2
+
+ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
+
+export PYTHONPATH="$ROOT_DIR/lib/python/:$PYTHONPATH"
diff --git a/test/test.py b/test/test.py
new file mode 100644
index 000000000..f70b9809f
--- /dev/null
+++ b/test/test.py
@@ -0,0 +1,61 @@
+import os
+import subprocess
+import sys
+import time
+import unittest
+
+import plasma
+
+class TestPlasmaAPI(unittest.TestCase):
+
+  def setUp(self):
+    # Start Plasma.
+    plasma_store_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../build/plasma_store")
+    self.p = subprocess.Popen([plasma_store_executable, "-s", "/tmp/store"])
+    time.sleep(0.1)
+    # Connect to Plasma.
+    self.plasma_client = plasma.PlasmaClient("/tmp/store")
+
+  def tearDown(self):
+    # Kill the plasma stoe process.
+    self.p.kill()
+
+  def test_create(self):
+    # Create an object string.
+    object_id = "id" + 18 * "x"
+    # Create a new buffer and write to it.
+    length = 1000
+    memory_buffer = self.plasma_client.create(object_id, length)
+    for i in range(length):
+      memory_buffer[i] = chr(i % 256)
+    # Seal the object.
+    self.plasma_client.seal(object_id)
+    # Get the object.
+    memory_buffer = self.plasma_client.get(object_id)
+    for i in range(length):
+      self.assertEqual(memory_buffer[i], chr(i % 256))
+
+  def test_illegal_functionality(self):
+    # Create an object string.
+    object_id = "id" + 18 * "x"
+    # Create a new buffer and write to it.
+    length = 1000
+    memory_buffer = self.plasma_client.create(object_id, length)
+    # Make sure we cannot access memory out of bounds.
+    self.assertRaises(Exception, lambda : memory_buffer[length])
+    # Seal the object.
+    self.plasma_client.seal(object_id)
+    # This test is commented out because it currently fails.
+    # # Make sure the object is ready only now.
+    # def illegal_assignment():
+    #   memory_buffer[0] = chr(0)
+    # self.assertRaises(Exception, illegal_assignment)
+    # Get the object.
+    memory_buffer = self.plasma_client.get(object_id)
+    # Make sure the object is read only.
+    def illegal_assignment():
+      memory_buffer[0] = chr(0)
+    self.assertRaises(Exception, illegal_assignment)
+
+if __name__ == "__main__":
+  unittest.main(verbosity=2)

From b6b17f3ac3526aaca124b08cee858c4f0a06ffe6 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Tue, 16 Aug 2016 17:42:45 -0700
Subject: [PATCH 10/91] Retry if plasma client fails to connect to plasma
 store.

---
 src/plasma_client.c | 15 +++++++++++++--
 test/test.py        |  2 --
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/plasma_client.c b/src/plasma_client.c
index 86b49125d..d58f194c7 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -10,7 +10,7 @@
 #include <sys/un.h>
 #include <strings.h>
 #include <netinet/in.h>
-#include <netdb.h> 
+#include <netdb.h>
 
 #include "plasma.h"
 #include "fling.h"
@@ -81,7 +81,18 @@ int plasma_store_connect(const char* socket_name) {
   memset(&addr, 0, sizeof(addr));
   addr.sun_family = AF_UNIX;
   strncpy(addr.sun_path, socket_name, sizeof(addr.sun_path)-1);
-  if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) == -1) {
+  // Try to connect to the Plasma store. If unsuccessful, retry several times.
+  int connected_successfully = 0;
+  for (int num_attempts = 0; num_attempts < 50; ++num_attempts) {
+    if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) == 0) {
+      connected_successfully = 1;
+      break;
+    }
+    // Sleep for 100 milliseconds.
+    usleep(100000);
+  }
+  // If we could not connect to the Plasma store, exit.
+  if (!connected_successfully) {
     LOG_ERR("could not connect to store %s", socket_name);
     exit(-1);
   }
diff --git a/test/test.py b/test/test.py
index f70b9809f..6d4bee003 100644
--- a/test/test.py
+++ b/test/test.py
@@ -1,7 +1,6 @@
 import os
 import subprocess
 import sys
-import time
 import unittest
 
 import plasma
@@ -12,7 +11,6 @@ class TestPlasmaAPI(unittest.TestCase):
     # Start Plasma.
     plasma_store_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../build/plasma_store")
     self.p = subprocess.Popen([plasma_store_executable, "-s", "/tmp/store"])
-    time.sleep(0.1)
     # Connect to Plasma.
     self.plasma_client = plasma.PlasmaClient("/tmp/store")
 

From 23327a18e0f8ecb6de56ad646d058fbd2c420573 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Wed, 17 Aug 2016 12:54:34 -0700
Subject: [PATCH 11/91] add plasma manager

---
 Makefile             |   5 +-
 lib/python/plasma.py |  25 ++-
 src/plasma.h         |  10 ++
 src/plasma_manager.c | 392 +++++++++++++++++++++++++++++++++++++++++++
 src/plasma_store.c   |  53 +++---
 test/nameserver.py   |  33 ++++
 6 files changed, 495 insertions(+), 23 deletions(-)
 create mode 100644 src/plasma_manager.c
 create mode 100644 test/nameserver.py

diff --git a/Makefile b/Makefile
index e303d65b5..f2c7ff9a5 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ CC = gcc
 CFLAGS = -g -Wall
 BUILD = build
 
-all: $(BUILD)/plasma_store $(BUILD)/plasma_client.so $(BUILD)/example
+all: $(BUILD)/plasma_store $(BUILD)/plasma_manager $(BUILD)/plasma_client.so $(BUILD)/example
 
 clean:
 	rm -r $(BUILD)/*
@@ -10,6 +10,9 @@ clean:
 $(BUILD)/plasma_store: src/plasma_store.c src/plasma.h src/fling.h src/fling.c
 	$(CC) $(CFLAGS) --std=c99 -D_XOPEN_SOURCE=500 src/plasma_store.c src/fling.c -o $(BUILD)/plasma_store
 
+$(BUILD)/plasma_manager: src/plasma_manager.c src/plasma.h src/plasma_client.c src/fling.h src/fling.c
+	$(CC) $(CFLAGS) --std=c99 -D_XOPEN_SOURCE=500 src/plasma_manager.c src/plasma_client.c src/fling.c -o $(BUILD)/plasma_manager
+
 $(BUILD)/plasma_client.so: src/plasma_client.c src/fling.h src/fling.c
 	$(CC) $(CFLAGS) --std=c99 -D_XOPEN_SOURCE=500 src/plasma_client.c src/fling.c -fPIC -shared -o $(BUILD)/plasma_client.so
 
diff --git a/lib/python/plasma.py b/lib/python/plasma.py
index 69361d63c..83de5f1b8 100644
--- a/lib/python/plasma.py
+++ b/lib/python/plasma.py
@@ -9,8 +9,13 @@ ID = ctypes.c_ubyte * 20
 class PlasmaID(ctypes.Structure):
   _fields_ = [("plasma_id", ID)]
 
-# these must be in sync with plasma_request_type in plasma.h
+# these must be in sync with plasma_request_type in plasma.h (can we have a test for that?)
 PLASMA_CREATE = 0
+PLASMA_GET = 1
+PLASMA_SEAL = 2
+PLASMA_TRANSFER = 3
+PLASMA_DATA = 4
+PLASMA_REGISTER = 5
 
 class PlasmaRequest(ctypes.Structure):
   _fields_ = [("type", ctypes.c_int),
@@ -32,6 +37,24 @@ def make_plasma_id(string):
   object_id = map(ord, string)
   return PlasmaID(plasma_id=ID(*object_id))
 
+class PlasmaManager(object):
+
+  def __init__(self, addr, port):
+    self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    self.sock.connect((addr, port))
+
+  def register(self, manager_id, addr, port):
+    "Register another object manager."
+    req = PlasmaRequest(type=PLASMA_REGISTER, manager_id=manager_id,
+                        addr=Addr(*map(int, addr.split("."))), port=port)
+    self.sock.send(buffer(req)[:])
+
+  def transfer(self, manager_id, object_id):
+    "Transfer local object with id object_id to manager with id manager_id."
+    req = PlasmaRequest(type=PLASMA_TRANSFER, manager_id=manager_id,
+                        object_id=make_plasma_id(object_id))
+    self.sock.send(buffer(req)[:])
+
 class PlasmaClient(object):
   def __init__(self, socket_name):
     plasma_client_library = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../../build/plasma_client.so")
diff --git a/src/plasma.h b/src/plasma.h
index 42ac2c6f5..2e4fbfdcd 100644
--- a/src/plasma.h
+++ b/src/plasma.h
@@ -6,6 +6,13 @@
 #include <errno.h>
 #include <string.h>
 
+#ifdef NDEBUG
+  #define LOG_DEBUG(M, ...)
+#else
+  #define LOG_DEBUG(M, ...) \
+    fprintf(stderr, "[DEBUG] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
+#endif
+
 #define LOG_ERR(M, ...) \
   fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", \
     __FILE__, __LINE__, errno == 0 ? "None" : strerror(errno), ##__VA_ARGS__)
@@ -24,6 +31,7 @@ typedef struct {
   unsigned char id[20];
 } plasma_id;
 
+// these values must be in sync with the ones in plasma.py (can we have a test for that?)
 enum plasma_request_type {
   PLASMA_CREATE, // create a new object
   PLASMA_GET, // get an object
@@ -64,4 +72,6 @@ plasma_buffer plasma_create(int conn, plasma_id object_id, int64_t size);
 plasma_buffer plasma_get(int conn, plasma_id object_id);
 void plasma_seal(int fd, plasma_id object_id);
 
+void plasma_send(int fd, plasma_request *req);
+
 #endif
diff --git a/src/plasma_manager.c b/src/plasma_manager.c
new file mode 100644
index 000000000..7ad2585b6
--- /dev/null
+++ b/src/plasma_manager.c
@@ -0,0 +1,392 @@
+// PLASMA MANAGER: Local to a node, connects to other managers to send and
+// receive objects from them
+//
+// The storage manager listens on its main listening port, and if a request for
+// transfering an object to another object store comes in, it ships the data
+// using a new connection to the target object manager. Also keeps a list of
+// other object managers.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <strings.h>
+#include <poll.h>
+#include <assert.h>
+#include <netinet/in.h>
+#include <netdb.h>
+
+#include "plasma.h"
+
+#define MAX_CONNECTIONS 2048
+#define MAX_NUM_MANAGERS 1024
+
+enum conn_type {
+  // Connection to send commands to the manager.
+  CONN_CONTROL,
+  // Connection to send data to another manager.
+  CONN_WRITE_DATA,
+  // Connection to receive data from another manager.
+  CONN_READ_DATA
+};
+
+typedef struct {
+  // Unique identifier for the connection.
+  int id;
+  // Of type conn_type.
+  int type;
+  // Socket of the plasma store that is accessed for reading or writing data for
+  // this connection.
+  int store_conn;
+  // Buffer this connection is reading from or writing to.
+  plasma_buffer buf;
+  // Current position in the buffer.
+  int64_t cursor;
+} conn_state;
+
+typedef struct {
+  // Address of the manager.
+  struct sockaddr_in name;
+  // Is this manager connected?
+  int connected;
+} manager_state;
+
+typedef struct {
+  // Name of the socket connecting to local plasma store.
+  const char* store_socket_name;
+  // Number of connections.
+  int num_conn;
+  // For the "poll" system call.
+  struct pollfd waiting[MAX_CONNECTIONS];
+  // Status of connections (both control and data).
+  conn_state conn[MAX_CONNECTIONS];
+  // Other plasma managers in the cluster.
+  manager_state managers[MAX_NUM_MANAGERS];
+} plasma_manager_state;
+
+void init_manager_state(plasma_manager_state *s, const char* store_socket_name) {
+  memset(&s->waiting, 0, sizeof(s->waiting));
+  memset(&s->conn, 0, sizeof(s->conn));
+  memset(&s->managers, 0, sizeof(s->managers));
+  s->num_conn = 0;
+  s->store_socket_name = store_socket_name;
+}
+
+#define h_addr h_addr_list[0]
+
+// Add name info for another plasma manager from the cluster.
+void add_manager(plasma_manager_state *s, int manager_id, char *ip_address, int port) {
+  assert(ip_address);
+  assert(s);
+  struct hostent *manager = gethostbyname(ip_address);
+  if (!manager) {
+    LOG_ERR("plasma manager %s not found", ip_address);
+    exit(-1);
+  }
+  s->managers[manager_id].connected = 1;
+  struct sockaddr_in *name = &s->managers[manager_id].name;
+  name->sin_family = AF_INET;
+  bcopy(manager->h_addr, &name->sin_addr.s_addr, manager->h_length);
+  name->sin_port = htons(port);
+}
+
+// Add connection for sending commands or data to another plasma manager
+// (returns the connection id).
+int add_conn(plasma_manager_state* s, int type, int fd, int events, plasma_buffer* buf) {
+  static int conn_id = 0;
+  s->waiting[s->num_conn].fd = fd;
+  s->waiting[s->num_conn].events = events;
+  s->conn[s->num_conn].id = conn_id;
+  s->conn[s->num_conn].type = type;
+  if (buf) {
+    s->conn[s->num_conn].buf = *buf;
+  }
+  s->conn[s->num_conn].cursor = 0;
+  s->num_conn += 1;
+  return conn_id++;
+}
+
+// Remove connection with index i by swapping it with the last element.
+void remove_conn(plasma_manager_state* s, int i) {
+  memcpy(&s->waiting[i], &s->waiting[s->num_conn-1], sizeof(struct pollfd));
+  memset(&s->waiting[s->num_conn-1], 0, sizeof(struct pollfd));
+  memcpy(&s->conn[i], &s->conn[s->num_conn-1], sizeof(conn_state));
+  memset(&s->conn[s->num_conn-1], 0, sizeof(conn_state));
+}
+
+#define BUFSIZE 4096
+
+// Start transfering data to another object store manager. This establishes
+// a connection to both the manager and the local object store and sends
+// the data header to the other object manager.
+void initiate_transfer(plasma_manager_state* state, plasma_request* req) {
+  int manager_id = req->manager_id;
+  int c = plasma_store_connect(state->store_socket_name);
+  plasma_buffer buf = plasma_get(c, req->object_id);
+  
+  int fd = socket(PF_INET, SOCK_STREAM, 0);
+  if (fd < 0) {
+    LOG_ERR("could not create socket");
+    exit(-1);
+  }
+  int r = connect(fd, (struct sockaddr*) &state->managers[manager_id].name, sizeof(state->managers[manager_id].name));
+  if (r < 0) {
+    LOG_ERR("could not establish connection to manager with id %d", manager_id);
+    exit(-1);
+  }
+
+  add_conn(state, CONN_WRITE_DATA, fd, POLLOUT, &buf);
+
+  plasma_request manager_req = { .type = PLASMA_DATA, .object_id = req->object_id, .size = buf.size };
+  LOG_INFO("filedescriptor is %d", fd);
+  plasma_send(fd, &manager_req);
+}
+
+void setup_data_connection(int conn_idx, plasma_manager_state* state, plasma_request* req) {
+  int store_conn = plasma_store_connect(state->store_socket_name);
+  state->conn[conn_idx].type = CONN_READ_DATA;
+  state->conn[conn_idx].store_conn = store_conn;
+  state->conn[conn_idx].buf = plasma_create(store_conn, req->object_id, req->size);
+  state->conn[conn_idx].cursor = 0;
+}
+
+// Handle a command request that came in through a socket (transfering data,
+// registering object managers, accepting incoming data).
+void process_command(int conn_idx, plasma_manager_state* state, plasma_request* req) {
+  switch (req->type) {
+  case PLASMA_TRANSFER:
+    LOG_INFO("transfering object to manager with id %d", req->manager_id);
+    initiate_transfer(state, req);
+    break;
+  case PLASMA_REGISTER: {
+      char buff[16];
+      snprintf(buff, 32, "%d.%d.%d.%d",
+               req->addr[0], req->addr[1],
+               req->addr[2], req->addr[3]);
+      if (req->manager_id >= MAX_NUM_MANAGERS) {
+        LOG_ERR("manager_id %d out of bounds", req->manager_id);
+      } else {
+        add_manager(state, req->manager_id, buff, req->port);
+        LOG_INFO("registering %s:%d with id %d", buff, req->port, req->manager_id);
+      }
+    }
+    break;
+  case PLASMA_DATA:
+    LOG_INFO("starting to stream data");
+    setup_data_connection(conn_idx, state, req);
+    break;
+  default:
+    LOG_ERR("invalid request %d", req->type);
+    exit(-1);
+  }
+}
+
+// Handle data or command event incoming on socket with index i.
+void read_from_socket(plasma_manager_state* state, int i, plasma_request* req) {
+  ssize_t r, s;
+  switch (state->conn[i].type) {
+    case CONN_CONTROL:
+      r = read(state->waiting[i].fd, req, sizeof(plasma_request));
+      if (r == 1) {
+        LOG_ERR("read error");
+      } else if (r == 0) {
+        LOG_INFO("connection with id %d disconnected", state->conn[i].id);
+        remove_conn(state, i);
+      } else {
+        process_command(i, state, req);
+      }
+      break;
+    case CONN_READ_DATA:
+      LOG_DEBUG("polled CONN_READ_DATA");
+      r = read(state->waiting[i].fd, state->conn[i].buf.data + state->conn[i].cursor, BUFSIZE);
+      if (r == -1) {
+        LOG_ERR("read error");
+      } else if (r == 0) {
+        LOG_INFO("end of file");
+      } else {
+        state->conn[i].cursor += r;
+      }
+      if (r == 0) {
+        close(state->waiting[i].fd);
+        state->waiting[i].fd = 0;
+        state->waiting[i].events = 0;
+        plasma_seal(state->conn[i].store_conn, state->conn[i].buf.object_id);
+      }
+      break;
+    case CONN_WRITE_DATA:
+      LOG_DEBUG("polled CONN_WRITE_DATA");
+      s = state->conn[i].buf.size - state->conn[i].cursor;
+      if (s > BUFSIZE)
+        s = BUFSIZE;
+      r = write(state->waiting[i].fd, state->conn[i].buf.data + state->conn[i].cursor, s);
+      if (r != s) {
+        if (r > 0) {
+          LOG_ERR("partial write on fd %d", state->waiting[i].fd);
+        } else {
+          LOG_ERR("write error");
+          exit(-1);
+        }
+      } else {
+        state->conn[i].cursor += r;
+      }
+      if (r == 0) {
+        close(state->waiting[i].fd);
+        state->waiting[i].fd = 0;
+        state->waiting[i].events = 0;
+      }
+      break;
+    default:
+      LOG_ERR("invalid connection type");
+      exit(-1);
+  }
+}
+
+// Main event loop of the plasma manager.
+void event_loop(int sock, plasma_manager_state* state) {
+  // Add listening socket.
+  add_conn(state, CONN_CONTROL, sock, POLLIN, NULL);
+  plasma_request req;
+  while (1) {
+    int num_ready = poll(state->waiting, state->num_conn, -1);
+    if (num_ready < 0) {
+      LOG_ERR("poll failed");
+      exit(-1);
+    }
+    for (int i = 0; i < state->num_conn; ++i) {
+      if (state->waiting[i].revents == 0)
+        continue;
+      if (state->waiting[i].fd == sock) {
+        // Handle new incoming connections.
+        int new_socket = accept(sock, NULL, NULL);
+        if (new_socket < 0) {
+          if (errno != EWOULDBLOCK) {
+            LOG_ERR("accept failed");
+            exit(-1);
+          }
+          break;
+        }
+        int conn_id = add_conn(state, CONN_CONTROL, new_socket, POLLIN, NULL);
+        LOG_INFO("new connection with id %d", conn_id);
+      } else {
+        read_from_socket(state, i, &req);
+      } 
+    }
+  }
+}
+
+// Register this plasma manager with the nameserver.
+void register_with_nameserver(const char* nameserver_addr, int nameserver_port,
+                              const char* manager_addr, int manager_port) {
+  int fd = socket(PF_INET, SOCK_STREAM, 0);
+  if (fd < 0) {
+    LOG_ERR("socket for nameserver connection could not be established");
+    exit(-1);
+  }
+  struct hostent *host = gethostbyname(nameserver_addr);
+  if (!host) {
+    LOG_ERR("nameserver %s not found", nameserver_addr);
+    exit(-1);
+  }
+  struct sockaddr_in nameserver;
+  memset(&nameserver, 0, sizeof(struct sockaddr_in));
+  nameserver.sin_family = AF_INET;
+  bcopy(host->h_addr, &nameserver.sin_addr.s_addr, host->h_length);
+  nameserver.sin_port = htons(nameserver_port);
+  if (connect(fd, (struct sockaddr*) &nameserver, sizeof(nameserver)) == -1) {
+    LOG_ERR("could not connect to nameserver %s:%d", nameserver_addr, nameserver_port);
+    exit(-1);
+  }
+  plasma_request req = { .type = PLASMA_REGISTER, .port = manager_port };
+  // TODO(pcm): input validation
+  sscanf(manager_addr, "%" SCNu8 ".%" SCNu8 ".%" SCNu8 ".%" SCNu8, &req.addr[0], &req.addr[1], &req.addr[2], &req.addr[3]);
+  plasma_send(fd, &req);
+  close(fd);
+}
+
+void start_server(const char *store_socket_name, const char* master_addr,
+                  const char* nameserver_addr, int nameserver_port) {
+  struct sockaddr_in name;
+  int sock = socket(PF_INET, SOCK_STREAM, 0);
+  if (sock < 0) {
+    LOG_ERR("could not create socket");
+    exit(-1);
+  }
+  name.sin_family = AF_INET;
+  name.sin_port = 0;
+  name.sin_addr.s_addr = htonl(INADDR_ANY);
+  int on = 1;
+  // TODO(pcm): http://stackoverflow.com/q/1150635
+  if (ioctl(sock, FIONBIO, (char*) &on) < 0) {
+    LOG_ERR("ioctl failed");
+    close(sock);
+    exit(-1);
+  }
+  if (bind(sock, (struct sockaddr*) &name, sizeof(name)) < 0) {
+    LOG_ERR("could not bind socket");
+    exit(-1);
+  }
+  socklen_t len = sizeof(name);
+  if (getsockname(sock, (struct sockaddr*) &name, &len) == -1) {
+    LOG_ERR("getsockname failed");
+  } else {
+    LOG_INFO("listening on port %d", ntohs(name.sin_port));
+  }
+  if (listen(sock, 5) == -1) {
+    LOG_ERR("could not listen to socket");
+    exit(-1);
+  }
+  register_with_nameserver(nameserver_addr, nameserver_port, master_addr, ntohs(name.sin_port));
+  plasma_manager_state state;
+  init_manager_state(&state, store_socket_name);
+  event_loop(sock, &state);
+}
+
+
+
+int main(int argc, char* argv[]) {
+  // Socket name of the plasma store this manager is connected to.
+  char *store_socket_name = NULL;
+  // IP address and port of the nameserver.
+  char *nameserver_addr_port = NULL;
+  // IP address this host can be reached at from the outside.
+  char *master_addr = NULL;
+  int c;
+  while ((c = getopt(argc, argv, "n:s:m:")) != -1) {
+    switch (c) {
+    case 's':
+      store_socket_name = optarg;
+      break;
+    case 'n':
+      nameserver_addr_port = optarg;
+      break;
+    case 'm':
+      master_addr = optarg;
+      break;
+    default:
+      LOG_ERR("unknown option %c", c);
+      exit(-1);
+    }
+  }
+  if (!store_socket_name) {
+    LOG_ERR("please specify socket for connecting to the plasma store with -s switch");
+    exit(-1);
+  }
+  if (!master_addr) {
+    LOG_ERR("please specify ip address of the current host in the format 123.456.789.10 with -m switch");
+    exit(-1);
+  }
+  // Parse nameserver address and port.
+  const char *format = "%15[0-9.]:%5[0-9]";
+  char nameserver_addr[16] = { 0 };
+  char nameserver_port[6] = { 0 };
+  if(!nameserver_addr_port || sscanf(nameserver_addr_port, format, nameserver_addr, nameserver_port) != 2) {
+    LOG_ERR("need to specify nameserver address in the format 123.456.789.10:12345 with -n switch");
+    exit(-1);
+  }
+  start_server(store_socket_name, master_addr, nameserver_addr, atoi(nameserver_port));
+}
diff --git a/src/plasma_store.c b/src/plasma_store.c
index 3260cde9e..d67420c24 100644
--- a/src/plasma_store.c
+++ b/src/plasma_store.c
@@ -28,9 +28,12 @@
 #define MAX_NUM_CLIENTS 2048
 
 typedef struct {
-  int num_clients; // number of clients connected
-  int client_id[MAX_NUM_CLIENTS]; // unique identifier for the clients
-  struct pollfd waiting[MAX_NUM_CLIENTS]; // data structure for polling
+  // Number of clients connected.
+  int num_clients;
+  // Unique identifier for the clients.
+  int client_id[MAX_NUM_CLIENTS];
+  // Data structure for polling.
+  struct pollfd waiting[MAX_NUM_CLIENTS];
 } plasma_store_state;
 
 void init_state(plasma_store_state* s) {
@@ -48,8 +51,8 @@ int add_client(plasma_store_state* s, int fd) {
   return curr_id++;
 }
 
-// remove the client at index i by swapping it with the
-// client at index num_clients-1 and zeroing the latter out
+// Remove the client at index i by swapping it with the
+// client at index num_clients-1 and zeroing the latter out.
 void remove_client(plasma_store_state* s, int i) {
   memcpy(&s->waiting[i], &s->waiting[s->num_clients-1], sizeof(struct pollfd));
   memset(&s->waiting[s->num_clients-1], 0, sizeof(struct pollfd));
@@ -59,27 +62,35 @@ void remove_client(plasma_store_state* s, int i) {
 }
 
 typedef struct {
-  plasma_id object_id; // object id of this object
-  plasma_object_info info; // object info like size, creation time and owner
-  int fd; // memory mapped file containing the object
-  UT_hash_handle handle; // handle for the uthash table
+  // Object id of this object.
+  plasma_id object_id;
+  // Object info like size, creation time and owner.
+  plasma_object_info info;
+  // Memory mapped file containing the object.
+  int fd;
+  // Handle for the uthash table.
+  UT_hash_handle handle;
 } object_table_entry;
 
 // objects that are still being written by their owner process
 object_table_entry* open_objects = NULL;
 
-// objects that have already been sealed by their owner process and
-// can now be shared with other processes
+// Objects that have already been sealed by their owner process and
+// can now be shared with other processes.
 object_table_entry* sealed_objects = NULL;
 
 typedef struct {
-  plasma_id object_id; // object id of this object
-  int num_waiting; // number of processes waiting for the object
-  int conn[MAX_NUM_CLIENTS]; // socket connections to waiting clients
-  UT_hash_handle handle; // handle for the uthash table
+  // Object id of this object.
+  plasma_id object_id;
+  // Number of processes waiting for the object.
+  int num_waiting;
+  // Socket connections to waiting clients.
+  int conn[MAX_NUM_CLIENTS];
+  // Handle for the uthash table.
+  UT_hash_handle handle;
 } object_notify_entry;
 
-// objects that processes are waiting for
+// Objects that processes are waiting for.
 object_notify_entry* objects_notify = NULL;
 
 // Create a buffer. This is creating a temporary file and then
@@ -107,7 +118,7 @@ int create_buffer(int64_t size) {
   return fd;
 }
 
-// create a new object buffer in the hash table
+// Create a new object buffer in the hash table.
 void create_object(int conn, plasma_request* req) {
   LOG_INFO("creating object"); // TODO(pcm): add object_id here
   int fd = create_buffer(req->size);
@@ -125,7 +136,7 @@ void create_object(int conn, plasma_request* req) {
   send_fd(conn, fd, (char*) &reply, sizeof(plasma_reply));
 }
 
-// get an object from the hash table
+// Get an object from the hash table.
 void get_object(int conn, plasma_request* req) {
   object_table_entry *entry;
   HASH_FIND(handle, sealed_objects, &req->object_id, sizeof(plasma_id), entry);
@@ -146,7 +157,7 @@ void get_object(int conn, plasma_request* req) {
   }
 }
 
-// seal an object that has been created in the hash table
+// Seal an object that has been created in the hash table.
 void seal_object(int conn, plasma_request* req) {
   LOG_INFO("sealing object"); // TODO(pcm): add object_id here
   object_table_entry *entry;
@@ -158,7 +169,7 @@ void seal_object(int conn, plasma_request* req) {
   int64_t size = entry->info.size;
   int fd = entry->fd;
   HASH_ADD(handle, sealed_objects, object_id, sizeof(plasma_id), entry);
-  // inform processes that the object is ready now
+  // Inform processes that the object is ready now.
   object_notify_entry* notify_entry;
   HASH_FIND(handle, objects_notify, &req->object_id, sizeof(plasma_id), notify_entry);
   if (!notify_entry) {
@@ -205,7 +216,7 @@ void event_loop(int socket) {
         continue;
       if (state.waiting[i].fd == socket) {
         while (1) {
-          // handle new incoming connections
+          // Handle new incoming connections.
           int new_socket = accept(socket, NULL, NULL);
           if (new_socket < 0) {
             if (errno != EWOULDBLOCK) {
diff --git a/test/nameserver.py b/test/nameserver.py
new file mode 100644
index 000000000..6fcd965ca
--- /dev/null
+++ b/test/nameserver.py
@@ -0,0 +1,33 @@
+import collections
+import socket
+import ctypes
+import plasma
+
+DEFAULT_PORT = 16121
+
+Connection = collections.namedtuple("Connection", ["address", "port"])
+
+# list of IP addresses and ports of managers
+object_managers = []
+
+def send_addresses(conn, object_managers):
+  manager = plasma.PlasmaManager(conn.address, conn.port)
+  for (manager_id, object_manager) in enumerate(object_managers):
+    manager.register(manager_id, object_manager.address, object_manager.port)
+
+if __name__ == '__main__':
+  sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+  sock.bind(('', DEFAULT_PORT))
+  sock.listen(5)
+  
+  while True:
+    (client, address) = sock.accept()
+    request = plasma.PlasmaRequest()
+    client.recv_into(request)
+    address = ".".join(map(str, request.addr[:]))
+    conn = Connection(address=address, port=request.port)
+    print "object manager " + str(conn) + " connected"
+    object_managers.append(conn)
+    for c in object_managers:
+      send_addresses(c, object_managers)
+    

From 3d6d1e8fe5b3017dca78048f51c1296d0f241add Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Thu, 18 Aug 2016 09:56:20 -0700
Subject: [PATCH 12/91] Test plasma manager.

---
 lib/python/plasma.py |  49 +++++++++++++++++++-
 src/plasma_manager.c |   8 ++--
 test/nameserver.py   |  35 +++++++++-----
 test/test.py         | 106 ++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 174 insertions(+), 24 deletions(-)

diff --git a/lib/python/plasma.py b/lib/python/plasma.py
index 83de5f1b8..28b3f086d 100644
--- a/lib/python/plasma.py
+++ b/lib/python/plasma.py
@@ -16,6 +16,8 @@ PLASMA_SEAL = 2
 PLASMA_TRANSFER = 3
 PLASMA_DATA = 4
 PLASMA_REGISTER = 5
+PLASMA_GET_MANAGER_PORT = 6
+PLASMA_RETURN_MANAGER_PORT = 7
 
 class PlasmaRequest(ctypes.Structure):
   _fields_ = [("type", ctypes.c_int),
@@ -38,24 +40,43 @@ def make_plasma_id(string):
   return PlasmaID(plasma_id=ID(*object_id))
 
 class PlasmaManager(object):
+  """The PlasmaManager is used to manage a PlasmaStore.
+
+  There should be one PlasmaManager per PlasmaStore. The PlasmaManager is
+  responsible for interfacing with other PlasmaManagers in order to transfer
+  objects between PlasmaStores. This class sends commands to the C
+  implementation of the PlasmaManager using sockets.
+
+  Attributes:
+    sock: The socket used to communicate with the C implementation of the
+      PlasmaManager.
+  """
 
   def __init__(self, addr, port):
+    """Initialize the PlasmaManager."""
     self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     self.sock.connect((addr, port))
 
   def register(self, manager_id, addr, port):
-    "Register another object manager."
+    """Register another object manager."""
     req = PlasmaRequest(type=PLASMA_REGISTER, manager_id=manager_id,
                         addr=Addr(*map(int, addr.split("."))), port=port)
     self.sock.send(buffer(req)[:])
 
   def transfer(self, manager_id, object_id):
-    "Transfer local object with id object_id to manager with id manager_id."
+    """Transfer local object with id object_id to manager with id manager_id."""
     req = PlasmaRequest(type=PLASMA_TRANSFER, manager_id=manager_id,
                         object_id=make_plasma_id(object_id))
     self.sock.send(buffer(req)[:])
 
 class PlasmaClient(object):
+  """The PlasmaClient is used to interface with a PlasmaStore.
+
+  The PlasmaClient can ask the PlasmaStore to allocate a new buffer, seal a
+  buffer, and get a buffer. Buffers are referred to by object IDs, which are
+  strings.
+  """
+
   def __init__(self, socket_name):
     plasma_client_library = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../../build/plasma_client.so")
     self.client = ctypes.cdll.LoadLibrary(plasma_client_library)
@@ -82,12 +103,36 @@ class PlasmaClient(object):
     self.sock = self.client.plasma_store_connect(socket_name)
 
   def create(self, object_id, size):
+    """Create a new buffer in the PlasmaStore for a particular object ID.
+
+    The returned buffer is mutable until seal is called.
+
+    Args:
+      object_id (str): A string used to identify an object.
+      size (int): The size in bytes of the created buffer.
+    """
     buf = self.client.plasma_create(self.sock, make_plasma_id(object_id), size)
     return self.buffer_from_read_write_memory(buf.data, buf.size)
 
   def get(self, object_id):
+    """Create a buffer from the PlasmaStore based on object ID.
+
+    This method can only be called after the buffer has been sealed. The
+    retrieved buffer is immutable.
+
+    Args:
+      object_id (str): A string used to identify an object.
+    """
     buf = self.client.plasma_get(self.sock, make_plasma_id(object_id))
     return self.buffer_from_memory(buf.data, buf.size)
 
   def seal(self, object_id):
+    """Seal the buffer in the PlasmaStore for a particular object ID.
+
+    Once a buffer has been sealed, the buffer is immutable and can only be
+    accessed through get.
+
+    Args:
+      object_id (str): A string used to identify an object.
+    """
     self.client.plasma_seal(self.sock, make_plasma_id(object_id))
diff --git a/src/plasma_manager.c b/src/plasma_manager.c
index 7ad2585b6..b439fcf35 100644
--- a/src/plasma_manager.c
+++ b/src/plasma_manager.c
@@ -128,7 +128,7 @@ void initiate_transfer(plasma_manager_state* state, plasma_request* req) {
   int manager_id = req->manager_id;
   int c = plasma_store_connect(state->store_socket_name);
   plasma_buffer buf = plasma_get(c, req->object_id);
-  
+
   int fd = socket(PF_INET, SOCK_STREAM, 0);
   if (fd < 0) {
     LOG_ERR("could not create socket");
@@ -274,7 +274,7 @@ void event_loop(int sock, plasma_manager_state* state) {
         LOG_INFO("new connection with id %d", conn_id);
       } else {
         read_from_socket(state, i, &req);
-      } 
+      }
     }
   }
 }
@@ -346,8 +346,6 @@ void start_server(const char *store_socket_name, const char* master_addr,
   event_loop(sock, &state);
 }
 
-
-
 int main(int argc, char* argv[]) {
   // Socket name of the plasma store this manager is connected to.
   char *store_socket_name = NULL;
@@ -384,7 +382,7 @@ int main(int argc, char* argv[]) {
   const char *format = "%15[0-9.]:%5[0-9]";
   char nameserver_addr[16] = { 0 };
   char nameserver_port[6] = { 0 };
-  if(!nameserver_addr_port || sscanf(nameserver_addr_port, format, nameserver_addr, nameserver_port) != 2) {
+  if (!nameserver_addr_port || sscanf(nameserver_addr_port, format, nameserver_addr, nameserver_port) != 2) {
     LOG_ERR("need to specify nameserver address in the format 123.456.789.10:12345 with -n switch");
     exit(-1);
   }
diff --git a/test/nameserver.py b/test/nameserver.py
index 6fcd965ca..66fa6cf52 100644
--- a/test/nameserver.py
+++ b/test/nameserver.py
@@ -1,6 +1,8 @@
 import collections
 import socket
 import ctypes
+import atexit
+
 import plasma
 
 DEFAULT_PORT = 16121
@@ -15,19 +17,30 @@ def send_addresses(conn, object_managers):
   for (manager_id, object_manager) in enumerate(object_managers):
     manager.register(manager_id, object_manager.address, object_manager.port)
 
-if __name__ == '__main__':
+if __name__ == "__main__":
   sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-  sock.bind(('', DEFAULT_PORT))
+  sock.bind(("", DEFAULT_PORT))
   sock.listen(5)
-  
+
+  def cleanup():
+    sock.shutdown(socket.SHUT_RDWR)
+    sock.close()
+  atexit.register(cleanup)
+
   while True:
-    (client, address) = sock.accept()
+    client, address = sock.accept()
     request = plasma.PlasmaRequest()
     client.recv_into(request)
-    address = ".".join(map(str, request.addr[:]))
-    conn = Connection(address=address, port=request.port)
-    print "object manager " + str(conn) + " connected"
-    object_managers.append(conn)
-    for c in object_managers:
-      send_addresses(c, object_managers)
-    
+    if request.type == plasma.PLASMA_REGISTER:
+      address = ".".join(map(str, request.addr[:]))
+      conn = Connection(address=address, port=request.port)
+      print "object manager " + str(conn) + " connected"
+      object_managers.append(conn)
+      for c in object_managers:
+        send_addresses(c, object_managers)
+    elif request.type == plasma.PLASMA_GET_MANAGER_PORT:
+      port = object_managers[request.manager_id].port
+      req = plasma.PlasmaRequest(type=plasma.PLASMA_RETURN_MANAGER_PORT, port=port)
+      client.send(buffer(req)[:])
+    else:
+      raise Exception("This code should be unreachable.")
diff --git a/test/test.py b/test/test.py
index 6d4bee003..ba9789e81 100644
--- a/test/test.py
+++ b/test/test.py
@@ -1,11 +1,17 @@
 import os
+import socket
 import subprocess
 import sys
 import unittest
+import random
+import time
 
 import plasma
 
-class TestPlasmaAPI(unittest.TestCase):
+def random_object_id():
+  return "".join([chr(random.randint(0, 256)) for _ in range(20)])
+
+class TestPlasmaClient(unittest.TestCase):
 
   def setUp(self):
     # Start Plasma.
@@ -15,12 +21,12 @@ class TestPlasmaAPI(unittest.TestCase):
     self.plasma_client = plasma.PlasmaClient("/tmp/store")
 
   def tearDown(self):
-    # Kill the plasma stoe process.
+    # Kill the plasma store process.
     self.p.kill()
 
   def test_create(self):
-    # Create an object string.
-    object_id = "id" + 18 * "x"
+    # Create an object id string.
+    object_id = random_object_id()
     # Create a new buffer and write to it.
     length = 1000
     memory_buffer = self.plasma_client.create(object_id, length)
@@ -34,8 +40,8 @@ class TestPlasmaAPI(unittest.TestCase):
       self.assertEqual(memory_buffer[i], chr(i % 256))
 
   def test_illegal_functionality(self):
-    # Create an object string.
-    object_id = "id" + 18 * "x"
+    # Create an object id string.
+    object_id = random_object_id()
     # Create a new buffer and write to it.
     length = 1000
     memory_buffer = self.plasma_client.create(object_id, length)
@@ -55,5 +61,93 @@ class TestPlasmaAPI(unittest.TestCase):
       memory_buffer[0] = chr(0)
     self.assertRaises(Exception, illegal_assignment)
 
+class TestPlasmaManager(unittest.TestCase):
+
+  def setUp(self):
+    # Start the nameserver.
+    nameserver_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "nameserver.py")
+    self.p1 = subprocess.Popen(["python", nameserver_path])
+    # Start two PlasmaStores.
+    plasma_store_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../build/plasma_store")
+    self.p2 = subprocess.Popen([plasma_store_executable, "-s", "/tmp/store1"])
+    self.p3 = subprocess.Popen([plasma_store_executable, "-s", "/tmp/store2"])
+    # Connect two PlasmaClients.
+    self.client1 = plasma.PlasmaClient("/tmp/store1")
+    self.client2 = plasma.PlasmaClient("/tmp/store2")
+    # Start two PlasmaManagers.
+    time.sleep(0.1)
+    plasma_manager_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../build/plasma_manager")
+    self.p4 = subprocess.Popen([plasma_manager_executable, "-n", "127.0.0.1:16121", "-s", "/tmp/store1", "-m", "127.0.0.1"])
+    self.p5 = subprocess.Popen([plasma_manager_executable, "-n", "127.0.0.1:16121", "-s", "/tmp/store2", "-m", "127.0.0.1"])
+    time.sleep(0.1)
+    # Connect to the nameserver.
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock.connect(("127.0.0.1", 16121))
+    # Get the port for the first PlasmaManager.
+    req = plasma.PlasmaRequest(type=plasma.PLASMA_GET_MANAGER_PORT, manager_id=0)
+    sock.send(buffer(req)[:])
+    request = plasma.PlasmaRequest()
+    sock.recv_into(request)
+    port1 = request.port
+    time.sleep(0.1)
+    # Get the port for the second PlasmaManager.
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock.connect(("127.0.0.1", 16121))
+    req = plasma.PlasmaRequest(type=plasma.PLASMA_GET_MANAGER_PORT, manager_id=1)
+    sock.send(buffer(req)[:])
+    request = plasma.PlasmaRequest()
+    sock.recv_into(request)
+    port2 = request.port
+    # Connect two Python PlasmaManagers.
+    self.manager1 = plasma.PlasmaManager("127.0.0.1", port1)
+    self.manager2 = plasma.PlasmaManager("127.0.0.1", port2)
+
+  def tearDown(self):
+    # Kill the nameserver, PlasmaStore and PlasmaManager processes.
+    self.p1.kill()
+    self.p2.kill()
+    self.p3.kill()
+    self.p4.kill()
+    self.p5.kill()
+
+  def test_transfer(self):
+    # Create an object id string.
+    object_id1 = random_object_id()
+    # Create a new buffer and write to it.
+    memory_buffer = self.client1.create(object_id1, 20000)
+    for i in range(len(memory_buffer)):
+      memory_buffer[i] = chr(i % 10)
+    # Seal the buffer.
+    self.client1.seal(object_id1)
+    # Transfer the buffer to the the other PlasmaStore.
+    self.manager1.transfer(1, object_id1)
+    # Compare the two buffers.
+    self.assertEqual(self.client1.get(object_id1)[:], self.client2.get(object_id1)[:])
+    # Transfer the buffer again.
+    self.manager1.transfer(1, object_id1)
+    # Compare the two buffers.
+    self.assertEqual(self.client1.get(object_id1)[:], self.client2.get(object_id1)[:])
+    # Create a new object id string.
+    object_id2 = random_object_id()
+    # Create a new buffer and write to it.
+    memory_buffer = self.client2.create(object_id2, 20000)
+    for i in range(len(memory_buffer)):
+      memory_buffer[i] = chr(i % 10)
+    # Seal the buffer.
+    self.client2.seal(object_id2)
+    # Transfer the buffer to the the other PlasmaStore.
+    self.manager2.transfer(0, object_id2)
+    # Compare the two buffers.
+    self.assertEqual(self.client1.get(object_id2)[:], self.client2.get(object_id2)[:])
+
+  def test_illegal_functionality(self):
+    # Create an object id string.
+    object_id = random_object_id()
+    # Create a new buffer.
+    memory_buffer = self.client1.create(object_id, 20000)
+    # This test is commented out because it currently fails.
+    # # Transferring the buffer before sealing it should fail.
+    # self.assertRaises(Exception, lambda : self.manager1.transfer(1, object_id))
+
 if __name__ == "__main__":
   unittest.main(verbosity=2)

From 87447256c4899ca3f6d209c59ea478e51ca4e63a Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Thu, 18 Aug 2016 23:26:08 -0700
Subject: [PATCH 13/91] Test Python 3 in Travis.

---
 .travis.yml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 3230e843f..15be440e8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,8 +6,16 @@ matrix:
   include:
     - os: linux
       dist: trusty
+      python: "2.7"
+    - os: linux
+      dist: trusty
+      python: "3.5"
     - os: osx
       osx_image: xcode7
+      python: "2.7"
+    - os: osx
+      osx_image: xcode7
+      python: "3.5"
 
 install:
   - make

From e1627319b3ea03c4811e5c91178edf4efa193a9c Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Mon, 22 Aug 2016 15:30:16 -0700
Subject: [PATCH 14/91] clean up tests

---
 lib/python/plasma.py |  16 ++----
 src/plasma.h         |   2 -
 src/plasma_manager.c | 127 ++++++++++---------------------------------
 test/nameserver.py   |  46 ----------------
 test/test.py         |  43 ++++-----------
 5 files changed, 45 insertions(+), 189 deletions(-)
 delete mode 100644 test/nameserver.py

diff --git a/lib/python/plasma.py b/lib/python/plasma.py
index 28b3f086d..713c3f3ad 100644
--- a/lib/python/plasma.py
+++ b/lib/python/plasma.py
@@ -16,12 +16,9 @@ PLASMA_SEAL = 2
 PLASMA_TRANSFER = 3
 PLASMA_DATA = 4
 PLASMA_REGISTER = 5
-PLASMA_GET_MANAGER_PORT = 6
-PLASMA_RETURN_MANAGER_PORT = 7
 
 class PlasmaRequest(ctypes.Structure):
   _fields_ = [("type", ctypes.c_int),
-              ("manager_id", ctypes.c_int),
               ("object_id", PlasmaID),
               ("size", ctypes.c_int64),
               ("addr", Addr),
@@ -57,16 +54,11 @@ class PlasmaManager(object):
     self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     self.sock.connect((addr, port))
 
-  def register(self, manager_id, addr, port):
-    """Register another object manager."""
-    req = PlasmaRequest(type=PLASMA_REGISTER, manager_id=manager_id,
-                        addr=Addr(*map(int, addr.split("."))), port=port)
-    self.sock.send(buffer(req)[:])
-
-  def transfer(self, manager_id, object_id):
+  def transfer(self, addr, port, object_id):
     """Transfer local object with id object_id to manager with id manager_id."""
-    req = PlasmaRequest(type=PLASMA_TRANSFER, manager_id=manager_id,
-                        object_id=make_plasma_id(object_id))
+    req = PlasmaRequest(type=PLASMA_TRANSFER, object_id=make_plasma_id(object_id),
+                        addr=Addr(*map(int, addr.split("."))), port=port)
+    print "sending port", port
     self.sock.send(buffer(req)[:])
 
 class PlasmaClient(object):
diff --git a/src/plasma.h b/src/plasma.h
index 2e4fbfdcd..39d04700f 100644
--- a/src/plasma.h
+++ b/src/plasma.h
@@ -38,12 +38,10 @@ enum plasma_request_type {
   PLASMA_SEAL, // seal an object
   PLASMA_TRANSFER, // request transfer to another store
   PLASMA_DATA, // header for sending data
-  PLASMA_REGISTER // register a plasma manager
 };
 
 typedef struct {
   int type;
-  int manager_id;
   plasma_id object_id;
   int64_t size;
   uint8_t addr[4];
diff --git a/src/plasma_manager.c b/src/plasma_manager.c
index b439fcf35..34a8e474a 100644
--- a/src/plasma_manager.c
+++ b/src/plasma_manager.c
@@ -49,13 +49,6 @@ typedef struct {
   int64_t cursor;
 } conn_state;
 
-typedef struct {
-  // Address of the manager.
-  struct sockaddr_in name;
-  // Is this manager connected?
-  int connected;
-} manager_state;
-
 typedef struct {
   // Name of the socket connecting to local plasma store.
   const char* store_socket_name;
@@ -65,36 +58,17 @@ typedef struct {
   struct pollfd waiting[MAX_CONNECTIONS];
   // Status of connections (both control and data).
   conn_state conn[MAX_CONNECTIONS];
-  // Other plasma managers in the cluster.
-  manager_state managers[MAX_NUM_MANAGERS];
 } plasma_manager_state;
 
 void init_manager_state(plasma_manager_state *s, const char* store_socket_name) {
   memset(&s->waiting, 0, sizeof(s->waiting));
   memset(&s->conn, 0, sizeof(s->conn));
-  memset(&s->managers, 0, sizeof(s->managers));
   s->num_conn = 0;
   s->store_socket_name = store_socket_name;
 }
 
 #define h_addr h_addr_list[0]
 
-// Add name info for another plasma manager from the cluster.
-void add_manager(plasma_manager_state *s, int manager_id, char *ip_address, int port) {
-  assert(ip_address);
-  assert(s);
-  struct hostent *manager = gethostbyname(ip_address);
-  if (!manager) {
-    LOG_ERR("plasma manager %s not found", ip_address);
-    exit(-1);
-  }
-  s->managers[manager_id].connected = 1;
-  struct sockaddr_in *name = &s->managers[manager_id].name;
-  name->sin_family = AF_INET;
-  bcopy(manager->h_addr, &name->sin_addr.s_addr, manager->h_length);
-  name->sin_port = htons(port);
-}
-
 // Add connection for sending commands or data to another plasma manager
 // (returns the connection id).
 int add_conn(plasma_manager_state* s, int type, int fd, int events, plasma_buffer* buf) {
@@ -125,7 +99,6 @@ void remove_conn(plasma_manager_state* s, int i) {
 // a connection to both the manager and the local object store and sends
 // the data header to the other object manager.
 void initiate_transfer(plasma_manager_state* state, plasma_request* req) {
-  int manager_id = req->manager_id;
   int c = plasma_store_connect(state->store_socket_name);
   plasma_buffer buf = plasma_get(c, req->object_id);
 
@@ -134,9 +107,24 @@ void initiate_transfer(plasma_manager_state* state, plasma_request* req) {
     LOG_ERR("could not create socket");
     exit(-1);
   }
-  int r = connect(fd, (struct sockaddr*) &state->managers[manager_id].name, sizeof(state->managers[manager_id].name));
+  
+  char ip_addr[16];
+  snprintf(ip_addr, 32, "%d.%d.%d.%d",
+                    req->addr[0], req->addr[1],
+                    req->addr[2], req->addr[3]);
+  struct hostent *manager = gethostbyname(ip_addr); // TODO(pcm): cache this
+  if (!manager) {
+    LOG_ERR("plasma manager %s not found", ip_addr);
+    exit(-1);
+  }
+  struct sockaddr_in addr;
+  addr.sin_family = AF_INET;
+  bcopy(manager->h_addr, &addr.sin_addr.s_addr, manager->h_length);
+  addr.sin_port = htons(req->port);
+  
+  int r = connect(fd, (struct sockaddr*) &addr, sizeof(addr));
   if (r < 0) {
-    LOG_ERR("could not establish connection to manager with id %d", manager_id);
+    LOG_ERR("could not establish connection to manager with id %s:%d", &ip_addr[0], req->port);
     exit(-1);
   }
 
@@ -160,22 +148,9 @@ void setup_data_connection(int conn_idx, plasma_manager_state* state, plasma_req
 void process_command(int conn_idx, plasma_manager_state* state, plasma_request* req) {
   switch (req->type) {
   case PLASMA_TRANSFER:
-    LOG_INFO("transfering object to manager with id %d", req->manager_id);
+    LOG_INFO("transfering object to manager with port %d", req->port);
     initiate_transfer(state, req);
     break;
-  case PLASMA_REGISTER: {
-      char buff[16];
-      snprintf(buff, 32, "%d.%d.%d.%d",
-               req->addr[0], req->addr[1],
-               req->addr[2], req->addr[3]);
-      if (req->manager_id >= MAX_NUM_MANAGERS) {
-        LOG_ERR("manager_id %d out of bounds", req->manager_id);
-      } else {
-        add_manager(state, req->manager_id, buff, req->port);
-        LOG_INFO("registering %s:%d with id %d", buff, req->port, req->manager_id);
-      }
-    }
-    break;
   case PLASMA_DATA:
     LOG_INFO("starting to stream data");
     setup_data_connection(conn_idx, state, req);
@@ -279,37 +254,7 @@ void event_loop(int sock, plasma_manager_state* state) {
   }
 }
 
-// Register this plasma manager with the nameserver.
-void register_with_nameserver(const char* nameserver_addr, int nameserver_port,
-                              const char* manager_addr, int manager_port) {
-  int fd = socket(PF_INET, SOCK_STREAM, 0);
-  if (fd < 0) {
-    LOG_ERR("socket for nameserver connection could not be established");
-    exit(-1);
-  }
-  struct hostent *host = gethostbyname(nameserver_addr);
-  if (!host) {
-    LOG_ERR("nameserver %s not found", nameserver_addr);
-    exit(-1);
-  }
-  struct sockaddr_in nameserver;
-  memset(&nameserver, 0, sizeof(struct sockaddr_in));
-  nameserver.sin_family = AF_INET;
-  bcopy(host->h_addr, &nameserver.sin_addr.s_addr, host->h_length);
-  nameserver.sin_port = htons(nameserver_port);
-  if (connect(fd, (struct sockaddr*) &nameserver, sizeof(nameserver)) == -1) {
-    LOG_ERR("could not connect to nameserver %s:%d", nameserver_addr, nameserver_port);
-    exit(-1);
-  }
-  plasma_request req = { .type = PLASMA_REGISTER, .port = manager_port };
-  // TODO(pcm): input validation
-  sscanf(manager_addr, "%" SCNu8 ".%" SCNu8 ".%" SCNu8 ".%" SCNu8, &req.addr[0], &req.addr[1], &req.addr[2], &req.addr[3]);
-  plasma_send(fd, &req);
-  close(fd);
-}
-
-void start_server(const char *store_socket_name, const char* master_addr,
-                  const char* nameserver_addr, int nameserver_port) {
+void start_server(const char *store_socket_name, const char* master_addr, int port) {
   struct sockaddr_in name;
   int sock = socket(PF_INET, SOCK_STREAM, 0);
   if (sock < 0) {
@@ -317,7 +262,7 @@ void start_server(const char *store_socket_name, const char* master_addr,
     exit(-1);
   }
   name.sin_family = AF_INET;
-  name.sin_port = 0;
+  name.sin_port = htons(port);
   name.sin_addr.s_addr = htonl(INADDR_ANY);
   int on = 1;
   // TODO(pcm): http://stackoverflow.com/q/1150635
@@ -330,17 +275,11 @@ void start_server(const char *store_socket_name, const char* master_addr,
     LOG_ERR("could not bind socket");
     exit(-1);
   }
-  socklen_t len = sizeof(name);
-  if (getsockname(sock, (struct sockaddr*) &name, &len) == -1) {
-    LOG_ERR("getsockname failed");
-  } else {
-    LOG_INFO("listening on port %d", ntohs(name.sin_port));
-  }
+  LOG_INFO("listening on port %d", port);
   if (listen(sock, 5) == -1) {
     LOG_ERR("could not listen to socket");
     exit(-1);
   }
-  register_with_nameserver(nameserver_addr, nameserver_port, master_addr, ntohs(name.sin_port));
   plasma_manager_state state;
   init_manager_state(&state, store_socket_name);
   event_loop(sock, &state);
@@ -349,22 +288,22 @@ void start_server(const char *store_socket_name, const char* master_addr,
 int main(int argc, char* argv[]) {
   // Socket name of the plasma store this manager is connected to.
   char *store_socket_name = NULL;
-  // IP address and port of the nameserver.
-  char *nameserver_addr_port = NULL;
-  // IP address this host can be reached at from the outside.
+  // IP address of this node
   char *master_addr = NULL;
+  // Port number the manager should use
+  int port;
   int c;
-  while ((c = getopt(argc, argv, "n:s:m:")) != -1) {
+  while ((c = getopt(argc, argv, "s:m:p:")) != -1) {
     switch (c) {
     case 's':
       store_socket_name = optarg;
       break;
-    case 'n':
-      nameserver_addr_port = optarg;
-      break;
     case 'm':
       master_addr = optarg;
       break;
+    case 'p':
+      port = atoi(optarg);
+      break;
     default:
       LOG_ERR("unknown option %c", c);
       exit(-1);
@@ -378,13 +317,5 @@ int main(int argc, char* argv[]) {
     LOG_ERR("please specify ip address of the current host in the format 123.456.789.10 with -m switch");
     exit(-1);
   }
-  // Parse nameserver address and port.
-  const char *format = "%15[0-9.]:%5[0-9]";
-  char nameserver_addr[16] = { 0 };
-  char nameserver_port[6] = { 0 };
-  if (!nameserver_addr_port || sscanf(nameserver_addr_port, format, nameserver_addr, nameserver_port) != 2) {
-    LOG_ERR("need to specify nameserver address in the format 123.456.789.10:12345 with -n switch");
-    exit(-1);
-  }
-  start_server(store_socket_name, master_addr, nameserver_addr, atoi(nameserver_port));
+  start_server(store_socket_name, master_addr, port);
 }
diff --git a/test/nameserver.py b/test/nameserver.py
deleted file mode 100644
index 66fa6cf52..000000000
--- a/test/nameserver.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import collections
-import socket
-import ctypes
-import atexit
-
-import plasma
-
-DEFAULT_PORT = 16121
-
-Connection = collections.namedtuple("Connection", ["address", "port"])
-
-# list of IP addresses and ports of managers
-object_managers = []
-
-def send_addresses(conn, object_managers):
-  manager = plasma.PlasmaManager(conn.address, conn.port)
-  for (manager_id, object_manager) in enumerate(object_managers):
-    manager.register(manager_id, object_manager.address, object_manager.port)
-
-if __name__ == "__main__":
-  sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-  sock.bind(("", DEFAULT_PORT))
-  sock.listen(5)
-
-  def cleanup():
-    sock.shutdown(socket.SHUT_RDWR)
-    sock.close()
-  atexit.register(cleanup)
-
-  while True:
-    client, address = sock.accept()
-    request = plasma.PlasmaRequest()
-    client.recv_into(request)
-    if request.type == plasma.PLASMA_REGISTER:
-      address = ".".join(map(str, request.addr[:]))
-      conn = Connection(address=address, port=request.port)
-      print "object manager " + str(conn) + " connected"
-      object_managers.append(conn)
-      for c in object_managers:
-        send_addresses(c, object_managers)
-    elif request.type == plasma.PLASMA_GET_MANAGER_PORT:
-      port = object_managers[request.manager_id].port
-      req = plasma.PlasmaRequest(type=plasma.PLASMA_RETURN_MANAGER_PORT, port=port)
-      client.send(buffer(req)[:])
-    else:
-      raise Exception("This code should be unreachable.")
diff --git a/test/test.py b/test/test.py
index ba9789e81..b0284cb54 100644
--- a/test/test.py
+++ b/test/test.py
@@ -5,11 +5,12 @@ import sys
 import unittest
 import random
 import time
+import tempfile
 
 import plasma
 
 def random_object_id():
-  return "".join([chr(random.randint(0, 256)) for _ in range(20)])
+  return "".join([chr(random.randint(0, 255)) for _ in range(20)])
 
 class TestPlasmaClient(unittest.TestCase):
 
@@ -64,9 +65,6 @@ class TestPlasmaClient(unittest.TestCase):
 class TestPlasmaManager(unittest.TestCase):
 
   def setUp(self):
-    # Start the nameserver.
-    nameserver_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "nameserver.py")
-    self.p1 = subprocess.Popen(["python", nameserver_path])
     # Start two PlasmaStores.
     plasma_store_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../build/plasma_store")
     self.p2 = subprocess.Popen([plasma_store_executable, "-s", "/tmp/store1"])
@@ -75,36 +73,19 @@ class TestPlasmaManager(unittest.TestCase):
     self.client1 = plasma.PlasmaClient("/tmp/store1")
     self.client2 = plasma.PlasmaClient("/tmp/store2")
     # Start two PlasmaManagers.
-    time.sleep(0.1)
+    self.port1 = random.randint(10000, 50000)
+    self.port2 = random.randint(10000, 50000)
     plasma_manager_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../build/plasma_manager")
-    self.p4 = subprocess.Popen([plasma_manager_executable, "-n", "127.0.0.1:16121", "-s", "/tmp/store1", "-m", "127.0.0.1"])
-    self.p5 = subprocess.Popen([plasma_manager_executable, "-n", "127.0.0.1:16121", "-s", "/tmp/store2", "-m", "127.0.0.1"])
+    self.p4 = subprocess.Popen([plasma_manager_executable, "-s", "/tmp/store1", "-m", "127.0.0.1", "-p", str(self.port1)])
+    self.p5 = subprocess.Popen([plasma_manager_executable, "-s", "/tmp/store2", "-m", "127.0.0.1", "-p", str(self.port2)])
     time.sleep(0.1)
-    # Connect to the nameserver.
-    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-    sock.connect(("127.0.0.1", 16121))
-    # Get the port for the first PlasmaManager.
-    req = plasma.PlasmaRequest(type=plasma.PLASMA_GET_MANAGER_PORT, manager_id=0)
-    sock.send(buffer(req)[:])
-    request = plasma.PlasmaRequest()
-    sock.recv_into(request)
-    port1 = request.port
-    time.sleep(0.1)
-    # Get the port for the second PlasmaManager.
-    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-    sock.connect(("127.0.0.1", 16121))
-    req = plasma.PlasmaRequest(type=plasma.PLASMA_GET_MANAGER_PORT, manager_id=1)
-    sock.send(buffer(req)[:])
-    request = plasma.PlasmaRequest()
-    sock.recv_into(request)
-    port2 = request.port
     # Connect two Python PlasmaManagers.
-    self.manager1 = plasma.PlasmaManager("127.0.0.1", port1)
-    self.manager2 = plasma.PlasmaManager("127.0.0.1", port2)
+    self.manager1 = plasma.PlasmaManager("127.0.0.1", self.port1)
+    self.manager2 = plasma.PlasmaManager("127.0.0.1", self.port2)
+    time.sleep(0.5)
 
   def tearDown(self):
     # Kill the nameserver, PlasmaStore and PlasmaManager processes.
-    self.p1.kill()
     self.p2.kill()
     self.p3.kill()
     self.p4.kill()
@@ -120,11 +101,11 @@ class TestPlasmaManager(unittest.TestCase):
     # Seal the buffer.
     self.client1.seal(object_id1)
     # Transfer the buffer to the the other PlasmaStore.
-    self.manager1.transfer(1, object_id1)
+    self.manager1.transfer("127.0.0.1", self.port2, object_id1)
     # Compare the two buffers.
     self.assertEqual(self.client1.get(object_id1)[:], self.client2.get(object_id1)[:])
     # Transfer the buffer again.
-    self.manager1.transfer(1, object_id1)
+    self.manager1.transfer("127.0.0.1", self.port2, object_id1)
     # Compare the two buffers.
     self.assertEqual(self.client1.get(object_id1)[:], self.client2.get(object_id1)[:])
     # Create a new object id string.
@@ -136,7 +117,7 @@ class TestPlasmaManager(unittest.TestCase):
     # Seal the buffer.
     self.client2.seal(object_id2)
     # Transfer the buffer to the the other PlasmaStore.
-    self.manager2.transfer(0, object_id2)
+    self.manager2.transfer("127.0.0.1", self.port1, object_id2)
     # Compare the two buffers.
     self.assertEqual(self.client1.get(object_id2)[:], self.client2.get(object_id2)[:])
 

From ad1a8454d50273e5f7fb69aaa2544e95256d5b74 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Mon, 5 Sep 2016 15:34:11 -0700
Subject: [PATCH 15/91] remove C struct duplication and python plasma manager

---
 lib/python/plasma.py | 94 ++++++++++++++++++--------------------------
 src/example.c        |  7 ++--
 src/plasma.h         | 13 ++++--
 src/plasma_client.c  | 55 +++++++++++++++++++++-----
 src/plasma_manager.c | 80 ++++++-------------------------------
 src/plasma_manager.h | 42 ++++++++++++++++++++
 test/test.py         | 17 ++++----
 7 files changed, 157 insertions(+), 151 deletions(-)
 create mode 100644 src/plasma_manager.h

diff --git a/lib/python/plasma.py b/lib/python/plasma.py
index 713c3f3ad..5d18b06e4 100644
--- a/lib/python/plasma.py
+++ b/lib/python/plasma.py
@@ -9,77 +9,38 @@ ID = ctypes.c_ubyte * 20
 class PlasmaID(ctypes.Structure):
   _fields_ = [("plasma_id", ID)]
 
-# these must be in sync with plasma_request_type in plasma.h (can we have a test for that?)
-PLASMA_CREATE = 0
-PLASMA_GET = 1
-PLASMA_SEAL = 2
-PLASMA_TRANSFER = 3
-PLASMA_DATA = 4
-PLASMA_REGISTER = 5
-
-class PlasmaRequest(ctypes.Structure):
-  _fields_ = [("type", ctypes.c_int),
-              ("object_id", PlasmaID),
-              ("size", ctypes.c_int64),
-              ("addr", Addr),
-              ("port", ctypes.c_int)]
-
-class PlasmaBuffer(ctypes.Structure):
-  _fields_ = [("plasma_id", PlasmaID),
-              ("data", ctypes.c_void_p),
-              ("size", ctypes.c_int64),
-              ("writable", ctypes.c_int)]
-
 def make_plasma_id(string):
   if len(string) != 20:
     raise Exception("PlasmaIDs must be 20 characters long")
   object_id = map(ord, string)
   return PlasmaID(plasma_id=ID(*object_id))
 
-class PlasmaManager(object):
-  """The PlasmaManager is used to manage a PlasmaStore.
-
-  There should be one PlasmaManager per PlasmaStore. The PlasmaManager is
-  responsible for interfacing with other PlasmaManagers in order to transfer
-  objects between PlasmaStores. This class sends commands to the C
-  implementation of the PlasmaManager using sockets.
-
-  Attributes:
-    sock: The socket used to communicate with the C implementation of the
-      PlasmaManager.
-  """
-
-  def __init__(self, addr, port):
-    """Initialize the PlasmaManager."""
-    self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-    self.sock.connect((addr, port))
-
-  def transfer(self, addr, port, object_id):
-    """Transfer local object with id object_id to manager with id manager_id."""
-    req = PlasmaRequest(type=PLASMA_TRANSFER, object_id=make_plasma_id(object_id),
-                        addr=Addr(*map(int, addr.split("."))), port=port)
-    print "sending port", port
-    self.sock.send(buffer(req)[:])
-
 class PlasmaClient(object):
-  """The PlasmaClient is used to interface with a PlasmaStore.
+  """The PlasmaClient is used to interface with a plasma store and a plasma manager.
 
   The PlasmaClient can ask the PlasmaStore to allocate a new buffer, seal a
   buffer, and get a buffer. Buffers are referred to by object IDs, which are
   strings.
   """
 
-  def __init__(self, socket_name):
+  def __init__(self, socket_name, addr=None, port=None):
+    """Initialize the PlasmaClient.
+    
+    Args:
+      socket_name (str): Name of the socket the plasma store is listening at.
+      addr (str): IPv4 address of plasma manager attached to the plasma store.
+      port (int): Port number of the plasma manager attached to the plasma store.
+    """
     plasma_client_library = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../../build/plasma_client.so")
     self.client = ctypes.cdll.LoadLibrary(plasma_client_library)
 
     self.client.plasma_store_connect.restype = ctypes.c_int
 
-    self.client.plasma_create.argtypes = [ctypes.c_int, PlasmaID, ctypes.c_int64]
-    self.client.plasma_create.restype = PlasmaBuffer
+    self.client.plasma_create.argtypes = [ctypes.c_int, PlasmaID, ctypes.c_int64, ctypes.POINTER(ctypes.c_void_p)]
+    self.client.plasma_create.restype = None
 
-    self.client.plasma_get.argtypes = [ctypes.c_int, PlasmaID]
-    self.client.plasma_get.restype = PlasmaBuffer
+    self.client.plasma_get.argtypes = [ctypes.c_int, PlasmaID, ctypes.POINTER(ctypes.c_int64), ctypes.POINTER(ctypes.c_void_p)]
+    self.client.plasma_get.restype = None
 
     self.client.plasma_seal.argtypes = [ctypes.c_int, PlasmaID]
     self.client.plasma_seal.restype = None
@@ -94,6 +55,11 @@ class PlasmaClient(object):
 
     self.sock = self.client.plasma_store_connect(socket_name)
 
+    if addr is not None and port is not None:
+      self.manager_conn = self.client.plasma_manager_connect(addr, port)
+    else:
+      self.manager_conn = -1 # not connected
+
   def create(self, object_id, size):
     """Create a new buffer in the PlasmaStore for a particular object ID.
 
@@ -103,8 +69,9 @@ class PlasmaClient(object):
       object_id (str): A string used to identify an object.
       size (int): The size in bytes of the created buffer.
     """
-    buf = self.client.plasma_create(self.sock, make_plasma_id(object_id), size)
-    return self.buffer_from_read_write_memory(buf.data, buf.size)
+    data = ctypes.c_void_p()
+    self.client.plasma_create(self.sock, make_plasma_id(object_id), size, ctypes.byref(data))
+    return self.buffer_from_read_write_memory(data, size)
 
   def get(self, object_id):
     """Create a buffer from the PlasmaStore based on object ID.
@@ -115,8 +82,10 @@ class PlasmaClient(object):
     Args:
       object_id (str): A string used to identify an object.
     """
-    buf = self.client.plasma_get(self.sock, make_plasma_id(object_id))
-    return self.buffer_from_memory(buf.data, buf.size)
+    size = ctypes.c_int64()
+    data = ctypes.c_void_p()
+    buf = self.client.plasma_get(self.sock, make_plasma_id(object_id), ctypes.byref(size), ctypes.byref(data))
+    return self.buffer_from_memory(data, size)
 
   def seal(self, object_id):
     """Seal the buffer in the PlasmaStore for a particular object ID.
@@ -128,3 +97,16 @@ class PlasmaClient(object):
       object_id (str): A string used to identify an object.
     """
     self.client.plasma_seal(self.sock, make_plasma_id(object_id))
+
+  def transfer(self, addr, port, object_id):
+    """Transfer local object with id object_id to another plasma instance
+    
+    Args:
+      addr (str): IPv4 address of the plasma instance the object is sent to.
+      port (int): Port number of the plasma instance the object is sent to.
+      object_id (str): A string used to identify an object.
+    """
+    if self.manager_conn == -1:
+      raise Exception("Not connected to the plasma manager socket")
+    self.client.plasma_transfer(self.manager_conn, addr, port, make_plasma_id(object_id))
+    
diff --git a/src/example.c b/src/example.c
index 20ecd7b64..1d8d74b5c 100644
--- a/src/example.c
+++ b/src/example.c
@@ -16,6 +16,8 @@
 
 int main(int argc, char *argv[]) {
   int conn = -1;
+  int64_t size;
+  void *data;
   int c;
   plasma_id id = {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
 		   255, 255, 255, 255, 255, 255, 255, 255}};
@@ -26,14 +28,14 @@ int main(int argc, char *argv[]) {
       break;
     case 'c':
       assert(conn != -1);
-      plasma_create(conn, id, 100);
+      plasma_create(conn, id, 100, &data);
       break;
     case 'f':
       assert(conn != -1);
       plasma_seal(conn, id);
       break;
     case 'g':
-      plasma_get(conn, id);
+      plasma_get(conn, id, &size, &data);
       break;
     default:
       abort();
@@ -42,4 +44,3 @@ int main(int argc, char *argv[]) {
   assert(conn != -1);
   close(conn);
 }
-
diff --git a/src/plasma.h b/src/plasma.h
index 39d04700f..9fdec86c2 100644
--- a/src/plasma.h
+++ b/src/plasma.h
@@ -65,11 +65,16 @@ typedef struct {
   int writable;
 } plasma_buffer;
 
+// Connect to the local plasma store UNIX domain socket
 int plasma_store_connect(const char* socket_name);
-plasma_buffer plasma_create(int conn, plasma_id object_id, int64_t size);
-plasma_buffer plasma_get(int conn, plasma_id object_id);
-void plasma_seal(int fd, plasma_id object_id);
 
-void plasma_send(int fd, plasma_request *req);
+// Connect to a possibly remote plasma manager
+int plasma_manager_connect(const char* addr, int port);
+
+void plasma_create(int store, plasma_id object_id, int64_t size, void **data);
+void plasma_get(int store, plasma_id object_id, int64_t *size, void **data);
+void plasma_seal(int store, plasma_id object_id);
+
+void plasma_send(int conn, plasma_request *req);
 
 #endif
diff --git a/src/plasma_client.c b/src/plasma_client.c
index 5dd36ae7e..e8e160d70 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -23,7 +23,7 @@ void plasma_send(int fd, plasma_request *req) {
   }
 }
 
-plasma_buffer plasma_create(int conn, plasma_id object_id, int64_t size) {
+void plasma_create(int conn, plasma_id object_id, int64_t size, void **data) {
   LOG_INFO("called plasma_create on conn %d with size %" PRId64, conn, size);
   plasma_request req = { .type = PLASMA_CREATE, .object_id = object_id, .size = size };
   plasma_send(conn, &req);
@@ -31,16 +31,14 @@ plasma_buffer plasma_create(int conn, plasma_id object_id, int64_t size) {
   int fd = recv_fd(conn, (char*)&reply, sizeof(plasma_reply));
   assert(reply.type == PLASMA_OBJECT);
   assert(reply.size == size);
-  void *data = mmap(NULL, reply.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-  if (data == MAP_FAILED) {
+  *data = mmap(NULL, reply.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+  if (*data == MAP_FAILED) {
     LOG_ERR("mmap failed");
     exit(-1);
   }
-  plasma_buffer buffer = { object_id, data, size, 1 };
-  return buffer;
 }
 
-plasma_buffer plasma_get(int conn, plasma_id object_id) {
+void plasma_get(int conn, plasma_id object_id, int64_t *size, void **data) {
   plasma_request req = { .type = PLASMA_GET, .object_id = object_id };
   plasma_send(conn, &req);
   plasma_reply reply;
@@ -52,13 +50,12 @@ plasma_buffer plasma_get(int conn, plasma_id object_id) {
     fd = new_fd;
   }
   assert(reply.type == PLASMA_OBJECT);
-  void *data = mmap(NULL, reply.size, PROT_READ, MAP_SHARED, fd, 0);
-  if (data  == MAP_FAILED) {
+  *data = mmap(NULL, reply.size, PROT_READ, MAP_SHARED, fd, 0);
+  if (*data  == MAP_FAILED) {
     LOG_ERR("mmap failed");
     exit(-1);
   }
-  plasma_buffer buffer = { object_id, data, reply.size, 0 };
-  return buffer;
+  *size = reply.size;
 }
 
 void plasma_seal(int fd, plasma_id object_id) {
@@ -94,3 +91,41 @@ int plasma_store_connect(const char* socket_name) {
   }
   return fd;
 }
+
+#define h_addr h_addr_list[0]
+
+int plasma_manager_connect(const char* ip_addr, int port) {
+  int fd = socket(PF_INET, SOCK_STREAM, 0);
+  if (fd < 0) {
+    LOG_ERR("could not create socket");
+    exit(-1);
+  }
+
+  struct hostent *manager = gethostbyname(ip_addr); // TODO(pcm): cache this
+  if (!manager) {
+    LOG_ERR("plasma manager %s not found", ip_addr);
+    exit(-1);
+  }
+
+  struct sockaddr_in addr;
+  addr.sin_family = AF_INET;
+  bcopy(manager->h_addr, &addr.sin_addr.s_addr, manager->h_length);
+  addr.sin_port = htons(port);
+
+  int r = connect(fd, (struct sockaddr*) &addr, sizeof(addr));
+  if (r < 0) {
+    LOG_ERR("could not establish connection to manager with id %s:%d", &ip_addr[0], port);
+    exit(-1);
+  }
+  return fd;
+}
+
+void plasma_transfer(int manager, const char* addr, int port, plasma_id object_id) {
+  plasma_request req = {.type = PLASMA_TRANSFER, .object_id = object_id, .port = port};
+  char* end = NULL;
+  for (int i = 0; i < 4; ++i) {
+    req.addr[i] = strtol(end ? end : addr, &end, 10);
+    end += 1; // skip the '.'
+  }
+  plasma_send(manager, &req);
+}
diff --git a/src/plasma_manager.c b/src/plasma_manager.c
index 34a8e474a..90e0d4087 100644
--- a/src/plasma_manager.c
+++ b/src/plasma_manager.c
@@ -22,43 +22,7 @@
 #include <netdb.h>
 
 #include "plasma.h"
-
-#define MAX_CONNECTIONS 2048
-#define MAX_NUM_MANAGERS 1024
-
-enum conn_type {
-  // Connection to send commands to the manager.
-  CONN_CONTROL,
-  // Connection to send data to another manager.
-  CONN_WRITE_DATA,
-  // Connection to receive data from another manager.
-  CONN_READ_DATA
-};
-
-typedef struct {
-  // Unique identifier for the connection.
-  int id;
-  // Of type conn_type.
-  int type;
-  // Socket of the plasma store that is accessed for reading or writing data for
-  // this connection.
-  int store_conn;
-  // Buffer this connection is reading from or writing to.
-  plasma_buffer buf;
-  // Current position in the buffer.
-  int64_t cursor;
-} conn_state;
-
-typedef struct {
-  // Name of the socket connecting to local plasma store.
-  const char* store_socket_name;
-  // Number of connections.
-  int num_conn;
-  // For the "poll" system call.
-  struct pollfd waiting[MAX_CONNECTIONS];
-  // Status of connections (both control and data).
-  conn_state conn[MAX_CONNECTIONS];
-} plasma_manager_state;
+#include "plasma_manager.h"
 
 void init_manager_state(plasma_manager_state *s, const char* store_socket_name) {
   memset(&s->waiting, 0, sizeof(s->waiting));
@@ -67,22 +31,17 @@ void init_manager_state(plasma_manager_state *s, const char* store_socket_name)
   s->store_socket_name = store_socket_name;
 }
 
-#define h_addr h_addr_list[0]
-
 // Add connection for sending commands or data to another plasma manager
-// (returns the connection id).
+// (returns the connection index).
 int add_conn(plasma_manager_state* s, int type, int fd, int events, plasma_buffer* buf) {
-  static int conn_id = 0;
   s->waiting[s->num_conn].fd = fd;
   s->waiting[s->num_conn].events = events;
-  s->conn[s->num_conn].id = conn_id;
   s->conn[s->num_conn].type = type;
   if (buf) {
     s->conn[s->num_conn].buf = *buf;
   }
   s->conn[s->num_conn].cursor = 0;
-  s->num_conn += 1;
-  return conn_id++;
+  return s->num_conn++;
 }
 
 // Remove connection with index i by swapping it with the last element.
@@ -100,33 +59,15 @@ void remove_conn(plasma_manager_state* s, int i) {
 // the data header to the other object manager.
 void initiate_transfer(plasma_manager_state* state, plasma_request* req) {
   int c = plasma_store_connect(state->store_socket_name);
-  plasma_buffer buf = plasma_get(c, req->object_id);
-
-  int fd = socket(PF_INET, SOCK_STREAM, 0);
-  if (fd < 0) {
-    LOG_ERR("could not create socket");
-    exit(-1);
-  }
+  plasma_buffer buf = { .object_id = req->object_id, .writable = 0 };
+  plasma_get(c, req->object_id, &buf.size, &buf.data);
   
   char ip_addr[16];
   snprintf(ip_addr, 32, "%d.%d.%d.%d",
                     req->addr[0], req->addr[1],
                     req->addr[2], req->addr[3]);
-  struct hostent *manager = gethostbyname(ip_addr); // TODO(pcm): cache this
-  if (!manager) {
-    LOG_ERR("plasma manager %s not found", ip_addr);
-    exit(-1);
-  }
-  struct sockaddr_in addr;
-  addr.sin_family = AF_INET;
-  bcopy(manager->h_addr, &addr.sin_addr.s_addr, manager->h_length);
-  addr.sin_port = htons(req->port);
-  
-  int r = connect(fd, (struct sockaddr*) &addr, sizeof(addr));
-  if (r < 0) {
-    LOG_ERR("could not establish connection to manager with id %s:%d", &ip_addr[0], req->port);
-    exit(-1);
-  }
+
+  int fd = plasma_manager_connect(&ip_addr[0], req->port);
 
   add_conn(state, CONN_WRITE_DATA, fd, POLLOUT, &buf);
 
@@ -139,7 +80,10 @@ void setup_data_connection(int conn_idx, plasma_manager_state* state, plasma_req
   int store_conn = plasma_store_connect(state->store_socket_name);
   state->conn[conn_idx].type = CONN_READ_DATA;
   state->conn[conn_idx].store_conn = store_conn;
-  state->conn[conn_idx].buf = plasma_create(store_conn, req->object_id, req->size);
+  state->conn[conn_idx].buf.object_id = req->object_id;
+  state->conn[conn_idx].buf.size = req->size;
+  state->conn[conn_idx].buf.writable = 1;
+  plasma_create(store_conn, req->object_id, req->size, &state->conn[conn_idx].buf.data);
   state->conn[conn_idx].cursor = 0;
 }
 
@@ -170,7 +114,7 @@ void read_from_socket(plasma_manager_state* state, int i, plasma_request* req) {
       if (r == 1) {
         LOG_ERR("read error");
       } else if (r == 0) {
-        LOG_INFO("connection with id %d disconnected", state->conn[i].id);
+        LOG_INFO("connection with index %d disconnected", i);
         remove_conn(state, i);
       } else {
         process_command(i, state, req);
diff --git a/src/plasma_manager.h b/src/plasma_manager.h
new file mode 100644
index 000000000..efa326cb9
--- /dev/null
+++ b/src/plasma_manager.h
@@ -0,0 +1,42 @@
+#ifndef PLASMA_MANAGER_H
+#define PLASMA_MANAGER_H
+
+#include <poll.h>
+
+#define MAX_CONNECTIONS 2048
+
+enum conn_type {
+  // Connection to send commands to the manager.
+  CONN_CONTROL,
+  // Connection to send data to another manager.
+  CONN_WRITE_DATA,
+  // Connection to receive data from another manager.
+  CONN_READ_DATA
+};
+
+typedef struct {
+  // Of type conn_type.
+  int type;
+  // Socket of the plasma store that is accessed for reading or writing data for
+  // this connection.
+  int store_conn;
+  // Buffer this connection is reading from or writing to.
+  plasma_buffer buf;
+  // Current position in the buffer.
+  int64_t cursor;
+} conn_state;
+
+typedef struct {
+  // ID of this manager
+  int64_t manager_id;
+  // Name of the socket connecting to local plasma store.
+  const char* store_socket_name;
+  // Number of connections.
+  int num_conn;
+  // For the "poll" system call.
+  struct pollfd waiting[MAX_CONNECTIONS];
+  // Status of connections (both control and data).
+  conn_state conn[MAX_CONNECTIONS];
+} plasma_manager_state;
+
+#endif
diff --git a/test/test.py b/test/test.py
index b0284cb54..82ba406a8 100644
--- a/test/test.py
+++ b/test/test.py
@@ -69,9 +69,6 @@ class TestPlasmaManager(unittest.TestCase):
     plasma_store_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../build/plasma_store")
     self.p2 = subprocess.Popen([plasma_store_executable, "-s", "/tmp/store1"])
     self.p3 = subprocess.Popen([plasma_store_executable, "-s", "/tmp/store2"])
-    # Connect two PlasmaClients.
-    self.client1 = plasma.PlasmaClient("/tmp/store1")
-    self.client2 = plasma.PlasmaClient("/tmp/store2")
     # Start two PlasmaManagers.
     self.port1 = random.randint(10000, 50000)
     self.port2 = random.randint(10000, 50000)
@@ -79,13 +76,13 @@ class TestPlasmaManager(unittest.TestCase):
     self.p4 = subprocess.Popen([plasma_manager_executable, "-s", "/tmp/store1", "-m", "127.0.0.1", "-p", str(self.port1)])
     self.p5 = subprocess.Popen([plasma_manager_executable, "-s", "/tmp/store2", "-m", "127.0.0.1", "-p", str(self.port2)])
     time.sleep(0.1)
-    # Connect two Python PlasmaManagers.
-    self.manager1 = plasma.PlasmaManager("127.0.0.1", self.port1)
-    self.manager2 = plasma.PlasmaManager("127.0.0.1", self.port2)
+    # Connect two PlasmaClients.
+    self.client1 = plasma.PlasmaClient("/tmp/store1", "127.0.0.1", self.port1)
+    self.client2 = plasma.PlasmaClient("/tmp/store2", "127.0.0.1", self.port2)
     time.sleep(0.5)
 
   def tearDown(self):
-    # Kill the nameserver, PlasmaStore and PlasmaManager processes.
+    # Kill the PlasmaStore and PlasmaManager processes.
     self.p2.kill()
     self.p3.kill()
     self.p4.kill()
@@ -101,11 +98,11 @@ class TestPlasmaManager(unittest.TestCase):
     # Seal the buffer.
     self.client1.seal(object_id1)
     # Transfer the buffer to the the other PlasmaStore.
-    self.manager1.transfer("127.0.0.1", self.port2, object_id1)
+    self.client1.transfer("127.0.0.1", self.port2, object_id1)
     # Compare the two buffers.
     self.assertEqual(self.client1.get(object_id1)[:], self.client2.get(object_id1)[:])
     # Transfer the buffer again.
-    self.manager1.transfer("127.0.0.1", self.port2, object_id1)
+    self.client1.transfer("127.0.0.1", self.port2, object_id1)
     # Compare the two buffers.
     self.assertEqual(self.client1.get(object_id1)[:], self.client2.get(object_id1)[:])
     # Create a new object id string.
@@ -117,7 +114,7 @@ class TestPlasmaManager(unittest.TestCase):
     # Seal the buffer.
     self.client2.seal(object_id2)
     # Transfer the buffer to the the other PlasmaStore.
-    self.manager2.transfer("127.0.0.1", self.port1, object_id2)
+    self.client2.transfer("127.0.0.1", self.port1, object_id2)
     # Compare the two buffers.
     self.assertEqual(self.client1.get(object_id2)[:], self.client2.get(object_id2)[:])
 

From a62c0f8fac7c45f80cb47a6adda4205d97a610a9 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Wed, 7 Sep 2016 20:19:37 -0700
Subject: [PATCH 16/91] Allow arbitrary number of connections (#13)

* refactor plasma to use an event loop

* unify comment style

* Clean up Makefile flags.

* Randomize socket names in tests so multiple copies of the tests can be run in parallel without conflict.
---
 Makefile             |  14 +--
 src/event_loop.c     |  85 ++++++++++++++++
 src/event_loop.h     |  36 +++++++
 src/example.c        |  16 +--
 src/fling.c          |   8 +-
 src/fling.h          |  33 +++---
 src/plasma.h         |  28 +++--
 src/plasma_client.c  |  15 +--
 src/plasma_manager.c | 189 ++++++++++++++++------------------
 src/plasma_manager.h |  49 ++++-----
 src/plasma_store.c   | 129 ++++++++++-------------
 src/utarray.h        | 238 +++++++++++++++++++++++++++++++++++++++++++
 test/test.py         |  19 ++--
 13 files changed, 595 insertions(+), 264 deletions(-)
 create mode 100644 src/event_loop.c
 create mode 100644 src/event_loop.h
 create mode 100644 src/utarray.h

diff --git a/Makefile b/Makefile
index f2c7ff9a5..64f66c980 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 CC = gcc
-CFLAGS = -g -Wall
+CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500
 BUILD = build
 
 all: $(BUILD)/plasma_store $(BUILD)/plasma_manager $(BUILD)/plasma_client.so $(BUILD)/example
@@ -7,14 +7,14 @@ all: $(BUILD)/plasma_store $(BUILD)/plasma_manager $(BUILD)/plasma_client.so $(B
 clean:
 	rm -r $(BUILD)/*
 
-$(BUILD)/plasma_store: src/plasma_store.c src/plasma.h src/fling.h src/fling.c
-	$(CC) $(CFLAGS) --std=c99 -D_XOPEN_SOURCE=500 src/plasma_store.c src/fling.c -o $(BUILD)/plasma_store
+$(BUILD)/plasma_store: src/plasma_store.c src/plasma.h src/event_loop.h src/event_loop.c src/fling.h src/fling.c
+	$(CC) $(CFLAGS) src/plasma_store.c src/event_loop.c src/fling.c -o $(BUILD)/plasma_store
 
-$(BUILD)/plasma_manager: src/plasma_manager.c src/plasma.h src/plasma_client.c src/fling.h src/fling.c
-	$(CC) $(CFLAGS) --std=c99 -D_XOPEN_SOURCE=500 src/plasma_manager.c src/plasma_client.c src/fling.c -o $(BUILD)/plasma_manager
+$(BUILD)/plasma_manager: src/plasma_manager.c src/event_loop.h src/event_loop.c src/plasma.h src/plasma_client.c src/fling.h src/fling.c
+	$(CC) $(CFLAGS) src/plasma_manager.c src/event_loop.c src/plasma_client.c src/fling.c -o $(BUILD)/plasma_manager
 
 $(BUILD)/plasma_client.so: src/plasma_client.c src/fling.h src/fling.c
-	$(CC) $(CFLAGS) --std=c99 -D_XOPEN_SOURCE=500 src/plasma_client.c src/fling.c -fPIC -shared -o $(BUILD)/plasma_client.so
+	$(CC) $(CFLAGS) src/plasma_client.c src/fling.c -fPIC -shared -o $(BUILD)/plasma_client.so
 
 $(BUILD)/example: src/plasma_client.c src/plasma.h src/example.c src/fling.h src/fling.c
-	$(CC) $(CFLAGS) --std=c99 -D_XOPEN_SOURCE=500 src/plasma_client.c src/example.c src/fling.c -o $(BUILD)/example
+	$(CC) $(CFLAGS) src/plasma_client.c src/example.c src/fling.c -o $(BUILD)/example
diff --git a/src/event_loop.c b/src/event_loop.c
new file mode 100644
index 000000000..2fb6a856b
--- /dev/null
+++ b/src/event_loop.c
@@ -0,0 +1,85 @@
+#include "event_loop.h"
+
+#include <assert.h>
+#include <unistd.h>
+
+UT_icd item_icd = { sizeof(event_loop_item), NULL, NULL, NULL };
+UT_icd poll_icd = { sizeof(struct pollfd), NULL, NULL, NULL };
+
+/* Initializes the event loop.
+ * This function needs to be called before any other event loop function. */
+void event_loop_init(event_loop *loop) {
+  utarray_new(loop->items, &item_icd);
+  utarray_new(loop->waiting, &poll_icd);
+}
+
+/* Add a new file descriptor fd to the event loop.
+ * This function sets a user defined type and id for the file descriptor
+ * which can be queried using event_loop_type and event_loop_id. The parameter
+ * events is the same as in http://linux.die.net/man/2/poll.
+ * Returns the index of the item in the event loop. */
+int64_t event_loop_attach(event_loop *loop, int type, data_connection* connection, int fd, int events) {
+  assert(utarray_len(loop->items) == utarray_len(loop->waiting));
+  int64_t index = utarray_len(loop->items);
+  event_loop_item item = { .type = type };
+  if (connection) {
+    item.connection = *connection;
+  }
+  utarray_push_back(loop->items, &item );
+  struct pollfd waiting = { .fd = fd, .events = events };
+  utarray_push_back(loop->waiting, &waiting);
+  return index;
+}
+
+/* Detach a file descriptor from the event loop.
+ * This invalidates all other indices into the event loop items, but leaves
+ * the ids of the event loop items valid. */
+void event_loop_detach(event_loop *loop, int64_t index, int shall_close) {
+  struct pollfd *waiting_item = (struct pollfd*) utarray_eltptr(loop->waiting, index);
+  struct pollfd *waiting_back = (struct pollfd*) utarray_back(loop->waiting);
+  if (shall_close) {
+    close(waiting_item->fd);
+  }
+  *waiting_item = *waiting_back;
+  utarray_pop_back(loop->waiting);
+
+  event_loop_item *items_item = (event_loop_item*) utarray_eltptr(loop->items, index);
+  event_loop_item *items_back = (event_loop_item*) utarray_back(loop->items);
+  *items_item = *items_back;
+  utarray_pop_back(loop->items);
+}
+
+/* Poll the file descriptors associated to this event loop.
+ * See http://linux.die.net/man/2/poll */
+int event_loop_poll(event_loop *loop) {
+  return poll((struct pollfd*) utarray_front(loop->waiting), utarray_len(loop->waiting), -1);
+}
+
+/* Get the total number of file descriptors participating in the event loop. */
+int64_t event_loop_size(event_loop *loop) {
+  return utarray_len(loop->waiting);
+}
+
+/* Get the pollfd structure associated to a file descriptor participating in the event loop. */
+struct pollfd *event_loop_get(event_loop *loop, int64_t index) {
+  return (struct pollfd*) utarray_eltptr(loop->waiting, index);
+}
+
+/* Set the data connection information for participant in the event loop. */
+void event_loop_set_connection(event_loop *loop, int64_t index, const data_connection* conn) {
+  event_loop_item *item = (event_loop_item*) utarray_eltptr(loop->items, index);
+  item->connection = *conn;
+}
+
+/* Get the data connection information for participant in the event loop. */
+data_connection* event_loop_get_connection(event_loop *loop, int64_t index) {
+  event_loop_item *item = (event_loop_item*) utarray_eltptr(loop->items, index);
+  return &item->connection;
+}
+
+/* Free the space associated to the event loop.
+ * Does not free the event_loop datastructure itself. */
+void event_loop_free(event_loop *loop) {
+  utarray_free(loop->items);
+  utarray_free(loop->waiting);
+}
diff --git a/src/event_loop.h b/src/event_loop.h
new file mode 100644
index 000000000..fbb3aab97
--- /dev/null
+++ b/src/event_loop.h
@@ -0,0 +1,36 @@
+#ifndef EVENT_LOOP_H
+#define EVENT_LOOP_H
+
+#include <poll.h>
+
+#include "utarray.h"
+#include "plasma.h"
+#include "plasma_manager.h"
+
+typedef struct {
+  /* The type of connection (e.g. redis, client, manager, data transfer). */
+  int type;
+  /* If type is data transfer, this contains information about the status
+   * of the transfer. */
+	data_connection connection;
+} event_loop_item;
+
+typedef struct {
+  /* Array of event_loop_items that hold information for connections. */
+  UT_array *items; 
+  /* Array of file descriptors that are waiting, corresponding to items. */
+  UT_array *waiting; 
+} event_loop;
+
+/* Event loop functions. */
+void event_loop_init(event_loop *loop);
+void event_loop_free(event_loop *loop);
+int64_t event_loop_attach(event_loop *loop, int type, data_connection* connection, int fd, int events);
+void event_loop_detach(event_loop *loop, int64_t index, int shall_close);
+int event_loop_poll(event_loop *loop);
+int64_t event_loop_size(event_loop *loop);
+struct pollfd *event_loop_get(event_loop *loop, int64_t index);
+void event_loop_set_connection(event_loop *loop, int64_t index, const data_connection* conn);
+data_connection *event_loop_get_connection(event_loop *loop, int64_t index);
+
+#endif
diff --git a/src/example.c b/src/example.c
index 1d8d74b5c..f42e26729 100644
--- a/src/example.c
+++ b/src/example.c
@@ -1,11 +1,11 @@
-// A simple example on how to use the plasma store
-// 
-// Can be called in the following way:
-// 
-// cd build
-// ./plasma_store -s /tmp/plasma_socket
-// ./example -s /tmp/plasma_socket -g
-// ./example -s /tmp/plasma_socket -c -f
+/* A simple example on how to use the plasma store
+ * 
+ * Can be called in the following way:
+ * 
+ * cd build
+ * ./plasma_store -s /tmp/plasma_socket
+ * ./example -s /tmp/plasma_socket -g
+ * ./example -s /tmp/plasma_socket -c -f */
 
 #include <stdlib.h>
 #include <getopt.h>
diff --git a/src/fling.c b/src/fling.c
index d614a051d..6b6e773de 100644
--- a/src/fling.c
+++ b/src/fling.c
@@ -26,7 +26,7 @@ int send_fd(int conn, int fd, const char* payload, int size) {
   header->cmsg_len = CMSG_LEN(sizeof(int));
   *(int *)CMSG_DATA(header) = fd;
 
-  // send file descriptor and payload
+  /* send file descriptor and payload */
   return sendmsg(conn, &msg, 0) != -1 && send(conn, payload, size, 0) == -1;
 }
 
@@ -55,9 +55,9 @@ int recv_fd(int conn, char* payload, int size) {
       }
     }
 
-  // The sender sent us more than one file descriptor. We've closed
-  // them all to prevent fd leaks but notify the caller that we got
-  // a bad message.
+  /* The sender sent us more than one file descriptor. We've closed
+   * them all to prevent fd leaks but notify the caller that we got
+   * a bad message. */
   if (oh_noes) {
     close(found_fd);
     errno = EBADMSG;
diff --git a/src/fling.h b/src/fling.h
index f6dc8a268..cc38f05ce 100644
--- a/src/fling.h
+++ b/src/fling.h
@@ -1,13 +1,13 @@
-// FLING: Exchanging file descriptors over sockets
-//
-// This is a little library for sending file descriptors over a socket
-// between processes. The reason for doing that (as opposed to using
-// filenames to share the files) is so (a) no files remain in the
-// filesystem after all the processes terminate, (b) to make sure that
-// there are no name collisions and (c) to be able to control who has
-// access to the data.
-//
-// Most of the code is from https://github.com/sharvil/flingfd
+/* FLING: Exchanging file descriptors over sockets
+ *
+ * This is a little library for sending file descriptors over a socket
+ * between processes. The reason for doing that (as opposed to using
+ * filenames to share the files) is so (a) no files remain in the
+ * filesystem after all the processes terminate, (b) to make sure that
+ * there are no name collisions and (c) to be able to control who has
+ * access to the data.
+ *
+ * Most of the code is from https://github.com/sharvil/flingfd */
 
 #include <unistd.h>
 #include <errno.h>
@@ -15,7 +15,7 @@
 #include <sys/socket.h>
 #include <sys/un.h>
 
-// This is neccessary for Mac OS X, see http://www.apuebook.com/faqs2e.html (10).
+/* This is neccessary for Mac OS X, see http://www.apuebook.com/faqs2e.html (10). */
 #if !defined(CMSG_SPACE) && !defined(CMSG_LEN)
   #define CMSG_SPACE(len) (__DARWIN_ALIGN32(sizeof(struct cmsghdr)) + __DARWIN_ALIGN32(len))
   #define CMSG_LEN(len) (__DARWIN_ALIGN32(sizeof(struct cmsghdr)) + (len))
@@ -24,12 +24,11 @@
 void init_msg(struct msghdr *msg, struct iovec *iov,
               char *buf, size_t buf_len);
 
-// Send a file descriptor "fd" and a payload "payload" of size "size"
-// over the socket "conn". Return 0 on success.
+/* Send a file descriptor "fd" and a payload "payload" of size "size"
+ * over the socket "conn". Return 0 on success. */
 int send_fd(int conn, int fd, const char* payload, int size);
 
-// Receive a file descriptor and a payload of size up to "size" from a
-// socket "conn". The payload will be written to "payload" and the file
-// descriptor will be returned. Returns -1 on failure.
+/* Receive a file descriptor and a payload of size up to "size" from a
+ * socket "conn". The payload will be written to "payload" and the file
+ * descriptor will be returned. Returns -1 on failure. */
 int recv_fd(int conn, char* payload, int size);
-
diff --git a/src/plasma.h b/src/plasma.h
index 9fdec86c2..eda883aa6 100644
--- a/src/plasma.h
+++ b/src/plasma.h
@@ -26,18 +26,22 @@ typedef struct {
   int64_t construct_duration;
 } plasma_object_info;
 
-// Represents an object id hash, can hold a full SHA1 hash
+/* Represents an object id hash, can hold a full SHA1 hash */
 typedef struct {
   unsigned char id[20];
 } plasma_id;
 
-// these values must be in sync with the ones in plasma.py (can we have a test for that?)
 enum plasma_request_type {
-  PLASMA_CREATE, // create a new object
-  PLASMA_GET, // get an object
-  PLASMA_SEAL, // seal an object
-  PLASMA_TRANSFER, // request transfer to another store
-  PLASMA_DATA, // header for sending data
+  /* Create a new object. */
+  PLASMA_CREATE,
+  /* Get an object. */
+  PLASMA_GET,
+  /* seal an object */
+  PLASMA_SEAL,
+  /* request transfer to another store */
+  PLASMA_TRANSFER,
+  /* Header for sending data */
+  PLASMA_DATA,
 };
 
 typedef struct {
@@ -49,8 +53,10 @@ typedef struct {
 } plasma_request;
 
 enum plasma_reply_type {
-  PLASMA_OBJECT, // the file descriptor represents an object
-  PLASMA_FUTURE, // the file descriptor represents a future
+  /* the file descriptor represents an object */
+  PLASMA_OBJECT,
+  /* the file descriptor represents a future */
+  PLASMA_FUTURE,
 };
 
 typedef struct {
@@ -65,10 +71,10 @@ typedef struct {
   int writable;
 } plasma_buffer;
 
-// Connect to the local plasma store UNIX domain socket
+/* Connect to the local plasma store UNIX domain socket */
 int plasma_store_connect(const char* socket_name);
 
-// Connect to a possibly remote plasma manager
+/* Connect to a possibly remote plasma manager */
 int plasma_manager_connect(const char* addr, int port);
 
 void plasma_create(int store, plasma_id object_id, int64_t size, void **data);
diff --git a/src/plasma_client.c b/src/plasma_client.c
index e8e160d70..d4f2d207f 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -1,4 +1,4 @@
-// PLASMA CLIENT: Client library for using the plasma store and manager
+/* PLASMA CLIENT: Client library for using the plasma store and manager */
 
 #include <assert.h>
 #include <stdlib.h>
@@ -42,7 +42,7 @@ void plasma_get(int conn, plasma_id object_id, int64_t *size, void **data) {
   plasma_request req = { .type = PLASMA_GET, .object_id = object_id };
   plasma_send(conn, &req);
   plasma_reply reply;
-  // the following loop is run at most twice
+  /* The following loop is run at most twice. */
   int fd = recv_fd(conn, (char*)&reply, sizeof(plasma_reply));
   if (reply.type == PLASMA_FUTURE) {
     int new_fd = recv_fd(fd, (char*)&reply, sizeof(plasma_reply));
@@ -74,17 +74,17 @@ int plasma_store_connect(const char* socket_name) {
   memset(&addr, 0, sizeof(addr));
   addr.sun_family = AF_UNIX;
   strncpy(addr.sun_path, socket_name, sizeof(addr.sun_path)-1);
-  // Try to connect to the Plasma store. If unsuccessful, retry several times.
+  /* Try to connect to the Plasma store. If unsuccessful, retry several times. */
   int connected_successfully = 0;
   for (int num_attempts = 0; num_attempts < 50; ++num_attempts) {
     if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) == 0) {
       connected_successfully = 1;
       break;
     }
-    // Sleep for 100 milliseconds.
+    /* Sleep for 100 milliseconds. */
     usleep(100000);
   }
-  // If we could not connect to the Plasma store, exit.
+  /* If we could not connect to the Plasma store, exit. */
   if (!connected_successfully) {
     LOG_ERR("could not connect to store %s", socket_name);
     exit(-1);
@@ -101,7 +101,7 @@ int plasma_manager_connect(const char* ip_addr, int port) {
     exit(-1);
   }
 
-  struct hostent *manager = gethostbyname(ip_addr); // TODO(pcm): cache this
+  struct hostent *manager = gethostbyname(ip_addr); /* TODO(pcm): cache this */
   if (!manager) {
     LOG_ERR("plasma manager %s not found", ip_addr);
     exit(-1);
@@ -125,7 +125,8 @@ void plasma_transfer(int manager, const char* addr, int port, plasma_id object_i
   char* end = NULL;
   for (int i = 0; i < 4; ++i) {
     req.addr[i] = strtol(end ? end : addr, &end, 10);
-    end += 1; // skip the '.'
+    /* skip the '.' */
+    end += 1;
   }
   plasma_send(manager, &req);
 }
diff --git a/src/plasma_manager.c b/src/plasma_manager.c
index 90e0d4087..321f40931 100644
--- a/src/plasma_manager.c
+++ b/src/plasma_manager.c
@@ -1,10 +1,9 @@
-// PLASMA MANAGER: Local to a node, connects to other managers to send and
-// receive objects from them
-//
-// The storage manager listens on its main listening port, and if a request for
-// transfering an object to another object store comes in, it ships the data
-// using a new connection to the target object manager. Also keeps a list of
-// other object managers.
+/* PLASMA MANAGER: Local to a node, connects to other managers to send and
+ * receive objects from them
+ *
+ * The storage manager listens on its main listening port, and if a request for
+ * transfering an object to another object store comes in, it ships the data
+ * using a new connection to the target object manager. */
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -21,46 +20,33 @@
 #include <netinet/in.h>
 #include <netdb.h>
 
+#include "event_loop.h"
 #include "plasma.h"
 #include "plasma_manager.h"
 
-void init_manager_state(plasma_manager_state *s, const char* store_socket_name) {
-  memset(&s->waiting, 0, sizeof(s->waiting));
-  memset(&s->conn, 0, sizeof(s->conn));
-  s->num_conn = 0;
+typedef struct {
+  /* Name of the socket connecting to local plasma store. */
+  const char* store_socket_name;
+  /* Event loop. */
+  event_loop* loop;
+} plasma_manager_state;
+
+/* Initialize the plasma manager. This function initializes the event loop
+ * of the plasma manager, and stores the address 'store_socket_name' of
+ * the local plasma store socket. */
+void init_plasma_manager(plasma_manager_state *s, const char* store_socket_name) {
+  s->loop = malloc(sizeof(event_loop));
+  event_loop_init(s->loop);
   s->store_socket_name = store_socket_name;
 }
 
-// Add connection for sending commands or data to another plasma manager
-// (returns the connection index).
-int add_conn(plasma_manager_state* s, int type, int fd, int events, plasma_buffer* buf) {
-  s->waiting[s->num_conn].fd = fd;
-  s->waiting[s->num_conn].events = events;
-  s->conn[s->num_conn].type = type;
-  if (buf) {
-    s->conn[s->num_conn].buf = *buf;
-  }
-  s->conn[s->num_conn].cursor = 0;
-  return s->num_conn++;
-}
-
-// Remove connection with index i by swapping it with the last element.
-void remove_conn(plasma_manager_state* s, int i) {
-  memcpy(&s->waiting[i], &s->waiting[s->num_conn-1], sizeof(struct pollfd));
-  memset(&s->waiting[s->num_conn-1], 0, sizeof(struct pollfd));
-  memcpy(&s->conn[i], &s->conn[s->num_conn-1], sizeof(conn_state));
-  memset(&s->conn[s->num_conn-1], 0, sizeof(conn_state));
-}
-
-#define BUFSIZE 4096
-
-// Start transfering data to another object store manager. This establishes
-// a connection to both the manager and the local object store and sends
-// the data header to the other object manager.
-void initiate_transfer(plasma_manager_state* state, plasma_request* req) {
-  int c = plasma_store_connect(state->store_socket_name);
+/* Start transfering data to another object store manager. This establishes
+ * a connection to both the manager and the local object store and sends
+ * the data header to the other object manager. */
+void initiate_transfer(plasma_manager_state* s, plasma_request* req) {
+  int store_conn = plasma_store_connect(s->store_socket_name);
   plasma_buffer buf = { .object_id = req->object_id, .writable = 0 };
-  plasma_get(c, req->object_id, &buf.size, &buf.data);
+  plasma_get(store_conn, req->object_id, &buf.size, &buf.data);
   
   char ip_addr[16];
   snprintf(ip_addr, 32, "%d.%d.%d.%d",
@@ -68,28 +54,27 @@ void initiate_transfer(plasma_manager_state* state, plasma_request* req) {
                     req->addr[2], req->addr[3]);
 
   int fd = plasma_manager_connect(&ip_addr[0], req->port);
-
-  add_conn(state, CONN_WRITE_DATA, fd, POLLOUT, &buf);
+  data_connection conn = { .type = DATA_CONNECTION_WRITE,  .store_conn = store_conn, .buf = buf, .cursor = 0 };
+  event_loop_attach(s->loop, CONNECTION_DATA, &conn, fd, POLLOUT);
 
   plasma_request manager_req = { .type = PLASMA_DATA, .object_id = req->object_id, .size = buf.size };
-  LOG_INFO("filedescriptor is %d", fd);
   plasma_send(fd, &manager_req);
 }
 
-void setup_data_connection(int conn_idx, plasma_manager_state* state, plasma_request* req) {
-  int store_conn = plasma_store_connect(state->store_socket_name);
-  state->conn[conn_idx].type = CONN_READ_DATA;
-  state->conn[conn_idx].store_conn = store_conn;
-  state->conn[conn_idx].buf.object_id = req->object_id;
-  state->conn[conn_idx].buf.size = req->size;
-  state->conn[conn_idx].buf.writable = 1;
-  plasma_create(store_conn, req->object_id, req->size, &state->conn[conn_idx].buf.data);
-  state->conn[conn_idx].cursor = 0;
+/* Start reading data from another object manager.
+ * Initializes the object we are going to write to in the
+ * local plasma store and then switches the data socket to reading mode. */
+void start_reading_data(int64_t index, plasma_manager_state* s, plasma_request* req) {
+  int store_conn = plasma_store_connect(s->store_socket_name);
+  plasma_buffer buf = { .object_id = req->object_id, .size = req->size, .writable = 1 };
+  plasma_create(store_conn, req->object_id, req->size, &buf.data);
+  data_connection conn = { .type = DATA_CONNECTION_READ, .store_conn = store_conn, .buf = buf, .cursor = 0 };
+  event_loop_set_connection(s->loop, index, &conn);
 }
 
-// Handle a command request that came in through a socket (transfering data,
-// registering object managers, accepting incoming data).
-void process_command(int conn_idx, plasma_manager_state* state, plasma_request* req) {
+/* Handle a command request that came in through a socket (transfering data,
+ * or accepting incoming data). */
+void process_command(int64_t id, plasma_manager_state* state, plasma_request* req) {
   switch (req->type) {
   case PLASMA_TRANSFER:
     LOG_INFO("transfering object to manager with port %d", req->port);
@@ -97,7 +82,7 @@ void process_command(int conn_idx, plasma_manager_state* state, plasma_request*
     break;
   case PLASMA_DATA:
     LOG_INFO("starting to stream data");
-    setup_data_connection(conn_idx, state, req);
+    start_reading_data(id, state, req);
     break;
   default:
     LOG_ERR("invalid request %d", req->type);
@@ -105,58 +90,59 @@ void process_command(int conn_idx, plasma_manager_state* state, plasma_request*
   }
 }
 
-// Handle data or command event incoming on socket with index i.
-void read_from_socket(plasma_manager_state* state, int i, plasma_request* req) {
+/* Handle data or command event incoming on socket with index "index". */
+void read_from_socket(plasma_manager_state* state, struct pollfd *waiting, int64_t index, plasma_request* req) {
   ssize_t r, s;
-  switch (state->conn[i].type) {
-    case CONN_CONTROL:
-      r = read(state->waiting[i].fd, req, sizeof(plasma_request));
-      if (r == 1) {
+  data_connection *conn = event_loop_get_connection(state->loop, index);
+  switch (conn->type) {
+    case DATA_CONNECTION_HEADER:
+      r = read(waiting->fd, req, sizeof(plasma_request));
+      if (r == -1) {
         LOG_ERR("read error");
       } else if (r == 0) {
-        LOG_INFO("connection with index %d disconnected", i);
-        remove_conn(state, i);
+        LOG_INFO("connection with id %" PRId64 " disconnected", index);
+        event_loop_detach(state->loop, index, 1);
       } else {
-        process_command(i, state, req);
+        process_command(index, state, req);
       }
       break;
-    case CONN_READ_DATA:
-      LOG_DEBUG("polled CONN_READ_DATA");
-      r = read(state->waiting[i].fd, state->conn[i].buf.data + state->conn[i].cursor, BUFSIZE);
+    case DATA_CONNECTION_READ:
+      LOG_DEBUG("polled DATA_CONNECTION_READ");
+      r = read(waiting->fd, conn->buf.data + conn->cursor, BUFSIZE);
       if (r == -1) {
         LOG_ERR("read error");
       } else if (r == 0) {
         LOG_INFO("end of file");
       } else {
-        state->conn[i].cursor += r;
+        conn->cursor += r;
       }
       if (r == 0) {
-        close(state->waiting[i].fd);
-        state->waiting[i].fd = 0;
-        state->waiting[i].events = 0;
-        plasma_seal(state->conn[i].store_conn, state->conn[i].buf.object_id);
+        LOG_DEBUG("reading on channel %" PRId64 " finished", index);
+        plasma_seal(conn->store_conn, conn->buf.object_id);
+        close(conn->store_conn);
+        event_loop_detach(state->loop, index, 1);
       }
       break;
-    case CONN_WRITE_DATA:
-      LOG_DEBUG("polled CONN_WRITE_DATA");
-      s = state->conn[i].buf.size - state->conn[i].cursor;
+    case DATA_CONNECTION_WRITE:
+      LOG_DEBUG("polled DATA_CONNECTION_WRITE");
+      s = conn->buf.size - conn->cursor;
       if (s > BUFSIZE)
         s = BUFSIZE;
-      r = write(state->waiting[i].fd, state->conn[i].buf.data + state->conn[i].cursor, s);
+      r = write(waiting->fd, conn->buf.data + conn->cursor, s);
       if (r != s) {
         if (r > 0) {
-          LOG_ERR("partial write on fd %d", state->waiting[i].fd);
+          LOG_ERR("partial write on fd %d", waiting->fd);
         } else {
           LOG_ERR("write error");
           exit(-1);
         }
       } else {
-        state->conn[i].cursor += r;
+        conn->cursor += r;
       }
       if (r == 0) {
-        close(state->waiting[i].fd);
-        state->waiting[i].fd = 0;
-        state->waiting[i].events = 0;
+        LOG_DEBUG("writing on channel %" PRId64 " finished", index);
+        close(conn->store_conn);
+        event_loop_detach(state->loop, index, 1);
       }
       break;
     default:
@@ -165,22 +151,23 @@ void read_from_socket(plasma_manager_state* state, int i, plasma_request* req) {
   }
 }
 
-// Main event loop of the plasma manager.
-void event_loop(int sock, plasma_manager_state* state) {
-  // Add listening socket.
-  add_conn(state, CONN_CONTROL, sock, POLLIN, NULL);
+/* Main event loop of the plasma manager. */
+void run_event_loop(int sock, plasma_manager_state* s) {
+  /* Add listening socket. */
+  event_loop_attach(s->loop, CONNECTION_LISTENER, NULL, sock, POLLIN);
   plasma_request req;
   while (1) {
-    int num_ready = poll(state->waiting, state->num_conn, -1);
+    int num_ready = event_loop_poll(s->loop);
     if (num_ready < 0) {
       LOG_ERR("poll failed");
       exit(-1);
     }
-    for (int i = 0; i < state->num_conn; ++i) {
-      if (state->waiting[i].revents == 0)
+    for (int i = 0; i < event_loop_size(s->loop); ++i) {
+      struct pollfd *waiting = event_loop_get(s->loop, i);
+      if (waiting->revents == 0)
         continue;
-      if (state->waiting[i].fd == sock) {
-        // Handle new incoming connections.
+      if (waiting->fd == sock) {
+        /* Handle new incoming connections. */
         int new_socket = accept(sock, NULL, NULL);
         if (new_socket < 0) {
           if (errno != EWOULDBLOCK) {
@@ -189,10 +176,11 @@ void event_loop(int sock, plasma_manager_state* state) {
           }
           break;
         }
-        int conn_id = add_conn(state, CONN_CONTROL, new_socket, POLLIN, NULL);
-        LOG_INFO("new connection with id %d", conn_id);
+        data_connection conn = { .type = DATA_CONNECTION_HEADER };
+        event_loop_attach(s->loop, CONNECTION_DATA, &conn, new_socket, POLLIN);
+        LOG_INFO("new connection with id %" PRId64, event_loop_size(s->loop));
       } else {
-        read_from_socket(state, i, &req);
+        read_from_socket(s, waiting, i, &req);
       }
     }
   }
@@ -209,12 +197,13 @@ void start_server(const char *store_socket_name, const char* master_addr, int po
   name.sin_port = htons(port);
   name.sin_addr.s_addr = htonl(INADDR_ANY);
   int on = 1;
-  // TODO(pcm): http://stackoverflow.com/q/1150635
+  /* TODO(pcm): http://stackoverflow.com/q/1150635 */
   if (ioctl(sock, FIONBIO, (char*) &on) < 0) {
     LOG_ERR("ioctl failed");
     close(sock);
     exit(-1);
   }
+  setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof (on));
   if (bind(sock, (struct sockaddr*) &name, sizeof(name)) < 0) {
     LOG_ERR("could not bind socket");
     exit(-1);
@@ -225,16 +214,16 @@ void start_server(const char *store_socket_name, const char* master_addr, int po
     exit(-1);
   }
   plasma_manager_state state;
-  init_manager_state(&state, store_socket_name);
-  event_loop(sock, &state);
+  init_plasma_manager(&state, store_socket_name);
+  run_event_loop(sock, &state);
 }
 
 int main(int argc, char* argv[]) {
-  // Socket name of the plasma store this manager is connected to.
+  /* Socket name of the plasma store this manager is connected to. */
   char *store_socket_name = NULL;
-  // IP address of this node
+  /* IP address of this node. */
   char *master_addr = NULL;
-  // Port number the manager should use
+  /* Port number the manager should use. */
   int port;
   int c;
   while ((c = getopt(argc, argv, "s:m:p:")) != -1) {
diff --git a/src/plasma_manager.h b/src/plasma_manager.h
index efa326cb9..d0efe4b65 100644
--- a/src/plasma_manager.h
+++ b/src/plasma_manager.h
@@ -2,41 +2,36 @@
 #define PLASMA_MANAGER_H
 
 #include <poll.h>
+#include "utarray.h"
 
-#define MAX_CONNECTIONS 2048
+/* The buffer size in bytes. Data will get transfered in multiples of this */
+#define BUFSIZE 4096
 
-enum conn_type {
-  // Connection to send commands to the manager.
-  CONN_CONTROL,
-  // Connection to send data to another manager.
-  CONN_WRITE_DATA,
-  // Connection to receive data from another manager.
-  CONN_READ_DATA
+enum connection_type {
+  CONNECTION_REDIS,
+  CONNECTION_LISTENER,
+  CONNECTION_DATA
+};
+
+enum data_connection_type {
+  /* Connection to send commands and metadata to the manager. */
+  DATA_CONNECTION_HEADER,
+  /* Connection to send data to another manager. */
+  DATA_CONNECTION_WRITE,
+  /* Connection to receive data from another manager. */
+  DATA_CONNECTION_READ
 };
 
 typedef struct {
-  // Of type conn_type.
+  /* Of type data_connection_type. */
   int type;
-  // Socket of the plasma store that is accessed for reading or writing data for
-  // this connection.
+  /* Local socket of the plasma store that is accessed for reading or writing
+   * data for this connection. */
   int store_conn;
-  // Buffer this connection is reading from or writing to.
+  /* Buffer this connection is reading from or writing to. */
   plasma_buffer buf;
-  // Current position in the buffer.
+  /* Current position in the buffer. */
   int64_t cursor;
-} conn_state;
-
-typedef struct {
-  // ID of this manager
-  int64_t manager_id;
-  // Name of the socket connecting to local plasma store.
-  const char* store_socket_name;
-  // Number of connections.
-  int num_conn;
-  // For the "poll" system call.
-  struct pollfd waiting[MAX_CONNECTIONS];
-  // Status of connections (both control and data).
-  conn_state conn[MAX_CONNECTIONS];
-} plasma_manager_state;
+} data_connection;
 
 #endif
diff --git a/src/plasma_store.c b/src/plasma_store.c
index d67420c24..1ddc6b06a 100644
--- a/src/plasma_store.c
+++ b/src/plasma_store.c
@@ -1,13 +1,13 @@
-// PLASMA STORE: This is a simple object store server process
-//
-// It accepts incoming client connections on a unix domain socket
-// (name passed in via the -s option of the executable) and uses a
-// single thread to serve the clients. Each client establishes a
-// connection and can create objects, wait for objects and seal
-// objects through that connection.
-//
-// It keeps a hash table that maps object_ids (which are 20 byte long,
-// just enough to store and SHA1 hash) to memory mapped files.
+/* PLASMA STORE: This is a simple object store server process
+ *
+ * It accepts incoming client connections on a unix domain socket
+ * (name passed in via the -s option of the executable) and uses a
+ * single thread to serve the clients. Each client establishes a
+ * connection and can create objects, wait for objects and seal
+ * objects through that connection.
+ *
+ * It keeps a hash table that maps object_ids (which are 20 byte long,
+ * just enough to store and SHA1 hash) to memory mapped files. */
 
 
 #include <stdio.h>
@@ -24,77 +24,54 @@
 #include "uthash.h"
 #include "fling.h"
 #include "plasma.h"
+#include "event_loop.h"
 
-#define MAX_NUM_CLIENTS 2048
+#define MAX_NUM_CLIENTS 100000
 
 typedef struct {
-  // Number of clients connected.
-  int num_clients;
-  // Unique identifier for the clients.
-  int client_id[MAX_NUM_CLIENTS];
-  // Data structure for polling.
-  struct pollfd waiting[MAX_NUM_CLIENTS];
+  /* Event loop for the plasma store. */
+  event_loop *loop;
 } plasma_store_state;
 
 void init_state(plasma_store_state* s) {
-  memset(&s->waiting, 0, sizeof(s->waiting));
-  memset(&s->client_id, 0, sizeof(s->client_id));
-  s->num_clients = 0;
-}
-
-int add_client(plasma_store_state* s, int fd) {
-  static int curr_id = 0;
-  s->waiting[s->num_clients].fd = fd;
-  s->waiting[s->num_clients].events = POLLIN;
-  s->client_id[s->num_clients] = curr_id;
-  s->num_clients += 1;
-  return curr_id++;
-}
-
-// Remove the client at index i by swapping it with the
-// client at index num_clients-1 and zeroing the latter out.
-void remove_client(plasma_store_state* s, int i) {
-  memcpy(&s->waiting[i], &s->waiting[s->num_clients-1], sizeof(struct pollfd));
-  memset(&s->waiting[s->num_clients-1], 0, sizeof(struct pollfd));
-  s->client_id[i] = s->client_id[s->num_clients-1];
-  s->client_id[s->num_clients-1] = 0;
-  s->num_clients -= 1;
+  s->loop = malloc(sizeof(event_loop));
+  event_loop_init(s->loop);
 }
 
 typedef struct {
-  // Object id of this object.
+  /* Object id of this object. */
   plasma_id object_id;
-  // Object info like size, creation time and owner.
+  /* Object info like size, creation time and owner. */
   plasma_object_info info;
-  // Memory mapped file containing the object.
+  /* Memory mapped file containing the object. */
   int fd;
-  // Handle for the uthash table.
+  /* Handle for the uthash table. */
   UT_hash_handle handle;
 } object_table_entry;
 
-// objects that are still being written by their owner process
+/* Objects that are still being written by their owner process. */
 object_table_entry* open_objects = NULL;
 
-// Objects that have already been sealed by their owner process and
-// can now be shared with other processes.
+/* Objects that have already been sealed by their owner process and
+ * can now be shared with other processes. */
 object_table_entry* sealed_objects = NULL;
 
 typedef struct {
-  // Object id of this object.
+  /* Object id of this object. */
   plasma_id object_id;
-  // Number of processes waiting for the object.
+  /* Number of processes waiting for the object. */
   int num_waiting;
-  // Socket connections to waiting clients.
+  /* Socket connections to waiting clients. */
   int conn[MAX_NUM_CLIENTS];
-  // Handle for the uthash table.
+  /* Handle for the uthash table. */
   UT_hash_handle handle;
 } object_notify_entry;
 
-// Objects that processes are waiting for.
+/* Objects that processes are waiting for. */
 object_notify_entry* objects_notify = NULL;
 
-// Create a buffer. This is creating a temporary file and then
-// immediately unlinking it so we do not leave traces in the system.
+/* Create a buffer. This is creating a temporary file and then
+ * immediately unlinking it so we do not leave traces in the system. */
 int create_buffer(int64_t size) {
   static char template[] = "/tmp/plasmaXXXXXX";
   char file_name[32];
@@ -118,9 +95,9 @@ int create_buffer(int64_t size) {
   return fd;
 }
 
-// Create a new object buffer in the hash table.
+/* Create a new object buffer in the hash table. */
 void create_object(int conn, plasma_request* req) {
-  LOG_INFO("creating object"); // TODO(pcm): add object_id here
+  LOG_INFO("creating object"); /* TODO(pcm): add object_id here */
   int fd = create_buffer(req->size);
   if (fd < 0) {
     LOG_ERR("could not create shared memory buffer");
@@ -129,14 +106,14 @@ void create_object(int conn, plasma_request* req) {
   object_table_entry *entry = malloc(sizeof(object_table_entry));
   memcpy(&entry->object_id, &req->object_id, 20);
   entry->info.size = req->size;
-  // TODO(pcm): set the other fields
+  /* TODO(pcm): set the other fields */
   entry->fd = fd;
   HASH_ADD(handle, open_objects, object_id, sizeof(plasma_id), entry);
   plasma_reply reply = { PLASMA_OBJECT, req->size };
   send_fd(conn, fd, (char*) &reply, sizeof(plasma_reply));
 }
 
-// Get an object from the hash table.
+/* Get an object from the hash table. */
 void get_object(int conn, plasma_request* req) {
   object_table_entry *entry;
   HASH_FIND(handle, sealed_objects, &req->object_id, sizeof(plasma_id), entry);
@@ -157,19 +134,19 @@ void get_object(int conn, plasma_request* req) {
   }
 }
 
-// Seal an object that has been created in the hash table.
+/* Seal an object that has been created in the hash table. */
 void seal_object(int conn, plasma_request* req) {
   LOG_INFO("sealing object"); // TODO(pcm): add object_id here
   object_table_entry *entry;
   HASH_FIND(handle, open_objects, &req->object_id, sizeof(plasma_id), entry);
   if (!entry) {
-    return; // TODO(pcm): return error
+    return; /* TODO(pcm): return error */
   }
   HASH_DELETE(handle, open_objects, entry);
   int64_t size = entry->info.size;
   int fd = entry->fd;
   HASH_ADD(handle, sealed_objects, object_id, sizeof(plasma_id), entry);
-  // Inform processes that the object is ready now.
+  /* Inform processes that the object is ready now. */
   object_notify_entry* notify_entry;
   HASH_FIND(handle, objects_notify, &req->object_id, sizeof(plasma_id), notify_entry);
   if (!notify_entry) {
@@ -178,6 +155,7 @@ void seal_object(int conn, plasma_request* req) {
   plasma_reply reply = { PLASMA_OBJECT, size };
   for (int i = 0; i < notify_entry->num_waiting; ++i) {
     send_fd(notify_entry->conn[i], fd, (char*) &reply, sizeof(plasma_reply));
+    close(notify_entry->conn[i]);
   }
   HASH_DELETE(handle, objects_notify, notify_entry);
   free(notify_entry);
@@ -200,23 +178,24 @@ void process_event(int conn, plasma_request* req) {
   }
 }
 
-void event_loop(int socket) {
+void run_event_loop(int socket) {
   plasma_store_state state;
   init_state(&state);
-  add_client(&state, socket);
+  event_loop_attach(state.loop, 0, NULL, socket, POLLIN);
   plasma_request req;
   while (1) {
-    int num_ready = poll(state.waiting, state.num_clients, -1);
+    int num_ready = event_loop_poll(state.loop);
     if (num_ready < 0) {
       LOG_ERR("poll failed");
       exit(-1);
     }
-    for (int i = 0; i < state.num_clients; ++i) {
-      if (state.waiting[i].revents == 0)
+    for (int i = 0; i < event_loop_size(state.loop); ++i) {
+      struct pollfd *waiting = event_loop_get(state.loop, i);
+      if (waiting->revents == 0)
         continue;
-      if (state.waiting[i].fd == socket) {
+      if (waiting->fd == socket) {
         while (1) {
-          // Handle new incoming connections.
+          /* Handle new incoming connections. */
           int new_socket = accept(socket, NULL, NULL);
           if (new_socket < 0) {
             if (errno != EWOULDBLOCK) {
@@ -225,19 +204,19 @@ void event_loop(int socket) {
             }
             break;
           }
-          int client_id = add_client(&state, new_socket);
-          LOG_INFO("adding new client with id %d", client_id);
+          event_loop_attach(state.loop, 0, NULL, new_socket, POLLIN);
+          LOG_INFO("adding new client");
         }
       } else {
-        int r = read(state.waiting[i].fd, &req, sizeof(plasma_request));
+        int r = read(waiting->fd, &req, sizeof(plasma_request));
         if (r == -1) {
           LOG_ERR("read error");
           continue;
         } else if (r == 0) {
-          LOG_INFO("client with id %d disconnected", state.client_id[i]);
-          remove_client(&state, i);
+          LOG_INFO("connection %d disconnected", i);
+          event_loop_detach(state.loop, i, 1);
         } else {
-          process_event(state.waiting[i].fd, &req);
+          process_event(waiting->fd, &req);
         }
       }
     }
@@ -256,7 +235,7 @@ void start_server(char* socket_name) {
     close(fd);
     exit(-1);
   }
-  // TODO(pcm): http://stackoverflow.com/q/1150635
+  /* TODO(pcm): http://stackoverflow.com/q/1150635 */
   if (ioctl(fd, FIONBIO, (char*) &on) < 0) {
     LOG_ERR("ioctl failed");
     close(fd);
@@ -269,7 +248,7 @@ void start_server(char* socket_name) {
   unlink(socket_name);
   bind(fd, (struct sockaddr*)&addr, sizeof(addr));
   listen(fd, 5);
-  event_loop(fd);
+  run_event_loop(fd);
 }
 
 int main(int argc, char* argv[]) {
diff --git a/src/utarray.h b/src/utarray.h
new file mode 100644
index 000000000..979e99e98
--- /dev/null
+++ b/src/utarray.h
@@ -0,0 +1,238 @@
+/*
+Copyright (c) 2008-2016, Troy D. Hanson   http://troydhanson.github.com/uthash/
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* a dynamic array implementation using macros
+ */
+#ifndef UTARRAY_H
+#define UTARRAY_H
+
+#define UTARRAY_VERSION 2.0.1
+
+#ifdef __GNUC__
+#define _UNUSED_ __attribute__ ((__unused__))
+#else
+#define _UNUSED_
+#endif
+
+#include <stddef.h>  /* size_t */
+#include <string.h>  /* memset, etc */
+#include <stdlib.h>  /* exit */
+
+#ifndef oom
+#define oom() exit(-1)
+#endif
+
+typedef void (ctor_f)(void *dst, const void *src);
+typedef void (dtor_f)(void *elt);
+typedef void (init_f)(void *elt);
+typedef struct {
+    size_t sz;
+    init_f *init;
+    ctor_f *copy;
+    dtor_f *dtor;
+} UT_icd;
+
+typedef struct {
+    unsigned i,n;/* i: index of next available slot, n: num slots */
+    UT_icd icd;  /* initializer, copy and destructor functions */
+    char *d;     /* n slots of size icd->sz*/
+} UT_array;
+
+#define utarray_init(a,_icd) do {                                             \
+  memset(a,0,sizeof(UT_array));                                               \
+  (a)->icd = *(_icd);                                                         \
+} while(0)
+
+#define utarray_done(a) do {                                                  \
+  if ((a)->n) {                                                               \
+    if ((a)->icd.dtor) {                                                      \
+      unsigned _ut_i;                                                         \
+      for(_ut_i=0; _ut_i < (a)->i; _ut_i++) {                                 \
+        (a)->icd.dtor(utarray_eltptr(a,_ut_i));                               \
+      }                                                                       \
+    }                                                                         \
+    free((a)->d);                                                             \
+  }                                                                           \
+  (a)->n=0;                                                                   \
+} while(0)
+
+#define utarray_new(a,_icd) do {                                              \
+  (a) = (UT_array*)malloc(sizeof(UT_array));                                  \
+  if ((a) == NULL) oom();                                                     \
+  utarray_init(a,_icd);                                                       \
+} while(0)
+
+#define utarray_free(a) do {                                                  \
+  utarray_done(a);                                                            \
+  free(a);                                                                    \
+} while(0)
+
+#define utarray_reserve(a,by) do {                                            \
+  if (((a)->i+(by)) > (a)->n) {                                               \
+    char *utarray_tmp;                                                        \
+    while (((a)->i+(by)) > (a)->n) { (a)->n = ((a)->n ? (2*(a)->n) : 8); }    \
+    utarray_tmp=(char*)realloc((a)->d, (a)->n*(a)->icd.sz);                   \
+    if (utarray_tmp == NULL) oom();                                           \
+    (a)->d=utarray_tmp;                                                       \
+  }                                                                           \
+} while(0)
+
+#define utarray_push_back(a,p) do {                                           \
+  utarray_reserve(a,1);                                                       \
+  if ((a)->icd.copy) { (a)->icd.copy( _utarray_eltptr(a,(a)->i++), p); }      \
+  else { memcpy(_utarray_eltptr(a,(a)->i++), p, (a)->icd.sz); };              \
+} while(0)
+
+#define utarray_pop_back(a) do {                                              \
+  if ((a)->icd.dtor) { (a)->icd.dtor( _utarray_eltptr(a,--((a)->i))); }       \
+  else { (a)->i--; }                                                          \
+} while(0)
+
+#define utarray_extend_back(a) do {                                           \
+  utarray_reserve(a,1);                                                       \
+  if ((a)->icd.init) { (a)->icd.init(_utarray_eltptr(a,(a)->i)); }            \
+  else { memset(_utarray_eltptr(a,(a)->i),0,(a)->icd.sz); }                   \
+  (a)->i++;                                                                   \
+} while(0)
+
+#define utarray_len(a) ((a)->i)
+
+#define utarray_eltptr(a,j) (((j) < (a)->i) ? _utarray_eltptr(a,j) : NULL)
+#define _utarray_eltptr(a,j) ((a)->d + ((a)->icd.sz * (j)))
+
+#define utarray_insert(a,p,j) do {                                            \
+  if ((j) > (a)->i) utarray_resize(a,j);                                      \
+  utarray_reserve(a,1);                                                       \
+  if ((j) < (a)->i) {                                                         \
+    memmove( _utarray_eltptr(a,(j)+1), _utarray_eltptr(a,j),                  \
+             ((a)->i - (j))*((a)->icd.sz));                                   \
+  }                                                                           \
+  if ((a)->icd.copy) { (a)->icd.copy( _utarray_eltptr(a,j), p); }             \
+  else { memcpy(_utarray_eltptr(a,j), p, (a)->icd.sz); };                     \
+  (a)->i++;                                                                   \
+} while(0)
+
+#define utarray_inserta(a,w,j) do {                                           \
+  if (utarray_len(w) == 0) break;                                             \
+  if ((j) > (a)->i) utarray_resize(a,j);                                      \
+  utarray_reserve(a,utarray_len(w));                                          \
+  if ((j) < (a)->i) {                                                         \
+    memmove(_utarray_eltptr(a,(j)+utarray_len(w)),                            \
+            _utarray_eltptr(a,j),                                             \
+            ((a)->i - (j))*((a)->icd.sz));                                    \
+  }                                                                           \
+  if ((a)->icd.copy) {                                                        \
+    unsigned _ut_i;                                                           \
+    for(_ut_i=0;_ut_i<(w)->i;_ut_i++) {                                       \
+      (a)->icd.copy(_utarray_eltptr(a, (j) + _ut_i), _utarray_eltptr(w, _ut_i)); \
+    }                                                                         \
+  } else {                                                                    \
+    memcpy(_utarray_eltptr(a,j), _utarray_eltptr(w,0),                        \
+           utarray_len(w)*((a)->icd.sz));                                     \
+  }                                                                           \
+  (a)->i += utarray_len(w);                                                   \
+} while(0)
+
+#define utarray_resize(dst,num) do {                                          \
+  unsigned _ut_i;                                                             \
+  if ((dst)->i > (unsigned)(num)) {                                           \
+    if ((dst)->icd.dtor) {                                                    \
+      for (_ut_i = (num); _ut_i < (dst)->i; ++_ut_i) {                        \
+        (dst)->icd.dtor(_utarray_eltptr(dst, _ut_i));                         \
+      }                                                                       \
+    }                                                                         \
+  } else if ((dst)->i < (unsigned)(num)) {                                    \
+    utarray_reserve(dst, (num) - (dst)->i);                                   \
+    if ((dst)->icd.init) {                                                    \
+      for (_ut_i = (dst)->i; _ut_i < (unsigned)(num); ++_ut_i) {              \
+        (dst)->icd.init(_utarray_eltptr(dst, _ut_i));                         \
+      }                                                                       \
+    } else {                                                                  \
+      memset(_utarray_eltptr(dst, (dst)->i), 0, (dst)->icd.sz*((num) - (dst)->i)); \
+    }                                                                         \
+  }                                                                           \
+  (dst)->i = (num);                                                           \
+} while(0)
+
+#define utarray_concat(dst,src) do {                                          \
+  utarray_inserta(dst, src, utarray_len(dst));                                \
+} while(0)
+
+#define utarray_erase(a,pos,len) do {                                         \
+  if ((a)->icd.dtor) {                                                        \
+    unsigned _ut_i;                                                           \
+    for (_ut_i = 0; _ut_i < (len); _ut_i++) {                                 \
+      (a)->icd.dtor(utarray_eltptr(a, (pos) + _ut_i));                        \
+    }                                                                         \
+  }                                                                           \
+  if ((a)->i > ((pos) + (len))) {                                             \
+    memmove(_utarray_eltptr(a, pos), _utarray_eltptr(a, (pos) + (len)),       \
+            ((a)->i - ((pos) + (len))) * (a)->icd.sz);                        \
+  }                                                                           \
+  (a)->i -= (len);                                                            \
+} while(0)
+
+#define utarray_renew(a,u) do {                                               \
+  if (a) utarray_clear(a);                                                    \
+  else utarray_new(a, u);                                                     \
+} while(0)
+
+#define utarray_clear(a) do {                                                 \
+  if ((a)->i > 0) {                                                           \
+    if ((a)->icd.dtor) {                                                      \
+      unsigned _ut_i;                                                         \
+      for(_ut_i=0; _ut_i < (a)->i; _ut_i++) {                                 \
+        (a)->icd.dtor(_utarray_eltptr(a, _ut_i));                             \
+      }                                                                       \
+    }                                                                         \
+    (a)->i = 0;                                                               \
+  }                                                                           \
+} while(0)
+
+#define utarray_sort(a,cmp) do {                                              \
+  qsort((a)->d, (a)->i, (a)->icd.sz, cmp);                                    \
+} while(0)
+
+#define utarray_find(a,v,cmp) bsearch((v),(a)->d,(a)->i,(a)->icd.sz,cmp)
+
+#define utarray_front(a) (((a)->i) ? (_utarray_eltptr(a,0)) : NULL)
+#define utarray_next(a,e) (((e)==NULL) ? utarray_front(a) : ((((a)->i) > (utarray_eltidx(a,e)+1)) ? _utarray_eltptr(a,utarray_eltidx(a,e)+1) : NULL))
+#define utarray_prev(a,e) (((e)==NULL) ? utarray_back(a) : ((utarray_eltidx(a,e) > 0) ? _utarray_eltptr(a,utarray_eltidx(a,e)-1) : NULL))
+#define utarray_back(a) (((a)->i) ? (_utarray_eltptr(a,(a)->i-1)) : NULL)
+#define utarray_eltidx(a,e) (((char*)(e) >= (a)->d) ? (((char*)(e) - (a)->d)/(a)->icd.sz) : -1)
+
+/* last we pre-define a few icd for common utarrays of ints and strings */
+static void utarray_str_cpy(void *dst, const void *src) {
+  char **_src = (char**)src, **_dst = (char**)dst;
+  *_dst = (*_src == NULL) ? NULL : strdup(*_src);
+}
+static void utarray_str_dtor(void *elt) {
+  char **eltc = (char**)elt;
+  if (*eltc != NULL) free(*eltc);
+}
+static const UT_icd ut_str_icd _UNUSED_ = {sizeof(char*),NULL,utarray_str_cpy,utarray_str_dtor};
+static const UT_icd ut_int_icd _UNUSED_ = {sizeof(int),NULL,NULL,NULL};
+static const UT_icd ut_ptr_icd _UNUSED_ = {sizeof(void*),NULL,NULL,NULL};
+
+
+#endif /* UTARRAY_H */
diff --git a/test/test.py b/test/test.py
index 82ba406a8..0fb54d3f2 100644
--- a/test/test.py
+++ b/test/test.py
@@ -17,9 +17,10 @@ class TestPlasmaClient(unittest.TestCase):
   def setUp(self):
     # Start Plasma.
     plasma_store_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../build/plasma_store")
-    self.p = subprocess.Popen([plasma_store_executable, "-s", "/tmp/store"])
+    store_name = "/tmp/store{}".format(random.randint(0, 10000))
+    self.p = subprocess.Popen([plasma_store_executable, "-s", store_name])
     # Connect to Plasma.
-    self.plasma_client = plasma.PlasmaClient("/tmp/store")
+    self.plasma_client = plasma.PlasmaClient(store_name)
 
   def tearDown(self):
     # Kill the plasma store process.
@@ -67,18 +68,20 @@ class TestPlasmaManager(unittest.TestCase):
   def setUp(self):
     # Start two PlasmaStores.
     plasma_store_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../build/plasma_store")
-    self.p2 = subprocess.Popen([plasma_store_executable, "-s", "/tmp/store1"])
-    self.p3 = subprocess.Popen([plasma_store_executable, "-s", "/tmp/store2"])
+    store_name1 = "/tmp/store{}".format(random.randint(0, 10000))
+    store_name2 = "/tmp/store{}".format(random.randint(0, 10000))
+    self.p2 = subprocess.Popen([plasma_store_executable, "-s", store_name1])
+    self.p3 = subprocess.Popen([plasma_store_executable, "-s", store_name2])
     # Start two PlasmaManagers.
     self.port1 = random.randint(10000, 50000)
     self.port2 = random.randint(10000, 50000)
     plasma_manager_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../build/plasma_manager")
-    self.p4 = subprocess.Popen([plasma_manager_executable, "-s", "/tmp/store1", "-m", "127.0.0.1", "-p", str(self.port1)])
-    self.p5 = subprocess.Popen([plasma_manager_executable, "-s", "/tmp/store2", "-m", "127.0.0.1", "-p", str(self.port2)])
+    self.p4 = subprocess.Popen([plasma_manager_executable, "-s", store_name1, "-m", "127.0.0.1", "-p", str(self.port1)])
+    self.p5 = subprocess.Popen([plasma_manager_executable, "-s", store_name2, "-m", "127.0.0.1", "-p", str(self.port2)])
     time.sleep(0.1)
     # Connect two PlasmaClients.
-    self.client1 = plasma.PlasmaClient("/tmp/store1", "127.0.0.1", self.port1)
-    self.client2 = plasma.PlasmaClient("/tmp/store2", "127.0.0.1", self.port2)
+    self.client1 = plasma.PlasmaClient(store_name1, "127.0.0.1", self.port1)
+    self.client2 = plasma.PlasmaClient(store_name2, "127.0.0.1", self.port2)
     time.sleep(0.5)
 
   def tearDown(self):

From 04737f3f56db58ad02cffad214ea80885142860f Mon Sep 17 00:00:00 2001
From: Richard Shin <shin.richard@gmail.com>
Date: Thu, 8 Sep 2016 15:28:27 -0700
Subject: [PATCH 17/91] Run clang-format and check in Travis CI (#14)

* Run clang-format and add pre-commit hook for it.

* Modify .travis.yml to check

* Try to fix problems with .travis.yml

* Try to fix .travis.yml yet again

* Update .clang-format to Philipp's preferences

* Don't allow lint to fail in Travis

* Remove git-hooks directory

* Improve clang-format failure output

* Fix clang-format error

* Report which commit clang-format is comparing against, and add whitespace error

* Handle non-PR Travis in clang-format, and add another error

* Check $TRAVIS_PULL_REQUEST correctly and add another error

* Fix syntax error in check-git-clang-format-output.sh

* Add whitespace error

* Remove extra whitespace, add clang-format to README
---
 .clang-format                            |   5 +
 .travis.yml                              |  13 +
 .travis/check-git-clang-format-output.sh |  18 +
 .travis/git-clang-format                 | 485 +++++++++++++++++++++++
 README.md                                |   3 +
 src/event_loop.c                         |  46 ++-
 src/event_loop.h                         |  16 +-
 src/example.c                            |   8 +-
 src/fling.c                              |  21 +-
 src/fling.h                              |  15 +-
 src/plasma.h                             |  20 +-
 src/plasma_client.c                      |  41 +-
 src/plasma_manager.c                     | 167 ++++----
 src/plasma_manager.h                     |   6 +-
 src/plasma_store.c                       |  37 +-
 15 files changed, 736 insertions(+), 165 deletions(-)
 create mode 100644 .clang-format
 create mode 100755 .travis/check-git-clang-format-output.sh
 create mode 100755 .travis/git-clang-format

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 000000000..8957bdc17
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,5 @@
+BasedOnStyle: Chromium
+DerivePointerAlignment: true
+IndentCaseLabels: false
+PointerAlignment: Right
+SpaceAfterCStyleCast: true
diff --git a/.travis.yml b/.travis.yml
index 15be440e8..5873bfca2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,6 +16,19 @@ matrix:
     - os: osx
       osx_image: xcode7
       python: "3.5"
+    - os: linux
+      dist: trusty
+      python: "2.7"
+      env: LINT=1
+      before_install:
+        # In case we ever want to use a different version of clang-format:
+        #- wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
+        #- echo "deb http://apt.llvm.org/trusty/ llvm-toolchain-trusty main" | sudo tee -a /etc/apt/sources.list > /dev/null
+        - sudo apt-get update -qq
+        - sudo apt-get install -qq clang-format-3.8
+      install: []
+      script:
+        - .travis/check-git-clang-format-output.sh
 
 install:
   - make
diff --git a/.travis/check-git-clang-format-output.sh b/.travis/check-git-clang-format-output.sh
new file mode 100755
index 000000000..ea84041a1
--- /dev/null
+++ b/.travis/check-git-clang-format-output.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+if [ "$TRAVIS_PULL_REQUEST" == "false" ] ; then
+  # Not in a pull request, so compare against parent commit
+  base_commit="HEAD^"
+  echo "Running clang-format against parent commit $(git rev-parse $base_commit)"
+else
+  base_commit="$TRAVIS_BRANCH"
+  echo "Running clang-format against branch $base_commit, with hash $(git rev-parse $base_commit)"
+fi
+output="$(.travis/git-clang-format --binary clang-format-3.8 --commit $base_commit --diff)"
+if [ "$output" == "no modified files to format" ] || [ "$output" == "clang-format did not modify any files" ] ; then
+  echo "clang-format passed."
+  exit 0
+else
+  echo "clang-format failed:"
+  echo "$output"
+  exit 1
+fi
diff --git a/.travis/git-clang-format b/.travis/git-clang-format
new file mode 100755
index 000000000..0c45762ea
--- /dev/null
+++ b/.travis/git-clang-format
@@ -0,0 +1,485 @@
+#!/usr/bin/env python
+#
+#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+r"""                                                                             
+clang-format git integration                                                     
+============================                                                     
+                                                                                 
+This file provides a clang-format integration for git. Put it somewhere in your  
+path and ensure that it is executable. Then, "git clang-format" will invoke      
+clang-format on the changes in current files or a specific commit.               
+                                                                                 
+For further details, run:                                                        
+git clang-format -h                                                              
+                                                                                 
+Requires Python 2.7                                                              
+"""               
+
+import argparse
+import collections
+import contextlib
+import errno
+import os
+import re
+import subprocess
+import sys
+
+usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]'
+
+desc = '''
+Run clang-format on all lines that differ between the working directory
+and <commit>, which defaults to HEAD.  Changes are only applied to the working
+directory.
+
+The following git-config settings set the default of the corresponding option:
+  clangFormat.binary
+  clangFormat.commit
+  clangFormat.extension
+  clangFormat.style
+'''
+
+# Name of the temporary index file in which save the output of clang-format.
+# This file is created within the .git directory.
+temp_index_basename = 'clang-format-index'
+
+
+Range = collections.namedtuple('Range', 'start, count')
+
+
+def main():
+  config = load_git_config()
+
+  # In order to keep '--' yet allow options after positionals, we need to
+  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
+  # nargs=argparse.REMAINDER disallows options after positionals.)
+  argv = sys.argv[1:]
+  try:
+    idx = argv.index('--')
+  except ValueError:
+    dash_dash = []
+  else:
+    dash_dash = argv[idx:]
+    argv = argv[:idx]
+
+  default_extensions = ','.join([
+      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
+      'c', 'h',  # C
+      'm',  # ObjC
+      'mm',  # ObjC++
+      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
+      # Other languages that clang-format supports
+      'proto', 'protodevel',  # Protocol Buffers
+      'js',  # JavaScript
+      'ts',  # TypeScript
+      ])
+
+  p = argparse.ArgumentParser(
+    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
+    description=desc)
+  p.add_argument('--binary',
+                 default=config.get('clangformat.binary', 'clang-format'),
+                 help='path to clang-format'),
+  p.add_argument('--commit',
+                 default=config.get('clangformat.commit', 'HEAD'),
+                 help='default commit to use if none is specified'),
+  p.add_argument('--diff', action='store_true',
+                 help='print a diff instead of applying the changes')
+  p.add_argument('--extensions',
+                 default=config.get('clangformat.extensions',
+                                    default_extensions),
+                 help=('comma-separated list of file extensions to format, '
+                       'excluding the period and case-insensitive')),
+  p.add_argument('-f', '--force', action='store_true',
+                 help='allow changes to unstaged files')
+  p.add_argument('-p', '--patch', action='store_true',
+                 help='select hunks interactively')
+  p.add_argument('-q', '--quiet', action='count', default=0,
+                 help='print less information')
+  p.add_argument('--style',
+                 default=config.get('clangformat.style', None),
+                 help='passed to clang-format'),
+  p.add_argument('-v', '--verbose', action='count', default=0,
+                 help='print extra information')
+  # We gather all the remaining positional arguments into 'args' since we need
+  # to use some heuristics to determine whether or not <commit> was present.
+  # However, to print pretty messages, we make use of metavar and help.
+  p.add_argument('args', nargs='*', metavar='<commit>',
+                 help='revision from which to compute the diff')
+  p.add_argument('ignored', nargs='*', metavar='<file>...',
+                 help='if specified, only consider differences in these files')
+  opts = p.parse_args(argv)
+
+  opts.verbose -= opts.quiet
+  del opts.quiet
+
+  commit, files = interpret_args(opts.args, dash_dash, opts.commit)
+  changed_lines = compute_diff_and_extract_lines(commit, files)
+  if opts.verbose >= 1:
+    ignored_files = set(changed_lines)
+  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
+  if opts.verbose >= 1:
+    ignored_files.difference_update(changed_lines)
+    if ignored_files:
+      print 'Ignoring changes in the following files (wrong extension):'
+      for filename in ignored_files:
+        print '   ', filename
+    if changed_lines:
+      print 'Running clang-format on the following files:'
+      for filename in changed_lines:
+        print '   ', filename
+  if not changed_lines:
+    print 'no modified files to format'
+    return
+  # The computed diff outputs absolute paths, so we must cd before accessing
+  # those files.
+  cd_to_toplevel()
+  old_tree = create_tree_from_workdir(changed_lines)
+  new_tree = run_clang_format_and_save_to_tree(changed_lines,
+                                               binary=opts.binary,
+                                               style=opts.style)
+  if opts.verbose >= 1:
+    print 'old tree:', old_tree
+    print 'new tree:', new_tree
+  if old_tree == new_tree:
+    if opts.verbose >= 0:
+      print 'clang-format did not modify any files'
+  elif opts.diff:
+    print_diff(old_tree, new_tree)
+  else:
+    changed_files = apply_changes(old_tree, new_tree, force=opts.force,
+                                  patch_mode=opts.patch)
+    if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
+      print 'changed files:'
+      for filename in changed_files:
+        print '   ', filename
+
+
+def load_git_config(non_string_options=None):
+  """Return the git configuration as a dictionary.
+
+  All options are assumed to be strings unless in `non_string_options`, in which
+  is a dictionary mapping option name (in lower case) to either "--bool" or
+  "--int"."""
+  if non_string_options is None:
+    non_string_options = {}
+  out = {}
+  for entry in run('git', 'config', '--list', '--null').split('\0'):
+    if entry:
+      name, value = entry.split('\n', 1)
+      if name in non_string_options:
+        value = run('git', 'config', non_string_options[name], name)
+      out[name] = value
+  return out
+
+
+def interpret_args(args, dash_dash, default_commit):
+  """Interpret `args` as "[commit] [--] [files...]" and return (commit, files).
+
+  It is assumed that "--" and everything that follows has been removed from
+  args and placed in `dash_dash`.
+
+  If "--" is present (i.e., `dash_dash` is non-empty), the argument to its
+  left (if present) is taken as commit.  Otherwise, the first argument is
+  checked if it is a commit or a file.  If commit is not given,
+  `default_commit` is used."""
+  if dash_dash:
+    if len(args) == 0:
+      commit = default_commit
+    elif len(args) > 1:
+      die('at most one commit allowed; %d given' % len(args))
+    else:
+      commit = args[0]
+    object_type = get_object_type(commit)
+    if object_type not in ('commit', 'tag'):
+      if object_type is None:
+        die("'%s' is not a commit" % commit)
+      else:
+        die("'%s' is a %s, but a commit was expected" % (commit, object_type))
+    files = dash_dash[1:]
+  elif args:
+    if disambiguate_revision(args[0]):
+      commit = args[0]
+      files = args[1:]
+    else:
+      commit = default_commit
+      files = args
+  else:
+    commit = default_commit
+    files = []
+  return commit, files
+
+
+def disambiguate_revision(value):
+  """Returns True if `value` is a revision, False if it is a file, or dies."""
+  # If `value` is ambiguous (neither a commit nor a file), the following
+  # command will die with an appropriate error message.
+  run('git', 'rev-parse', value, verbose=False)
+  object_type = get_object_type(value)
+  if object_type is None:
+    return False
+  if object_type in ('commit', 'tag'):
+    return True
+  die('`%s` is a %s, but a commit or filename was expected' %
+      (value, object_type))
+
+
+def get_object_type(value):
+  """Returns a string description of an object's type, or None if it is not
+  a valid git object."""
+  cmd = ['git', 'cat-file', '-t', value]
+  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+  stdout, stderr = p.communicate()
+  if p.returncode != 0:
+    return None
+  return stdout.strip()
+
+
+def compute_diff_and_extract_lines(commit, files):
+  """Calls compute_diff() followed by extract_lines()."""
+  diff_process = compute_diff(commit, files)
+  changed_lines = extract_lines(diff_process.stdout)
+  diff_process.stdout.close()
+  diff_process.wait()
+  if diff_process.returncode != 0:
+    # Assume error was already printed to stderr.
+    sys.exit(2)
+  return changed_lines
+
+
+def compute_diff(commit, files):
+  """Return a subprocess object producing the diff from `commit`.
+
+  The return value's `stdin` file object will produce a patch with the
+  differences between the working directory and `commit`, filtered on `files`
+  (if non-empty).  Zero context lines are used in the patch."""
+  cmd = ['git', 'diff-index', '-p', '-U0', commit, '--']
+  cmd.extend(files)
+  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+  p.stdin.close()
+  return p
+
+
+def extract_lines(patch_file):
+  """Extract the changed lines in `patch_file`.
+
+  The return value is a dictionary mapping filename to a list of (start_line,
+  line_count) pairs.
+
+  The input must have been produced with ``-U0``, meaning unidiff format with
+  zero lines of context.  The return value is a dict mapping filename to a
+  list of line `Range`s."""
+  matches = {}
+  for line in patch_file:
+    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
+    if match:
+      filename = match.group(1).rstrip('\r\n')
+    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
+    if match:
+      start_line = int(match.group(1))
+      line_count = 1
+      if match.group(3):
+        line_count = int(match.group(3))
+      if line_count > 0:
+        matches.setdefault(filename, []).append(Range(start_line, line_count))
+  return matches
+
+
+def filter_by_extension(dictionary, allowed_extensions):
+  """Delete every key in `dictionary` that doesn't have an allowed extension.
+
+  `allowed_extensions` must be a collection of lowercase file extensions,
+  excluding the period."""
+  allowed_extensions = frozenset(allowed_extensions)
+  for filename in dictionary.keys():
+    base_ext = filename.rsplit('.', 1)
+    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
+      del dictionary[filename]
+
+
+def cd_to_toplevel():
+  """Change to the top level of the git repository."""
+  toplevel = run('git', 'rev-parse', '--show-toplevel')
+  os.chdir(toplevel)
+
+
+def create_tree_from_workdir(filenames):
+  """Create a new git tree with the given files from the working directory.
+
+  Returns the object ID (SHA-1) of the created tree."""
+  return create_tree(filenames, '--stdin')
+
+
+def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format',
+                                      style=None):
+  """Run clang-format on each file and save the result to a git tree.
+
+  Returns the object ID (SHA-1) of the created tree."""
+  def index_info_generator():
+    for filename, line_ranges in changed_lines.iteritems():
+      mode = oct(os.stat(filename).st_mode)
+      blob_id = clang_format_to_blob(filename, line_ranges, binary=binary,
+                                     style=style)
+      yield '%s %s\t%s' % (mode, blob_id, filename)
+  return create_tree(index_info_generator(), '--index-info')
+
+
+def create_tree(input_lines, mode):
+  """Create a tree object from the given input.
+
+  If mode is '--stdin', it must be a list of filenames.  If mode is
+  '--index-info' is must be a list of values suitable for "git update-index
+  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
+  is invalid."""
+  assert mode in ('--stdin', '--index-info')
+  cmd = ['git', 'update-index', '--add', '-z', mode]
+  with temporary_index_file():
+    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
+    for line in input_lines:
+      p.stdin.write('%s\0' % line)
+    p.stdin.close()
+    if p.wait() != 0:
+      die('`%s` failed' % ' '.join(cmd))
+    tree_id = run('git', 'write-tree')
+    return tree_id
+
+
+def clang_format_to_blob(filename, line_ranges, binary='clang-format',
+                         style=None):
+  """Run clang-format on the given file and save the result to a git blob.
+
+  Returns the object ID (SHA-1) of the created blob."""
+  clang_format_cmd = [binary, filename]
+  if style:
+    clang_format_cmd.extend(['-style='+style])
+  clang_format_cmd.extend([
+      '-lines=%s:%s' % (start_line, start_line+line_count-1)
+      for start_line, line_count in line_ranges])
+  try:
+    clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE,
+                                    stdout=subprocess.PIPE)
+  except OSError as e:
+    if e.errno == errno.ENOENT:
+      die('cannot find executable "%s"' % binary)
+    else:
+      raise
+  clang_format.stdin.close()
+  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
+  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
+                                 stdout=subprocess.PIPE)
+  clang_format.stdout.close()
+  stdout = hash_object.communicate()[0]
+  if hash_object.returncode != 0:
+    die('`%s` failed' % ' '.join(hash_object_cmd))
+  if clang_format.wait() != 0:
+    die('`%s` failed' % ' '.join(clang_format_cmd))
+  return stdout.rstrip('\r\n')
+
+
+@contextlib.contextmanager
+def temporary_index_file(tree=None):
+  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
+  the file afterward."""
+  index_path = create_temporary_index(tree)
+  old_index_path = os.environ.get('GIT_INDEX_FILE')
+  os.environ['GIT_INDEX_FILE'] = index_path
+  try:
+    yield
+  finally:
+    if old_index_path is None:
+      del os.environ['GIT_INDEX_FILE']
+    else:
+      os.environ['GIT_INDEX_FILE'] = old_index_path
+    os.remove(index_path)
+
+
+def create_temporary_index(tree=None):
+  """Create a temporary index file and return the created file's path.
+
+  If `tree` is not None, use that as the tree to read in.  Otherwise, an
+  empty index is created."""
+  gitdir = run('git', 'rev-parse', '--git-dir')
+  path = os.path.join(gitdir, temp_index_basename)
+  if tree is None:
+    tree = '--empty'
+  run('git', 'read-tree', '--index-output='+path, tree)
+  return path
+
+
+def print_diff(old_tree, new_tree):
+  """Print the diff between the two trees to stdout."""
+  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
+  # is expected to be viewed by the user, and only the former does nice things
+  # like color and pagination.
+  subprocess.check_call(['git', 'diff', old_tree, new_tree, '--'])
+
+
+def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
+  """Apply the changes in `new_tree` to the working directory.
+
+  Bails if there are local changes in those files and not `force`.  If
+  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
+  changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree,
+                      new_tree).rstrip('\0').split('\0')
+  if not force:
+    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
+    if unstaged_files:
+      print >>sys.stderr, ('The following files would be modified but '
+                           'have unstaged changes:')
+      print >>sys.stderr, unstaged_files
+      print >>sys.stderr, 'Please commit, stage, or stash them first.'
+      sys.exit(2)
+  if patch_mode:
+    # In patch mode, we could just as well create an index from the new tree
+    # and checkout from that, but then the user will be presented with a
+    # message saying "Discard ... from worktree".  Instead, we use the old
+    # tree as the index and checkout from new_tree, which gives the slightly
+    # better message, "Apply ... to index and worktree".  This is not quite
+    # right, since it won't be applied to the user's index, but oh well.
+    with temporary_index_file(old_tree):
+      subprocess.check_call(['git', 'checkout', '--patch', new_tree])
+    index_tree = old_tree
+  else:
+    with temporary_index_file(new_tree):
+      run('git', 'checkout-index', '-a', '-f')
+  return changed_files
+
+
+def run(*args, **kwargs):
+  stdin = kwargs.pop('stdin', '')
+  verbose = kwargs.pop('verbose', True)
+  strip = kwargs.pop('strip', True)
+  for name in kwargs:
+    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
+  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                       stdin=subprocess.PIPE)
+  stdout, stderr = p.communicate(input=stdin)
+  if p.returncode == 0:
+    if stderr:
+      if verbose:
+        print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
+      print >>sys.stderr, stderr.rstrip()
+    if strip:
+      stdout = stdout.rstrip('\r\n')
+    return stdout
+  if verbose:
+    print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode)
+  if stderr:
+    print >>sys.stderr, stderr.rstrip()
+  sys.exit(2)
+
+
+def die(message):
+  print >>sys.stderr, 'error:', message
+  sys.exit(2)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/README.md b/README.md
index eedccd357..563088235 100644
--- a/README.md
+++ b/README.md
@@ -2,3 +2,6 @@
 
 Plasma is an experimental in-memory object manager. It is under development and
 not ready for general use.
+
+## clang-format
+Run .travis/git-clang-format to automatically format changes in the checkout.
diff --git a/src/event_loop.c b/src/event_loop.c
index 2fb6a856b..de61c0e05 100644
--- a/src/event_loop.c
+++ b/src/event_loop.c
@@ -3,8 +3,8 @@
 #include <assert.h>
 #include <unistd.h>
 
-UT_icd item_icd = { sizeof(event_loop_item), NULL, NULL, NULL };
-UT_icd poll_icd = { sizeof(struct pollfd), NULL, NULL, NULL };
+UT_icd item_icd = {sizeof(event_loop_item), NULL, NULL, NULL};
+UT_icd poll_icd = {sizeof(struct pollfd), NULL, NULL, NULL};
 
 /* Initializes the event loop.
  * This function needs to be called before any other event loop function. */
@@ -18,15 +18,19 @@ void event_loop_init(event_loop *loop) {
  * which can be queried using event_loop_type and event_loop_id. The parameter
  * events is the same as in http://linux.die.net/man/2/poll.
  * Returns the index of the item in the event loop. */
-int64_t event_loop_attach(event_loop *loop, int type, data_connection* connection, int fd, int events) {
+int64_t event_loop_attach(event_loop *loop,
+                          int type,
+                          data_connection *connection,
+                          int fd,
+                          int events) {
   assert(utarray_len(loop->items) == utarray_len(loop->waiting));
   int64_t index = utarray_len(loop->items);
-  event_loop_item item = { .type = type };
+  event_loop_item item = {.type = type};
   if (connection) {
     item.connection = *connection;
   }
-  utarray_push_back(loop->items, &item );
-  struct pollfd waiting = { .fd = fd, .events = events };
+  utarray_push_back(loop->items, &item);
+  struct pollfd waiting = {.fd = fd, .events = events};
   utarray_push_back(loop->waiting, &waiting);
   return index;
 }
@@ -35,16 +39,18 @@ int64_t event_loop_attach(event_loop *loop, int type, data_connection* connectio
  * This invalidates all other indices into the event loop items, but leaves
  * the ids of the event loop items valid. */
 void event_loop_detach(event_loop *loop, int64_t index, int shall_close) {
-  struct pollfd *waiting_item = (struct pollfd*) utarray_eltptr(loop->waiting, index);
-  struct pollfd *waiting_back = (struct pollfd*) utarray_back(loop->waiting);
+  struct pollfd *waiting_item =
+      (struct pollfd *) utarray_eltptr(loop->waiting, index);
+  struct pollfd *waiting_back = (struct pollfd *) utarray_back(loop->waiting);
   if (shall_close) {
     close(waiting_item->fd);
   }
   *waiting_item = *waiting_back;
   utarray_pop_back(loop->waiting);
 
-  event_loop_item *items_item = (event_loop_item*) utarray_eltptr(loop->items, index);
-  event_loop_item *items_back = (event_loop_item*) utarray_back(loop->items);
+  event_loop_item *items_item =
+      (event_loop_item *) utarray_eltptr(loop->items, index);
+  event_loop_item *items_back = (event_loop_item *) utarray_back(loop->items);
   *items_item = *items_back;
   utarray_pop_back(loop->items);
 }
@@ -52,7 +58,8 @@ void event_loop_detach(event_loop *loop, int64_t index, int shall_close) {
 /* Poll the file descriptors associated to this event loop.
  * See http://linux.die.net/man/2/poll */
 int event_loop_poll(event_loop *loop) {
-  return poll((struct pollfd*) utarray_front(loop->waiting), utarray_len(loop->waiting), -1);
+  return poll((struct pollfd *) utarray_front(loop->waiting),
+              utarray_len(loop->waiting), -1);
 }
 
 /* Get the total number of file descriptors participating in the event loop. */
@@ -60,20 +67,25 @@ int64_t event_loop_size(event_loop *loop) {
   return utarray_len(loop->waiting);
 }
 
-/* Get the pollfd structure associated to a file descriptor participating in the event loop. */
+/* Get the pollfd structure associated to a file descriptor participating in the
+ * event loop. */
 struct pollfd *event_loop_get(event_loop *loop, int64_t index) {
-  return (struct pollfd*) utarray_eltptr(loop->waiting, index);
+  return (struct pollfd *) utarray_eltptr(loop->waiting, index);
 }
 
 /* Set the data connection information for participant in the event loop. */
-void event_loop_set_connection(event_loop *loop, int64_t index, const data_connection* conn) {
-  event_loop_item *item = (event_loop_item*) utarray_eltptr(loop->items, index);
+void event_loop_set_connection(event_loop *loop,
+                               int64_t index,
+                               const data_connection *conn) {
+  event_loop_item *item =
+      (event_loop_item *) utarray_eltptr(loop->items, index);
   item->connection = *conn;
 }
 
 /* Get the data connection information for participant in the event loop. */
-data_connection* event_loop_get_connection(event_loop *loop, int64_t index) {
-  event_loop_item *item = (event_loop_item*) utarray_eltptr(loop->items, index);
+data_connection *event_loop_get_connection(event_loop *loop, int64_t index) {
+  event_loop_item *item =
+      (event_loop_item *) utarray_eltptr(loop->items, index);
   return &item->connection;
 }
 
diff --git a/src/event_loop.h b/src/event_loop.h
index fbb3aab97..b83e1df63 100644
--- a/src/event_loop.h
+++ b/src/event_loop.h
@@ -12,25 +12,31 @@ typedef struct {
   int type;
   /* If type is data transfer, this contains information about the status
    * of the transfer. */
-	data_connection connection;
+  data_connection connection;
 } event_loop_item;
 
 typedef struct {
   /* Array of event_loop_items that hold information for connections. */
-  UT_array *items; 
+  UT_array *items;
   /* Array of file descriptors that are waiting, corresponding to items. */
-  UT_array *waiting; 
+  UT_array *waiting;
 } event_loop;
 
 /* Event loop functions. */
 void event_loop_init(event_loop *loop);
 void event_loop_free(event_loop *loop);
-int64_t event_loop_attach(event_loop *loop, int type, data_connection* connection, int fd, int events);
+int64_t event_loop_attach(event_loop *loop,
+                          int type,
+                          data_connection *connection,
+                          int fd,
+                          int events);
 void event_loop_detach(event_loop *loop, int64_t index, int shall_close);
 int event_loop_poll(event_loop *loop);
 int64_t event_loop_size(event_loop *loop);
 struct pollfd *event_loop_get(event_loop *loop, int64_t index);
-void event_loop_set_connection(event_loop *loop, int64_t index, const data_connection* conn);
+void event_loop_set_connection(event_loop *loop,
+                               int64_t index,
+                               const data_connection *conn);
 data_connection *event_loop_get_connection(event_loop *loop, int64_t index);
 
 #endif
diff --git a/src/example.c b/src/example.c
index f42e26729..f2b445675 100644
--- a/src/example.c
+++ b/src/example.c
@@ -1,7 +1,7 @@
 /* A simple example on how to use the plasma store
- * 
+ *
  * Can be called in the following way:
- * 
+ *
  * cd build
  * ./plasma_store -s /tmp/plasma_socket
  * ./example -s /tmp/plasma_socket -g
@@ -19,8 +19,8 @@ int main(int argc, char *argv[]) {
   int64_t size;
   void *data;
   int c;
-  plasma_id id = {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-		   255, 255, 255, 255, 255, 255, 255, 255}};
+  plasma_id id = {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+                   255, 255, 255, 255, 255, 255, 255, 255, 255, 255}};
   while ((c = getopt(argc, argv, "s:cfg")) != -1) {
     switch (c) {
     case 's':
diff --git a/src/fling.c b/src/fling.c
index 6b6e773de..6e4d8cf0a 100644
--- a/src/fling.c
+++ b/src/fling.c
@@ -1,7 +1,9 @@
 #include "fling.h"
 
-void init_msg(struct msghdr *msg, struct iovec *iov,
-	      char *buf, size_t buf_len) {
+void init_msg(struct msghdr *msg,
+              struct iovec *iov,
+              char *buf,
+              size_t buf_len) {
   iov->iov_base = buf;
   iov->iov_len = 1;
 
@@ -13,7 +15,7 @@ void init_msg(struct msghdr *msg, struct iovec *iov,
   msg->msg_namelen = 0;
 }
 
-int send_fd(int conn, int fd, const char* payload, int size) {
+int send_fd(int conn, int fd, const char *payload, int size) {
   struct msghdr msg;
   struct iovec iov;
   char buf[CMSG_SPACE(sizeof(int))];
@@ -24,13 +26,13 @@ int send_fd(int conn, int fd, const char* payload, int size) {
   header->cmsg_level = SOL_SOCKET;
   header->cmsg_type = SCM_RIGHTS;
   header->cmsg_len = CMSG_LEN(sizeof(int));
-  *(int *)CMSG_DATA(header) = fd;
+  *(int *) CMSG_DATA(header) = fd;
 
   /* send file descriptor and payload */
   return sendmsg(conn, &msg, 0) != -1 && send(conn, payload, size, 0) == -1;
 }
 
-int recv_fd(int conn, char* payload, int size) {
+int recv_fd(int conn, char *payload, int size) {
   struct msghdr msg;
   struct iovec iov;
   char buf[CMSG_SPACE(sizeof(int))];
@@ -41,11 +43,14 @@ int recv_fd(int conn, char* payload, int size) {
 
   int found_fd = -1;
   int oh_noes = 0;
-  for (struct cmsghdr *header = CMSG_FIRSTHDR(&msg); header != NULL; header = CMSG_NXTHDR(&msg, header))
+  for (struct cmsghdr *header = CMSG_FIRSTHDR(&msg); header != NULL;
+       header = CMSG_NXTHDR(&msg, header))
     if (header->cmsg_level == SOL_SOCKET && header->cmsg_type == SCM_RIGHTS) {
-      int count = (header->cmsg_len - (CMSG_DATA(header) - (unsigned char *)header)) / sizeof(int);
+      int count =
+          (header->cmsg_len - (CMSG_DATA(header) - (unsigned char *) header)) /
+          sizeof(int);
       for (int i = 0; i < count; ++i) {
-        int fd = ((int *)CMSG_DATA(header))[i];
+        int fd = ((int *) CMSG_DATA(header))[i];
         if (found_fd == -1) {
           found_fd = fd;
         } else {
diff --git a/src/fling.h b/src/fling.h
index cc38f05ce..56f08dd46 100644
--- a/src/fling.h
+++ b/src/fling.h
@@ -15,20 +15,21 @@
 #include <sys/socket.h>
 #include <sys/un.h>
 
-/* This is neccessary for Mac OS X, see http://www.apuebook.com/faqs2e.html (10). */
+/* This is neccessary for Mac OS X, see http://www.apuebook.com/faqs2e.html
+ * (10). */
 #if !defined(CMSG_SPACE) && !defined(CMSG_LEN)
-  #define CMSG_SPACE(len) (__DARWIN_ALIGN32(sizeof(struct cmsghdr)) + __DARWIN_ALIGN32(len))
-  #define CMSG_LEN(len) (__DARWIN_ALIGN32(sizeof(struct cmsghdr)) + (len))
+#define CMSG_SPACE(len) \
+  (__DARWIN_ALIGN32(sizeof(struct cmsghdr)) + __DARWIN_ALIGN32(len))
+#define CMSG_LEN(len) (__DARWIN_ALIGN32(sizeof(struct cmsghdr)) + (len))
 #endif
 
-void init_msg(struct msghdr *msg, struct iovec *iov,
-              char *buf, size_t buf_len);
+void init_msg(struct msghdr *msg, struct iovec *iov, char *buf, size_t buf_len);
 
 /* Send a file descriptor "fd" and a payload "payload" of size "size"
  * over the socket "conn". Return 0 on success. */
-int send_fd(int conn, int fd, const char* payload, int size);
+int send_fd(int conn, int fd, const char *payload, int size);
 
 /* Receive a file descriptor and a payload of size up to "size" from a
  * socket "conn". The payload will be written to "payload" and the file
  * descriptor will be returned. Returns -1 on failure. */
-int recv_fd(int conn, char* payload, int size);
+int recv_fd(int conn, char *payload, int size);
diff --git a/src/plasma.h b/src/plasma.h
index eda883aa6..017b85a3a 100644
--- a/src/plasma.h
+++ b/src/plasma.h
@@ -7,15 +7,15 @@
 #include <string.h>
 
 #ifdef NDEBUG
-  #define LOG_DEBUG(M, ...)
+#define LOG_DEBUG(M, ...)
 #else
-  #define LOG_DEBUG(M, ...) \
-    fprintf(stderr, "[DEBUG] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
+#define LOG_DEBUG(M, ...) \
+  fprintf(stderr, "[DEBUG] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
 #endif
 
-#define LOG_ERR(M, ...) \
-  fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", \
-    __FILE__, __LINE__, errno == 0 ? "None" : strerror(errno), ##__VA_ARGS__)
+#define LOG_ERR(M, ...)                                                     \
+  fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
+          errno == 0 ? "None" : strerror(errno), ##__VA_ARGS__)
 
 #define LOG_INFO(M, ...) \
   fprintf(stderr, "[INFO] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
@@ -27,9 +27,7 @@ typedef struct {
 } plasma_object_info;
 
 /* Represents an object id hash, can hold a full SHA1 hash */
-typedef struct {
-  unsigned char id[20];
-} plasma_id;
+typedef struct { unsigned char id[20]; } plasma_id;
 
 enum plasma_request_type {
   /* Create a new object. */
@@ -72,10 +70,10 @@ typedef struct {
 } plasma_buffer;
 
 /* Connect to the local plasma store UNIX domain socket */
-int plasma_store_connect(const char* socket_name);
+int plasma_store_connect(const char *socket_name);
 
 /* Connect to a possibly remote plasma manager */
-int plasma_manager_connect(const char* addr, int port);
+int plasma_manager_connect(const char *addr, int port);
 
 void plasma_create(int store, plasma_id object_id, int64_t size, void **data);
 void plasma_get(int store, plasma_id object_id, int64_t *size, void **data);
diff --git a/src/plasma_client.c b/src/plasma_client.c
index d4f2d207f..76eaac154 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -25,10 +25,11 @@ void plasma_send(int fd, plasma_request *req) {
 
 void plasma_create(int conn, plasma_id object_id, int64_t size, void **data) {
   LOG_INFO("called plasma_create on conn %d with size %" PRId64, conn, size);
-  plasma_request req = { .type = PLASMA_CREATE, .object_id = object_id, .size = size };
+  plasma_request req = {
+      .type = PLASMA_CREATE, .object_id = object_id, .size = size};
   plasma_send(conn, &req);
   plasma_reply reply;
-  int fd = recv_fd(conn, (char*)&reply, sizeof(plasma_reply));
+  int fd = recv_fd(conn, (char *) &reply, sizeof(plasma_reply));
   assert(reply.type == PLASMA_OBJECT);
   assert(reply.size == size);
   *data = mmap(NULL, reply.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
@@ -39,19 +40,19 @@ void plasma_create(int conn, plasma_id object_id, int64_t size, void **data) {
 }
 
 void plasma_get(int conn, plasma_id object_id, int64_t *size, void **data) {
-  plasma_request req = { .type = PLASMA_GET, .object_id = object_id };
+  plasma_request req = {.type = PLASMA_GET, .object_id = object_id};
   plasma_send(conn, &req);
   plasma_reply reply;
   /* The following loop is run at most twice. */
-  int fd = recv_fd(conn, (char*)&reply, sizeof(plasma_reply));
+  int fd = recv_fd(conn, (char *) &reply, sizeof(plasma_reply));
   if (reply.type == PLASMA_FUTURE) {
-    int new_fd = recv_fd(fd, (char*)&reply, sizeof(plasma_reply));
+    int new_fd = recv_fd(fd, (char *) &reply, sizeof(plasma_reply));
     close(fd);
     fd = new_fd;
   }
   assert(reply.type == PLASMA_OBJECT);
   *data = mmap(NULL, reply.size, PROT_READ, MAP_SHARED, fd, 0);
-  if (*data  == MAP_FAILED) {
+  if (*data == MAP_FAILED) {
     LOG_ERR("mmap failed");
     exit(-1);
   }
@@ -59,11 +60,11 @@ void plasma_get(int conn, plasma_id object_id, int64_t *size, void **data) {
 }
 
 void plasma_seal(int fd, plasma_id object_id) {
-  plasma_request req = { .type = PLASMA_SEAL, .object_id = object_id };
+  plasma_request req = {.type = PLASMA_SEAL, .object_id = object_id};
   plasma_send(fd, &req);
 }
 
-int plasma_store_connect(const char* socket_name) {
+int plasma_store_connect(const char *socket_name) {
   assert(socket_name);
   struct sockaddr_un addr;
   int fd;
@@ -73,11 +74,12 @@ int plasma_store_connect(const char* socket_name) {
   }
   memset(&addr, 0, sizeof(addr));
   addr.sun_family = AF_UNIX;
-  strncpy(addr.sun_path, socket_name, sizeof(addr.sun_path)-1);
-  /* Try to connect to the Plasma store. If unsuccessful, retry several times. */
+  strncpy(addr.sun_path, socket_name, sizeof(addr.sun_path) - 1);
+  /* Try to connect to the Plasma store. If unsuccessful, retry several times.
+   */
   int connected_successfully = 0;
   for (int num_attempts = 0; num_attempts < 50; ++num_attempts) {
-    if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) == 0) {
+    if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) == 0) {
       connected_successfully = 1;
       break;
     }
@@ -94,7 +96,7 @@ int plasma_store_connect(const char* socket_name) {
 
 #define h_addr h_addr_list[0]
 
-int plasma_manager_connect(const char* ip_addr, int port) {
+int plasma_manager_connect(const char *ip_addr, int port) {
   int fd = socket(PF_INET, SOCK_STREAM, 0);
   if (fd < 0) {
     LOG_ERR("could not create socket");
@@ -112,17 +114,22 @@ int plasma_manager_connect(const char* ip_addr, int port) {
   bcopy(manager->h_addr, &addr.sin_addr.s_addr, manager->h_length);
   addr.sin_port = htons(port);
 
-  int r = connect(fd, (struct sockaddr*) &addr, sizeof(addr));
+  int r = connect(fd, (struct sockaddr *) &addr, sizeof(addr));
   if (r < 0) {
-    LOG_ERR("could not establish connection to manager with id %s:%d", &ip_addr[0], port);
+    LOG_ERR("could not establish connection to manager with id %s:%d",
+            &ip_addr[0], port);
     exit(-1);
   }
   return fd;
 }
 
-void plasma_transfer(int manager, const char* addr, int port, plasma_id object_id) {
-  plasma_request req = {.type = PLASMA_TRANSFER, .object_id = object_id, .port = port};
-  char* end = NULL;
+void plasma_transfer(int manager,
+                     const char *addr,
+                     int port,
+                     plasma_id object_id) {
+  plasma_request req = {
+      .type = PLASMA_TRANSFER, .object_id = object_id, .port = port};
+  char *end = NULL;
   for (int i = 0; i < 4; ++i) {
     req.addr[i] = strtol(end ? end : addr, &end, 10);
     /* skip the '.' */
diff --git a/src/plasma_manager.c b/src/plasma_manager.c
index 321f40931..2e37d5f69 100644
--- a/src/plasma_manager.c
+++ b/src/plasma_manager.c
@@ -34,7 +34,8 @@ typedef struct {
 /* Initialize the plasma manager. This function initializes the event loop
  * of the plasma manager, and stores the address 'store_socket_name' of
  * the local plasma store socket. */
-void init_plasma_manager(plasma_manager_state *s, const char* store_socket_name) {
+void init_plasma_manager(plasma_manager_state* s,
+                         const char* store_socket_name) {
   s->loop = malloc(sizeof(event_loop));
   event_loop_init(s->loop);
   s->store_socket_name = store_socket_name;
@@ -45,36 +46,47 @@ void init_plasma_manager(plasma_manager_state *s, const char* store_socket_name)
  * the data header to the other object manager. */
 void initiate_transfer(plasma_manager_state* s, plasma_request* req) {
   int store_conn = plasma_store_connect(s->store_socket_name);
-  plasma_buffer buf = { .object_id = req->object_id, .writable = 0 };
+  plasma_buffer buf = {.object_id = req->object_id, .writable = 0};
   plasma_get(store_conn, req->object_id, &buf.size, &buf.data);
-  
+
   char ip_addr[16];
-  snprintf(ip_addr, 32, "%d.%d.%d.%d",
-                    req->addr[0], req->addr[1],
-                    req->addr[2], req->addr[3]);
+  snprintf(ip_addr, 32, "%d.%d.%d.%d", req->addr[0], req->addr[1], req->addr[2],
+           req->addr[3]);
 
   int fd = plasma_manager_connect(&ip_addr[0], req->port);
-  data_connection conn = { .type = DATA_CONNECTION_WRITE,  .store_conn = store_conn, .buf = buf, .cursor = 0 };
+  data_connection conn = {.type = DATA_CONNECTION_WRITE,
+                          .store_conn = store_conn,
+                          .buf = buf,
+                          .cursor = 0};
   event_loop_attach(s->loop, CONNECTION_DATA, &conn, fd, POLLOUT);
 
-  plasma_request manager_req = { .type = PLASMA_DATA, .object_id = req->object_id, .size = buf.size };
+  plasma_request manager_req = {
+      .type = PLASMA_DATA, .object_id = req->object_id, .size = buf.size};
   plasma_send(fd, &manager_req);
 }
 
 /* Start reading data from another object manager.
  * Initializes the object we are going to write to in the
  * local plasma store and then switches the data socket to reading mode. */
-void start_reading_data(int64_t index, plasma_manager_state* s, plasma_request* req) {
+void start_reading_data(int64_t index,
+                        plasma_manager_state* s,
+                        plasma_request* req) {
   int store_conn = plasma_store_connect(s->store_socket_name);
-  plasma_buffer buf = { .object_id = req->object_id, .size = req->size, .writable = 1 };
+  plasma_buffer buf = {
+      .object_id = req->object_id, .size = req->size, .writable = 1};
   plasma_create(store_conn, req->object_id, req->size, &buf.data);
-  data_connection conn = { .type = DATA_CONNECTION_READ, .store_conn = store_conn, .buf = buf, .cursor = 0 };
+  data_connection conn = {.type = DATA_CONNECTION_READ,
+                          .store_conn = store_conn,
+                          .buf = buf,
+                          .cursor = 0};
   event_loop_set_connection(s->loop, index, &conn);
 }
 
 /* Handle a command request that came in through a socket (transfering data,
  * or accepting incoming data). */
-void process_command(int64_t id, plasma_manager_state* state, plasma_request* req) {
+void process_command(int64_t id,
+                     plasma_manager_state* state,
+                     plasma_request* req) {
   switch (req->type) {
   case PLASMA_TRANSFER:
     LOG_INFO("transfering object to manager with port %d", req->port);
@@ -91,63 +103,66 @@ void process_command(int64_t id, plasma_manager_state* state, plasma_request* re
 }
 
 /* Handle data or command event incoming on socket with index "index". */
-void read_from_socket(plasma_manager_state* state, struct pollfd *waiting, int64_t index, plasma_request* req) {
+void read_from_socket(plasma_manager_state* state,
+                      struct pollfd* waiting,
+                      int64_t index,
+                      plasma_request* req) {
   ssize_t r, s;
-  data_connection *conn = event_loop_get_connection(state->loop, index);
+  data_connection* conn = event_loop_get_connection(state->loop, index);
   switch (conn->type) {
-    case DATA_CONNECTION_HEADER:
-      r = read(waiting->fd, req, sizeof(plasma_request));
-      if (r == -1) {
-        LOG_ERR("read error");
-      } else if (r == 0) {
-        LOG_INFO("connection with id %" PRId64 " disconnected", index);
-        event_loop_detach(state->loop, index, 1);
+  case DATA_CONNECTION_HEADER:
+    r = read(waiting->fd, req, sizeof(plasma_request));
+    if (r == -1) {
+      LOG_ERR("read error");
+    } else if (r == 0) {
+      LOG_INFO("connection with id %" PRId64 " disconnected", index);
+      event_loop_detach(state->loop, index, 1);
+    } else {
+      process_command(index, state, req);
+    }
+    break;
+  case DATA_CONNECTION_READ:
+    LOG_DEBUG("polled DATA_CONNECTION_READ");
+    r = read(waiting->fd, conn->buf.data + conn->cursor, BUFSIZE);
+    if (r == -1) {
+      LOG_ERR("read error");
+    } else if (r == 0) {
+      LOG_INFO("end of file");
+    } else {
+      conn->cursor += r;
+    }
+    if (r == 0) {
+      LOG_DEBUG("reading on channel %" PRId64 " finished", index);
+      plasma_seal(conn->store_conn, conn->buf.object_id);
+      close(conn->store_conn);
+      event_loop_detach(state->loop, index, 1);
+    }
+    break;
+  case DATA_CONNECTION_WRITE:
+    LOG_DEBUG("polled DATA_CONNECTION_WRITE");
+    s = conn->buf.size - conn->cursor;
+    if (s > BUFSIZE)
+      s = BUFSIZE;
+    r = write(waiting->fd, conn->buf.data + conn->cursor, s);
+    if (r != s) {
+      if (r > 0) {
+        LOG_ERR("partial write on fd %d", waiting->fd);
       } else {
-        process_command(index, state, req);
+        LOG_ERR("write error");
+        exit(-1);
       }
-      break;
-    case DATA_CONNECTION_READ:
-      LOG_DEBUG("polled DATA_CONNECTION_READ");
-      r = read(waiting->fd, conn->buf.data + conn->cursor, BUFSIZE);
-      if (r == -1) {
-        LOG_ERR("read error");
-      } else if (r == 0) {
-        LOG_INFO("end of file");
-      } else {
-        conn->cursor += r;
-      }
-      if (r == 0) {
-        LOG_DEBUG("reading on channel %" PRId64 " finished", index);
-        plasma_seal(conn->store_conn, conn->buf.object_id);
-        close(conn->store_conn);
-        event_loop_detach(state->loop, index, 1);
-      }
-      break;
-    case DATA_CONNECTION_WRITE:
-      LOG_DEBUG("polled DATA_CONNECTION_WRITE");
-      s = conn->buf.size - conn->cursor;
-      if (s > BUFSIZE)
-        s = BUFSIZE;
-      r = write(waiting->fd, conn->buf.data + conn->cursor, s);
-      if (r != s) {
-        if (r > 0) {
-          LOG_ERR("partial write on fd %d", waiting->fd);
-        } else {
-          LOG_ERR("write error");
-          exit(-1);
-        }
-      } else {
-        conn->cursor += r;
-      }
-      if (r == 0) {
-        LOG_DEBUG("writing on channel %" PRId64 " finished", index);
-        close(conn->store_conn);
-        event_loop_detach(state->loop, index, 1);
-      }
-      break;
-    default:
-      LOG_ERR("invalid connection type");
-      exit(-1);
+    } else {
+      conn->cursor += r;
+    }
+    if (r == 0) {
+      LOG_DEBUG("writing on channel %" PRId64 " finished", index);
+      close(conn->store_conn);
+      event_loop_detach(state->loop, index, 1);
+    }
+    break;
+  default:
+    LOG_ERR("invalid connection type");
+    exit(-1);
   }
 }
 
@@ -163,7 +178,7 @@ void run_event_loop(int sock, plasma_manager_state* s) {
       exit(-1);
     }
     for (int i = 0; i < event_loop_size(s->loop); ++i) {
-      struct pollfd *waiting = event_loop_get(s->loop, i);
+      struct pollfd* waiting = event_loop_get(s->loop, i);
       if (waiting->revents == 0)
         continue;
       if (waiting->fd == sock) {
@@ -176,7 +191,7 @@ void run_event_loop(int sock, plasma_manager_state* s) {
           }
           break;
         }
-        data_connection conn = { .type = DATA_CONNECTION_HEADER };
+        data_connection conn = {.type = DATA_CONNECTION_HEADER};
         event_loop_attach(s->loop, CONNECTION_DATA, &conn, new_socket, POLLIN);
         LOG_INFO("new connection with id %" PRId64, event_loop_size(s->loop));
       } else {
@@ -186,7 +201,9 @@ void run_event_loop(int sock, plasma_manager_state* s) {
   }
 }
 
-void start_server(const char *store_socket_name, const char* master_addr, int port) {
+void start_server(const char* store_socket_name,
+                  const char* master_addr,
+                  int port) {
   struct sockaddr_in name;
   int sock = socket(PF_INET, SOCK_STREAM, 0);
   if (sock < 0) {
@@ -203,7 +220,7 @@ void start_server(const char *store_socket_name, const char* master_addr, int po
     close(sock);
     exit(-1);
   }
-  setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof (on));
+  setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
   if (bind(sock, (struct sockaddr*) &name, sizeof(name)) < 0) {
     LOG_ERR("could not bind socket");
     exit(-1);
@@ -220,9 +237,9 @@ void start_server(const char *store_socket_name, const char* master_addr, int po
 
 int main(int argc, char* argv[]) {
   /* Socket name of the plasma store this manager is connected to. */
-  char *store_socket_name = NULL;
+  char* store_socket_name = NULL;
   /* IP address of this node. */
-  char *master_addr = NULL;
+  char* master_addr = NULL;
   /* Port number the manager should use. */
   int port;
   int c;
@@ -243,11 +260,15 @@ int main(int argc, char* argv[]) {
     }
   }
   if (!store_socket_name) {
-    LOG_ERR("please specify socket for connecting to the plasma store with -s switch");
+    LOG_ERR(
+        "please specify socket for connecting to the plasma store with -s "
+        "switch");
     exit(-1);
   }
   if (!master_addr) {
-    LOG_ERR("please specify ip address of the current host in the format 123.456.789.10 with -m switch");
+    LOG_ERR(
+        "please specify ip address of the current host in the format "
+        "123.456.789.10 with -m switch");
     exit(-1);
   }
   start_server(store_socket_name, master_addr, port);
diff --git a/src/plasma_manager.h b/src/plasma_manager.h
index d0efe4b65..f7cf6b480 100644
--- a/src/plasma_manager.h
+++ b/src/plasma_manager.h
@@ -7,11 +7,7 @@
 /* The buffer size in bytes. Data will get transfered in multiples of this */
 #define BUFSIZE 4096
 
-enum connection_type {
-  CONNECTION_REDIS,
-  CONNECTION_LISTENER,
-  CONNECTION_DATA
-};
+enum connection_type { CONNECTION_REDIS, CONNECTION_LISTENER, CONNECTION_DATA };
 
 enum data_connection_type {
   /* Connection to send commands and metadata to the manager. */
diff --git a/src/plasma_store.c b/src/plasma_store.c
index 1ddc6b06a..b6a64d1a6 100644
--- a/src/plasma_store.c
+++ b/src/plasma_store.c
@@ -9,7 +9,6 @@
  * It keeps a hash table that maps object_ids (which are 20 byte long,
  * just enough to store and SHA1 hash) to memory mapped files. */
 
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -30,7 +29,7 @@
 
 typedef struct {
   /* Event loop for the plasma store. */
-  event_loop *loop;
+  event_loop* loop;
 } plasma_store_state;
 
 void init_state(plasma_store_state* s) {
@@ -103,41 +102,42 @@ void create_object(int conn, plasma_request* req) {
     LOG_ERR("could not create shared memory buffer");
     exit(-1);
   }
-  object_table_entry *entry = malloc(sizeof(object_table_entry));
+  object_table_entry* entry = malloc(sizeof(object_table_entry));
   memcpy(&entry->object_id, &req->object_id, 20);
   entry->info.size = req->size;
   /* TODO(pcm): set the other fields */
   entry->fd = fd;
   HASH_ADD(handle, open_objects, object_id, sizeof(plasma_id), entry);
-  plasma_reply reply = { PLASMA_OBJECT, req->size };
+  plasma_reply reply = {PLASMA_OBJECT, req->size};
   send_fd(conn, fd, (char*) &reply, sizeof(plasma_reply));
 }
 
 /* Get an object from the hash table. */
 void get_object(int conn, plasma_request* req) {
-  object_table_entry *entry;
+  object_table_entry* entry;
   HASH_FIND(handle, sealed_objects, &req->object_id, sizeof(plasma_id), entry);
   if (entry) {
-    plasma_reply reply = { PLASMA_OBJECT, entry->info.size };
+    plasma_reply reply = {PLASMA_OBJECT, entry->info.size};
     send_fd(conn, entry->fd, (char*) &reply, sizeof(plasma_reply));
   } else {
     LOG_INFO("object not in hash table of sealed objects");
     int fd[2];
     socketpair(AF_UNIX, SOCK_STREAM, 0, fd);
-    object_notify_entry *notify_entry = malloc(sizeof(object_notify_entry));
+    object_notify_entry* notify_entry = malloc(sizeof(object_notify_entry));
     memcpy(&notify_entry->object_id, &req->object_id, 20);
     notify_entry->conn[notify_entry->num_waiting] = fd[0];
     notify_entry->num_waiting += 1;
-    HASH_ADD(handle, objects_notify, object_id, sizeof(plasma_id), notify_entry);
-    plasma_reply reply = { PLASMA_FUTURE, -1 };
+    HASH_ADD(handle, objects_notify, object_id, sizeof(plasma_id),
+             notify_entry);
+    plasma_reply reply = {PLASMA_FUTURE, -1};
     send_fd(conn, fd[1], (char*) &reply, sizeof(plasma_reply));
   }
 }
 
 /* Seal an object that has been created in the hash table. */
 void seal_object(int conn, plasma_request* req) {
-  LOG_INFO("sealing object"); // TODO(pcm): add object_id here
-  object_table_entry *entry;
+  LOG_INFO("sealing object");  // TODO(pcm): add object_id here
+  object_table_entry* entry;
   HASH_FIND(handle, open_objects, &req->object_id, sizeof(plasma_id), entry);
   if (!entry) {
     return; /* TODO(pcm): return error */
@@ -148,11 +148,12 @@ void seal_object(int conn, plasma_request* req) {
   HASH_ADD(handle, sealed_objects, object_id, sizeof(plasma_id), entry);
   /* Inform processes that the object is ready now. */
   object_notify_entry* notify_entry;
-  HASH_FIND(handle, objects_notify, &req->object_id, sizeof(plasma_id), notify_entry);
+  HASH_FIND(handle, objects_notify, &req->object_id, sizeof(plasma_id),
+            notify_entry);
   if (!notify_entry) {
     return;
   }
-  plasma_reply reply = { PLASMA_OBJECT, size };
+  plasma_reply reply = {PLASMA_OBJECT, size};
   for (int i = 0; i < notify_entry->num_waiting; ++i) {
     send_fd(notify_entry->conn[i], fd, (char*) &reply, sizeof(plasma_reply));
     close(notify_entry->conn[i]);
@@ -190,7 +191,7 @@ void run_event_loop(int socket) {
       exit(-1);
     }
     for (int i = 0; i < event_loop_size(state.loop); ++i) {
-      struct pollfd *waiting = event_loop_get(state.loop, i);
+      struct pollfd* waiting = event_loop_get(state.loop, i);
       if (waiting->revents == 0)
         continue;
       if (waiting->fd == socket) {
@@ -230,7 +231,7 @@ void start_server(char* socket_name) {
     exit(-1);
   }
   int on = 1;
-  if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*)&on, sizeof(on)) < 0) {
+  if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*) &on, sizeof(on)) < 0) {
     LOG_ERR("setsockopt failed");
     close(fd);
     exit(-1);
@@ -244,15 +245,15 @@ void start_server(char* socket_name) {
   struct sockaddr_un addr;
   memset(&addr, 0, sizeof(addr));
   addr.sun_family = AF_UNIX;
-  strncpy(addr.sun_path, socket_name, sizeof(addr.sun_path)-1);
+  strncpy(addr.sun_path, socket_name, sizeof(addr.sun_path) - 1);
   unlink(socket_name);
-  bind(fd, (struct sockaddr*)&addr, sizeof(addr));
+  bind(fd, (struct sockaddr*) &addr, sizeof(addr));
   listen(fd, 5);
   run_event_loop(fd);
 }
 
 int main(int argc, char* argv[]) {
-  char *socket_name = NULL;
+  char* socket_name = NULL;
   int c;
   while ((c = getopt(argc, argv, "s:")) != -1) {
     switch (c) {

From d52bf7d1468544a89cd49742e14d3dd94df34b45 Mon Sep 17 00:00:00 2001
From: Richard Shin <shin.richard@gmail.com>
Date: Sat, 10 Sep 2016 16:39:24 -0700
Subject: [PATCH 18/91] Use dlmalloc to manage shared memory (#15)

* Use dlmalloc to manage shared memory

* add stresstest
---
 .gitignore                               |    1 +
 .travis/check-git-clang-format-output.sh |    2 +-
 .travis/git-clang-format                 |    7 +-
 Makefile                                 |    6 +-
 src/fling.c                              |    1 +
 src/malloc.c                             |  123 +
 src/malloc.h                             |    9 +
 src/plasma.h                             |   15 +-
 src/plasma_client.c                      |   19 +-
 src/plasma_store.c                       |   70 +-
 test/test.py                             |   16 +
 third_party/dlmalloc.c                   | 6280 ++++++++++++++++++++++
 12 files changed, 6496 insertions(+), 53 deletions(-)
 create mode 100644 src/malloc.c
 create mode 100644 src/malloc.h
 create mode 100644 third_party/dlmalloc.c

diff --git a/.gitignore b/.gitignore
index faf17a085..ea25290f1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 build/*
 *~
+*.pyc
diff --git a/.travis/check-git-clang-format-output.sh b/.travis/check-git-clang-format-output.sh
index ea84041a1..c3431f68b 100755
--- a/.travis/check-git-clang-format-output.sh
+++ b/.travis/check-git-clang-format-output.sh
@@ -7,7 +7,7 @@ else
   base_commit="$TRAVIS_BRANCH"
   echo "Running clang-format against branch $base_commit, with hash $(git rev-parse $base_commit)"
 fi
-output="$(.travis/git-clang-format --binary clang-format-3.8 --commit $base_commit --diff)"
+output="$(.travis/git-clang-format --binary clang-format-3.8 --commit $base_commit --diff --exclude ^third_party/)"
 if [ "$output" == "no modified files to format" ] || [ "$output" == "clang-format did not modify any files" ] ; then
   echo "clang-format passed."
   exit 0
diff --git a/.travis/git-clang-format b/.travis/git-clang-format
index 0c45762ea..116635ab7 100755
--- a/.travis/git-clang-format
+++ b/.travis/git-clang-format
@@ -97,6 +97,7 @@ def main():
                                     default_extensions),
                  help=('comma-separated list of file extensions to format, '
                        'excluding the period and case-insensitive')),
+  p.add_argument('--exclude', help='Exclude files matching this regex.')
   p.add_argument('-f', '--force', action='store_true',
                  help='allow changes to unstaged files')
   p.add_argument('-p', '--patch', action='store_true',
@@ -125,10 +126,14 @@ def main():
   if opts.verbose >= 1:
     ignored_files = set(changed_lines)
   filter_by_extension(changed_lines, opts.extensions.lower().split(','))
+  if opts.exclude:
+    for filename in changed_lines.keys():
+      if re.match(opts.exclude, filename):
+        del changed_lines[filename]
   if opts.verbose >= 1:
     ignored_files.difference_update(changed_lines)
     if ignored_files:
-      print 'Ignoring changes in the following files (wrong extension):'
+      print 'Ignoring changes in the following files:'
       for filename in ignored_files:
         print '   ', filename
     if changed_lines:
diff --git a/Makefile b/Makefile
index 64f66c980..c317cc938 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 CC = gcc
-CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500
+CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -I.
 BUILD = build
 
 all: $(BUILD)/plasma_store $(BUILD)/plasma_manager $(BUILD)/plasma_client.so $(BUILD)/example
@@ -7,8 +7,8 @@ all: $(BUILD)/plasma_store $(BUILD)/plasma_manager $(BUILD)/plasma_client.so $(B
 clean:
 	rm -r $(BUILD)/*
 
-$(BUILD)/plasma_store: src/plasma_store.c src/plasma.h src/event_loop.h src/event_loop.c src/fling.h src/fling.c
-	$(CC) $(CFLAGS) src/plasma_store.c src/event_loop.c src/fling.c -o $(BUILD)/plasma_store
+$(BUILD)/plasma_store: src/plasma_store.c src/plasma.h src/event_loop.h src/event_loop.c src/fling.h src/fling.c src/malloc.c src/malloc.h third_party/dlmalloc.c
+	$(CC) $(CFLAGS) src/plasma_store.c src/event_loop.c src/fling.c src/malloc.c -o $(BUILD)/plasma_store
 
 $(BUILD)/plasma_manager: src/plasma_manager.c src/event_loop.h src/event_loop.c src/plasma.h src/plasma_client.c src/fling.h src/fling.c
 	$(CC) $(CFLAGS) src/plasma_manager.c src/event_loop.c src/plasma_client.c src/fling.c -o $(BUILD)/plasma_manager
diff --git a/src/fling.c b/src/fling.c
index 6e4d8cf0a..6363fdfb0 100644
--- a/src/fling.c
+++ b/src/fling.c
@@ -19,6 +19,7 @@ int send_fd(int conn, int fd, const char *payload, int size) {
   struct msghdr msg;
   struct iovec iov;
   char buf[CMSG_SPACE(sizeof(int))];
+  memset(&buf, 0, CMSG_SPACE(sizeof(int)));
 
   init_msg(&msg, &iov, buf, sizeof(buf));
 
diff --git a/src/malloc.c b/src/malloc.c
new file mode 100644
index 000000000..bbfccd462
--- /dev/null
+++ b/src/malloc.c
@@ -0,0 +1,123 @@
+#include <assert.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "plasma.h"
+#include "uthash.h"
+
+void *fake_mmap(size_t);
+int fake_munmap(void *, size_t);
+
+#define MMAP(s) fake_mmap(s)
+#define MUNMAP(a, s) fake_munmap(a, s)
+#define DIRECT_MMAP(s) fake_mmap(s)
+#define DIRECT_MUNMAP(a, s) fake_munmap(a, s)
+#define USE_DL_PREFIX
+#define HAVE_MORECORE 0
+
+#include "third_party/dlmalloc.c"
+
+#undef MMAP
+#undef MUNMAP
+#undef DIRECT_MMAP
+#undef DIRECT_MUNMAP
+#undef USE_DL_PREFIX
+#undef HAVE_MORECORE
+
+struct mmap_record {
+  int fd;
+  void *pointer;
+  int64_t size;
+  UT_hash_handle hh_fd;
+  UT_hash_handle hh_pointer;
+};
+
+struct mmap_record *records_by_fd = NULL;
+struct mmap_record *records_by_pointer = NULL;
+
+/* Create a buffer. This is creating a temporary file and then
+ * immediately unlinking it so we do not leave traces in the system. */
+int create_buffer(int64_t size) {
+  static char template[] = "/tmp/plasmaXXXXXX";
+  char file_name[32];
+  strncpy(file_name, template, 32);
+  int fd = mkstemp(file_name);
+  if (fd < 0)
+    return -1;
+  FILE *file = fdopen(fd, "a+");
+  if (!file) {
+    close(fd);
+    return -1;
+  }
+  if (unlink(file_name) != 0) {
+    LOG_ERR("unlink error");
+    return -1;
+  }
+  if (ftruncate(fd, (off_t) size) != 0) {
+    LOG_ERR("ftruncate error");
+    return -1;
+  }
+  return fd;
+}
+
+void *fake_mmap(size_t size) {
+  // Add sizeof(size_t) so that the returned pointer is deliberately not
+  // page-aligned. This ensures that the segments of memory returned by
+  // fake_mmap are never contiguous.
+  int fd = create_buffer(size + sizeof(size_t));
+  void *pointer = mmap(NULL, size + sizeof(size_t), PROT_READ | PROT_WRITE,
+                       MAP_SHARED, fd, 0);
+  if (pointer == MAP_FAILED) {
+    return pointer;
+  }
+  pointer += sizeof(size_t);
+
+  struct mmap_record *record = malloc(sizeof(struct mmap_record));
+  record->fd = fd;
+  record->pointer = pointer;
+  record->size = size;
+  HASH_ADD(hh_fd, records_by_fd, fd, sizeof(fd), record);
+  HASH_ADD(hh_pointer, records_by_pointer, pointer, sizeof(pointer), record);
+
+  LOG_DEBUG("%p = fake_mmap(%lu)", pointer, size);
+  return pointer;
+}
+
+int fake_munmap(void *addr, size_t size) {
+  LOG_DEBUG("fake_munmap(%p, %lu)", addr, size);
+
+  struct mmap_record *record;
+
+  addr -= sizeof(size_t);
+  HASH_FIND(hh_pointer, records_by_pointer, &addr, sizeof(addr), record);
+  assert(record != NULL);
+  close(record->fd);
+
+  HASH_DELETE(hh_fd, records_by_fd, record);
+  HASH_DELETE(hh_pointer, records_by_pointer, record);
+
+  return munmap(addr, size + sizeof(size_t));
+}
+
+void get_malloc_mapinfo(void *addr,
+                        int *fd,
+                        int64_t *map_size,
+                        ptrdiff_t *offset) {
+  struct mmap_record *record;
+  // TODO(rshin): Implement a more efficient search through records_by_fd.
+  for (record = records_by_fd; record != NULL; record = record->hh_fd.next) {
+    if (addr >= record->pointer && addr < record->pointer + record->size) {
+      *fd = record->fd;
+      *map_size = record->size;
+      *offset = addr - record->pointer;
+      return;
+    }
+  }
+  *fd = -1;
+  *map_size = 0;
+  *offset = 0;
+}
diff --git a/src/malloc.h b/src/malloc.h
new file mode 100644
index 000000000..2b7395eba
--- /dev/null
+++ b/src/malloc.h
@@ -0,0 +1,9 @@
+#ifndef MALLOC_H
+#define MALLOC_H
+
+void get_malloc_mapinfo(void *addr,
+                        int *fd,
+                        int64_t *map_length,
+                        ptrdiff_t *offset);
+
+#endif  // MALLOC_H
diff --git a/src/plasma.h b/src/plasma.h
index 017b85a3a..0bcc2615f 100644
--- a/src/plasma.h
+++ b/src/plasma.h
@@ -4,6 +4,7 @@
 #include <inttypes.h>
 #include <stdio.h>
 #include <errno.h>
+#include <stddef.h>
 #include <string.h>
 
 #ifdef NDEBUG
@@ -13,13 +14,17 @@
   fprintf(stderr, "[DEBUG] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
 #endif
 
+#ifdef PLASMA_LOGGIN_ON
+#define LOG_INFO(M, ...) \
+  fprintf(stderr, "[INFO] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
+#else
+#define LOG_INFO(M, ...)
+#endif
+
 #define LOG_ERR(M, ...)                                                     \
   fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
           errno == 0 ? "None" : strerror(errno), ##__VA_ARGS__)
 
-#define LOG_INFO(M, ...) \
-  fprintf(stderr, "[INFO] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
-
 typedef struct {
   int64_t size;
   int64_t create_time;
@@ -59,7 +64,9 @@ enum plasma_reply_type {
 
 typedef struct {
   int type;
-  int64_t size;
+  ptrdiff_t offset;
+  int64_t map_size;
+  int64_t object_size;
 } plasma_reply;
 
 typedef struct {
diff --git a/src/plasma_client.c b/src/plasma_client.c
index 76eaac154..4442555c4 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -31,12 +31,15 @@ void plasma_create(int conn, plasma_id object_id, int64_t size, void **data) {
   plasma_reply reply;
   int fd = recv_fd(conn, (char *) &reply, sizeof(plasma_reply));
   assert(reply.type == PLASMA_OBJECT);
-  assert(reply.size == size);
-  *data = mmap(NULL, reply.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+  assert(reply.object_size == size);
+  *data =
+      mmap(NULL, reply.map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0) +
+      reply.offset;
   if (*data == MAP_FAILED) {
     LOG_ERR("mmap failed");
     exit(-1);
   }
+  close(fd);
 }
 
 void plasma_get(int conn, plasma_id object_id, int64_t *size, void **data) {
@@ -51,12 +54,14 @@ void plasma_get(int conn, plasma_id object_id, int64_t *size, void **data) {
     fd = new_fd;
   }
   assert(reply.type == PLASMA_OBJECT);
-  *data = mmap(NULL, reply.size, PROT_READ, MAP_SHARED, fd, 0);
+  *data =
+      mmap(NULL, reply.map_size, PROT_READ, MAP_SHARED, fd, 0) + reply.offset;
   if (*data == MAP_FAILED) {
     LOG_ERR("mmap failed");
     exit(-1);
   }
-  *size = reply.size;
+  close(fd);
+  *size = reply.object_size;
 }
 
 void plasma_seal(int fd, plasma_id object_id) {
@@ -116,8 +121,10 @@ int plasma_manager_connect(const char *ip_addr, int port) {
 
   int r = connect(fd, (struct sockaddr *) &addr, sizeof(addr));
   if (r < 0) {
-    LOG_ERR("could not establish connection to manager with id %s:%d",
-            &ip_addr[0], port);
+    LOG_ERR(
+        "could not establish connection to manager with id %s:%d (probably ran "
+        "out of ports)",
+        &ip_addr[0], port);
     exit(-1);
   }
   return fd;
diff --git a/src/plasma_store.c b/src/plasma_store.c
index b6a64d1a6..386107a6a 100644
--- a/src/plasma_store.c
+++ b/src/plasma_store.c
@@ -9,6 +9,7 @@
  * It keeps a hash table that maps object_ids (which are 20 byte long,
  * just enough to store and SHA1 hash) to memory mapped files. */
 
+#include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -22,11 +23,14 @@
 
 #include "uthash.h"
 #include "fling.h"
+#include "malloc.h"
 #include "plasma.h"
 #include "event_loop.h"
 
 #define MAX_NUM_CLIENTS 100000
 
+void* dlmalloc(size_t);
+
 typedef struct {
   /* Event loop for the plasma store. */
   event_loop* loop;
@@ -44,6 +48,10 @@ typedef struct {
   plasma_object_info info;
   /* Memory mapped file containing the object. */
   int fd;
+  /* Size of the underlying map. */
+  int64_t map_size;
+  /* Offset from the base of the mmap. */
+  ptrdiff_t offset;
   /* Handle for the uthash table. */
   UT_hash_handle handle;
 } object_table_entry;
@@ -69,47 +77,32 @@ typedef struct {
 /* Objects that processes are waiting for. */
 object_notify_entry* objects_notify = NULL;
 
-/* Create a buffer. This is creating a temporary file and then
- * immediately unlinking it so we do not leave traces in the system. */
-int create_buffer(int64_t size) {
-  static char template[] = "/tmp/plasmaXXXXXX";
-  char file_name[32];
-  strncpy(file_name, template, 32);
-  int fd = mkstemp(file_name);
-  if (fd < 0)
-    return -1;
-  FILE* file = fdopen(fd, "a+");
-  if (!file) {
-    close(fd);
-    return -1;
-  }
-  if (unlink(file_name) != 0) {
-    LOG_ERR("unlink error");
-    return -1;
-  }
-  if (ftruncate(fd, (off_t) size) != 0) {
-    LOG_ERR("ftruncate error");
-    return -1;
-  }
-  return fd;
-}
-
 /* Create a new object buffer in the hash table. */
 void create_object(int conn, plasma_request* req) {
   LOG_INFO("creating object"); /* TODO(pcm): add object_id here */
-  int fd = create_buffer(req->size);
-  if (fd < 0) {
-    LOG_ERR("could not create shared memory buffer");
-    exit(-1);
-  }
+
+  void* pointer = dlmalloc(req->size);
+  int fd;
+  int64_t map_size;
+  ptrdiff_t offset;
+  get_malloc_mapinfo(pointer, &fd, &map_size, &offset);
+  assert(fd != -1);
+
   object_table_entry* entry = malloc(sizeof(object_table_entry));
   memcpy(&entry->object_id, &req->object_id, 20);
   entry->info.size = req->size;
   /* TODO(pcm): set the other fields */
   entry->fd = fd;
+  entry->map_size = map_size;
+  entry->offset = offset;
   HASH_ADD(handle, open_objects, object_id, sizeof(plasma_id), entry);
-  plasma_reply reply = {PLASMA_OBJECT, req->size};
-  send_fd(conn, fd, (char*) &reply, sizeof(plasma_reply));
+  plasma_reply reply;
+  memset(&reply, 0, sizeof(reply));
+  reply.type = PLASMA_OBJECT;
+  reply.offset = offset;
+  reply.map_size = map_size;
+  reply.object_size = req->size;
+  send_fd(conn, fd, (char*) &reply, sizeof(reply));
 }
 
 /* Get an object from the hash table. */
@@ -117,7 +110,8 @@ void get_object(int conn, plasma_request* req) {
   object_table_entry* entry;
   HASH_FIND(handle, sealed_objects, &req->object_id, sizeof(plasma_id), entry);
   if (entry) {
-    plasma_reply reply = {PLASMA_OBJECT, entry->info.size};
+    plasma_reply reply = {PLASMA_OBJECT, entry->offset, entry->map_size,
+                          entry->info.size};
     send_fd(conn, entry->fd, (char*) &reply, sizeof(plasma_reply));
   } else {
     LOG_INFO("object not in hash table of sealed objects");
@@ -129,7 +123,7 @@ void get_object(int conn, plasma_request* req) {
     notify_entry->num_waiting += 1;
     HASH_ADD(handle, objects_notify, object_id, sizeof(plasma_id),
              notify_entry);
-    plasma_reply reply = {PLASMA_FUTURE, -1};
+    plasma_reply reply = {PLASMA_FUTURE, 0, 0, -1};
     send_fd(conn, fd[1], (char*) &reply, sizeof(plasma_reply));
   }
 }
@@ -143,8 +137,6 @@ void seal_object(int conn, plasma_request* req) {
     return; /* TODO(pcm): return error */
   }
   HASH_DELETE(handle, open_objects, entry);
-  int64_t size = entry->info.size;
-  int fd = entry->fd;
   HASH_ADD(handle, sealed_objects, object_id, sizeof(plasma_id), entry);
   /* Inform processes that the object is ready now. */
   object_notify_entry* notify_entry;
@@ -153,9 +145,11 @@ void seal_object(int conn, plasma_request* req) {
   if (!notify_entry) {
     return;
   }
-  plasma_reply reply = {PLASMA_OBJECT, size};
+  plasma_reply reply = {PLASMA_OBJECT, entry->offset, entry->map_size,
+                        entry->info.size};
   for (int i = 0; i < notify_entry->num_waiting; ++i) {
-    send_fd(notify_entry->conn[i], fd, (char*) &reply, sizeof(plasma_reply));
+    send_fd(notify_entry->conn[i], entry->fd, (char*) &reply,
+            sizeof(plasma_reply));
     close(notify_entry->conn[i]);
   }
   HASH_DELETE(handle, objects_notify, notify_entry);
diff --git a/test/test.py b/test/test.py
index 0fb54d3f2..6b1f8c546 100644
--- a/test/test.py
+++ b/test/test.py
@@ -1,3 +1,5 @@
+from __future__ import print_function
+
 import os
 import socket
 import subprocess
@@ -130,5 +132,19 @@ class TestPlasmaManager(unittest.TestCase):
     # # Transferring the buffer before sealing it should fail.
     # self.assertRaises(Exception, lambda : self.manager1.transfer(1, object_id))
 
+  def test_stresstest(self):
+    a = time.time()
+    object_ids = []
+    for i in range(10000): # TODO(pcm): increase this to 100000
+      object_id = random_object_id()
+      object_ids.append(object_id)
+      self.client1.create(object_id, 1)
+      self.client1.seal(object_id)
+    for object_id in object_ids:
+      self.client1.transfer("127.0.0.1", self.port2, object_id)
+    b = time.time() - a
+
+    print("it took", b, "seconds to put and transfer the objects")
+
 if __name__ == "__main__":
   unittest.main(verbosity=2)
diff --git a/third_party/dlmalloc.c b/third_party/dlmalloc.c
new file mode 100644
index 000000000..649cfbc70
--- /dev/null
+++ b/third_party/dlmalloc.c
@@ -0,0 +1,6280 @@
+/*
+  This is a version (aka dlmalloc) of malloc/free/realloc written by
+  Doug Lea and released to the public domain, as explained at
+  http://creativecommons.org/publicdomain/zero/1.0/ Send questions,
+  comments, complaints, performance data, etc to dl@cs.oswego.edu
+
+* Version 2.8.6 Wed Aug 29 06:57:58 2012  Doug Lea
+   Note: There may be an updated version of this malloc obtainable at
+           ftp://gee.cs.oswego.edu/pub/misc/malloc.c
+         Check before installing!
+
+* Quickstart
+
+  This library is all in one file to simplify the most common usage:
+  ftp it, compile it (-O3), and link it into another program. All of
+  the compile-time options default to reasonable values for use on
+  most platforms.  You might later want to step through various
+  compile-time and dynamic tuning options.
+
+  For convenience, an include file for code using this malloc is at:
+     ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.6.h
+  You don't really need this .h file unless you call functions not
+  defined in your system include files.  The .h file contains only the
+  excerpts from this file needed for using this malloc on ANSI C/C++
+  systems, so long as you haven't changed compile-time options about
+  naming and tuning parameters.  If you do, then you can create your
+  own malloc.h that does include all settings by cutting at the point
+  indicated below. Note that you may already by default be using a C
+  library containing a malloc that is based on some version of this
+  malloc (for example in linux). You might still want to use the one
+  in this file to customize settings or to avoid overheads associated
+  with library versions.
+
+* Vital statistics:
+
+  Supported pointer/size_t representation:       4 or 8 bytes
+       size_t MUST be an unsigned type of the same width as
+       pointers. (If you are using an ancient system that declares
+       size_t as a signed type, or need it to be a different width
+       than pointers, you can use a previous release of this malloc
+       (e.g. 2.7.2) supporting these.)
+
+  Alignment:                                     8 bytes (minimum)
+       This suffices for nearly all current machines and C compilers.
+       However, you can define MALLOC_ALIGNMENT to be wider than this
+       if necessary (up to 128bytes), at the expense of using more space.
+
+  Minimum overhead per allocated chunk:   4 or  8 bytes (if 4byte sizes)
+                                          8 or 16 bytes (if 8byte sizes)
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information, and additional cross-check word
+       if FOOTERS is defined.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including overhead)
+                          8-byte ptrs:  32 bytes    (including overhead)
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is about
+       32 bytes plus the remainder from a system page (the minimal
+       mmap unit); typically 4096 or 8192 bytes.
+
+  Security: static-safe; optionally more or less
+       The "security" of malloc refers to the ability of malicious
+       code to accentuate the effects of errors (for example, freeing
+       space that is not currently malloc'ed or overwriting past the
+       ends of chunks) in code that calls malloc.  This malloc
+       guarantees not to modify any memory locations below the base of
+       heap, i.e., static variables, even in the presence of usage
+       errors.  The routines additionally detect most improper frees
+       and reallocs.  All this holds as long as the static bookkeeping
+       for malloc itself is not corrupted by some other means.  This
+       is only one aspect of security -- these checks do not, and
+       cannot, detect all possible programming errors.
+
+       If FOOTERS is defined nonzero, then each allocated chunk
+       carries an additional check word to verify that it was malloced
+       from its space.  These check words are the same within each
+       execution of a program using malloc, but differ across
+       executions, so externally crafted fake chunks cannot be
+       freed. This improves security by rejecting frees/reallocs that
+       could corrupt heap memory, in addition to the checks preventing
+       writes to statics that are always on.  This may further improve
+       security at the expense of time and space overhead.  (Note that
+       FOOTERS may also be worth using with MSPACES.)
+
+       By default detected errors cause the program to abort (calling
+       "abort()"). You can override this to instead proceed past
+       errors by defining PROCEED_ON_ERROR.  In this case, a bad free
+       has no effect, and a malloc that encounters a bad address
+       caused by user overwrites will ignore the bad address by
+       dropping pointers and indices to all known memory. This may
+       be appropriate for programs that should continue if at all
+       possible in the face of programming errors, although they may
+       run out of memory because dropped memory is never reclaimed.
+
+       If you don't like either of these options, you can define
+       CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything
+       else. And if if you are sure that your program using malloc has
+       no errors or vulnerabilities, you can define INSECURE to 1,
+       which might (or might not) provide a small performance improvement.
+
+       It is also possible to limit the maximum total allocatable
+       space, using malloc_set_footprint_limit. This is not
+       designed as a security feature in itself (calls to set limits
+       are not screened or privileged), but may be useful as one
+       aspect of a secure implementation.
+
+  Thread-safety: NOT thread-safe unless USE_LOCKS defined non-zero
+       When USE_LOCKS is defined, each public call to malloc, free,
+       etc is surrounded with a lock. By default, this uses a plain
+       pthread mutex, win32 critical section, or a spin-lock if if
+       available for the platform and not disabled by setting
+       USE_SPIN_LOCKS=0.  However, if USE_RECURSIVE_LOCKS is defined,
+       recursive versions are used instead (which are not required for
+       base functionality but may be needed in layered extensions).
+       Using a global lock is not especially fast, and can be a major
+       bottleneck.  It is designed only to provide minimal protection
+       in concurrent environments, and to provide a basis for
+       extensions.  If you are using malloc in a concurrent program,
+       consider instead using nedmalloc
+       (http://www.nedprod.com/programs/portable/nedmalloc/) or
+       ptmalloc (See http://www.malloc.de), which are derived from
+       versions of this malloc.
+
+  System requirements: Any combination of MORECORE and/or MMAP/MUNMAP
+       This malloc can use unix sbrk or any emulation (invoked using
+       the CALL_MORECORE macro) and/or mmap/munmap or any emulation
+       (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system
+       memory.  On most unix systems, it tends to work best if both
+       MORECORE and MMAP are enabled.  On Win32, it uses emulations
+       based on VirtualAlloc. It also uses common C library functions
+       like memset.
+
+  Compliance: I believe it is compliant with the Single Unix Specification
+       (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably
+       others as well.
+
+* Overview of algorithms
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and
+  tunable.  Consistent balance across these factors results in a good
+  general-purpose allocator for malloc-intensive programs.
+
+  In most ways, this malloc is a best-fit allocator. Generally, it
+  chooses the best-fitting existing chunk for a request, with ties
+  broken in approximately least-recently-used order. (This strategy
+  normally maintains low fragmentation.) However, for requests less
+  than 256bytes, it deviates from best-fit when there is not an
+  exactly fitting available chunk by preferring to use space adjacent
+  to that used for the previous small request, as well as by breaking
+  ties in approximately most-recently-used order. (These enhance
+  locality of series of small allocations.)  And for very large requests
+  (>= 256Kb by default), it relies on system memory mapping
+  facilities, if supported.  (This helps avoid carrying around and
+  possibly fragmenting memory used only for large chunks.)
+
+  All operations (except malloc_stats and mallinfo) have execution
+  times that are bounded by a constant factor of the number of bits in
+  a size_t, not counting any clearing in calloc or copying in realloc,
+  or actions surrounding MORECORE and MMAP that have times
+  proportional to the number of non-contiguous regions returned by
+  system allocation routines, which is often just 1. In real-time
+  applications, you can optionally suppress segment traversals using
+  NO_SEGMENT_TRAVERSAL, which assures bounded execution even when
+  system allocators return non-contiguous spaces, at the typical
+  expense of carrying around more memory and increased fragmentation.
+
+  The implementation is not very modular and seriously overuses
+  macros. Perhaps someday all C compilers will do as good a job
+  inlining modular code as can now be done by brute-force expansion,
+  but now, enough of them seem not to.
+
+  Some compilers issue a lot of warnings about code that is
+  dead/unreachable only on some platforms, and also about intentional
+  uses of negation on unsigned types. All known cases of each can be
+  ignored.
+
+  For a longer but out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+* MSPACES
+  If MSPACES is defined, then in addition to malloc, free, etc.,
+  this file also defines mspace_malloc, mspace_free, etc. These
+  are versions of malloc routines that take an "mspace" argument
+  obtained using create_mspace, to control all internal bookkeeping.
+  If ONLY_MSPACES is defined, only these versions are compiled.
+  So if you would like to use this allocator for only some allocations,
+  and your system malloc for others, you can compile with
+  ONLY_MSPACES and then do something like...
+    static mspace mymspace = create_mspace(0,0); // for example
+    #define mymalloc(bytes)  mspace_malloc(mymspace, bytes)
+
+  (Note: If you only need one instance of an mspace, you can instead
+  use "USE_DL_PREFIX" to relabel the global malloc.)
+
+  You can similarly create thread-local allocators by storing
+  mspaces as thread-locals. For example:
+    static __thread mspace tlms = 0;
+    void*  tlmalloc(size_t bytes) {
+      if (tlms == 0) tlms = create_mspace(0, 0);
+      return mspace_malloc(tlms, bytes);
+    }
+    void  tlfree(void* mem) { mspace_free(tlms, mem); }
+
+  Unless FOOTERS is defined, each mspace is completely independent.
+  You cannot allocate from one and free to another (although
+  conformance is only weakly checked, so usage errors are not always
+  caught). If FOOTERS is defined, then each chunk carries around a tag
+  indicating its originating mspace, and frees are directed to their
+  originating spaces. Normally, this requires use of locks.
+
+ -------------------------  Compile-time options ---------------------------
+
+Be careful in setting #define values for numerical constants of type
+size_t. On some systems, literal values are not automatically extended
+to size_t precision unless they are explicitly casted. You can also
+use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below.
+
+WIN32                    default: defined if _WIN32 defined
+  Defining WIN32 sets up defaults for MS environment and compilers.
+  Otherwise defaults are for unix. Beware that there seem to be some
+  cases where this malloc might not be a pure drop-in replacement for
+  Win32 malloc: Random-looking failures from Win32 GDI API's (eg;
+  SetDIBits()) may be due to bugs in some video driver implementations
+  when pixel buffers are malloc()ed, and the region spans more than
+  one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb)
+  default granularity, pixel buffers may straddle virtual allocation
+  regions more often than when using the Microsoft allocator.  You can
+  avoid this by using VirtualAlloc() and VirtualFree() for all pixel
+  buffers rather than using malloc().  If this is not possible,
+  recompile this malloc with a larger DEFAULT_GRANULARITY. Note:
+  in cases where MSC and gcc (cygwin) are known to differ on WIN32,
+  conditions use _MSC_VER to distinguish them.
+
+DLMALLOC_EXPORT       default: extern
+  Defines how public APIs are declared. If you want to export via a
+  Windows DLL, you might define this as
+    #define DLMALLOC_EXPORT extern  __declspec(dllexport)
+  If you want a POSIX ELF shared object, you might use
+    #define DLMALLOC_EXPORT extern __attribute__((visibility("default")))
+
+MALLOC_ALIGNMENT         default: (size_t)(2 * sizeof(void *))
+  Controls the minimum alignment for malloc'ed chunks.  It must be a
+  power of two and at least 8, even on machines for which smaller
+  alignments would suffice. It may be defined as larger than this
+  though. Note however that code and data structures are optimized for
+  the case of 8-byte alignment.
+
+MSPACES                  default: 0 (false)
+  If true, compile in support for independent allocation spaces.
+  This is only supported if HAVE_MMAP is true.
+
+ONLY_MSPACES             default: 0 (false)
+  If true, only compile in mspace versions, not regular versions.
+
+USE_LOCKS                default: 0 (false)
+  Causes each call to each public routine to be surrounded with
+  pthread or WIN32 mutex lock/unlock. (If set true, this can be
+  overridden on a per-mspace basis for mspace versions.) If set to a
+  non-zero value other than 1, locks are used, but their
+  implementation is left out, so lock functions must be supplied manually,
+  as described below.
+
+USE_SPIN_LOCKS           default: 1 iff USE_LOCKS and spin locks available
+  If true, uses custom spin locks for locking. This is currently
+  supported only gcc >= 4.1, older gccs on x86 platforms, and recent
+  MS compilers.  Otherwise, posix locks or win32 critical sections are
+  used.
+
+USE_RECURSIVE_LOCKS      default: not defined
+  If defined nonzero, uses recursive (aka reentrant) locks, otherwise
+  uses plain mutexes. This is not required for malloc proper, but may
+  be needed for layered allocators such as nedmalloc.
+
+LOCK_AT_FORK            default: not defined
+  If defined nonzero, performs pthread_atfork upon initialization
+  to initialize child lock while holding parent lock. The implementation
+  assumes that pthread locks (not custom locks) are being used. In other
+  cases, you may need to customize the implementation.
+
+FOOTERS                  default: 0
+  If true, provide extra checking and dispatching by placing
+  information in the footers of allocated chunks. This adds
+  space and time overhead.
+
+INSECURE                 default: 0
+  If true, omit checks for usage errors and heap space overwrites.
+
+USE_DL_PREFIX            default: NOT defined
+  Causes compiler to prefix all public routines with the string 'dl'.
+  This can be useful when you only want to use this malloc in one part
+  of a program, using your regular system malloc elsewhere.
+
+MALLOC_INSPECT_ALL       default: NOT defined
+  If defined, compiles malloc_inspect_all and mspace_inspect_all, that
+  perform traversal of all heap space.  Unless access to these
+  functions is otherwise restricted, you probably do not want to
+  include them in secure implementations.
+
+ABORT                    default: defined as abort()
+  Defines how to abort on failed checks.  On most systems, a failed
+  check cannot die with an "assert" or even print an informative
+  message, because the underlying print routines in turn call malloc,
+  which will fail again.  Generally, the best policy is to simply call
+  abort(). It's not very useful to do more than this because many
+  errors due to overwriting will show up as address faults (null, odd
+  addresses etc) rather than malloc-triggered checks, so will also
+  abort.  Also, most compilers know that abort() does not return, so
+  can better optimize code conditionally calling it.
+
+PROCEED_ON_ERROR           default: defined as 0 (false)
+  Controls whether detected bad addresses cause them to bypassed
+  rather than aborting. If set, detected bad arguments to free and
+  realloc are ignored. And all bookkeeping information is zeroed out
+  upon a detected overwrite of freed heap space, thus losing the
+  ability to ever return it from malloc again, but enabling the
+  application to proceed. If PROCEED_ON_ERROR is defined, the
+  static variable malloc_corruption_error_count is compiled in
+  and can be examined to see if errors have occurred. This option
+  generates slower code than the default abort policy.
+
+DEBUG                    default: NOT defined
+  The DEBUG setting is mainly intended for people trying to modify
+  this code or diagnose problems when porting to new platforms.
+  However, it may also be able to better isolate user errors than just
+  using runtime checks.  The assertions in the check routines spell
+  out in more detail the assumptions and invariants underlying the
+  algorithms.  The checking is fairly extensive, and will slow down
+  execution noticeably. Calling malloc_stats or mallinfo with DEBUG
+  set will attempt to check every non-mmapped allocated and free chunk
+  in the course of computing the summaries.
+
+ABORT_ON_ASSERT_FAILURE   default: defined as 1 (true)
+  Debugging assertion failures can be nearly impossible if your
+  version of the assert macro causes malloc to be called, which will
+  lead to a cascade of further failures, blowing the runtime stack.
+  ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(),
+  which will usually make debugging easier.
+
+MALLOC_FAILURE_ACTION     default: sets errno to ENOMEM, or no-op on win32
+  The action to take before "return 0" when malloc fails to be able to
+  return memory because there is none available.
+
+HAVE_MORECORE             default: 1 (true) unless win32 or ONLY_MSPACES
+  True if this system supports sbrk or an emulation of it.
+
+MORECORE                  default: sbrk
+  The name of the sbrk-style system routine to call to obtain more
+  memory.  See below for guidance on writing custom MORECORE
+  functions. The type of the argument to sbrk/MORECORE varies across
+  systems.  It cannot be size_t, because it supports negative
+  arguments, so it is normally the signed type of the same width as
+  size_t (sometimes declared as "intptr_t").  It doesn't much matter
+  though. Internally, we only call it with arguments less than half
+  the max value of a size_t, which should work across all reasonable
+  possibilities, although sometimes generating compiler warnings.
+
+MORECORE_CONTIGUOUS       default: 1 (true) if HAVE_MORECORE
+  If true, take advantage of fact that consecutive calls to MORECORE
+  with positive arguments always return contiguous increasing
+  addresses.  This is true of unix sbrk. It does not hurt too much to
+  set it true anyway, since malloc copes with non-contiguities.
+  Setting it false when definitely non-contiguous saves time
+  and possibly wasted space it would take to discover this though.
+
+MORECORE_CANNOT_TRIM      default: NOT defined
+  True if MORECORE cannot release space back to the system when given
+  negative arguments. This is generally necessary only if you are
+  using a hand-crafted MORECORE function that cannot handle negative
+  arguments.
+
+NO_SEGMENT_TRAVERSAL       default: 0
+  If non-zero, suppresses traversals of memory segments
+  returned by either MORECORE or CALL_MMAP. This disables
+  merging of segments that are contiguous, and selectively
+  releasing them to the OS if unused, but bounds execution times.
+
+HAVE_MMAP                 default: 1 (true)
+  True if this system supports mmap or an emulation of it.  If so, and
+  HAVE_MORECORE is not true, MMAP is used for all system
+  allocation. If set and HAVE_MORECORE is true as well, MMAP is
+  primarily used to directly allocate very large blocks. It is also
+  used as a backup strategy in cases where MORECORE fails to provide
+  space from system. Note: A single call to MUNMAP is assumed to be
+  able to unmap memory that may have be allocated using multiple calls
+  to MMAP, so long as they are adjacent.
+
+HAVE_MREMAP               default: 1 on linux, else 0
+  If true realloc() uses mremap() to re-allocate large blocks and
+  extend or shrink allocation spaces.
+
+MMAP_CLEARS               default: 1 except on WINCE.
+  True if mmap clears memory so calloc doesn't need to. This is true
+  for standard unix mmap using /dev/zero and on WIN32 except for WINCE.
+
+USE_BUILTIN_FFS            default: 0 (i.e., not used)
+  Causes malloc to use the builtin ffs() function to compute indices.
+  Some compilers may recognize and intrinsify ffs to be faster than the
+  supplied C version. Also, the case of x86 using gcc is special-cased
+  to an asm instruction, so is already as fast as it can be, and so
+  this setting has no effect. Similarly for Win32 under recent MS compilers.
+  (On most x86s, the asm version is only slightly faster than the C version.)
+
+malloc_getpagesize         default: derive from system includes, or 4096.
+  The system page size. To the extent possible, this malloc manages
+  memory from the system in page-size units.  This may be (and
+  usually is) a function rather than a constant. This is ignored
+  if WIN32, where page size is determined using getSystemInfo during
+  initialization.
+
+USE_DEV_RANDOM             default: 0 (i.e., not used)
+  Causes malloc to use /dev/random to initialize secure magic seed for
+  stamping footers. Otherwise, the current time is used.
+
+NO_MALLINFO                default: 0
+  If defined, don't compile "mallinfo". This can be a simple way
+  of dealing with mismatches between system declarations and
+  those in this file.
+
+MALLINFO_FIELD_TYPE        default: size_t
+  The type of the fields in the mallinfo struct. This was originally
+  defined as "int" in SVID etc, but is more usefully defined as
+  size_t. The value is used only if  HAVE_USR_INCLUDE_MALLOC_H is not set
+
+NO_MALLOC_STATS            default: 0
+  If defined, don't compile "malloc_stats". This avoids calls to
+  fprintf and bringing in stdio dependencies you might not want.
+
+REALLOC_ZERO_BYTES_FREES    default: not defined
+  This should be set if a call to realloc with zero bytes should
+  be the same as a call to free. Some people think it should. Otherwise,
+  since this malloc returns a unique pointer for malloc(0), so does
+  realloc(p, 0).
+
+LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H
+LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H,  LACKS_ERRNO_H
+LACKS_STDLIB_H LACKS_SCHED_H LACKS_TIME_H  default: NOT defined unless on WIN32
+  Define these if your system does not have these header files.
+  You might need to manually insert some of the declarations they provide.
+
+DEFAULT_GRANULARITY        default: page size if MORECORE_CONTIGUOUS,
+                                system_info.dwAllocationGranularity in WIN32,
+                                otherwise 64K.
+      Also settable using mallopt(M_GRANULARITY, x)
+  The unit for allocating and deallocating memory from the system.  On
+  most systems with contiguous MORECORE, there is no reason to
+  make this more than a page. However, systems with MMAP tend to
+  either require or encourage larger granularities.  You can increase
+  this value to prevent system allocation functions to be called so
+  often, especially if they are slow.  The value must be at least one
+  page and must be a power of two.  Setting to 0 causes initialization
+  to either page size or win32 region size.  (Note: In previous
+  versions of malloc, the equivalent of this option was called
+  "TOP_PAD")
+
+DEFAULT_TRIM_THRESHOLD    default: 2MB
+      Also settable using mallopt(M_TRIM_THRESHOLD, x)
+  The maximum amount of unused top-most memory to keep before
+  releasing via malloc_trim in free().  Automatic trimming is mainly
+  useful in long-lived programs using contiguous MORECORE.  Because
+  trimming via sbrk can be slow on some systems, and can sometimes be
+  wasteful (in cases where programs immediately afterward allocate
+  more large chunks) the value should be high enough so that your
+  overall system performance would improve by releasing this much
+  memory.  As a rough guide, you might set to a value close to the
+  average size of a process (program) running on your system.
+  Releasing this much memory would allow such a process to run in
+  memory.  Generally, it is worth tuning trim thresholds when a
+  program undergoes phases where several large chunks are allocated
+  and released in ways that can reuse each other's storage, perhaps
+  mixed with phases where there are no such chunks at all. The trim
+  value must be greater than page size to have any useful effect.  To
+  disable trimming completely, you can set to MAX_SIZE_T. Note that the trick
+  some people use of mallocing a huge space and then freeing it at
+  program startup, in an attempt to reserve system memory, doesn't
+  have the intended effect under automatic trimming, since that memory
+  will immediately be returned to the system.
+
+DEFAULT_MMAP_THRESHOLD       default: 256K
+      Also settable using mallopt(M_MMAP_THRESHOLD, x)
+  The request size threshold for using MMAP to directly service a
+  request. Requests of at least this size that cannot be allocated
+  using already-existing space will be serviced via mmap.  (If enough
+  normal freed space already exists it is used instead.)  Using mmap
+  segregates relatively large chunks of memory so that they can be
+  individually obtained and released from the host system. A request
+  serviced through mmap is never reused by any other request (at least
+  not directly; the system may just so happen to remap successive
+  requests to the same locations).  Segregating space in this way has
+  the benefits that: Mmapped space can always be individually released
+  back to the system, which helps keep the system level memory demands
+  of a long-lived program low.  Also, mapped memory doesn't become
+  `locked' between other chunks, as can happen with normally allocated
+  chunks, which means that even trimming via malloc_trim would not
+  release them.  However, it has the disadvantage that the space
+  cannot be reclaimed, consolidated, and then used to service later
+  requests, as happens with normal chunks.  The advantages of mmap
+  nearly always outweigh disadvantages for "large" chunks, but the
+  value of "large" may vary across systems.  The default is an
+  empirically derived value that works well in most systems. You can
+  disable mmap by setting to MAX_SIZE_T.
+
+MAX_RELEASE_CHECK_RATE   default: 4095 unless not HAVE_MMAP
+  The number of consolidated frees between checks to release
+  unused segments when freeing. When using non-contiguous segments,
+  especially with multiple mspaces, checking only for topmost space
+  doesn't always suffice to trigger trimming. To compensate for this,
+  free() will, with a period of MAX_RELEASE_CHECK_RATE (or the
+  current number of segments, if greater) try to release unused
+  segments to the OS when freeing chunks that result in
+  consolidation. The best value for this parameter is a compromise
+  between slowing down frees with relatively costly checks that
+  rarely trigger versus holding on to unused memory. To effectively
+  disable, set to MAX_SIZE_T. This may lead to a very slight speed
+  improvement at the expense of carrying around more memory.
+*/
+
+/* Version identifier to allow people to support multiple versions */
+#ifndef DLMALLOC_VERSION
+#define DLMALLOC_VERSION 20806
+#endif /* DLMALLOC_VERSION */
+
+#ifndef DLMALLOC_EXPORT
+#define DLMALLOC_EXPORT extern
+#endif
+
+#ifndef WIN32
+#ifdef _WIN32
+#define WIN32 1
+#endif  /* _WIN32 */
+#ifdef _WIN32_WCE
+#define LACKS_FCNTL_H
+#define WIN32 1
+#endif /* _WIN32_WCE */
+#endif  /* WIN32 */
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <tchar.h>
+#define HAVE_MMAP 1
+#define HAVE_MORECORE 0
+#define LACKS_UNISTD_H
+#define LACKS_SYS_PARAM_H
+#define LACKS_SYS_MMAN_H
+#define LACKS_STRING_H
+#define LACKS_STRINGS_H
+#define LACKS_SYS_TYPES_H
+#define LACKS_ERRNO_H
+#define LACKS_SCHED_H
+#ifndef MALLOC_FAILURE_ACTION
+#define MALLOC_FAILURE_ACTION
+#endif /* MALLOC_FAILURE_ACTION */
+#ifndef MMAP_CLEARS
+#ifdef _WIN32_WCE /* WINCE reportedly does not clear */
+#define MMAP_CLEARS 0
+#else
+#define MMAP_CLEARS 1
+#endif /* _WIN32_WCE */
+#endif /*MMAP_CLEARS */
+#endif  /* WIN32 */
+
+#if defined(DARWIN) || defined(_DARWIN)
+/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
+#ifndef HAVE_MORECORE
+#define HAVE_MORECORE 0
+#define HAVE_MMAP 1
+/* OSX allocators provide 16 byte alignment */
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT ((size_t)16U)
+#endif
+#endif  /* HAVE_MORECORE */
+#endif  /* DARWIN */
+
+#ifndef LACKS_SYS_TYPES_H
+#include <sys/types.h>  /* For size_t */
+#endif  /* LACKS_SYS_TYPES_H */
+
+/* The maximum possible size_t value has all bits set */
+#define MAX_SIZE_T           (~(size_t)0)
+
+#ifndef USE_LOCKS /* ensure true if spin or recursive locks set */
+#define USE_LOCKS  ((defined(USE_SPIN_LOCKS) && USE_SPIN_LOCKS != 0) || \
+                    (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0))
+#endif /* USE_LOCKS */
+
+#if USE_LOCKS /* Spin locks for gcc >= 4.1, older gcc on x86, MSC >= 1310 */
+#if ((defined(__GNUC__) &&                                              \
+      ((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) ||      \
+       defined(__i386__) || defined(__x86_64__))) ||                    \
+     (defined(_MSC_VER) && _MSC_VER>=1310))
+#ifndef USE_SPIN_LOCKS
+#define USE_SPIN_LOCKS 1
+#endif /* USE_SPIN_LOCKS */
+#elif USE_SPIN_LOCKS
+#error "USE_SPIN_LOCKS defined without implementation"
+#endif /* ... locks available... */
+#elif !defined(USE_SPIN_LOCKS)
+#define USE_SPIN_LOCKS 0
+#endif /* USE_LOCKS */
+
+#ifndef ONLY_MSPACES
+#define ONLY_MSPACES 0
+#endif  /* ONLY_MSPACES */
+#ifndef MSPACES
+#if ONLY_MSPACES
+#define MSPACES 1
+#else   /* ONLY_MSPACES */
+#define MSPACES 0
+#endif  /* ONLY_MSPACES */
+#endif  /* MSPACES */
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *)))
+#endif  /* MALLOC_ALIGNMENT */
+#ifndef FOOTERS
+#define FOOTERS 0
+#endif  /* FOOTERS */
+#ifndef ABORT
+#define ABORT  abort()
+#endif  /* ABORT */
+#ifndef ABORT_ON_ASSERT_FAILURE
+#define ABORT_ON_ASSERT_FAILURE 1
+#endif  /* ABORT_ON_ASSERT_FAILURE */
+#ifndef PROCEED_ON_ERROR
+#define PROCEED_ON_ERROR 0
+#endif  /* PROCEED_ON_ERROR */
+
+#ifndef INSECURE
+#define INSECURE 0
+#endif  /* INSECURE */
+#ifndef MALLOC_INSPECT_ALL
+#define MALLOC_INSPECT_ALL 0
+#endif  /* MALLOC_INSPECT_ALL */
+#ifndef HAVE_MMAP
+#define HAVE_MMAP 1
+#endif  /* HAVE_MMAP */
+#ifndef MMAP_CLEARS
+#define MMAP_CLEARS 1
+#endif  /* MMAP_CLEARS */
+#ifndef HAVE_MREMAP
+#ifdef linux
+#define HAVE_MREMAP 1
+#define _GNU_SOURCE /* Turns on mremap() definition */
+#else   /* linux */
+#define HAVE_MREMAP 0
+#endif  /* linux */
+#endif  /* HAVE_MREMAP */
+#ifndef MALLOC_FAILURE_ACTION
+#define MALLOC_FAILURE_ACTION  errno = ENOMEM;
+#endif  /* MALLOC_FAILURE_ACTION */
+#ifndef HAVE_MORECORE
+#if ONLY_MSPACES
+#define HAVE_MORECORE 0
+#else   /* ONLY_MSPACES */
+#define HAVE_MORECORE 1
+#endif  /* ONLY_MSPACES */
+#endif  /* HAVE_MORECORE */
+#if !HAVE_MORECORE
+#define MORECORE_CONTIGUOUS 0
+#else   /* !HAVE_MORECORE */
+#define MORECORE_DEFAULT sbrk
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif  /* MORECORE_CONTIGUOUS */
+#endif  /* HAVE_MORECORE */
+#ifndef DEFAULT_GRANULARITY
+#if (MORECORE_CONTIGUOUS || defined(WIN32))
+#define DEFAULT_GRANULARITY (0)  /* 0 means to compute in init_mparams */
+#else   /* MORECORE_CONTIGUOUS */
+#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
+#endif  /* MORECORE_CONTIGUOUS */
+#endif  /* DEFAULT_GRANULARITY */
+#ifndef DEFAULT_TRIM_THRESHOLD
+#ifndef MORECORE_CANNOT_TRIM
+#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
+#else   /* MORECORE_CANNOT_TRIM */
+#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
+#endif  /* MORECORE_CANNOT_TRIM */
+#endif  /* DEFAULT_TRIM_THRESHOLD */
+#ifndef DEFAULT_MMAP_THRESHOLD
+#if HAVE_MMAP
+#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
+#else   /* HAVE_MMAP */
+#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
+#endif  /* HAVE_MMAP */
+#endif  /* DEFAULT_MMAP_THRESHOLD */
+#ifndef MAX_RELEASE_CHECK_RATE
+#if HAVE_MMAP
+#define MAX_RELEASE_CHECK_RATE 4095
+#else
+#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T
+#endif /* HAVE_MMAP */
+#endif /* MAX_RELEASE_CHECK_RATE */
+#ifndef USE_BUILTIN_FFS
+#define USE_BUILTIN_FFS 0
+#endif  /* USE_BUILTIN_FFS */
+#ifndef USE_DEV_RANDOM
+#define USE_DEV_RANDOM 0
+#endif  /* USE_DEV_RANDOM */
+#ifndef NO_MALLINFO
+#define NO_MALLINFO 0
+#endif  /* NO_MALLINFO */
+#ifndef MALLINFO_FIELD_TYPE
+#define MALLINFO_FIELD_TYPE size_t
+#endif  /* MALLINFO_FIELD_TYPE */
+#ifndef NO_MALLOC_STATS
+#define NO_MALLOC_STATS 0
+#endif  /* NO_MALLOC_STATS */
+#ifndef NO_SEGMENT_TRAVERSAL
+#define NO_SEGMENT_TRAVERSAL 0
+#endif /* NO_SEGMENT_TRAVERSAL */
+
+/*
+  mallopt tuning options.  SVID/XPG defines four standard parameter
+  numbers for mallopt, normally defined in malloc.h.  None of these
+  are used in this malloc, so setting them has no effect. But this
+  malloc does support the following options.
+*/
+
+#define M_TRIM_THRESHOLD     (-1)
+#define M_GRANULARITY        (-2)
+#define M_MMAP_THRESHOLD     (-3)
+
+/* ------------------------ Mallinfo declarations ------------------------ */
+
+#if !NO_MALLINFO
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any system that has a
+  /usr/include/malloc.h defining struct mallinfo.  The main
+  declaration needed is the mallinfo struct that is returned (by-copy)
+  by mallinfo().  The malloinfo struct contains a bunch of fields that
+  are not even meaningful in this version of malloc.  These fields are
+  are instead filled by mallinfo() with other numbers that might be of
+  interest.
+
+  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
+  /usr/include/malloc.h file that includes a declaration of struct
+  mallinfo.  If so, it is included; else a compliant version is
+  declared below.  These must be precisely the same for mallinfo() to
+  work.  The original SVID version of this struct, defined on most
+  systems with mallinfo, declares all fields as ints. But some others
+  define as unsigned long. If your system defines the fields using a
+  type of different width than listed here, you MUST #include your
+  system version and #define HAVE_USR_INCLUDE_MALLOC_H.
+*/
+
+/* #define HAVE_USR_INCLUDE_MALLOC_H */
+
+#ifdef HAVE_USR_INCLUDE_MALLOC_H
+#include "/usr/include/malloc.h"
+#else /* HAVE_USR_INCLUDE_MALLOC_H */
+#ifndef STRUCT_MALLINFO_DECLARED
+/* HP-UX (and others?) redefines mallinfo unless _STRUCT_MALLINFO is defined */
+#define _STRUCT_MALLINFO
+#define STRUCT_MALLINFO_DECLARED 1
+struct mallinfo {
+  MALLINFO_FIELD_TYPE arena;    /* non-mmapped space allocated from system */
+  MALLINFO_FIELD_TYPE ordblks;  /* number of free chunks */
+  MALLINFO_FIELD_TYPE smblks;   /* always 0 */
+  MALLINFO_FIELD_TYPE hblks;    /* always 0 */
+  MALLINFO_FIELD_TYPE hblkhd;   /* space in mmapped regions */
+  MALLINFO_FIELD_TYPE usmblks;  /* maximum total allocated space */
+  MALLINFO_FIELD_TYPE fsmblks;  /* always 0 */
+  MALLINFO_FIELD_TYPE uordblks; /* total allocated space */
+  MALLINFO_FIELD_TYPE fordblks; /* total free space */
+  MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */
+};
+#endif /* STRUCT_MALLINFO_DECLARED */
+#endif /* HAVE_USR_INCLUDE_MALLOC_H */
+#endif /* NO_MALLINFO */
+
+/*
+  Try to persuade compilers to inline. The most critical functions for
+  inlining are defined as macros, so these aren't used for them.
+*/
+
+#ifndef FORCEINLINE
+  #if defined(__GNUC__)
+#define FORCEINLINE __inline __attribute__ ((always_inline))
+  #elif defined(_MSC_VER)
+    #define FORCEINLINE __forceinline
+  #endif
+#endif
+#ifndef NOINLINE
+  #if defined(__GNUC__)
+    #define NOINLINE __attribute__ ((noinline))
+  #elif defined(_MSC_VER)
+    #define NOINLINE __declspec(noinline)
+  #else
+    #define NOINLINE
+  #endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#ifndef FORCEINLINE
+ #define FORCEINLINE inline
+#endif
+#endif /* __cplusplus */
+#ifndef FORCEINLINE
+ #define FORCEINLINE
+#endif
+
+#if !ONLY_MSPACES
+
+/* ------------------- Declarations of public routines ------------------- */
+
+#ifndef USE_DL_PREFIX
+#define dlcalloc               calloc
+#define dlfree                 free
+#define dlmalloc               malloc
+#define dlmemalign             memalign
+#define dlposix_memalign       posix_memalign
+#define dlrealloc              realloc
+#define dlrealloc_in_place     realloc_in_place
+#define dlvalloc               valloc
+#define dlpvalloc              pvalloc
+#define dlmallinfo             mallinfo
+#define dlmallopt              mallopt
+#define dlmalloc_trim          malloc_trim
+#define dlmalloc_stats         malloc_stats
+#define dlmalloc_usable_size   malloc_usable_size
+#define dlmalloc_footprint     malloc_footprint
+#define dlmalloc_max_footprint malloc_max_footprint
+#define dlmalloc_footprint_limit malloc_footprint_limit
+#define dlmalloc_set_footprint_limit malloc_set_footprint_limit
+#define dlmalloc_inspect_all   malloc_inspect_all
+#define dlindependent_calloc   independent_calloc
+#define dlindependent_comalloc independent_comalloc
+#define dlbulk_free            bulk_free
+#endif /* USE_DL_PREFIX */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or
+  null if no space is available, in which case errno is set to ENOMEM
+  on ANSI C systems.
+
+  If n is zero, malloc returns a minimum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 32 bytes on 64bit
+  systems.)  Note that size_t is an unsigned type, so calls with
+  arguments that would be negative if signed are interpreted as
+  requests for huge amounts of space, which will often fail. The
+  maximum supported value of n differs across systems, but is in all
+  cases less than the maximum representable value of a size_t.
+*/
+DLMALLOC_EXPORT void* dlmalloc(size_t);
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. If p was not malloced or already
+  freed, free(p) will by default cause the current program to abort.
+*/
+DLMALLOC_EXPORT void  dlfree(void*);
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+DLMALLOC_EXPORT void* dlcalloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p in most cases when possible, otherwise it
+  employs the equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  realloc with a size
+  argument of zero (re)allocates a minimum-sized chunk.
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+DLMALLOC_EXPORT void* dlrealloc(void*, size_t);
+
+/*
+  realloc_in_place(void* p, size_t n)
+  Resizes the space allocated for p to size n, only if this can be
+  done without moving p (i.e., only if there is adjacent space
+  available if n is greater than p's current allocated size, or n is
+  less than or equal to p's size). This may be used instead of plain
+  realloc if an alternative allocation strategy is needed upon failure
+  to expand space; for example, reallocation of a buffer that must be
+  memory-aligned or cleared. You can use realloc_in_place to trigger
+  these alternatives only when needed.
+
+  Returns p if successful; otherwise null.
+*/
+DLMALLOC_EXPORT void* dlrealloc_in_place(void*, size_t);
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+DLMALLOC_EXPORT void* dlmemalign(size_t, size_t);
+
+/*
+  int posix_memalign(void** pp, size_t alignment, size_t n);
+  Allocates a chunk of n bytes, aligned in accord with the alignment
+  argument. Differs from memalign only in that it (1) assigns the
+  allocated memory to *pp rather than returning it, (2) fails and
+  returns EINVAL if the alignment is not a power of two (3) fails and
+  returns ENOMEM if memory cannot be allocated.
+*/
+DLMALLOC_EXPORT int dlposix_memalign(void**, size_t, size_t);
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+DLMALLOC_EXPORT void* dlvalloc(size_t);
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  To workaround the fact that mallopt is specified to use int,
+  not size_t parameters, the value -1 is specially treated as the
+  maximum unsigned size_t value.
+
+  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  None of these are use in this malloc,
+  so setting them has no effect. But this malloc also supports other
+  options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #  default    allowed param values
+  M_TRIM_THRESHOLD     -1   2*1024*1024   any   (-1 disables)
+  M_GRANULARITY        -2     page size   any power of 2 >= page size
+  M_MMAP_THRESHOLD     -3      256*1024   any   (or 0 if no MMAP support)
+*/
+DLMALLOC_EXPORT int dlmallopt(int, int);
+
+/*
+  malloc_footprint();
+  Returns the number of bytes obtained from the system.  The total
+  number of bytes allocated by malloc, realloc etc., is less than this
+  value. Unlike mallinfo, this function returns only a precomputed
+  result, so can be called frequently to monitor memory consumption.
+  Even if locks are otherwise defined, this function does not use them,
+  so results might not be up to date.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_footprint(void);
+
+/*
+  malloc_max_footprint();
+  Returns the maximum number of bytes obtained from the system. This
+  value will be greater than current footprint if deallocated space
+  has been reclaimed by the system. The peak number of bytes allocated
+  by malloc, realloc etc., is less than this value. Unlike mallinfo,
+  this function returns only a precomputed result, so can be called
+  frequently to monitor memory consumption.  Even if locks are
+  otherwise defined, this function does not use them, so results might
+  not be up to date.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_max_footprint(void);
+
+/*
+  malloc_footprint_limit();
+  Returns the number of bytes that the heap is allowed to obtain from
+  the system, returning the last value returned by
+  malloc_set_footprint_limit, or the maximum size_t value if
+  never set. The returned value reflects a permission. There is no
+  guarantee that this number of bytes can actually be obtained from
+  the system.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_footprint_limit();
+
+/*
+  malloc_set_footprint_limit();
+  Sets the maximum number of bytes to obtain from the system, causing
+  failure returns from malloc and related functions upon attempts to
+  exceed this value. The argument value may be subject to page
+  rounding to an enforceable limit; this actual value is returned.
+  Using an argument of the maximum possible size_t effectively
+  disables checks. If the argument is less than or equal to the
+  current malloc_footprint, then all future allocations that require
+  additional system memory will fail. However, invocation cannot
+  retroactively deallocate existing used memory.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_set_footprint_limit(size_t bytes);
+
+#if MALLOC_INSPECT_ALL
+/*
+  malloc_inspect_all(void(*handler)(void *start,
+                                    void *end,
+                                    size_t used_bytes,
+                                    void* callback_arg),
+                      void* arg);
+  Traverses the heap and calls the given handler for each managed
+  region, skipping all bytes that are (or may be) used for bookkeeping
+  purposes.  Traversal does not include include chunks that have been
+  directly memory mapped. Each reported region begins at the start
+  address, and continues up to but not including the end address.  The
+  first used_bytes of the region contain allocated data. If
+  used_bytes is zero, the region is unallocated. The handler is
+  invoked with the given callback argument. If locks are defined, they
+  are held during the entire traversal. It is a bad idea to invoke
+  other malloc functions from within the handler.
+
+  For example, to count the number of in-use chunks with size greater
+  than 1000, you could write:
+  static int count = 0;
+  void count_chunks(void* start, void* end, size_t used, void* arg) {
+    if (used >= 1000) ++count;
+  }
+  then:
+    malloc_inspect_all(count_chunks, NULL);
+
+  malloc_inspect_all is compiled only if MALLOC_INSPECT_ALL is defined.
+*/
+DLMALLOC_EXPORT void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*),
+                           void* arg);
+
+#endif /* MALLOC_INSPECT_ALL */
+
+#if !NO_MALLINFO
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    always zero.
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   the maximum total allocated space. This will be greater
+                than current total if trimming has occurred.
+  fsmblks:   always zero
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+               back to system via malloc_trim. ("ideally" means that
+               it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+DLMALLOC_EXPORT struct mallinfo dlmallinfo(void);
+#endif /* NO_MALLINFO */
+
+/*
+  independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
+
+  independent_calloc is similar to calloc, but instead of returning a
+  single cleared space, it returns an array of pointers to n_elements
+  independent elements that can hold contents of size elem_size, each
+  of which starts out cleared, and can be independently freed,
+  realloc'ed etc. The elements are guaranteed to be adjacently
+  allocated (this is not guaranteed to occur with multiple callocs or
+  mallocs), which may also improve cache locality in some
+  applications.
+
+  The "chunks" argument is optional (i.e., may be null, which is
+  probably the most typical usage). If it is null, the returned array
+  is itself dynamically allocated and should also be freed when it is
+  no longer needed. Otherwise, the chunks array must be of at least
+  n_elements in length. It is filled in with the pointers to the
+  chunks.
+
+  In either case, independent_calloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and "chunks"
+  is null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be freed when it is no longer needed. This can be
+  done all at once using bulk_free.
+
+  independent_calloc simplifies and speeds up implementations of many
+  kinds of pools.  It may also be useful when constructing large data
+  structures that initially have a fixed number of fixed-sized nodes,
+  but the number is not known at compile time, and some of the nodes
+  may later need to be freed. For example:
+
+  struct Node { int item; struct Node* next; };
+
+  struct Node* build_list() {
+    struct Node** pool;
+    int n = read_number_of_nodes_needed();
+    if (n <= 0) return 0;
+    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
+    if (pool == 0) die();
+    // organize into a linked list...
+    struct Node* first = pool[0];
+    for (i = 0; i < n-1; ++i)
+      pool[i]->next = pool[i+1];
+    free(pool);     // Can now free the array (or not, if it is needed later)
+    return first;
+  }
+*/
+DLMALLOC_EXPORT void** dlindependent_calloc(size_t, size_t, void**);
+
+/*
+  independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+
+  independent_comalloc allocates, all at once, a set of n_elements
+  chunks with sizes indicated in the "sizes" array.    It returns
+  an array of pointers to these elements, each of which can be
+  independently freed, realloc'ed etc. The elements are guaranteed to
+  be adjacently allocated (this is not guaranteed to occur with
+  multiple callocs or mallocs), which may also improve cache locality
+  in some applications.
+
+  The "chunks" argument is optional (i.e., may be null). If it is null
+  the returned array is itself dynamically allocated and should also
+  be freed when it is no longer needed. Otherwise, the chunks array
+  must be of at least n_elements in length. It is filled in with the
+  pointers to the chunks.
+
+  In either case, independent_comalloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and chunks is
+  null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be freed when it is no longer needed. This can be
+  done all at once using bulk_free.
+
+  independent_comallac differs from independent_calloc in that each
+  element may have a different size, and also that it does not
+  automatically clear elements.
+
+  independent_comalloc can be used to speed up allocation in cases
+  where several structs or objects must always be allocated at the
+  same time.  For example:
+
+  struct Head { ... }
+  struct Foot { ... }
+
+  void send_message(char* msg) {
+    int msglen = strlen(msg);
+    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
+    void* chunks[3];
+    if (independent_comalloc(3, sizes, chunks) == 0)
+      die();
+    struct Head* head = (struct Head*)(chunks[0]);
+    char*        body = (char*)(chunks[1]);
+    struct Foot* foot = (struct Foot*)(chunks[2]);
+    // ...
+  }
+
+  In general though, independent_comalloc is worth using only for
+  larger values of n_elements. For small values, you probably won't
+  detect enough difference from series of malloc calls to bother.
+
+  Overuse of independent_comalloc can increase overall memory usage,
+  since it cannot reuse existing noncontiguous small chunks that
+  might be available for some of the elements.
+*/
+DLMALLOC_EXPORT void** dlindependent_comalloc(size_t, size_t*, void**);
+
+/*
+  bulk_free(void* array[], size_t n_elements)
+  Frees and clears (sets to null) each non-null pointer in the given
+  array.  This is likely to be faster than freeing them one-by-one.
+  If footers are used, pointers that have been allocated in different
+  mspaces are not freed or cleared, and the count of all such pointers
+  is returned.  For large arrays of pointers with poor locality, it
+  may be worthwhile to sort this array before calling bulk_free.
+*/
+DLMALLOC_EXPORT size_t  dlbulk_free(void**, size_t n_elements);
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+DLMALLOC_EXPORT void*  dlpvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative arguments
+  to sbrk) if there is unused memory at the `high' end of the malloc
+  pool or in unused MMAP segments. You can call this after freeing
+  large blocks of memory to potentially reduce the system-level memory
+  requirements of a program. However, it cannot guarantee to reduce
+  memory. Under some allocation patterns, some large free blocks of
+  memory will be locked between two used chunks, so they cannot be
+  given back to the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero, only
+  the minimum amount of memory to maintain internal data structures
+  will be left. Non-zero arguments can be supplied to maintain enough
+  trailing space to service future expected allocations without having
+  to re-obtain memory from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+*/
+DLMALLOC_EXPORT int  dlmalloc_trim(size_t);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+*/
+DLMALLOC_EXPORT void  dlmalloc_stats(void);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+*/
+size_t dlmalloc_usable_size(void*);
+
+#endif /* ONLY_MSPACES */
+
+#if MSPACES
+
+/*
+  mspace is an opaque type representing an independent
+  region of space that supports mspace_malloc, etc.
+*/
+typedef void* mspace;
+
+/*
+  create_mspace creates and returns a new independent space with the
+  given initial capacity, or, if 0, the default granularity size.  It
+  returns null if there is no system memory available to create the
+  space.  If argument locked is non-zero, the space uses a separate
+  lock to control access. The capacity of the space will grow
+  dynamically as needed to service mspace_malloc requests.  You can
+  control the sizes of incremental increases of this space by
+  compiling with a different DEFAULT_GRANULARITY or dynamically
+  setting with mallopt(M_GRANULARITY, value).
+*/
+DLMALLOC_EXPORT mspace create_mspace(size_t capacity, int locked);
+
+/*
+  destroy_mspace destroys the given space, and attempts to return all
+  of its memory back to the system, returning the total number of
+  bytes freed. After destruction, the results of access to all memory
+  used by the space become undefined.
+*/
+DLMALLOC_EXPORT size_t destroy_mspace(mspace msp);
+
+/*
+  create_mspace_with_base uses the memory supplied as the initial base
+  of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this
+  space is used for bookkeeping, so the capacity must be at least this
+  large. (Otherwise 0 is returned.) When this initial space is
+  exhausted, additional memory will be obtained from the system.
+  Destroying this space will deallocate all additionally allocated
+  space (if possible) but not the initial base.
+*/
+DLMALLOC_EXPORT mspace create_mspace_with_base(void* base, size_t capacity, int locked);
+
+/*
+  mspace_track_large_chunks controls whether requests for large chunks
+  are allocated in their own untracked mmapped regions, separate from
+  others in this mspace. By default large chunks are not tracked,
+  which reduces fragmentation. However, such chunks are not
+  necessarily released to the system upon destroy_mspace.  Enabling
+  tracking by setting to true may increase fragmentation, but avoids
+  leakage when relying on destroy_mspace to release all memory
+  allocated using this space.  The function returns the previous
+  setting.
+*/
+DLMALLOC_EXPORT int mspace_track_large_chunks(mspace msp, int enable);
+
+
+/*
+  mspace_malloc behaves as malloc, but operates within
+  the given space.
+*/
+DLMALLOC_EXPORT void* mspace_malloc(mspace msp, size_t bytes);
+
+/*
+  mspace_free behaves as free, but operates within
+  the given space.
+
+  If compiled with FOOTERS==1, mspace_free is not actually needed.
+  free may be called instead of mspace_free because freed chunks from
+  any space are handled by their originating spaces.
+*/
+DLMALLOC_EXPORT void mspace_free(mspace msp, void* mem);
+
+/*
+  mspace_realloc behaves as realloc, but operates within
+  the given space.
+
+  If compiled with FOOTERS==1, mspace_realloc is not actually
+  needed.  realloc may be called instead of mspace_realloc because
+  realloced chunks from any space are handled by their originating
+  spaces.
+*/
+DLMALLOC_EXPORT void* mspace_realloc(mspace msp, void* mem, size_t newsize);
+
+/*
+  mspace_calloc behaves as calloc, but operates within
+  the given space.
+*/
+DLMALLOC_EXPORT void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size);
+
+/*
+  mspace_memalign behaves as memalign, but operates within
+  the given space.
+*/
+DLMALLOC_EXPORT void* mspace_memalign(mspace msp, size_t alignment, size_t bytes);
+
+/*
+  mspace_independent_calloc behaves as independent_calloc, but
+  operates within the given space.
+*/
+DLMALLOC_EXPORT void** mspace_independent_calloc(mspace msp, size_t n_elements,
+                                 size_t elem_size, void* chunks[]);
+
+/*
+  mspace_independent_comalloc behaves as independent_comalloc, but
+  operates within the given space.
+*/
+DLMALLOC_EXPORT void** mspace_independent_comalloc(mspace msp, size_t n_elements,
+                                   size_t sizes[], void* chunks[]);
+
+/*
+  mspace_footprint() returns the number of bytes obtained from the
+  system for this space.
+*/
+DLMALLOC_EXPORT size_t mspace_footprint(mspace msp);
+
+/*
+  mspace_max_footprint() returns the peak number of bytes obtained from the
+  system for this space.
+*/
+DLMALLOC_EXPORT size_t mspace_max_footprint(mspace msp);
+
+
+#if !NO_MALLINFO
+/*
+  mspace_mallinfo behaves as mallinfo, but reports properties of
+  the given space.
+*/
+DLMALLOC_EXPORT struct mallinfo mspace_mallinfo(mspace msp);
+#endif /* NO_MALLINFO */
+
+/*
+  malloc_usable_size(void* p) behaves the same as malloc_usable_size;
+*/
+DLMALLOC_EXPORT size_t mspace_usable_size(const void* mem);
+
+/*
+  mspace_malloc_stats behaves as malloc_stats, but reports
+  properties of the given space.
+*/
+DLMALLOC_EXPORT void mspace_malloc_stats(mspace msp);
+
+/*
+  mspace_trim behaves as malloc_trim, but
+  operates within the given space.
+*/
+DLMALLOC_EXPORT int mspace_trim(mspace msp, size_t pad);
+
+/*
+  An alias for mallopt.
+*/
+DLMALLOC_EXPORT int mspace_mallopt(int, int);
+
+#endif /* MSPACES */
+
+#ifdef __cplusplus
+}  /* end of extern "C" */
+#endif /* __cplusplus */
+
+/*
+  ========================================================================
+  To make a fully customizable malloc.h header file, cut everything
+  above this line, put into file malloc.h, edit to suit, and #include it
+  on the next line, as well as in programs that use this malloc.
+  ========================================================================
+*/
+
+/* #include "malloc.h" */
+
+/*------------------------------ internal #includes ---------------------- */
+
+#ifdef _MSC_VER
+#pragma warning( disable : 4146 ) /* no "unsigned" warnings */
+#endif /* _MSC_VER */
+#if !NO_MALLOC_STATS
+#include <stdio.h>       /* for printing in malloc_stats */
+#endif /* NO_MALLOC_STATS */
+#ifndef LACKS_ERRNO_H
+#include <errno.h>       /* for MALLOC_FAILURE_ACTION */
+#endif /* LACKS_ERRNO_H */
+#ifdef DEBUG
+#if ABORT_ON_ASSERT_FAILURE
+#undef assert
+#define assert(x) if(!(x)) ABORT
+#else /* ABORT_ON_ASSERT_FAILURE */
+#include <assert.h>
+#endif /* ABORT_ON_ASSERT_FAILURE */
+#else  /* DEBUG */
+#ifndef assert
+#define assert(x)
+#endif
+#define DEBUG 0
+#endif /* DEBUG */
+#if !defined(WIN32) && !defined(LACKS_TIME_H)
+#include <time.h>        /* for magic initialization */
+#endif /* WIN32 */
+#ifndef LACKS_STDLIB_H
+#include <stdlib.h>      /* for abort() */
+#endif /* LACKS_STDLIB_H */
+#ifndef LACKS_STRING_H
+#include <string.h>      /* for memset etc */
+#endif  /* LACKS_STRING_H */
+#if USE_BUILTIN_FFS
+#ifndef LACKS_STRINGS_H
+#include <strings.h>     /* for ffs */
+#endif /* LACKS_STRINGS_H */
+#endif /* USE_BUILTIN_FFS */
+#if HAVE_MMAP
+#ifndef LACKS_SYS_MMAN_H
+/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */
+#if (defined(linux) && !defined(__USE_GNU))
+#define __USE_GNU 1
+#include <sys/mman.h>    /* for mmap */
+#undef __USE_GNU
+#else
+#include <sys/mman.h>    /* for mmap */
+#endif /* linux */
+#endif /* LACKS_SYS_MMAN_H */
+#ifndef LACKS_FCNTL_H
+#include <fcntl.h>
+#endif /* LACKS_FCNTL_H */
+#endif /* HAVE_MMAP */
+#ifndef LACKS_UNISTD_H
+#include <unistd.h>     /* for sbrk, sysconf */
+#else /* LACKS_UNISTD_H */
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
+extern void*     sbrk(ptrdiff_t);
+#endif /* FreeBSD etc */
+#endif /* LACKS_UNISTD_H */
+
+/* Declarations for locking */
+#if USE_LOCKS
+#ifndef WIN32
+#if defined (__SVR4) && defined (__sun)  /* solaris */
+#include <thread.h>
+#elif !defined(LACKS_SCHED_H)
+#include <sched.h>
+#endif /* solaris or LACKS_SCHED_H */
+#if (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0) || !USE_SPIN_LOCKS
+#include <pthread.h>
+#endif /* USE_RECURSIVE_LOCKS ... */
+#elif defined(_MSC_VER)
+#ifndef _M_AMD64
+/* These are already defined on AMD64 builds */
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp);
+LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value);
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* _M_AMD64 */
+#pragma intrinsic (_InterlockedCompareExchange)
+#pragma intrinsic (_InterlockedExchange)
+#define interlockedcompareexchange _InterlockedCompareExchange
+#define interlockedexchange _InterlockedExchange
+#elif defined(WIN32) && defined(__GNUC__)
+#define interlockedcompareexchange(a, b, c) __sync_val_compare_and_swap(a, c, b)
+#define interlockedexchange __sync_lock_test_and_set
+#endif /* Win32 */
+#else /* USE_LOCKS */
+#endif /* USE_LOCKS */
+
+#ifndef LOCK_AT_FORK
+#define LOCK_AT_FORK 0
+#endif
+
+/* Declarations for bit scanning on win32 */
+#if defined(_MSC_VER) && _MSC_VER>=1300
+#ifndef BitScanForward /* Try to avoid pulling in WinNT.h */
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+unsigned char _BitScanForward(unsigned long *index, unsigned long mask);
+unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#define BitScanForward _BitScanForward
+#define BitScanReverse _BitScanReverse
+#pragma intrinsic(_BitScanForward)
+#pragma intrinsic(_BitScanReverse)
+#endif /* BitScanForward */
+#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */
+
+#ifndef WIN32
+#ifndef malloc_getpagesize
+#  ifdef _SC_PAGESIZE         /* some SVR4 systems omit an underscore */
+#    ifndef _SC_PAGE_SIZE
+#      define _SC_PAGE_SIZE _SC_PAGESIZE
+#    endif
+#  endif
+#  ifdef _SC_PAGE_SIZE
+#    define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
+#  else
+#    if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
+       extern size_t getpagesize();
+#      define malloc_getpagesize getpagesize()
+#    else
+#      ifdef WIN32 /* use supplied emulation of getpagesize */
+#        define malloc_getpagesize getpagesize()
+#      else
+#        ifndef LACKS_SYS_PARAM_H
+#          include <sys/param.h>
+#        endif
+#        ifdef EXEC_PAGESIZE
+#          define malloc_getpagesize EXEC_PAGESIZE
+#        else
+#          ifdef NBPG
+#            ifndef CLSIZE
+#              define malloc_getpagesize NBPG
+#            else
+#              define malloc_getpagesize (NBPG * CLSIZE)
+#            endif
+#          else
+#            ifdef NBPC
+#              define malloc_getpagesize NBPC
+#            else
+#              ifdef PAGESIZE
+#                define malloc_getpagesize PAGESIZE
+#              else /* just guess */
+#                define malloc_getpagesize ((size_t)4096U)
+#              endif
+#            endif
+#          endif
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+#endif
+
+/* ------------------- size_t and alignment properties -------------------- */
+
+/* The byte and bit size of a size_t */
+#define SIZE_T_SIZE         (sizeof(size_t))
+#define SIZE_T_BITSIZE      (sizeof(size_t) << 3)
+
+/* Some constants coerced to size_t */
+/* Annoying but necessary to avoid errors on some platforms */
+#define SIZE_T_ZERO         ((size_t)0)
+#define SIZE_T_ONE          ((size_t)1)
+#define SIZE_T_TWO          ((size_t)2)
+#define SIZE_T_FOUR         ((size_t)4)
+#define TWO_SIZE_T_SIZES    (SIZE_T_SIZE<<1)
+#define FOUR_SIZE_T_SIZES   (SIZE_T_SIZE<<2)
+#define SIX_SIZE_T_SIZES    (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES)
+#define HALF_MAX_SIZE_T     (MAX_SIZE_T / 2U)
+
+/* The bit mask value corresponding to MALLOC_ALIGNMENT */
+#define CHUNK_ALIGN_MASK    (MALLOC_ALIGNMENT - SIZE_T_ONE)
+
+/* True if address a has acceptable alignment */
+#define is_aligned(A)       (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0)
+
+/* the number of bytes to offset an address to align it */
+#define align_offset(A)\
+ ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\
+  ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK))
+
+/* -------------------------- MMAP preliminaries ------------------------- */
+
+/*
+   If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and
+   checks to fail so compiler optimizer can delete code rather than
+   using so many "#if"s.
+*/
+
+
+/* MORECORE and MMAP must return MFAIL on failure */
+#define MFAIL                ((void*)(MAX_SIZE_T))
+#define CMFAIL               ((char*)(MFAIL)) /* defined for convenience */
+
+#if HAVE_MMAP
+
+#ifndef WIN32
+#define MUNMAP_DEFAULT(a, s)  munmap((a), (s))
+#define MMAP_PROT            (PROT_READ|PROT_WRITE)
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+#define MAP_ANONYMOUS        MAP_ANON
+#endif /* MAP_ANON */
+#ifdef MAP_ANONYMOUS
+#define MMAP_FLAGS           (MAP_PRIVATE|MAP_ANONYMOUS)
+#define MMAP_DEFAULT(s)       mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0)
+#else /* MAP_ANONYMOUS */
+/*
+   Nearly all versions of mmap support MAP_ANONYMOUS, so the following
+   is unlikely to be needed, but is supplied just in case.
+*/
+#define MMAP_FLAGS           (MAP_PRIVATE)
+static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
+#define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \
+           (dev_zero_fd = open("/dev/zero", O_RDWR), \
+            mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \
+            mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0))
+#endif /* MAP_ANONYMOUS */
+
+#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s)
+
+#else /* WIN32 */
+
+/* Win32 MMAP via VirtualAlloc */
+static FORCEINLINE void* win32mmap(size_t size) {
+  void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+  return (ptr != 0)? ptr: MFAIL;
+}
+
+/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
+static FORCEINLINE void* win32direct_mmap(size_t size) {
+  void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
+                           PAGE_READWRITE);
+  return (ptr != 0)? ptr: MFAIL;
+}
+
+/* This function supports releasing coalesed segments */
+static FORCEINLINE int win32munmap(void* ptr, size_t size) {
+  MEMORY_BASIC_INFORMATION minfo;
+  char* cptr = (char*)ptr;
+  while (size) {
+    if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
+      return -1;
+    if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
+        minfo.State != MEM_COMMIT || minfo.RegionSize > size)
+      return -1;
+    if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
+      return -1;
+    cptr += minfo.RegionSize;
+    size -= minfo.RegionSize;
+  }
+  return 0;
+}
+
+#define MMAP_DEFAULT(s)             win32mmap(s)
+#define MUNMAP_DEFAULT(a, s)        win32munmap((a), (s))
+#define DIRECT_MMAP_DEFAULT(s)      win32direct_mmap(s)
+#endif /* WIN32 */
+#endif /* HAVE_MMAP */
+
+#if HAVE_MREMAP
+#ifndef WIN32
+#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv))
+#endif /* WIN32 */
+#endif /* HAVE_MREMAP */
+
+/**
+ * Define CALL_MORECORE
+ */
+#if HAVE_MORECORE
+    #ifdef MORECORE
+        #define CALL_MORECORE(S)    MORECORE(S)
+    #else  /* MORECORE */
+        #define CALL_MORECORE(S)    MORECORE_DEFAULT(S)
+    #endif /* MORECORE */
+#else  /* HAVE_MORECORE */
+    #define CALL_MORECORE(S)        MFAIL
+#endif /* HAVE_MORECORE */
+
+/**
+ * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP
+ */
+#if HAVE_MMAP
+    #define USE_MMAP_BIT            (SIZE_T_ONE)
+
+    #ifdef MMAP
+        #define CALL_MMAP(s)        MMAP(s)
+    #else /* MMAP */
+        #define CALL_MMAP(s)        MMAP_DEFAULT(s)
+    #endif /* MMAP */
+    #ifdef MUNMAP
+        #define CALL_MUNMAP(a, s)   MUNMAP((a), (s))
+    #else /* MUNMAP */
+        #define CALL_MUNMAP(a, s)   MUNMAP_DEFAULT((a), (s))
+    #endif /* MUNMAP */
+    #ifdef DIRECT_MMAP
+        #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
+    #else /* DIRECT_MMAP */
+        #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s)
+    #endif /* DIRECT_MMAP */
+#else  /* HAVE_MMAP */
+    #define USE_MMAP_BIT            (SIZE_T_ZERO)
+
+    #define MMAP(s)                 MFAIL
+    #define MUNMAP(a, s)            (-1)
+    #define DIRECT_MMAP(s)          MFAIL
+    #define CALL_DIRECT_MMAP(s)     DIRECT_MMAP(s)
+    #define CALL_MMAP(s)            MMAP(s)
+    #define CALL_MUNMAP(a, s)       MUNMAP((a), (s))
+#endif /* HAVE_MMAP */
+
+/**
+ * Define CALL_MREMAP
+ */
+#if HAVE_MMAP && HAVE_MREMAP
+    #ifdef MREMAP
+        #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv))
+    #else /* MREMAP */
+        #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv))
+    #endif /* MREMAP */
+#else  /* HAVE_MMAP && HAVE_MREMAP */
+    #define CALL_MREMAP(addr, osz, nsz, mv)     MFAIL
+#endif /* HAVE_MMAP && HAVE_MREMAP */
+
+/* mstate bit set if continguous morecore disabled or failed */
+#define USE_NONCONTIGUOUS_BIT (4U)
+
+/* segment bit set in create_mspace_with_base */
+#define EXTERN_BIT            (8U)
+
+
+/* --------------------------- Lock preliminaries ------------------------ */
+
+/*
+  When locks are defined, there is one global lock, plus
+  one per-mspace lock.
+
+  The global lock_ensures that mparams.magic and other unique
+  mparams values are initialized only once. It also protects
+  sequences of calls to MORECORE.  In many cases sys_alloc requires
+  two calls, that should not be interleaved with calls by other
+  threads.  This does not protect against direct calls to MORECORE
+  by other threads not using this lock, so there is still code to
+  cope the best we can on interference.
+
+  Per-mspace locks surround calls to malloc, free, etc.
+  By default, locks are simple non-reentrant mutexes.
+
+  Because lock-protected regions generally have bounded times, it is
+  OK to use the supplied simple spinlocks. Spinlocks are likely to
+  improve performance for lightly contended applications, but worsen
+  performance under heavy contention.
+
+  If USE_LOCKS is > 1, the definitions of lock routines here are
+  bypassed, in which case you will need to define the type MLOCK_T,
+  and at least INITIAL_LOCK, DESTROY_LOCK, ACQUIRE_LOCK, RELEASE_LOCK
+  and TRY_LOCK.  You must also declare a
+    static MLOCK_T malloc_global_mutex = { initialization values };.
+
+*/
+
+#if !USE_LOCKS
+#define USE_LOCK_BIT               (0U)
+#define INITIAL_LOCK(l)            (0)
+#define DESTROY_LOCK(l)            (0)
+#define ACQUIRE_MALLOC_GLOBAL_LOCK()
+#define RELEASE_MALLOC_GLOBAL_LOCK()
+
+#else
+#if USE_LOCKS > 1
+/* -----------------------  User-defined locks ------------------------ */
+/* Define your own lock implementation here */
+/* #define INITIAL_LOCK(lk)  ... */
+/* #define DESTROY_LOCK(lk)  ... */
+/* #define ACQUIRE_LOCK(lk)  ... */
+/* #define RELEASE_LOCK(lk)  ... */
+/* #define TRY_LOCK(lk) ... */
+/* static MLOCK_T malloc_global_mutex = ... */
+
+#elif USE_SPIN_LOCKS
+
+/* First, define CAS_LOCK and CLEAR_LOCK on ints */
+/* Note CAS_LOCK defined to return 0 on success */
+
+#if defined(__GNUC__)&& (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
+#define CAS_LOCK(sl)     __sync_lock_test_and_set(sl, 1)
+#define CLEAR_LOCK(sl)   __sync_lock_release(sl)
+
+#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
+/* Custom spin locks for older gcc on x86 */
+static FORCEINLINE int x86_cas_lock(int *sl) {
+  int ret;
+  int val = 1;
+  int cmp = 0;
+  __asm__ __volatile__  ("lock; cmpxchgl %1, %2"
+                         : "=a" (ret)
+                         : "r" (val), "m" (*(sl)), "0"(cmp)
+                         : "memory", "cc");
+  return ret;
+}
+
+static FORCEINLINE void x86_clear_lock(int* sl) {
+  assert(*sl != 0);
+  int prev = 0;
+  int ret;
+  __asm__ __volatile__ ("lock; xchgl %0, %1"
+                        : "=r" (ret)
+                        : "m" (*(sl)), "0"(prev)
+                        : "memory");
+}
+
+#define CAS_LOCK(sl)     x86_cas_lock(sl)
+#define CLEAR_LOCK(sl)   x86_clear_lock(sl)
+
+#else /* Win32 MSC */
+#define CAS_LOCK(sl)     interlockedexchange(sl, (LONG)1)
+#define CLEAR_LOCK(sl)   interlockedexchange (sl, (LONG)0)
+
+#endif /* ... gcc spins locks ... */
+
+/* How to yield for a spin lock */
+#define SPINS_PER_YIELD       63
+#if defined(_MSC_VER)
+#define SLEEP_EX_DURATION     50 /* delay for yield/sleep */
+#define SPIN_LOCK_YIELD  SleepEx(SLEEP_EX_DURATION, FALSE)
+#elif defined (__SVR4) && defined (__sun) /* solaris */
+#define SPIN_LOCK_YIELD   thr_yield();
+#elif !defined(LACKS_SCHED_H)
+#define SPIN_LOCK_YIELD   sched_yield();
+#else
+#define SPIN_LOCK_YIELD
+#endif /* ... yield ... */
+
+#if !defined(USE_RECURSIVE_LOCKS) || USE_RECURSIVE_LOCKS == 0
+/* Plain spin locks use single word (embedded in malloc_states) */
+static int spin_acquire_lock(int *sl) {
+  int spins = 0;
+  while (*(volatile int *)sl != 0 || CAS_LOCK(sl)) {
+    if ((++spins & SPINS_PER_YIELD) == 0) {
+      SPIN_LOCK_YIELD;
+    }
+  }
+  return 0;
+}
+
+#define MLOCK_T               int
+#define TRY_LOCK(sl)          !CAS_LOCK(sl)
+#define RELEASE_LOCK(sl)      CLEAR_LOCK(sl)
+#define ACQUIRE_LOCK(sl)      (CAS_LOCK(sl)? spin_acquire_lock(sl) : 0)
+#define INITIAL_LOCK(sl)      (*sl = 0)
+#define DESTROY_LOCK(sl)      (0)
+static MLOCK_T malloc_global_mutex = 0;
+
+#else /* USE_RECURSIVE_LOCKS */
+/* types for lock owners */
+#ifdef WIN32
+#define THREAD_ID_T           DWORD
+#define CURRENT_THREAD        GetCurrentThreadId()
+#define EQ_OWNER(X,Y)         ((X) == (Y))
+#else
+/*
+  Note: the following assume that pthread_t is a type that can be
+  initialized to (casted) zero. If this is not the case, you will need to
+  somehow redefine these or not use spin locks.
+*/
+#define THREAD_ID_T           pthread_t
+#define CURRENT_THREAD        pthread_self()
+#define EQ_OWNER(X,Y)         pthread_equal(X, Y)
+#endif
+
+struct malloc_recursive_lock {
+  int sl;
+  unsigned int c;
+  THREAD_ID_T threadid;
+};
+
+#define MLOCK_T  struct malloc_recursive_lock
+static MLOCK_T malloc_global_mutex = { 0, 0, (THREAD_ID_T)0};
+
+static FORCEINLINE void recursive_release_lock(MLOCK_T *lk) {
+  assert(lk->sl != 0);
+  if (--lk->c == 0) {
+    CLEAR_LOCK(&lk->sl);
+  }
+}
+
+static FORCEINLINE int recursive_acquire_lock(MLOCK_T *lk) {
+  THREAD_ID_T mythreadid = CURRENT_THREAD;
+  int spins = 0;
+  for (;;) {
+    if (*((volatile int *)(&lk->sl)) == 0) {
+      if (!CAS_LOCK(&lk->sl)) {
+        lk->threadid = mythreadid;
+        lk->c = 1;
+        return 0;
+      }
+    }
+    else if (EQ_OWNER(lk->threadid, mythreadid)) {
+      ++lk->c;
+      return 0;
+    }
+    if ((++spins & SPINS_PER_YIELD) == 0) {
+      SPIN_LOCK_YIELD;
+    }
+  }
+}
+
+static FORCEINLINE int recursive_try_lock(MLOCK_T *lk) {
+  THREAD_ID_T mythreadid = CURRENT_THREAD;
+  if (*((volatile int *)(&lk->sl)) == 0) {
+    if (!CAS_LOCK(&lk->sl)) {
+      lk->threadid = mythreadid;
+      lk->c = 1;
+      return 1;
+    }
+  }
+  else if (EQ_OWNER(lk->threadid, mythreadid)) {
+    ++lk->c;
+    return 1;
+  }
+  return 0;
+}
+
+#define RELEASE_LOCK(lk)      recursive_release_lock(lk)
+#define TRY_LOCK(lk)          recursive_try_lock(lk)
+#define ACQUIRE_LOCK(lk)      recursive_acquire_lock(lk)
+#define INITIAL_LOCK(lk)      ((lk)->threadid = (THREAD_ID_T)0, (lk)->sl = 0, (lk)->c = 0)
+#define DESTROY_LOCK(lk)      (0)
+#endif /* USE_RECURSIVE_LOCKS */
+
+#elif defined(WIN32) /* Win32 critical sections */
+#define MLOCK_T               CRITICAL_SECTION
+#define ACQUIRE_LOCK(lk)      (EnterCriticalSection(lk), 0)
+#define RELEASE_LOCK(lk)      LeaveCriticalSection(lk)
+#define TRY_LOCK(lk)          TryEnterCriticalSection(lk)
+#define INITIAL_LOCK(lk)      (!InitializeCriticalSectionAndSpinCount((lk), 0x80000000|4000))
+#define DESTROY_LOCK(lk)      (DeleteCriticalSection(lk), 0)
+#define NEED_GLOBAL_LOCK_INIT
+
+static MLOCK_T malloc_global_mutex;
+static volatile LONG malloc_global_mutex_status;
+
+/* Use spin loop to initialize global lock */
+static void init_malloc_global_mutex() {
+  for (;;) {
+    long stat = malloc_global_mutex_status;
+    if (stat > 0)
+      return;
+    /* transition to < 0 while initializing, then to > 0) */
+    if (stat == 0 &&
+        interlockedcompareexchange(&malloc_global_mutex_status, (LONG)-1, (LONG)0) == 0) {
+      InitializeCriticalSection(&malloc_global_mutex);
+      interlockedexchange(&malloc_global_mutex_status, (LONG)1);
+      return;
+    }
+    SleepEx(0, FALSE);
+  }
+}
+
+#else /* pthreads-based locks */
+#define MLOCK_T               pthread_mutex_t
+#define ACQUIRE_LOCK(lk)      pthread_mutex_lock(lk)
+#define RELEASE_LOCK(lk)      pthread_mutex_unlock(lk)
+#define TRY_LOCK(lk)          (!pthread_mutex_trylock(lk))
+#define INITIAL_LOCK(lk)      pthread_init_lock(lk)
+#define DESTROY_LOCK(lk)      pthread_mutex_destroy(lk)
+
+#if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0 && defined(linux) && !defined(PTHREAD_MUTEX_RECURSIVE)
+/* Cope with old-style linux recursive lock initialization by adding */
+/* skipped internal declaration from pthread.h */
+extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr,
+                                              int __kind));
+#define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP
+#define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y)
+#endif /* USE_RECURSIVE_LOCKS ... */
+
+static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static int pthread_init_lock (MLOCK_T *lk) {
+  pthread_mutexattr_t attr;
+  if (pthread_mutexattr_init(&attr)) return 1;
+#if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0
+  if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1;
+#endif
+  if (pthread_mutex_init(lk, &attr)) return 1;
+  if (pthread_mutexattr_destroy(&attr)) return 1;
+  return 0;
+}
+
+#endif /* ... lock types ... */
+
+/* Common code for all lock types */
+#define USE_LOCK_BIT               (2U)
+
+#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK
+#define ACQUIRE_MALLOC_GLOBAL_LOCK()  ACQUIRE_LOCK(&malloc_global_mutex);
+#endif
+
+#ifndef RELEASE_MALLOC_GLOBAL_LOCK
+#define RELEASE_MALLOC_GLOBAL_LOCK()  RELEASE_LOCK(&malloc_global_mutex);
+#endif
+
+#endif /* USE_LOCKS */
+
+/* -----------------------  Chunk representations ------------------------ */
+
+/*
+  (The following includes lightly edited explanations by Colin Plumb.)
+
+  The malloc_chunk declaration below is misleading (but accurate and
+  necessary).  It declares a "view" into memory allowing access to
+  necessary fields at known offsets from a given base.
+
+  Chunks of memory are maintained using a `boundary tag' method as
+  originally described by Knuth.  (See the paper by Paul Wilson
+  ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such
+  techniques.)  Sizes of free chunks are stored both in the front of
+  each chunk and at the end.  This makes consolidating fragmented
+  chunks into bigger chunks fast.  The head fields also hold bits
+  representing whether chunks are free or in use.
+
+  Here are some pictures to make it clearer.  They are "exploded" to
+  show that the state of a chunk can be thought of as extending from
+  the high 31 bits of the head field of its header through the
+  prev_foot and PINUSE_BIT bit of the following chunk header.
+
+  A chunk that's in use looks like:
+
+   chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+           | Size of previous chunk (if P = 0)                             |
+           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+         | Size of this chunk                                         1| +-+
+   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         |                                                               |
+         +-                                                             -+
+         |                                                               |
+         +-                                                             -+
+         |                                                               :
+         +-      size - sizeof(size_t) available payload bytes          -+
+         :                                                               |
+ chunk-> +-                                                             -+
+         |                                                               |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1|
+       | Size of next chunk (may or may not be in use)               | +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    And if it's free, it looks like this:
+
+   chunk-> +-                                                             -+
+           | User payload (must be in use, or we would have merged!)       |
+           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+         | Size of this chunk                                         0| +-+
+   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Next pointer                                                  |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Prev pointer                                                  |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         |                                                               :
+         +-      size - sizeof(struct chunk) unused bytes               -+
+         :                                                               |
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Size of this chunk                                            |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0|
+       | Size of next chunk (must be in use, or we would have merged)| +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                                                               :
+       +- User payload                                                -+
+       :                                                               |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+                                                                     |0|
+                                                                     +-+
+  Note that since we always merge adjacent free chunks, the chunks
+  adjacent to a free chunk must be in use.
+
+  Given a pointer to a chunk (which can be derived trivially from the
+  payload pointer) we can, in O(1) time, find out whether the adjacent
+  chunks are free, and if so, unlink them from the lists that they
+  are on and merge them with the current chunk.
+
+  Chunks always begin on even word boundaries, so the mem portion
+  (which is returned to the user) is also on an even word boundary, and
+  thus at least double-word aligned.
+
+  The P (PINUSE_BIT) bit, stored in the unused low-order bit of the
+  chunk size (which is always a multiple of two words), is an in-use
+  bit for the *previous* chunk.  If that bit is *clear*, then the
+  word before the current chunk size contains the previous chunk
+  size, and can be used to find the front of the previous chunk.
+  The very first chunk allocated always has this bit set, preventing
+  access to non-existent (or non-owned) memory. If pinuse is set for
+  any given chunk, then you CANNOT determine the size of the
+  previous chunk, and might even get a memory addressing fault when
+  trying to do so.
+
+  The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of
+  the chunk size redundantly records whether the current chunk is
+  inuse (unless the chunk is mmapped). This redundancy enables usage
+  checks within free and realloc, and reduces indirection when freeing
+  and consolidating chunks.
+
+  Each freshly allocated chunk must have both cinuse and pinuse set.
+  That is, each allocated chunk borders either a previously allocated
+  and still in-use chunk, or the base of its memory arena. This is
+  ensured by making all allocations from the `lowest' part of any
+  found chunk.  Further, no free chunk physically borders another one,
+  so each free chunk is known to be preceded and followed by either
+  inuse chunks or the ends of memory.
+
+  Note that the `foot' of the current chunk is actually represented
+  as the prev_foot of the NEXT chunk. This makes it easier to
+  deal with alignments etc but can be very confusing when trying
+  to extend or adapt this code.
+
+  The exceptions to all this are
+
+     1. The special chunk `top' is the top-most available chunk (i.e.,
+        the one bordering the end of available memory). It is treated
+        specially.  Top is never included in any bin, is used only if
+        no other chunk is available, and is released back to the
+        system if it is very large (see M_TRIM_THRESHOLD).  In effect,
+        the top chunk is treated as larger (and thus less well
+        fitting) than any other available chunk.  The top chunk
+        doesn't update its trailing size field since there is no next
+        contiguous chunk that would have to index off it. However,
+        space is still allocated for it (TOP_FOOT_SIZE) to enable
+        separation or merging when space is extended.
+
+     3. Chunks allocated via mmap, have both cinuse and pinuse bits
+        cleared in their head fields.  Because they are allocated
+        one-by-one, each must carry its own prev_foot field, which is
+        also used to hold the offset this chunk has within its mmapped
+        region, which is needed to preserve alignment. Each mmapped
+        chunk is trailed by the first two fields of a fake next-chunk
+        for sake of usage checks.
+
+*/
+
+struct malloc_chunk {
+  size_t               prev_foot;  /* Size of previous chunk (if free).  */
+  size_t               head;       /* Size and inuse bits. */
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+};
+
+typedef struct malloc_chunk  mchunk;
+typedef struct malloc_chunk* mchunkptr;
+typedef struct malloc_chunk* sbinptr;  /* The type of bins of chunks */
+typedef unsigned int bindex_t;         /* Described below */
+typedef unsigned int binmap_t;         /* Described below */
+typedef unsigned int flag_t;           /* The type of various bit flag sets */
+
+/* ------------------- Chunks sizes and alignments ----------------------- */
+
+#define MCHUNK_SIZE         (sizeof(mchunk))
+
+#if FOOTERS
+#define CHUNK_OVERHEAD      (TWO_SIZE_T_SIZES)
+#else /* FOOTERS */
+#define CHUNK_OVERHEAD      (SIZE_T_SIZE)
+#endif /* FOOTERS */
+
+/* MMapped chunks need a second word of overhead ... */
+#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
+/* ... and additional padding for fake next-chunk at foot */
+#define MMAP_FOOT_PAD       (FOUR_SIZE_T_SIZES)
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+#define MIN_CHUNK_SIZE\
+  ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+/* conversion from malloc headers to user pointers, and back */
+#define chunk2mem(p)        ((void*)((char*)(p)       + TWO_SIZE_T_SIZES))
+#define mem2chunk(mem)      ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES))
+/* chunk associated with aligned address A */
+#define align_as_chunk(A)   (mchunkptr)((A) + align_offset(chunk2mem(A)))
+
+/* Bounds on request (not chunk) sizes. */
+#define MAX_REQUEST         ((-MIN_CHUNK_SIZE) << 2)
+#define MIN_REQUEST         (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)
+
+/* pad request bytes into a usable size */
+#define pad_request(req) \
+   (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+/* pad request, checking for minimum (but not maximum) */
+#define request2size(req) \
+  (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req))
+
+
+/* ------------------ Operations on head and foot fields ----------------- */
+
+/*
+  The head field of a chunk is or'ed with PINUSE_BIT when previous
+  adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in
+  use, unless mmapped, in which case both bits are cleared.
+
+  FLAG4_BIT is not used by this malloc, but might be useful in extensions.
+*/
+
+#define PINUSE_BIT          (SIZE_T_ONE)
+#define CINUSE_BIT          (SIZE_T_TWO)
+#define FLAG4_BIT           (SIZE_T_FOUR)
+#define INUSE_BITS          (PINUSE_BIT|CINUSE_BIT)
+#define FLAG_BITS           (PINUSE_BIT|CINUSE_BIT|FLAG4_BIT)
+
+/* Head value for fenceposts */
+#define FENCEPOST_HEAD      (INUSE_BITS|SIZE_T_SIZE)
+
+/* extraction of fields from head words */
+#define cinuse(p)           ((p)->head & CINUSE_BIT)
+#define pinuse(p)           ((p)->head & PINUSE_BIT)
+#define flag4inuse(p)       ((p)->head & FLAG4_BIT)
+#define is_inuse(p)         (((p)->head & INUSE_BITS) != PINUSE_BIT)
+#define is_mmapped(p)       (((p)->head & INUSE_BITS) == 0)
+
+#define chunksize(p)        ((p)->head & ~(FLAG_BITS))
+
+#define clear_pinuse(p)     ((p)->head &= ~PINUSE_BIT)
+#define set_flag4(p)        ((p)->head |= FLAG4_BIT)
+#define clear_flag4(p)      ((p)->head &= ~FLAG4_BIT)
+
+/* Treat space at ptr +/- offset as a chunk */
+#define chunk_plus_offset(p, s)  ((mchunkptr)(((char*)(p)) + (s)))
+#define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s)))
+
+/* Ptr to next or previous physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~FLAG_BITS)))
+#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) ))
+
+/* extract next chunk's pinuse bit */
+#define next_pinuse(p)  ((next_chunk(p)->head) & PINUSE_BIT)
+
+/* Get/set size at footer */
+#define get_foot(p, s)  (((mchunkptr)((char*)(p) + (s)))->prev_foot)
+#define set_foot(p, s)  (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s))
+
+/* Set size, pinuse bit, and foot */
+#define set_size_and_pinuse_of_free_chunk(p, s)\
+  ((p)->head = (s|PINUSE_BIT), set_foot(p, s))
+
+/* Set size, pinuse bit, foot, and clear next pinuse */
+#define set_free_with_pinuse(p, s, n)\
+  (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))
+
+/* Get the internal overhead associated with chunk p */
+#define overhead_for(p)\
+ (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD)
+
+/* Return true if malloced space is not necessarily cleared */
+#if MMAP_CLEARS
+#define calloc_must_clear(p) (!is_mmapped(p))
+#else /* MMAP_CLEARS */
+#define calloc_must_clear(p) (1)
+#endif /* MMAP_CLEARS */
+
+/* ---------------------- Overlaid data structures ----------------------- */
+
+/*
+  When chunks are not in use, they are treated as nodes of either
+  lists or trees.
+
+  "Small"  chunks are stored in circular doubly-linked lists, and look
+  like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk in list             |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk in list            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space (may be 0 bytes long)                .
+            .                                                               .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+  Larger chunks are kept in a form of bitwise digital trees (aka
+  tries) keyed on chunksizes.  Because malloc_tree_chunks are only for
+  free chunks greater than 256 bytes, their size doesn't impose any
+  constraints on user chunk sizes.  Each node looks like:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk of same size        |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk of same size       |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to left child (child[0])                  |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to right child (child[1])                 |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to parent                                 |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             bin index of this chunk                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space                                      .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+  Each tree holding treenodes is a tree of unique chunk sizes.  Chunks
+  of the same size are arranged in a circularly-linked list, with only
+  the oldest chunk (the next to be used, in our FIFO ordering)
+  actually in the tree.  (Tree members are distinguished by a non-null
+  parent pointer.)  If a chunk with the same size an an existing node
+  is inserted, it is linked off the existing node using pointers that
+  work in the same way as fd/bk pointers of small chunks.
+
+  Each tree contains a power of 2 sized range of chunk sizes (the
+  smallest is 0x100 <= x < 0x180), which is is divided in half at each
+  tree level, with the chunks in the smaller half of the range (0x100
+  <= x < 0x140 for the top nose) in the left subtree and the larger
+  half (0x140 <= x < 0x180) in the right subtree.  This is, of course,
+  done by inspecting individual bits.
+
+  Using these rules, each node's left subtree contains all smaller
+  sizes than its right subtree.  However, the node at the root of each
+  subtree has no particular ordering relationship to either.  (The
+  dividing line between the subtree sizes is based on trie relation.)
+  If we remove the last chunk of a given size from the interior of the
+  tree, we need to replace it with a leaf node.  The tree ordering
+  rules permit a node to be replaced by any leaf below it.
+
+  The smallest chunk in a tree (a common operation in a best-fit
+  allocator) can be found by walking a path to the leftmost leaf in
+  the tree.  Unlike a usual binary tree, where we follow left child
+  pointers until we reach a null, here we follow the right child
+  pointer any time the left one is null, until we reach a leaf with
+  both child pointers null. The smallest chunk in the tree will be
+  somewhere along that path.
+
+  The worst case number of steps to add, find, or remove a node is
+  bounded by the number of bits differentiating chunks within
+  bins. Under current bin calculations, this ranges from 6 up to 21
+  (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case
+  is of course much better.
+*/
+
+struct malloc_tree_chunk {
+  /* The first four fields must be compatible with malloc_chunk */
+  size_t                    prev_foot;
+  size_t                    head;
+  struct malloc_tree_chunk* fd;
+  struct malloc_tree_chunk* bk;
+
+  struct malloc_tree_chunk* child[2];
+  struct malloc_tree_chunk* parent;
+  bindex_t                  index;
+};
+
+typedef struct malloc_tree_chunk  tchunk;
+typedef struct malloc_tree_chunk* tchunkptr;
+typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */
+
+/* A little helper macro for trees */
+#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1])
+
+/* ----------------------------- Segments -------------------------------- */
+
+/*
+  Each malloc space may include non-contiguous segments, held in a
+  list headed by an embedded malloc_segment record representing the
+  top-most space. Segments also include flags holding properties of
+  the space. Large chunks that are directly allocated by mmap are not
+  included in this list. They are instead independently created and
+  destroyed without otherwise keeping track of them.
+
+  Segment management mainly comes into play for spaces allocated by
+  MMAP.  Any call to MMAP might or might not return memory that is
+  adjacent to an existing segment.  MORECORE normally contiguously
+  extends the current space, so this space is almost always adjacent,
+  which is simpler and faster to deal with. (This is why MORECORE is
+  used preferentially to MMAP when both are available -- see
+  sys_alloc.)  When allocating using MMAP, we don't use any of the
+  hinting mechanisms (inconsistently) supported in various
+  implementations of unix mmap, or distinguish reserving from
+  committing memory. Instead, we just ask for space, and exploit
+  contiguity when we get it.  It is probably possible to do
+  better than this on some systems, but no general scheme seems
+  to be significantly better.
+
+  Management entails a simpler variant of the consolidation scheme
+  used for chunks to reduce fragmentation -- new adjacent memory is
+  normally prepended or appended to an existing segment. However,
+  there are limitations compared to chunk consolidation that mostly
+  reflect the fact that segment processing is relatively infrequent
+  (occurring only when getting memory from system) and that we
+  don't expect to have huge numbers of segments:
+
+  * Segments are not indexed, so traversal requires linear scans.  (It
+    would be possible to index these, but is not worth the extra
+    overhead and complexity for most programs on most platforms.)
+  * New segments are only appended to old ones when holding top-most
+    memory; if they cannot be prepended to others, they are held in
+    different segments.
+
+  Except for the top-most segment of an mstate, each segment record
+  is kept at the tail of its segment. Segments are added by pushing
+  segment records onto the list headed by &mstate.seg for the
+  containing mstate.
+
+  Segment flags control allocation/merge/deallocation policies:
+  * If EXTERN_BIT set, then we did not allocate this segment,
+    and so should not try to deallocate or merge with others.
+    (This currently holds only for the initial segment passed
+    into create_mspace_with_base.)
+  * If USE_MMAP_BIT set, the segment may be merged with
+    other surrounding mmapped segments and trimmed/de-allocated
+    using munmap.
+  * If neither bit is set, then the segment was obtained using
+    MORECORE so can be merged with surrounding MORECORE'd segments
+    and deallocated/trimmed using MORECORE with negative arguments.
+*/
+
+struct malloc_segment {
+  char*        base;             /* base address */
+  size_t       size;             /* allocated size */
+  struct malloc_segment* next;   /* ptr to next segment */
+  flag_t       sflags;           /* mmap and extern flag */
+};
+
+#define is_mmapped_segment(S)  ((S)->sflags & USE_MMAP_BIT)
+#define is_extern_segment(S)   ((S)->sflags & EXTERN_BIT)
+
+typedef struct malloc_segment  msegment;
+typedef struct malloc_segment* msegmentptr;
+
+/* ---------------------------- malloc_state ----------------------------- */
+
+/*
+   A malloc_state holds all of the bookkeeping for a space.
+   The main fields are:
+
+  Top
+    The topmost chunk of the currently active segment. Its size is
+    cached in topsize.  The actual size of topmost space is
+    topsize+TOP_FOOT_SIZE, which includes space reserved for adding
+    fenceposts and segment records if necessary when getting more
+    space from the system.  The size at which to autotrim top is
+    cached from mparams in trim_check, except that it is disabled if
+    an autotrim fails.
+
+  Designated victim (dv)
+    This is the preferred chunk for servicing small requests that
+    don't have exact fits.  It is normally the chunk split off most
+    recently to service another small request.  Its size is cached in
+    dvsize. The link fields of this chunk are not maintained since it
+    is not kept in a bin.
+
+  SmallBins
+    An array of bin headers for free chunks.  These bins hold chunks
+    with sizes less than MIN_LARGE_SIZE bytes. Each bin contains
+    chunks of all the same size, spaced 8 bytes apart.  To simplify
+    use in double-linked lists, each bin header acts as a malloc_chunk
+    pointing to the real first node, if it exists (else pointing to
+    itself).  This avoids special-casing for headers.  But to avoid
+    waste, we allocate only the fd/bk pointers of bins, and then use
+    repositioning tricks to treat these as the fields of a chunk.
+
+  TreeBins
+    Treebins are pointers to the roots of trees holding a range of
+    sizes. There are 2 equally spaced treebins for each power of two
+    from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything
+    larger.
+
+  Bin maps
+    There is one bit map for small bins ("smallmap") and one for
+    treebins ("treemap).  Each bin sets its bit when non-empty, and
+    clears the bit when empty.  Bit operations are then used to avoid
+    bin-by-bin searching -- nearly all "search" is done without ever
+    looking at bins that won't be selected.  The bit maps
+    conservatively use 32 bits per map word, even if on 64bit system.
+    For a good description of some of the bit-based techniques used
+    here, see Henry S. Warren Jr's book "Hacker's Delight" (and
+    supplement at http://hackersdelight.org/). Many of these are
+    intended to reduce the branchiness of paths through malloc etc, as
+    well as to reduce the number of memory locations read or written.
+
+  Segments
+    A list of segments headed by an embedded malloc_segment record
+    representing the initial space.
+
+  Address check support
+    The least_addr field is the least address ever obtained from
+    MORECORE or MMAP. Attempted frees and reallocs of any address less
+    than this are trapped (unless INSECURE is defined).
+
+  Magic tag
+    A cross-check field that should always hold same value as mparams.magic.
+
+  Max allowed footprint
+    The maximum allowed bytes to allocate from system (zero means no limit)
+
+  Flags
+    Bits recording whether to use MMAP, locks, or contiguous MORECORE
+
+  Statistics
+    Each space keeps track of current and maximum system memory
+    obtained via MORECORE or MMAP.
+
+  Trim support
+    Fields holding the amount of unused topmost memory that should trigger
+    trimming, and a counter to force periodic scanning to release unused
+    non-topmost segments.
+
+  Locking
+    If USE_LOCKS is defined, the "mutex" lock is acquired and released
+    around every public call using this mspace.
+
+  Extension support
+    A void* pointer and a size_t field that can be used to help implement
+    extensions to this malloc.
+*/
+
+/* Bin types, widths and sizes */
+#define NSMALLBINS        (32U)
+#define NTREEBINS         (32U)
+#define SMALLBIN_SHIFT    (3U)
+#define SMALLBIN_WIDTH    (SIZE_T_ONE << SMALLBIN_SHIFT)
+#define TREEBIN_SHIFT     (8U)
+#define MIN_LARGE_SIZE    (SIZE_T_ONE << TREEBIN_SHIFT)
+#define MAX_SMALL_SIZE    (MIN_LARGE_SIZE - SIZE_T_ONE)
+#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)
+
+struct malloc_state {
+  binmap_t   smallmap;
+  binmap_t   treemap;
+  size_t     dvsize;
+  size_t     topsize;
+  char*      least_addr;
+  mchunkptr  dv;
+  mchunkptr  top;
+  size_t     trim_check;
+  size_t     release_checks;
+  size_t     magic;
+  mchunkptr  smallbins[(NSMALLBINS+1)*2];
+  tbinptr    treebins[NTREEBINS];
+  size_t     footprint;
+  size_t     max_footprint;
+  size_t     footprint_limit; /* zero means no limit */
+  flag_t     mflags;
+#if USE_LOCKS
+  MLOCK_T    mutex;     /* locate lock among fields that rarely change */
+#endif /* USE_LOCKS */
+  msegment   seg;
+  void*      extp;      /* Unused but available for extensions */
+  size_t     exts;
+};
+
+typedef struct malloc_state*    mstate;
+
+/* ------------- Global malloc_state and malloc_params ------------------- */
+
+/*
+  malloc_params holds global properties, including those that can be
+  dynamically set using mallopt. There is a single instance, mparams,
+  initialized in init_mparams. Note that the non-zeroness of "magic"
+  also serves as an initialization flag.
+*/
+
+struct malloc_params {
+  size_t magic;
+  size_t page_size;
+  size_t granularity;
+  size_t mmap_threshold;
+  size_t trim_threshold;
+  flag_t default_mflags;
+};
+
+static struct malloc_params mparams;
+
+/* Ensure mparams initialized */
+#define ensure_initialization() (void)(mparams.magic != 0 || init_mparams())
+
+#if !ONLY_MSPACES
+
+/* The global malloc_state used for all non-"mspace" calls */
+static struct malloc_state _gm_;
+#define gm                 (&_gm_)
+#define is_global(M)       ((M) == &_gm_)
+
+#endif /* !ONLY_MSPACES */
+
+#define is_initialized(M)  ((M)->top != 0)
+
+/* -------------------------- system alloc setup ------------------------- */
+
+/* Operations on mflags */
+
+#define use_lock(M)           ((M)->mflags &   USE_LOCK_BIT)
+#define enable_lock(M)        ((M)->mflags |=  USE_LOCK_BIT)
+#if USE_LOCKS
+#define disable_lock(M)       ((M)->mflags &= ~USE_LOCK_BIT)
+#else
+#define disable_lock(M)
+#endif
+
+#define use_mmap(M)           ((M)->mflags &   USE_MMAP_BIT)
+#define enable_mmap(M)        ((M)->mflags |=  USE_MMAP_BIT)
+#if HAVE_MMAP
+#define disable_mmap(M)       ((M)->mflags &= ~USE_MMAP_BIT)
+#else
+#define disable_mmap(M)
+#endif
+
+#define use_noncontiguous(M)  ((M)->mflags &   USE_NONCONTIGUOUS_BIT)
+#define disable_contiguous(M) ((M)->mflags |=  USE_NONCONTIGUOUS_BIT)
+
+#define set_lock(M,L)\
+ ((M)->mflags = (L)?\
+  ((M)->mflags | USE_LOCK_BIT) :\
+  ((M)->mflags & ~USE_LOCK_BIT))
+
+/* page-align a size */
+#define page_align(S)\
+ (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE))
+
+/* granularity-align a size */
+#define granularity_align(S)\
+  (((S) + (mparams.granularity - SIZE_T_ONE))\
+   & ~(mparams.granularity - SIZE_T_ONE))
+
+
+/* For mmap, use granularity alignment on windows, else page-align */
+#ifdef WIN32
+#define mmap_align(S) granularity_align(S)
+#else
+#define mmap_align(S) page_align(S)
+#endif
+
+/* For sys_alloc, enough padding to ensure can malloc request on success */
+#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT)
+
+#define is_page_aligned(S)\
+   (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0)
+#define is_granularity_aligned(S)\
+   (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0)
+
+/*  True if segment S holds address A */
+#define segment_holds(S, A)\
+  ((char*)(A) >= S->base && (char*)(A) < S->base + S->size)
+
+/* Return segment holding given address */
+static msegmentptr segment_holding(mstate m, char* addr) {
+  msegmentptr sp = &m->seg;
+  for (;;) {
+    if (addr >= sp->base && addr < sp->base + sp->size)
+      return sp;
+    if ((sp = sp->next) == 0)
+      return 0;
+  }
+}
+
+/* Return true if segment contains a segment link */
+static int has_segment_link(mstate m, msegmentptr ss) {
+  msegmentptr sp = &m->seg;
+  for (;;) {
+    if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size)
+      return 1;
+    if ((sp = sp->next) == 0)
+      return 0;
+  }
+}
+
+#ifndef MORECORE_CANNOT_TRIM
+#define should_trim(M,s)  ((s) > (M)->trim_check)
+#else  /* MORECORE_CANNOT_TRIM */
+#define should_trim(M,s)  (0)
+#endif /* MORECORE_CANNOT_TRIM */
+
+/*
+  TOP_FOOT_SIZE is padding at the end of a segment, including space
+  that may be needed to place segment records and fenceposts when new
+  noncontiguous segments are added.
+*/
+#define TOP_FOOT_SIZE\
+  (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
+
+
+/* -------------------------------  Hooks -------------------------------- */
+
+/*
+  PREACTION should be defined to return 0 on success, and nonzero on
+  failure. If you are not using locking, you can redefine these to do
+  anything you like.
+*/
+
+#if USE_LOCKS
+#define PREACTION(M)  ((use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0)
+#define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); }
+#else /* USE_LOCKS */
+
+#ifndef PREACTION
+#define PREACTION(M) (0)
+#endif  /* PREACTION */
+
+#ifndef POSTACTION
+#define POSTACTION(M)
+#endif  /* POSTACTION */
+
+#endif /* USE_LOCKS */
+
+/*
+  CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses.
+  USAGE_ERROR_ACTION is triggered on detected bad frees and
+  reallocs. The argument p is an address that might have triggered the
+  fault. It is ignored by the two predefined actions, but might be
+  useful in custom actions that try to help diagnose errors.
+*/
+
+#if PROCEED_ON_ERROR
+
+/* A count of the number of corruption errors causing resets */
+int malloc_corruption_error_count;
+
+/* default corruption action */
+static void reset_on_error(mstate m);
+
+#define CORRUPTION_ERROR_ACTION(m)  reset_on_error(m)
+#define USAGE_ERROR_ACTION(m, p)
+
+#else /* PROCEED_ON_ERROR */
+
+#ifndef CORRUPTION_ERROR_ACTION
+#define CORRUPTION_ERROR_ACTION(m) ABORT
+#endif /* CORRUPTION_ERROR_ACTION */
+
+#ifndef USAGE_ERROR_ACTION
+#define USAGE_ERROR_ACTION(m,p) ABORT
+#endif /* USAGE_ERROR_ACTION */
+
+#endif /* PROCEED_ON_ERROR */
+
+
+/* -------------------------- Debugging setup ---------------------------- */
+
+#if ! DEBUG
+
+#define check_free_chunk(M,P)
+#define check_inuse_chunk(M,P)
+#define check_malloced_chunk(M,P,N)
+#define check_mmapped_chunk(M,P)
+#define check_malloc_state(M)
+#define check_top_chunk(M,P)
+
+#else /* DEBUG */
+#define check_free_chunk(M,P)       do_check_free_chunk(M,P)
+#define check_inuse_chunk(M,P)      do_check_inuse_chunk(M,P)
+#define check_top_chunk(M,P)        do_check_top_chunk(M,P)
+#define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N)
+#define check_mmapped_chunk(M,P)    do_check_mmapped_chunk(M,P)
+#define check_malloc_state(M)       do_check_malloc_state(M)
+
+static void   do_check_any_chunk(mstate m, mchunkptr p);
+static void   do_check_top_chunk(mstate m, mchunkptr p);
+static void   do_check_mmapped_chunk(mstate m, mchunkptr p);
+static void   do_check_inuse_chunk(mstate m, mchunkptr p);
+static void   do_check_free_chunk(mstate m, mchunkptr p);
+static void   do_check_malloced_chunk(mstate m, void* mem, size_t s);
+static void   do_check_tree(mstate m, tchunkptr t);
+static void   do_check_treebin(mstate m, bindex_t i);
+static void   do_check_smallbin(mstate m, bindex_t i);
+static void   do_check_malloc_state(mstate m);
+static int    bin_find(mstate m, mchunkptr x);
+static size_t traverse_and_check(mstate m);
+#endif /* DEBUG */
+
+/* ---------------------------- Indexing Bins ---------------------------- */
+
+#define is_small(s)         (((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
+#define small_index(s)      (bindex_t)((s)  >> SMALLBIN_SHIFT)
+#define small_index2size(i) ((i)  << SMALLBIN_SHIFT)
+#define MIN_SMALL_INDEX     (small_index(MIN_CHUNK_SIZE))
+
+/* addressing by index. See above about smallbin repositioning */
+#define smallbin_at(M, i)   ((sbinptr)((char*)&((M)->smallbins[(i)<<1])))
+#define treebin_at(M,i)     (&((M)->treebins[i]))
+
+/* assign tree index for size S to variable I. Use x86 asm if possible  */
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+#define compute_tree_index(S, I)\
+{\
+  unsigned int X = S >> TREEBIN_SHIFT;\
+  if (X == 0)\
+    I = 0;\
+  else if (X > 0xFFFF)\
+    I = NTREEBINS-1;\
+  else {\
+    unsigned int K = (unsigned) sizeof(X)*__CHAR_BIT__ - 1 - (unsigned) __builtin_clz(X); \
+    I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+  }\
+}
+
+#elif defined (__INTEL_COMPILER)
+#define compute_tree_index(S, I)\
+{\
+  size_t X = S >> TREEBIN_SHIFT;\
+  if (X == 0)\
+    I = 0;\
+  else if (X > 0xFFFF)\
+    I = NTREEBINS-1;\
+  else {\
+    unsigned int K = _bit_scan_reverse (X); \
+    I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+  }\
+}
+
+#elif defined(_MSC_VER) && _MSC_VER>=1300
+#define compute_tree_index(S, I)\
+{\
+  size_t X = S >> TREEBIN_SHIFT;\
+  if (X == 0)\
+    I = 0;\
+  else if (X > 0xFFFF)\
+    I = NTREEBINS-1;\
+  else {\
+    unsigned int K;\
+    _BitScanReverse((DWORD *) &K, (DWORD) X);\
+    I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+  }\
+}
+
+#else /* GNUC */
+#define compute_tree_index(S, I)\
+{\
+  size_t X = S >> TREEBIN_SHIFT;\
+  if (X == 0)\
+    I = 0;\
+  else if (X > 0xFFFF)\
+    I = NTREEBINS-1;\
+  else {\
+    unsigned int Y = (unsigned int)X;\
+    unsigned int N = ((Y - 0x100) >> 16) & 8;\
+    unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\
+    N += K;\
+    N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\
+    K = 14 - N + ((Y <<= K) >> 15);\
+    I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\
+  }\
+}
+#endif /* GNUC */
+
+/* Bit representing maximum resolved size in a treebin at i */
+#define bit_for_tree_index(i) \
+   (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2)
+
+/* Shift placing maximum resolved bit in a treebin at i as sign bit */
+#define leftshift_for_tree_index(i) \
+   ((i == NTREEBINS-1)? 0 : \
+    ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))
+
+/* The size of the smallest chunk held in bin with index i */
+#define minsize_for_tree_index(i) \
+   ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) |  \
+   (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))
+
+
+/* ------------------------ Operations on bin maps ----------------------- */
+
+/* bit corresponding to given index */
+#define idx2bit(i)              ((binmap_t)(1) << (i))
+
+/* Mark/Clear bits with given index */
+#define mark_smallmap(M,i)      ((M)->smallmap |=  idx2bit(i))
+#define clear_smallmap(M,i)     ((M)->smallmap &= ~idx2bit(i))
+#define smallmap_is_marked(M,i) ((M)->smallmap &   idx2bit(i))
+
+#define mark_treemap(M,i)       ((M)->treemap  |=  idx2bit(i))
+#define clear_treemap(M,i)      ((M)->treemap  &= ~idx2bit(i))
+#define treemap_is_marked(M,i)  ((M)->treemap  &   idx2bit(i))
+
+/* isolate the least set bit of a bitmap */
+#define least_bit(x)         ((x) & -(x))
+
+/* mask with all bits to left of least bit of x on */
+#define left_bits(x)         ((x<<1) | -(x<<1))
+
+/* mask with all bits to left of or equal to least bit of x on */
+#define same_or_left_bits(x) ((x) | -(x))
+
+/* index corresponding to given bit. Use x86 asm if possible */
+
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+#define compute_bit2idx(X, I)\
+{\
+  unsigned int J;\
+  J = __builtin_ctz(X); \
+  I = (bindex_t)J;\
+}
+
+#elif defined (__INTEL_COMPILER)
+#define compute_bit2idx(X, I)\
+{\
+  unsigned int J;\
+  J = _bit_scan_forward (X); \
+  I = (bindex_t)J;\
+}
+
+#elif defined(_MSC_VER) && _MSC_VER>=1300
+#define compute_bit2idx(X, I)\
+{\
+  unsigned int J;\
+  _BitScanForward((DWORD *) &J, X);\
+  I = (bindex_t)J;\
+}
+
+#elif USE_BUILTIN_FFS
+#define compute_bit2idx(X, I) I = ffs(X)-1
+
+#else
+#define compute_bit2idx(X, I)\
+{\
+  unsigned int Y = X - 1;\
+  unsigned int K = Y >> (16-4) & 16;\
+  unsigned int N = K;        Y >>= K;\
+  N += K = Y >> (8-3) &  8;  Y >>= K;\
+  N += K = Y >> (4-2) &  4;  Y >>= K;\
+  N += K = Y >> (2-1) &  2;  Y >>= K;\
+  N += K = Y >> (1-0) &  1;  Y >>= K;\
+  I = (bindex_t)(N + Y);\
+}
+#endif /* GNUC */
+
+
+/* ----------------------- Runtime Check Support ------------------------- */
+
+/*
+  For security, the main invariant is that malloc/free/etc never
+  writes to a static address other than malloc_state, unless static
+  malloc_state itself has been corrupted, which cannot occur via
+  malloc (because of these checks). In essence this means that we
+  believe all pointers, sizes, maps etc held in malloc_state, but
+  check all of those linked or offsetted from other embedded data
+  structures.  These checks are interspersed with main code in a way
+  that tends to minimize their run-time cost.
+
+  When FOOTERS is defined, in addition to range checking, we also
+  verify footer fields of inuse chunks, which can be used guarantee
+  that the mstate controlling malloc/free is intact.  This is a
+  streamlined version of the approach described by William Robertson
+  et al in "Run-time Detection of Heap-based Overflows" LISA'03
+  http://www.usenix.org/events/lisa03/tech/robertson.html The footer
+  of an inuse chunk holds the xor of its mstate and a random seed,
+  that is checked upon calls to free() and realloc().  This is
+  (probabalistically) unguessable from outside the program, but can be
+  computed by any code successfully malloc'ing any chunk, so does not
+  itself provide protection against code that has already broken
+  security through some other means.  Unlike Robertson et al, we
+  always dynamically check addresses of all offset chunks (previous,
+  next, etc). This turns out to be cheaper than relying on hashes.
+*/
+
+#if !INSECURE
+/* Check if address a is at least as high as any from MORECORE or MMAP */
+#define ok_address(M, a) ((char*)(a) >= (M)->least_addr)
+/* Check if address of next chunk n is higher than base chunk p */
+#define ok_next(p, n)    ((char*)(p) < (char*)(n))
+/* Check if p has inuse status */
+#define ok_inuse(p)     is_inuse(p)
+/* Check if p has its pinuse bit on */
+#define ok_pinuse(p)     pinuse(p)
+
+#else /* !INSECURE */
+#define ok_address(M, a) (1)
+#define ok_next(b, n)    (1)
+#define ok_inuse(p)      (1)
+#define ok_pinuse(p)     (1)
+#endif /* !INSECURE */
+
+#if (FOOTERS && !INSECURE)
+/* Check if (alleged) mstate m has expected magic field */
+#define ok_magic(M)      ((M)->magic == mparams.magic)
+#else  /* (FOOTERS && !INSECURE) */
+#define ok_magic(M)      (1)
+#endif /* (FOOTERS && !INSECURE) */
+
+/* In gcc, use __builtin_expect to minimize impact of checks */
+#if !INSECURE
+#if defined(__GNUC__) && __GNUC__ >= 3
+#define RTCHECK(e)  __builtin_expect(e, 1)
+#else /* GNUC */
+#define RTCHECK(e)  (e)
+#endif /* GNUC */
+#else /* !INSECURE */
+#define RTCHECK(e)  (1)
+#endif /* !INSECURE */
+
+/* macros to set up inuse chunks with or without footers */
+
+#if !FOOTERS
+
+#define mark_inuse_foot(M,p,s)
+
+/* Macros for setting head/foot of non-mmapped chunks */
+
+/* Set cinuse bit and pinuse bit of next chunk */
+#define set_inuse(M,p,s)\
+  ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
+  ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
+
+/* Set cinuse and pinuse of this chunk and pinuse of next chunk */
+#define set_inuse_and_pinuse(M,p,s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+  ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
+
+/* Set size, cinuse and pinuse bit of this chunk */
+#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT))
+
+#else /* FOOTERS */
+
+/* Set foot of inuse chunk to be xor of mstate and seed */
+#define mark_inuse_foot(M,p,s)\
+  (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic))
+
+#define get_mstate_for(p)\
+  ((mstate)(((mchunkptr)((char*)(p) +\
+    (chunksize(p))))->prev_foot ^ mparams.magic))
+
+#define set_inuse(M,p,s)\
+  ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
+  (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \
+  mark_inuse_foot(M,p,s))
+
+#define set_inuse_and_pinuse(M,p,s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+  (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\
+ mark_inuse_foot(M,p,s))
+
+#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+  mark_inuse_foot(M, p, s))
+
+#endif /* !FOOTERS */
+
+/* ---------------------------- setting mparams -------------------------- */
+
+#if LOCK_AT_FORK
+static void pre_fork(void)         { ACQUIRE_LOCK(&(gm)->mutex); }
+static void post_fork_parent(void) { RELEASE_LOCK(&(gm)->mutex); }
+static void post_fork_child(void)  { INITIAL_LOCK(&(gm)->mutex); }
+#endif /* LOCK_AT_FORK */
+
+/* Initialize mparams */
+static int init_mparams(void) {
+#ifdef NEED_GLOBAL_LOCK_INIT
+  if (malloc_global_mutex_status <= 0)
+    init_malloc_global_mutex();
+#endif
+
+  ACQUIRE_MALLOC_GLOBAL_LOCK();
+  if (mparams.magic == 0) {
+    size_t magic;
+    size_t psize;
+    size_t gsize;
+
+#ifndef WIN32
+    psize = malloc_getpagesize;
+    gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize);
+#else /* WIN32 */
+    {
+      SYSTEM_INFO system_info;
+      GetSystemInfo(&system_info);
+      psize = system_info.dwPageSize;
+      gsize = ((DEFAULT_GRANULARITY != 0)?
+               DEFAULT_GRANULARITY : system_info.dwAllocationGranularity);
+    }
+#endif /* WIN32 */
+
+    /* Sanity-check configuration:
+       size_t must be unsigned and as wide as pointer type.
+       ints must be at least 4 bytes.
+       alignment must be at least 8.
+       Alignment, min chunk size, and page size must all be powers of 2.
+    */
+    if ((sizeof(size_t) != sizeof(char*)) ||
+        (MAX_SIZE_T < MIN_CHUNK_SIZE)  ||
+        (sizeof(int) < 4)  ||
+        (MALLOC_ALIGNMENT < (size_t)8U) ||
+        ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) ||
+        ((MCHUNK_SIZE      & (MCHUNK_SIZE-SIZE_T_ONE))      != 0) ||
+        ((gsize            & (gsize-SIZE_T_ONE))            != 0) ||
+        ((psize            & (psize-SIZE_T_ONE))            != 0))
+      ABORT;
+    mparams.granularity = gsize;
+    mparams.page_size = psize;
+    mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD;
+    mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD;
+#if MORECORE_CONTIGUOUS
+    mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT;
+#else  /* MORECORE_CONTIGUOUS */
+    mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT;
+#endif /* MORECORE_CONTIGUOUS */
+
+#if !ONLY_MSPACES
+    /* Set up lock for main malloc area */
+    gm->mflags = mparams.default_mflags;
+    (void)INITIAL_LOCK(&gm->mutex);
+#endif
+#if LOCK_AT_FORK
+    pthread_atfork(&pre_fork, &post_fork_parent, &post_fork_child);
+#endif
+
+    {
+#if USE_DEV_RANDOM
+      int fd;
+      unsigned char buf[sizeof(size_t)];
+      /* Try to use /dev/urandom, else fall back on using time */
+      if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 &&
+          read(fd, buf, sizeof(buf)) == sizeof(buf)) {
+        magic = *((size_t *) buf);
+        close(fd);
+      }
+      else
+#endif /* USE_DEV_RANDOM */
+#ifdef WIN32
+      magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U);
+#elif defined(LACKS_TIME_H)
+      magic = (size_t)&magic ^ (size_t)0x55555555U;
+#else
+      magic = (size_t)(time(0) ^ (size_t)0x55555555U);
+#endif
+      magic |= (size_t)8U;    /* ensure nonzero */
+      magic &= ~(size_t)7U;   /* improve chances of fault for bad values */
+      /* Until memory modes commonly available, use volatile-write */
+      (*(volatile size_t *)(&(mparams.magic))) = magic;
+    }
+  }
+
+  RELEASE_MALLOC_GLOBAL_LOCK();
+  return 1;
+}
+
+/* support for mallopt */
+static int change_mparam(int param_number, int value) {
+  size_t val;
+  ensure_initialization();
+  val = (value == -1)? MAX_SIZE_T : (size_t)value;
+  switch(param_number) {
+  case M_TRIM_THRESHOLD:
+    mparams.trim_threshold = val;
+    return 1;
+  case M_GRANULARITY:
+    if (val >= mparams.page_size && ((val & (val-1)) == 0)) {
+      mparams.granularity = val;
+      return 1;
+    }
+    else
+      return 0;
+  case M_MMAP_THRESHOLD:
+    mparams.mmap_threshold = val;
+    return 1;
+  default:
+    return 0;
+  }
+}
+
+#if DEBUG
+/* ------------------------- Debugging Support --------------------------- */
+
+/* Check properties of any chunk, whether free, inuse, mmapped etc  */
+static void do_check_any_chunk(mstate m, mchunkptr p) {
+  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+  assert(ok_address(m, p));
+}
+
+/* Check properties of top chunk */
+static void do_check_top_chunk(mstate m, mchunkptr p) {
+  msegmentptr sp = segment_holding(m, (char*)p);
+  size_t  sz = p->head & ~INUSE_BITS; /* third-lowest bit can be set! */
+  assert(sp != 0);
+  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+  assert(ok_address(m, p));
+  assert(sz == m->topsize);
+  assert(sz > 0);
+  assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE);
+  assert(pinuse(p));
+  assert(!pinuse(chunk_plus_offset(p, sz)));
+}
+
+/* Check properties of (inuse) mmapped chunks */
+static void do_check_mmapped_chunk(mstate m, mchunkptr p) {
+  size_t  sz = chunksize(p);
+  size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD);
+  assert(is_mmapped(p));
+  assert(use_mmap(m));
+  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+  assert(ok_address(m, p));
+  assert(!is_small(sz));
+  assert((len & (mparams.page_size-SIZE_T_ONE)) == 0);
+  assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD);
+  assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0);
+}
+
+/* Check properties of inuse chunks */
+static void do_check_inuse_chunk(mstate m, mchunkptr p) {
+  do_check_any_chunk(m, p);
+  assert(is_inuse(p));
+  assert(next_pinuse(p));
+  /* If not pinuse and not mmapped, previous chunk has OK offset */
+  assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p);
+  if (is_mmapped(p))
+    do_check_mmapped_chunk(m, p);
+}
+
+/* Check properties of free chunks */
+static void do_check_free_chunk(mstate m, mchunkptr p) {
+  size_t sz = chunksize(p);
+  mchunkptr next = chunk_plus_offset(p, sz);
+  do_check_any_chunk(m, p);
+  assert(!is_inuse(p));
+  assert(!next_pinuse(p));
+  assert (!is_mmapped(p));
+  if (p != m->dv && p != m->top) {
+    if (sz >= MIN_CHUNK_SIZE) {
+      assert((sz & CHUNK_ALIGN_MASK) == 0);
+      assert(is_aligned(chunk2mem(p)));
+      assert(next->prev_foot == sz);
+      assert(pinuse(p));
+      assert (next == m->top || is_inuse(next));
+      assert(p->fd->bk == p);
+      assert(p->bk->fd == p);
+    }
+    else  /* markers are always of size SIZE_T_SIZE */
+      assert(sz == SIZE_T_SIZE);
+  }
+}
+
+/* Check properties of malloced chunks at the point they are malloced */
+static void do_check_malloced_chunk(mstate m, void* mem, size_t s) {
+  if (mem != 0) {
+    mchunkptr p = mem2chunk(mem);
+    size_t sz = p->head & ~INUSE_BITS;
+    do_check_inuse_chunk(m, p);
+    assert((sz & CHUNK_ALIGN_MASK) == 0);
+    assert(sz >= MIN_CHUNK_SIZE);
+    assert(sz >= s);
+    /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */
+    assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE));
+  }
+}
+
+/* Check a tree and its subtrees.  */
+static void do_check_tree(mstate m, tchunkptr t) {
+  tchunkptr head = 0;
+  tchunkptr u = t;
+  bindex_t tindex = t->index;
+  size_t tsize = chunksize(t);
+  bindex_t idx;
+  compute_tree_index(tsize, idx);
+  assert(tindex == idx);
+  assert(tsize >= MIN_LARGE_SIZE);
+  assert(tsize >= minsize_for_tree_index(idx));
+  assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1))));
+
+  do { /* traverse through chain of same-sized nodes */
+    do_check_any_chunk(m, ((mchunkptr)u));
+    assert(u->index == tindex);
+    assert(chunksize(u) == tsize);
+    assert(!is_inuse(u));
+    assert(!next_pinuse(u));
+    assert(u->fd->bk == u);
+    assert(u->bk->fd == u);
+    if (u->parent == 0) {
+      assert(u->child[0] == 0);
+      assert(u->child[1] == 0);
+    }
+    else {
+      assert(head == 0); /* only one node on chain has parent */
+      head = u;
+      assert(u->parent != u);
+      assert (u->parent->child[0] == u ||
+              u->parent->child[1] == u ||
+              *((tbinptr*)(u->parent)) == u);
+      if (u->child[0] != 0) {
+        assert(u->child[0]->parent == u);
+        assert(u->child[0] != u);
+        do_check_tree(m, u->child[0]);
+      }
+      if (u->child[1] != 0) {
+        assert(u->child[1]->parent == u);
+        assert(u->child[1] != u);
+        do_check_tree(m, u->child[1]);
+      }
+      if (u->child[0] != 0 && u->child[1] != 0) {
+        assert(chunksize(u->child[0]) < chunksize(u->child[1]));
+      }
+    }
+    u = u->fd;
+  } while (u != t);
+  assert(head != 0);
+}
+
+/*  Check all the chunks in a treebin.  */
+static void do_check_treebin(mstate m, bindex_t i) {
+  tbinptr* tb = treebin_at(m, i);
+  tchunkptr t = *tb;
+  int empty = (m->treemap & (1U << i)) == 0;
+  if (t == 0)
+    assert(empty);
+  if (!empty)
+    do_check_tree(m, t);
+}
+
+/*  Check all the chunks in a smallbin.  */
+static void do_check_smallbin(mstate m, bindex_t i) {
+  sbinptr b = smallbin_at(m, i);
+  mchunkptr p = b->bk;
+  unsigned int empty = (m->smallmap & (1U << i)) == 0;
+  if (p == b)
+    assert(empty);
+  if (!empty) {
+    for (; p != b; p = p->bk) {
+      size_t size = chunksize(p);
+      mchunkptr q;
+      /* each chunk claims to be free */
+      do_check_free_chunk(m, p);
+      /* chunk belongs in bin */
+      assert(small_index(size) == i);
+      assert(p->bk == b || chunksize(p->bk) == chunksize(p));
+      /* chunk is followed by an inuse chunk */
+      q = next_chunk(p);
+      if (q->head != FENCEPOST_HEAD)
+        do_check_inuse_chunk(m, q);
+    }
+  }
+}
+
+/* Find x in a bin. Used in other check functions. */
+static int bin_find(mstate m, mchunkptr x) {
+  size_t size = chunksize(x);
+  if (is_small(size)) {
+    bindex_t sidx = small_index(size);
+    sbinptr b = smallbin_at(m, sidx);
+    if (smallmap_is_marked(m, sidx)) {
+      mchunkptr p = b;
+      do {
+        if (p == x)
+          return 1;
+      } while ((p = p->fd) != b);
+    }
+  }
+  else {
+    bindex_t tidx;
+    compute_tree_index(size, tidx);
+    if (treemap_is_marked(m, tidx)) {
+      tchunkptr t = *treebin_at(m, tidx);
+      size_t sizebits = size << leftshift_for_tree_index(tidx);
+      while (t != 0 && chunksize(t) != size) {
+        t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
+        sizebits <<= 1;
+      }
+      if (t != 0) {
+        tchunkptr u = t;
+        do {
+          if (u == (tchunkptr)x)
+            return 1;
+        } while ((u = u->fd) != t);
+      }
+    }
+  }
+  return 0;
+}
+
+/* Traverse each chunk and check it; return total */
+static size_t traverse_and_check(mstate m) {
+  size_t sum = 0;
+  if (is_initialized(m)) {
+    msegmentptr s = &m->seg;
+    sum += m->topsize + TOP_FOOT_SIZE;
+    while (s != 0) {
+      mchunkptr q = align_as_chunk(s->base);
+      mchunkptr lastq = 0;
+      assert(pinuse(q));
+      while (segment_holds(s, q) &&
+             q != m->top && q->head != FENCEPOST_HEAD) {
+        sum += chunksize(q);
+        if (is_inuse(q)) {
+          assert(!bin_find(m, q));
+          do_check_inuse_chunk(m, q);
+        }
+        else {
+          assert(q == m->dv || bin_find(m, q));
+          assert(lastq == 0 || is_inuse(lastq)); /* Not 2 consecutive free */
+          do_check_free_chunk(m, q);
+        }
+        lastq = q;
+        q = next_chunk(q);
+      }
+      s = s->next;
+    }
+  }
+  return sum;
+}
+
+
+/* Check all properties of malloc_state. */
+static void do_check_malloc_state(mstate m) {
+  bindex_t i;
+  size_t total;
+  /* check bins */
+  for (i = 0; i < NSMALLBINS; ++i)
+    do_check_smallbin(m, i);
+  for (i = 0; i < NTREEBINS; ++i)
+    do_check_treebin(m, i);
+
+  if (m->dvsize != 0) { /* check dv chunk */
+    do_check_any_chunk(m, m->dv);
+    assert(m->dvsize == chunksize(m->dv));
+    assert(m->dvsize >= MIN_CHUNK_SIZE);
+    assert(bin_find(m, m->dv) == 0);
+  }
+
+  if (m->top != 0) {   /* check top chunk */
+    do_check_top_chunk(m, m->top);
+    /*assert(m->topsize == chunksize(m->top)); redundant */
+    assert(m->topsize > 0);
+    assert(bin_find(m, m->top) == 0);
+  }
+
+  total = traverse_and_check(m);
+  assert(total <= m->footprint);
+  assert(m->footprint <= m->max_footprint);
+}
+#endif /* DEBUG */
+
+/* ----------------------------- statistics ------------------------------ */
+
+#if !NO_MALLINFO
+static struct mallinfo internal_mallinfo(mstate m) {
+  struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+  ensure_initialization();
+  if (!PREACTION(m)) {
+    check_malloc_state(m);
+    if (is_initialized(m)) {
+      size_t nfree = SIZE_T_ONE; /* top always free */
+      size_t mfree = m->topsize + TOP_FOOT_SIZE;
+      size_t sum = mfree;
+      msegmentptr s = &m->seg;
+      while (s != 0) {
+        mchunkptr q = align_as_chunk(s->base);
+        while (segment_holds(s, q) &&
+               q != m->top && q->head != FENCEPOST_HEAD) {
+          size_t sz = chunksize(q);
+          sum += sz;
+          if (!is_inuse(q)) {
+            mfree += sz;
+            ++nfree;
+          }
+          q = next_chunk(q);
+        }
+        s = s->next;
+      }
+
+      nm.arena    = sum;
+      nm.ordblks  = nfree;
+      nm.hblkhd   = m->footprint - sum;
+      nm.usmblks  = m->max_footprint;
+      nm.uordblks = m->footprint - mfree;
+      nm.fordblks = mfree;
+      nm.keepcost = m->topsize;
+    }
+
+    POSTACTION(m);
+  }
+  return nm;
+}
+#endif /* !NO_MALLINFO */
+
+#if !NO_MALLOC_STATS
+static void internal_malloc_stats(mstate m) {
+  ensure_initialization();
+  if (!PREACTION(m)) {
+    size_t maxfp = 0;
+    size_t fp = 0;
+    size_t used = 0;
+    check_malloc_state(m);
+    if (is_initialized(m)) {
+      msegmentptr s = &m->seg;
+      maxfp = m->max_footprint;
+      fp = m->footprint;
+      used = fp - (m->topsize + TOP_FOOT_SIZE);
+
+      while (s != 0) {
+        mchunkptr q = align_as_chunk(s->base);
+        while (segment_holds(s, q) &&
+               q != m->top && q->head != FENCEPOST_HEAD) {
+          if (!is_inuse(q))
+            used -= chunksize(q);
+          q = next_chunk(q);
+        }
+        s = s->next;
+      }
+    }
+    POSTACTION(m); /* drop lock */
+    fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp));
+    fprintf(stderr, "system bytes     = %10lu\n", (unsigned long)(fp));
+    fprintf(stderr, "in use bytes     = %10lu\n", (unsigned long)(used));
+  }
+}
+#endif /* NO_MALLOC_STATS */
+
+/* ----------------------- Operations on smallbins ----------------------- */
+
+/*
+  Various forms of linking and unlinking are defined as macros.  Even
+  the ones for trees, which are very long but have very short typical
+  paths.  This is ugly but reduces reliance on inlining support of
+  compilers.
+*/
+
+/* Link a free chunk into a smallbin  */
+#define insert_small_chunk(M, P, S) {\
+  bindex_t I  = small_index(S);\
+  mchunkptr B = smallbin_at(M, I);\
+  mchunkptr F = B;\
+  assert(S >= MIN_CHUNK_SIZE);\
+  if (!smallmap_is_marked(M, I))\
+    mark_smallmap(M, I);\
+  else if (RTCHECK(ok_address(M, B->fd)))\
+    F = B->fd;\
+  else {\
+    CORRUPTION_ERROR_ACTION(M);\
+  }\
+  B->fd = P;\
+  F->bk = P;\
+  P->fd = F;\
+  P->bk = B;\
+}
+
+/* Unlink a chunk from a smallbin  */
+#define unlink_small_chunk(M, P, S) {\
+  mchunkptr F = P->fd;\
+  mchunkptr B = P->bk;\
+  bindex_t I = small_index(S);\
+  assert(P != B);\
+  assert(P != F);\
+  assert(chunksize(P) == small_index2size(I));\
+  if (RTCHECK(F == smallbin_at(M,I) || (ok_address(M, F) && F->bk == P))) { \
+    if (B == F) {\
+      clear_smallmap(M, I);\
+    }\
+    else if (RTCHECK(B == smallbin_at(M,I) ||\
+                     (ok_address(M, B) && B->fd == P))) {\
+      F->bk = B;\
+      B->fd = F;\
+    }\
+    else {\
+      CORRUPTION_ERROR_ACTION(M);\
+    }\
+  }\
+  else {\
+    CORRUPTION_ERROR_ACTION(M);\
+  }\
+}
+
+/* Unlink the first chunk from a smallbin */
+#define unlink_first_small_chunk(M, B, P, I) {\
+  mchunkptr F = P->fd;\
+  assert(P != B);\
+  assert(P != F);\
+  assert(chunksize(P) == small_index2size(I));\
+  if (B == F) {\
+    clear_smallmap(M, I);\
+  }\
+  else if (RTCHECK(ok_address(M, F) && F->bk == P)) {\
+    F->bk = B;\
+    B->fd = F;\
+  }\
+  else {\
+    CORRUPTION_ERROR_ACTION(M);\
+  }\
+}
+
+/* Replace dv node, binning the old one */
+/* Used only when dvsize known to be small */
+#define replace_dv(M, P, S) {\
+  size_t DVS = M->dvsize;\
+  assert(is_small(DVS));\
+  if (DVS != 0) {\
+    mchunkptr DV = M->dv;\
+    insert_small_chunk(M, DV, DVS);\
+  }\
+  M->dvsize = S;\
+  M->dv = P;\
+}
+
+/* ------------------------- Operations on trees ------------------------- */
+
+/* Insert chunk into tree */
+#define insert_large_chunk(M, X, S) {\
+  tbinptr* H;\
+  bindex_t I;\
+  compute_tree_index(S, I);\
+  H = treebin_at(M, I);\
+  X->index = I;\
+  X->child[0] = X->child[1] = 0;\
+  if (!treemap_is_marked(M, I)) {\
+    mark_treemap(M, I);\
+    *H = X;\
+    X->parent = (tchunkptr)H;\
+    X->fd = X->bk = X;\
+  }\
+  else {\
+    tchunkptr T = *H;\
+    size_t K = S << leftshift_for_tree_index(I);\
+    for (;;) {\
+      if (chunksize(T) != S) {\
+        tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\
+        K <<= 1;\
+        if (*C != 0)\
+          T = *C;\
+        else if (RTCHECK(ok_address(M, C))) {\
+          *C = X;\
+          X->parent = T;\
+          X->fd = X->bk = X;\
+          break;\
+        }\
+        else {\
+          CORRUPTION_ERROR_ACTION(M);\
+          break;\
+        }\
+      }\
+      else {\
+        tchunkptr F = T->fd;\
+        if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\
+          T->fd = F->bk = X;\
+          X->fd = F;\
+          X->bk = T;\
+          X->parent = 0;\
+          break;\
+        }\
+        else {\
+          CORRUPTION_ERROR_ACTION(M);\
+          break;\
+        }\
+      }\
+    }\
+  }\
+}
+
+/*
+  Unlink steps:
+
+  1. If x is a chained node, unlink it from its same-sized fd/bk links
+     and choose its bk node as its replacement.
+  2. If x was the last node of its size, but not a leaf node, it must
+     be replaced with a leaf node (not merely one with an open left or
+     right), to make sure that lefts and rights of descendents
+     correspond properly to bit masks.  We use the rightmost descendent
+     of x.  We could use any other leaf, but this is easy to locate and
+     tends to counteract removal of leftmosts elsewhere, and so keeps
+     paths shorter than minimally guaranteed.  This doesn't loop much
+     because on average a node in a tree is near the bottom.
+  3. If x is the base of a chain (i.e., has parent links) relink
+     x's parent and children to x's replacement (or null if none).
+*/
+
+#define unlink_large_chunk(M, X) {\
+  tchunkptr XP = X->parent;\
+  tchunkptr R;\
+  if (X->bk != X) {\
+    tchunkptr F = X->fd;\
+    R = X->bk;\
+    if (RTCHECK(ok_address(M, F) && F->bk == X && R->fd == X)) {\
+      F->bk = R;\
+      R->fd = F;\
+    }\
+    else {\
+      CORRUPTION_ERROR_ACTION(M);\
+    }\
+  }\
+  else {\
+    tchunkptr* RP;\
+    if (((R = *(RP = &(X->child[1]))) != 0) ||\
+        ((R = *(RP = &(X->child[0]))) != 0)) {\
+      tchunkptr* CP;\
+      while ((*(CP = &(R->child[1])) != 0) ||\
+             (*(CP = &(R->child[0])) != 0)) {\
+        R = *(RP = CP);\
+      }\
+      if (RTCHECK(ok_address(M, RP)))\
+        *RP = 0;\
+      else {\
+        CORRUPTION_ERROR_ACTION(M);\
+      }\
+    }\
+  }\
+  if (XP != 0) {\
+    tbinptr* H = treebin_at(M, X->index);\
+    if (X == *H) {\
+      if ((*H = R) == 0) \
+        clear_treemap(M, X->index);\
+    }\
+    else if (RTCHECK(ok_address(M, XP))) {\
+      if (XP->child[0] == X) \
+        XP->child[0] = R;\
+      else \
+        XP->child[1] = R;\
+    }\
+    else\
+      CORRUPTION_ERROR_ACTION(M);\
+    if (R != 0) {\
+      if (RTCHECK(ok_address(M, R))) {\
+        tchunkptr C0, C1;\
+        R->parent = XP;\
+        if ((C0 = X->child[0]) != 0) {\
+          if (RTCHECK(ok_address(M, C0))) {\
+            R->child[0] = C0;\
+            C0->parent = R;\
+          }\
+          else\
+            CORRUPTION_ERROR_ACTION(M);\
+        }\
+        if ((C1 = X->child[1]) != 0) {\
+          if (RTCHECK(ok_address(M, C1))) {\
+            R->child[1] = C1;\
+            C1->parent = R;\
+          }\
+          else\
+            CORRUPTION_ERROR_ACTION(M);\
+        }\
+      }\
+      else\
+        CORRUPTION_ERROR_ACTION(M);\
+    }\
+  }\
+}
+
+/* Relays to large vs small bin operations */
+
+#define insert_chunk(M, P, S)\
+  if (is_small(S)) insert_small_chunk(M, P, S)\
+  else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); }
+
+#define unlink_chunk(M, P, S)\
+  if (is_small(S)) unlink_small_chunk(M, P, S)\
+  else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); }
+
+
+/* Relays to internal calls to malloc/free from realloc, memalign etc */
+
+#if ONLY_MSPACES
+#define internal_malloc(m, b) mspace_malloc(m, b)
+#define internal_free(m, mem) mspace_free(m,mem);
+#else /* ONLY_MSPACES */
+#if MSPACES
+#define internal_malloc(m, b)\
+  ((m == gm)? dlmalloc(b) : mspace_malloc(m, b))
+#define internal_free(m, mem)\
+   if (m == gm) dlfree(mem); else mspace_free(m,mem);
+#else /* MSPACES */
+#define internal_malloc(m, b) dlmalloc(b)
+#define internal_free(m, mem) dlfree(mem)
+#endif /* MSPACES */
+#endif /* ONLY_MSPACES */
+
+/* -----------------------  Direct-mmapping chunks ----------------------- */
+
+/*
+  Directly mmapped chunks are set up with an offset to the start of
+  the mmapped region stored in the prev_foot field of the chunk. This
+  allows reconstruction of the required argument to MUNMAP when freed,
+  and also allows adjustment of the returned chunk to meet alignment
+  requirements (especially in memalign).
+*/
+
+/* Malloc using mmap */
+static void* mmap_alloc(mstate m, size_t nb) {
+  size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+  if (m->footprint_limit != 0) {
+    size_t fp = m->footprint + mmsize;
+    if (fp <= m->footprint || fp > m->footprint_limit)
+      return 0;
+  }
+  if (mmsize > nb) {     /* Check for wrap around 0 */
+    char* mm = (char*)(CALL_DIRECT_MMAP(mmsize));
+    if (mm != CMFAIL) {
+      size_t offset = align_offset(chunk2mem(mm));
+      size_t psize = mmsize - offset - MMAP_FOOT_PAD;
+      mchunkptr p = (mchunkptr)(mm + offset);
+      p->prev_foot = offset;
+      p->head = psize;
+      mark_inuse_foot(m, p, psize);
+      chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD;
+      chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0;
+
+      if (m->least_addr == 0 || mm < m->least_addr)
+        m->least_addr = mm;
+      if ((m->footprint += mmsize) > m->max_footprint)
+        m->max_footprint = m->footprint;
+      assert(is_aligned(chunk2mem(p)));
+      check_mmapped_chunk(m, p);
+      return chunk2mem(p);
+    }
+  }
+  return 0;
+}
+
+/* Realloc using mmap */
+static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb, int flags) {
+  size_t oldsize = chunksize(oldp);
+  (void)flags; /* placate people compiling -Wunused */
+  if (is_small(nb)) /* Can't shrink mmap regions below small size */
+    return 0;
+  /* Keep old chunk if big enough but not too big */
+  if (oldsize >= nb + SIZE_T_SIZE &&
+      (oldsize - nb) <= (mparams.granularity << 1))
+    return oldp;
+  else {
+    size_t offset = oldp->prev_foot;
+    size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD;
+    size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+    char* cp = (char*)CALL_MREMAP((char*)oldp - offset,
+                                  oldmmsize, newmmsize, flags);
+    if (cp != CMFAIL) {
+      mchunkptr newp = (mchunkptr)(cp + offset);
+      size_t psize = newmmsize - offset - MMAP_FOOT_PAD;
+      newp->head = psize;
+      mark_inuse_foot(m, newp, psize);
+      chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD;
+      chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0;
+
+      if (cp < m->least_addr)
+        m->least_addr = cp;
+      if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint)
+        m->max_footprint = m->footprint;
+      check_mmapped_chunk(m, newp);
+      return newp;
+    }
+  }
+  return 0;
+}
+
+
+/* -------------------------- mspace management -------------------------- */
+
+/* Initialize top chunk and its size */
+static void init_top(mstate m, mchunkptr p, size_t psize) {
+  /* Ensure alignment */
+  size_t offset = align_offset(chunk2mem(p));
+  p = (mchunkptr)((char*)p + offset);
+  psize -= offset;
+
+  m->top = p;
+  m->topsize = psize;
+  p->head = psize | PINUSE_BIT;
+  /* set size of fake trailing chunk holding overhead space only once */
+  chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE;
+  m->trim_check = mparams.trim_threshold; /* reset on each update */
+}
+
+/* Initialize bins for a new mstate that is otherwise zeroed out */
+static void init_bins(mstate m) {
+  /* Establish circular links for smallbins */
+  bindex_t i;
+  for (i = 0; i < NSMALLBINS; ++i) {
+    sbinptr bin = smallbin_at(m,i);
+    bin->fd = bin->bk = bin;
+  }
+}
+
+#if PROCEED_ON_ERROR
+
+/* default corruption action */
+static void reset_on_error(mstate m) {
+  int i;
+  ++malloc_corruption_error_count;
+  /* Reinitialize fields to forget about all memory */
+  m->smallmap = m->treemap = 0;
+  m->dvsize = m->topsize = 0;
+  m->seg.base = 0;
+  m->seg.size = 0;
+  m->seg.next = 0;
+  m->top = m->dv = 0;
+  for (i = 0; i < NTREEBINS; ++i)
+    *treebin_at(m, i) = 0;
+  init_bins(m);
+}
+#endif /* PROCEED_ON_ERROR */
+
+/* Allocate chunk and prepend remainder with chunk in successor base. */
+static void* prepend_alloc(mstate m, char* newbase, char* oldbase,
+                           size_t nb) {
+  mchunkptr p = align_as_chunk(newbase);
+  mchunkptr oldfirst = align_as_chunk(oldbase);
+  size_t psize = (char*)oldfirst - (char*)p;
+  mchunkptr q = chunk_plus_offset(p, nb);
+  size_t qsize = psize - nb;
+  set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+
+  assert((char*)oldfirst > (char*)q);
+  assert(pinuse(oldfirst));
+  assert(qsize >= MIN_CHUNK_SIZE);
+
+  /* consolidate remainder with first chunk of old base */
+  if (oldfirst == m->top) {
+    size_t tsize = m->topsize += qsize;
+    m->top = q;
+    q->head = tsize | PINUSE_BIT;
+    check_top_chunk(m, q);
+  }
+  else if (oldfirst == m->dv) {
+    size_t dsize = m->dvsize += qsize;
+    m->dv = q;
+    set_size_and_pinuse_of_free_chunk(q, dsize);
+  }
+  else {
+    if (!is_inuse(oldfirst)) {
+      size_t nsize = chunksize(oldfirst);
+      unlink_chunk(m, oldfirst, nsize);
+      oldfirst = chunk_plus_offset(oldfirst, nsize);
+      qsize += nsize;
+    }
+    set_free_with_pinuse(q, qsize, oldfirst);
+    insert_chunk(m, q, qsize);
+    check_free_chunk(m, q);
+  }
+
+  check_malloced_chunk(m, chunk2mem(p), nb);
+  return chunk2mem(p);
+}
+
+/* Add a segment to hold a new noncontiguous region */
+static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) {
+  /* Determine locations and sizes of segment, fenceposts, old top */
+  char* old_top = (char*)m->top;
+  msegmentptr oldsp = segment_holding(m, old_top);
+  char* old_end = oldsp->base + oldsp->size;
+  size_t ssize = pad_request(sizeof(struct malloc_segment));
+  char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+  size_t offset = align_offset(chunk2mem(rawsp));
+  char* asp = rawsp + offset;
+  char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp;
+  mchunkptr sp = (mchunkptr)csp;
+  msegmentptr ss = (msegmentptr)(chunk2mem(sp));
+  mchunkptr tnext = chunk_plus_offset(sp, ssize);
+  mchunkptr p = tnext;
+  int nfences = 0;
+
+  /* reset top to new space */
+  init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+
+  /* Set up segment record */
+  assert(is_aligned(ss));
+  set_size_and_pinuse_of_inuse_chunk(m, sp, ssize);
+  *ss = m->seg; /* Push current record */
+  m->seg.base = tbase;
+  m->seg.size = tsize;
+  m->seg.sflags = mmapped;
+  m->seg.next = ss;
+
+  /* Insert trailing fenceposts */
+  for (;;) {
+    mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE);
+    p->head = FENCEPOST_HEAD;
+    ++nfences;
+    if ((char*)(&(nextp->head)) < old_end)
+      p = nextp;
+    else
+      break;
+  }
+  assert(nfences >= 2);
+
+  /* Insert the rest of old top into a bin as an ordinary free chunk */
+  if (csp != old_top) {
+    mchunkptr q = (mchunkptr)old_top;
+    size_t psize = csp - old_top;
+    mchunkptr tn = chunk_plus_offset(q, psize);
+    set_free_with_pinuse(q, psize, tn);
+    insert_chunk(m, q, psize);
+  }
+
+  check_top_chunk(m, m->top);
+}
+
+/* -------------------------- System allocation -------------------------- */
+
+/* Get memory from system using MORECORE or MMAP */
+static void* sys_alloc(mstate m, size_t nb) {
+  char* tbase = CMFAIL;
+  size_t tsize = 0;
+  flag_t mmap_flag = 0;
+  size_t asize; /* allocation size */
+
+  ensure_initialization();
+
+  /* Directly map large chunks, but only if already initialized */
+  if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) {
+    void* mem = mmap_alloc(m, nb);
+    if (mem != 0)
+      return mem;
+  }
+
+  asize = granularity_align(nb + SYS_ALLOC_PADDING);
+  if (asize <= nb)
+    return 0; /* wraparound */
+  if (m->footprint_limit != 0) {
+    size_t fp = m->footprint + asize;
+    if (fp <= m->footprint || fp > m->footprint_limit)
+      return 0;
+  }
+
+  /*
+    Try getting memory in any of three ways (in most-preferred to
+    least-preferred order):
+    1. A call to MORECORE that can normally contiguously extend memory.
+       (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or
+       or main space is mmapped or a previous contiguous call failed)
+    2. A call to MMAP new space (disabled if not HAVE_MMAP).
+       Note that under the default settings, if MORECORE is unable to
+       fulfill a request, and HAVE_MMAP is true, then mmap is
+       used as a noncontiguous system allocator. This is a useful backup
+       strategy for systems with holes in address spaces -- in this case
+       sbrk cannot contiguously expand the heap, but mmap may be able to
+       find space.
+    3. A call to MORECORE that cannot usually contiguously extend memory.
+       (disabled if not HAVE_MORECORE)
+
+   In all cases, we need to request enough bytes from system to ensure
+   we can malloc nb bytes upon success, so pad with enough space for
+   top_foot, plus alignment-pad to make sure we don't lose bytes if
+   not on boundary, and round this up to a granularity unit.
+  */
+
+  if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) {
+    char* br = CMFAIL;
+    size_t ssize = asize; /* sbrk call size */
+    msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top);
+    ACQUIRE_MALLOC_GLOBAL_LOCK();
+
+    if (ss == 0) {  /* First time through or recovery */
+      char* base = (char*)CALL_MORECORE(0);
+      if (base != CMFAIL) {
+        size_t fp;
+        /* Adjust to end on a page boundary */
+        if (!is_page_aligned(base))
+          ssize += (page_align((size_t)base) - (size_t)base);
+        fp = m->footprint + ssize; /* recheck limits */
+        if (ssize > nb && ssize < HALF_MAX_SIZE_T &&
+            (m->footprint_limit == 0 ||
+             (fp > m->footprint && fp <= m->footprint_limit)) &&
+            (br = (char*)(CALL_MORECORE(ssize))) == base) {
+          tbase = base;
+          tsize = ssize;
+        }
+      }
+    }
+    else {
+      /* Subtract out existing available top space from MORECORE request. */
+      ssize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING);
+      /* Use mem here only if it did continuously extend old space */
+      if (ssize < HALF_MAX_SIZE_T &&
+          (br = (char*)(CALL_MORECORE(ssize))) == ss->base+ss->size) {
+        tbase = br;
+        tsize = ssize;
+      }
+    }
+
+    if (tbase == CMFAIL) {    /* Cope with partial failure */
+      if (br != CMFAIL) {    /* Try to use/extend the space we did get */
+        if (ssize < HALF_MAX_SIZE_T &&
+            ssize < nb + SYS_ALLOC_PADDING) {
+          size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - ssize);
+          if (esize < HALF_MAX_SIZE_T) {
+            char* end = (char*)CALL_MORECORE(esize);
+            if (end != CMFAIL)
+              ssize += esize;
+            else {            /* Can't use; try to release */
+              (void) CALL_MORECORE(-ssize);
+              br = CMFAIL;
+            }
+          }
+        }
+      }
+      if (br != CMFAIL) {    /* Use the space we did get */
+        tbase = br;
+        tsize = ssize;
+      }
+      else
+        disable_contiguous(m); /* Don't try contiguous path in the future */
+    }
+
+    RELEASE_MALLOC_GLOBAL_LOCK();
+  }
+
+  if (HAVE_MMAP && tbase == CMFAIL) {  /* Try MMAP */
+    char* mp = (char*)(CALL_MMAP(asize));
+    if (mp != CMFAIL) {
+      tbase = mp;
+      tsize = asize;
+      mmap_flag = USE_MMAP_BIT;
+    }
+  }
+
+  if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */
+    if (asize < HALF_MAX_SIZE_T) {
+      char* br = CMFAIL;
+      char* end = CMFAIL;
+      ACQUIRE_MALLOC_GLOBAL_LOCK();
+      br = (char*)(CALL_MORECORE(asize));
+      end = (char*)(CALL_MORECORE(0));
+      RELEASE_MALLOC_GLOBAL_LOCK();
+      if (br != CMFAIL && end != CMFAIL && br < end) {
+        size_t ssize = end - br;
+        if (ssize > nb + TOP_FOOT_SIZE) {
+          tbase = br;
+          tsize = ssize;
+        }
+      }
+    }
+  }
+
+  if (tbase != CMFAIL) {
+
+    if ((m->footprint += tsize) > m->max_footprint)
+      m->max_footprint = m->footprint;
+
+    if (!is_initialized(m)) { /* first-time initialization */
+      if (m->least_addr == 0 || tbase < m->least_addr)
+        m->least_addr = tbase;
+      m->seg.base = tbase;
+      m->seg.size = tsize;
+      m->seg.sflags = mmap_flag;
+      m->magic = mparams.magic;
+      m->release_checks = MAX_RELEASE_CHECK_RATE;
+      init_bins(m);
+#if !ONLY_MSPACES
+      if (is_global(m))
+        init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+      else
+#endif
+      {
+        /* Offset top by embedded malloc_state */
+        mchunkptr mn = next_chunk(mem2chunk(m));
+        init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE);
+      }
+    }
+
+    else {
+      /* Try to merge with an existing segment */
+      msegmentptr sp = &m->seg;
+      /* Only consider most recent segment if traversal suppressed */
+      while (sp != 0 && tbase != sp->base + sp->size)
+        sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
+      if (sp != 0 &&
+          !is_extern_segment(sp) &&
+          (sp->sflags & USE_MMAP_BIT) == mmap_flag &&
+          segment_holds(sp, m->top)) { /* append */
+        sp->size += tsize;
+        init_top(m, m->top, m->topsize + tsize);
+      }
+      else {
+        if (tbase < m->least_addr)
+          m->least_addr = tbase;
+        sp = &m->seg;
+        while (sp != 0 && sp->base != tbase + tsize)
+          sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
+        if (sp != 0 &&
+            !is_extern_segment(sp) &&
+            (sp->sflags & USE_MMAP_BIT) == mmap_flag) {
+          char* oldbase = sp->base;
+          sp->base = tbase;
+          sp->size += tsize;
+          return prepend_alloc(m, tbase, oldbase, nb);
+        }
+        else
+          add_segment(m, tbase, tsize, mmap_flag);
+      }
+    }
+
+    if (nb < m->topsize) { /* Allocate from new or extended top space */
+      size_t rsize = m->topsize -= nb;
+      mchunkptr p = m->top;
+      mchunkptr r = m->top = chunk_plus_offset(p, nb);
+      r->head = rsize | PINUSE_BIT;
+      set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+      check_top_chunk(m, m->top);
+      check_malloced_chunk(m, chunk2mem(p), nb);
+      return chunk2mem(p);
+    }
+  }
+
+  MALLOC_FAILURE_ACTION;
+  return 0;
+}
+
+/* -----------------------  system deallocation -------------------------- */
+
+/* Unmap and unlink any mmapped segments that don't contain used chunks */
+static size_t release_unused_segments(mstate m) {
+  size_t released = 0;
+  int nsegs = 0;
+  msegmentptr pred = &m->seg;
+  msegmentptr sp = pred->next;
+  while (sp != 0) {
+    char* base = sp->base;
+    size_t size = sp->size;
+    msegmentptr next = sp->next;
+    ++nsegs;
+    if (is_mmapped_segment(sp) && !is_extern_segment(sp)) {
+      mchunkptr p = align_as_chunk(base);
+      size_t psize = chunksize(p);
+      /* Can unmap if first chunk holds entire segment and not pinned */
+      if (!is_inuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) {
+        tchunkptr tp = (tchunkptr)p;
+        assert(segment_holds(sp, (char*)sp));
+        if (p == m->dv) {
+          m->dv = 0;
+          m->dvsize = 0;
+        }
+        else {
+          unlink_large_chunk(m, tp);
+        }
+        if (CALL_MUNMAP(base, size) == 0) {
+          released += size;
+          m->footprint -= size;
+          /* unlink obsoleted record */
+          sp = pred;
+          sp->next = next;
+        }
+        else { /* back out if cannot unmap */
+          insert_large_chunk(m, tp, psize);
+        }
+      }
+    }
+    if (NO_SEGMENT_TRAVERSAL) /* scan only first segment */
+      break;
+    pred = sp;
+    sp = next;
+  }
+  /* Reset check counter */
+  m->release_checks = (((size_t) nsegs > (size_t) MAX_RELEASE_CHECK_RATE)?
+                       (size_t) nsegs : (size_t) MAX_RELEASE_CHECK_RATE);
+  return released;
+}
+
+static int sys_trim(mstate m, size_t pad) {
+  size_t released = 0;
+  ensure_initialization();
+  if (pad < MAX_REQUEST && is_initialized(m)) {
+    pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */
+
+    if (m->topsize > pad) {
+      /* Shrink top space in granularity-size units, keeping at least one */
+      size_t unit = mparams.granularity;
+      size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit -
+                      SIZE_T_ONE) * unit;
+      msegmentptr sp = segment_holding(m, (char*)m->top);
+
+      if (!is_extern_segment(sp)) {
+        if (is_mmapped_segment(sp)) {
+          if (HAVE_MMAP &&
+              sp->size >= extra &&
+              !has_segment_link(m, sp)) { /* can't shrink if pinned */
+            size_t newsize = sp->size - extra;
+            (void)newsize; /* placate people compiling -Wunused-variable */
+            /* Prefer mremap, fall back to munmap */
+            if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) ||
+                (CALL_MUNMAP(sp->base + newsize, extra) == 0)) {
+              released = extra;
+            }
+          }
+        }
+        else if (HAVE_MORECORE) {
+          if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */
+            extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit;
+          ACQUIRE_MALLOC_GLOBAL_LOCK();
+          {
+            /* Make sure end of memory is where we last set it. */
+            char* old_br = (char*)(CALL_MORECORE(0));
+            if (old_br == sp->base + sp->size) {
+              char* rel_br = (char*)(CALL_MORECORE(-extra));
+              char* new_br = (char*)(CALL_MORECORE(0));
+              if (rel_br != CMFAIL && new_br < old_br)
+                released = old_br - new_br;
+            }
+          }
+          RELEASE_MALLOC_GLOBAL_LOCK();
+        }
+      }
+
+      if (released != 0) {
+        sp->size -= released;
+        m->footprint -= released;
+        init_top(m, m->top, m->topsize - released);
+        check_top_chunk(m, m->top);
+      }
+    }
+
+    /* Unmap any unused mmapped segments */
+    if (HAVE_MMAP)
+      released += release_unused_segments(m);
+
+    /* On failure, disable autotrim to avoid repeated failed future calls */
+    if (released == 0 && m->topsize > m->trim_check)
+      m->trim_check = MAX_SIZE_T;
+  }
+
+  return (released != 0)? 1 : 0;
+}
+
+/* Consolidate and bin a chunk. Differs from exported versions
+   of free mainly in that the chunk need not be marked as inuse.
+*/
+static void dispose_chunk(mstate m, mchunkptr p, size_t psize) {
+  mchunkptr next = chunk_plus_offset(p, psize);
+  if (!pinuse(p)) {
+    mchunkptr prev;
+    size_t prevsize = p->prev_foot;
+    if (is_mmapped(p)) {
+      psize += prevsize + MMAP_FOOT_PAD;
+      if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+        m->footprint -= psize;
+      return;
+    }
+    prev = chunk_minus_offset(p, prevsize);
+    psize += prevsize;
+    p = prev;
+    if (RTCHECK(ok_address(m, prev))) { /* consolidate backward */
+      if (p != m->dv) {
+        unlink_chunk(m, p, prevsize);
+      }
+      else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+        m->dvsize = psize;
+        set_free_with_pinuse(p, psize, next);
+        return;
+      }
+    }
+    else {
+      CORRUPTION_ERROR_ACTION(m);
+      return;
+    }
+  }
+  if (RTCHECK(ok_address(m, next))) {
+    if (!cinuse(next)) {  /* consolidate forward */
+      if (next == m->top) {
+        size_t tsize = m->topsize += psize;
+        m->top = p;
+        p->head = tsize | PINUSE_BIT;
+        if (p == m->dv) {
+          m->dv = 0;
+          m->dvsize = 0;
+        }
+        return;
+      }
+      else if (next == m->dv) {
+        size_t dsize = m->dvsize += psize;
+        m->dv = p;
+        set_size_and_pinuse_of_free_chunk(p, dsize);
+        return;
+      }
+      else {
+        size_t nsize = chunksize(next);
+        psize += nsize;
+        unlink_chunk(m, next, nsize);
+        set_size_and_pinuse_of_free_chunk(p, psize);
+        if (p == m->dv) {
+          m->dvsize = psize;
+          return;
+        }
+      }
+    }
+    else {
+      set_free_with_pinuse(p, psize, next);
+    }
+    insert_chunk(m, p, psize);
+  }
+  else {
+    CORRUPTION_ERROR_ACTION(m);
+  }
+}
+
+/* ---------------------------- malloc --------------------------- */
+
+/* allocate a large request from the best fitting chunk in a treebin */
+static void* tmalloc_large(mstate m, size_t nb) {
+  tchunkptr v = 0;
+  size_t rsize = -nb; /* Unsigned negation */
+  tchunkptr t;
+  bindex_t idx;
+  compute_tree_index(nb, idx);
+  if ((t = *treebin_at(m, idx)) != 0) {
+    /* Traverse tree for this bin looking for node with size == nb */
+    size_t sizebits = nb << leftshift_for_tree_index(idx);
+    tchunkptr rst = 0;  /* The deepest untaken right subtree */
+    for (;;) {
+      tchunkptr rt;
+      size_t trem = chunksize(t) - nb;
+      if (trem < rsize) {
+        v = t;
+        if ((rsize = trem) == 0)
+          break;
+      }
+      rt = t->child[1];
+      t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
+      if (rt != 0 && rt != t)
+        rst = rt;
+      if (t == 0) {
+        t = rst; /* set t to least subtree holding sizes > nb */
+        break;
+      }
+      sizebits <<= 1;
+    }
+  }
+  if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */
+    binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap;
+    if (leftbits != 0) {
+      bindex_t i;
+      binmap_t leastbit = least_bit(leftbits);
+      compute_bit2idx(leastbit, i);
+      t = *treebin_at(m, i);
+    }
+  }
+
+  while (t != 0) { /* find smallest of tree or subtree */
+    size_t trem = chunksize(t) - nb;
+    if (trem < rsize) {
+      rsize = trem;
+      v = t;
+    }
+    t = leftmost_child(t);
+  }
+
+  /*  If dv is a better fit, return 0 so malloc will use it */
+  if (v != 0 && rsize < (size_t)(m->dvsize - nb)) {
+    if (RTCHECK(ok_address(m, v))) { /* split */
+      mchunkptr r = chunk_plus_offset(v, nb);
+      assert(chunksize(v) == rsize + nb);
+      if (RTCHECK(ok_next(v, r))) {
+        unlink_large_chunk(m, v);
+        if (rsize < MIN_CHUNK_SIZE)
+          set_inuse_and_pinuse(m, v, (rsize + nb));
+        else {
+          set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+          set_size_and_pinuse_of_free_chunk(r, rsize);
+          insert_chunk(m, r, rsize);
+        }
+        return chunk2mem(v);
+      }
+    }
+    CORRUPTION_ERROR_ACTION(m);
+  }
+  return 0;
+}
+
+/* allocate a small request from the best fitting chunk in a treebin */
+static void* tmalloc_small(mstate m, size_t nb) {
+  tchunkptr t, v;
+  size_t rsize;
+  bindex_t i;
+  binmap_t leastbit = least_bit(m->treemap);
+  compute_bit2idx(leastbit, i);
+  v = t = *treebin_at(m, i);
+  rsize = chunksize(t) - nb;
+
+  while ((t = leftmost_child(t)) != 0) {
+    size_t trem = chunksize(t) - nb;
+    if (trem < rsize) {
+      rsize = trem;
+      v = t;
+    }
+  }
+
+  if (RTCHECK(ok_address(m, v))) {
+    mchunkptr r = chunk_plus_offset(v, nb);
+    assert(chunksize(v) == rsize + nb);
+    if (RTCHECK(ok_next(v, r))) {
+      unlink_large_chunk(m, v);
+      if (rsize < MIN_CHUNK_SIZE)
+        set_inuse_and_pinuse(m, v, (rsize + nb));
+      else {
+        set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+        set_size_and_pinuse_of_free_chunk(r, rsize);
+        replace_dv(m, r, rsize);
+      }
+      return chunk2mem(v);
+    }
+  }
+
+  CORRUPTION_ERROR_ACTION(m);
+  return 0;
+}
+
+#if !ONLY_MSPACES
+
+void* dlmalloc(size_t bytes) {
+  /*
+     Basic algorithm:
+     If a small request (< 256 bytes minus per-chunk overhead):
+       1. If one exists, use a remainderless chunk in associated smallbin.
+          (Remainderless means that there are too few excess bytes to
+          represent as a chunk.)
+       2. If it is big enough, use the dv chunk, which is normally the
+          chunk adjacent to the one used for the most recent small request.
+       3. If one exists, split the smallest available chunk in a bin,
+          saving remainder in dv.
+       4. If it is big enough, use the top chunk.
+       5. If available, get memory from system and use it
+     Otherwise, for a large request:
+       1. Find the smallest available binned chunk that fits, and use it
+          if it is better fitting than dv chunk, splitting if necessary.
+       2. If better fitting than any binned chunk, use the dv chunk.
+       3. If it is big enough, use the top chunk.
+       4. If request size >= mmap threshold, try to directly mmap this chunk.
+       5. If available, get memory from system and use it
+
+     The ugly goto's here ensure that postaction occurs along all paths.
+  */
+
+#if USE_LOCKS
+  ensure_initialization(); /* initialize in sys_alloc if not using locks */
+#endif
+
+  if (!PREACTION(gm)) {
+    void* mem;
+    size_t nb;
+    if (bytes <= MAX_SMALL_REQUEST) {
+      bindex_t idx;
+      binmap_t smallbits;
+      nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
+      idx = small_index(nb);
+      smallbits = gm->smallmap >> idx;
+
+      if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
+        mchunkptr b, p;
+        idx += ~smallbits & 1;       /* Uses next bin if idx empty */
+        b = smallbin_at(gm, idx);
+        p = b->fd;
+        assert(chunksize(p) == small_index2size(idx));
+        unlink_first_small_chunk(gm, b, p, idx);
+        set_inuse_and_pinuse(gm, p, small_index2size(idx));
+        mem = chunk2mem(p);
+        check_malloced_chunk(gm, mem, nb);
+        goto postaction;
+      }
+
+      else if (nb > gm->dvsize) {
+        if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
+          mchunkptr b, p, r;
+          size_t rsize;
+          bindex_t i;
+          binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
+          binmap_t leastbit = least_bit(leftbits);
+          compute_bit2idx(leastbit, i);
+          b = smallbin_at(gm, i);
+          p = b->fd;
+          assert(chunksize(p) == small_index2size(i));
+          unlink_first_small_chunk(gm, b, p, i);
+          rsize = small_index2size(i) - nb;
+          /* Fit here cannot be remainderless if 4byte sizes */
+          if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
+            set_inuse_and_pinuse(gm, p, small_index2size(i));
+          else {
+            set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+            r = chunk_plus_offset(p, nb);
+            set_size_and_pinuse_of_free_chunk(r, rsize);
+            replace_dv(gm, r, rsize);
+          }
+          mem = chunk2mem(p);
+          check_malloced_chunk(gm, mem, nb);
+          goto postaction;
+        }
+
+        else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) {
+          check_malloced_chunk(gm, mem, nb);
+          goto postaction;
+        }
+      }
+    }
+    else if (bytes >= MAX_REQUEST)
+      nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
+    else {
+      nb = pad_request(bytes);
+      if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) {
+        check_malloced_chunk(gm, mem, nb);
+        goto postaction;
+      }
+    }
+
+    if (nb <= gm->dvsize) {
+      size_t rsize = gm->dvsize - nb;
+      mchunkptr p = gm->dv;
+      if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
+        mchunkptr r = gm->dv = chunk_plus_offset(p, nb);
+        gm->dvsize = rsize;
+        set_size_and_pinuse_of_free_chunk(r, rsize);
+        set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+      }
+      else { /* exhaust dv */
+        size_t dvs = gm->dvsize;
+        gm->dvsize = 0;
+        gm->dv = 0;
+        set_inuse_and_pinuse(gm, p, dvs);
+      }
+      mem = chunk2mem(p);
+      check_malloced_chunk(gm, mem, nb);
+      goto postaction;
+    }
+
+    else if (nb < gm->topsize) { /* Split top */
+      size_t rsize = gm->topsize -= nb;
+      mchunkptr p = gm->top;
+      mchunkptr r = gm->top = chunk_plus_offset(p, nb);
+      r->head = rsize | PINUSE_BIT;
+      set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+      mem = chunk2mem(p);
+      check_top_chunk(gm, gm->top);
+      check_malloced_chunk(gm, mem, nb);
+      goto postaction;
+    }
+
+    mem = sys_alloc(gm, nb);
+
+  postaction:
+    POSTACTION(gm);
+    return mem;
+  }
+
+  return 0;
+}
+
+/* ---------------------------- free --------------------------- */
+
+void dlfree(void* mem) {
+  /*
+     Consolidate freed chunks with preceeding or succeeding bordering
+     free chunks, if they exist, and then place in a bin.  Intermixed
+     with special cases for top, dv, mmapped chunks, and usage errors.
+  */
+
+  if (mem != 0) {
+    mchunkptr p  = mem2chunk(mem);
+#if FOOTERS
+    mstate fm = get_mstate_for(p);
+    if (!ok_magic(fm)) {
+      USAGE_ERROR_ACTION(fm, p);
+      return;
+    }
+#else /* FOOTERS */
+#define fm gm
+#endif /* FOOTERS */
+    if (!PREACTION(fm)) {
+      check_inuse_chunk(fm, p);
+      if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
+        size_t psize = chunksize(p);
+        mchunkptr next = chunk_plus_offset(p, psize);
+        if (!pinuse(p)) {
+          size_t prevsize = p->prev_foot;
+          if (is_mmapped(p)) {
+            psize += prevsize + MMAP_FOOT_PAD;
+            if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+              fm->footprint -= psize;
+            goto postaction;
+          }
+          else {
+            mchunkptr prev = chunk_minus_offset(p, prevsize);
+            psize += prevsize;
+            p = prev;
+            if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
+              if (p != fm->dv) {
+                unlink_chunk(fm, p, prevsize);
+              }
+              else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+                fm->dvsize = psize;
+                set_free_with_pinuse(p, psize, next);
+                goto postaction;
+              }
+            }
+            else
+              goto erroraction;
+          }
+        }
+
+        if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
+          if (!cinuse(next)) {  /* consolidate forward */
+            if (next == fm->top) {
+              size_t tsize = fm->topsize += psize;
+              fm->top = p;
+              p->head = tsize | PINUSE_BIT;
+              if (p == fm->dv) {
+                fm->dv = 0;
+                fm->dvsize = 0;
+              }
+              if (should_trim(fm, tsize))
+                sys_trim(fm, 0);
+              goto postaction;
+            }
+            else if (next == fm->dv) {
+              size_t dsize = fm->dvsize += psize;
+              fm->dv = p;
+              set_size_and_pinuse_of_free_chunk(p, dsize);
+              goto postaction;
+            }
+            else {
+              size_t nsize = chunksize(next);
+              psize += nsize;
+              unlink_chunk(fm, next, nsize);
+              set_size_and_pinuse_of_free_chunk(p, psize);
+              if (p == fm->dv) {
+                fm->dvsize = psize;
+                goto postaction;
+              }
+            }
+          }
+          else
+            set_free_with_pinuse(p, psize, next);
+
+          if (is_small(psize)) {
+            insert_small_chunk(fm, p, psize);
+            check_free_chunk(fm, p);
+          }
+          else {
+            tchunkptr tp = (tchunkptr)p;
+            insert_large_chunk(fm, tp, psize);
+            check_free_chunk(fm, p);
+            if (--fm->release_checks == 0)
+              release_unused_segments(fm);
+          }
+          goto postaction;
+        }
+      }
+    erroraction:
+      USAGE_ERROR_ACTION(fm, p);
+    postaction:
+      POSTACTION(fm);
+    }
+  }
+#if !FOOTERS
+#undef fm
+#endif /* FOOTERS */
+}
+
+void* dlcalloc(size_t n_elements, size_t elem_size) {
+  void* mem;
+  size_t req = 0;
+  if (n_elements != 0) {
+    req = n_elements * elem_size;
+    if (((n_elements | elem_size) & ~(size_t)0xffff) &&
+        (req / n_elements != elem_size))
+      req = MAX_SIZE_T; /* force downstream failure on overflow */
+  }
+  mem = dlmalloc(req);
+  if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
+    memset(mem, 0, req);
+  return mem;
+}
+
+#endif /* !ONLY_MSPACES */
+
+/* ------------ Internal support for realloc, memalign, etc -------------- */
+
+/* Try to realloc; only in-place unless can_move true */
+static mchunkptr try_realloc_chunk(mstate m, mchunkptr p, size_t nb,
+                                   int can_move) {
+  mchunkptr newp = 0;
+  size_t oldsize = chunksize(p);
+  mchunkptr next = chunk_plus_offset(p, oldsize);
+  if (RTCHECK(ok_address(m, p) && ok_inuse(p) &&
+              ok_next(p, next) && ok_pinuse(next))) {
+    if (is_mmapped(p)) {
+      newp = mmap_resize(m, p, nb, can_move);
+    }
+    else if (oldsize >= nb) {             /* already big enough */
+      size_t rsize = oldsize - nb;
+      if (rsize >= MIN_CHUNK_SIZE) {      /* split off remainder */
+        mchunkptr r = chunk_plus_offset(p, nb);
+        set_inuse(m, p, nb);
+        set_inuse(m, r, rsize);
+        dispose_chunk(m, r, rsize);
+      }
+      newp = p;
+    }
+    else if (next == m->top) {  /* extend into top */
+      if (oldsize + m->topsize > nb) {
+        size_t newsize = oldsize + m->topsize;
+        size_t newtopsize = newsize - nb;
+        mchunkptr newtop = chunk_plus_offset(p, nb);
+        set_inuse(m, p, nb);
+        newtop->head = newtopsize |PINUSE_BIT;
+        m->top = newtop;
+        m->topsize = newtopsize;
+        newp = p;
+      }
+    }
+    else if (next == m->dv) { /* extend into dv */
+      size_t dvs = m->dvsize;
+      if (oldsize + dvs >= nb) {
+        size_t dsize = oldsize + dvs - nb;
+        if (dsize >= MIN_CHUNK_SIZE) {
+          mchunkptr r = chunk_plus_offset(p, nb);
+          mchunkptr n = chunk_plus_offset(r, dsize);
+          set_inuse(m, p, nb);
+          set_size_and_pinuse_of_free_chunk(r, dsize);
+          clear_pinuse(n);
+          m->dvsize = dsize;
+          m->dv = r;
+        }
+        else { /* exhaust dv */
+          size_t newsize = oldsize + dvs;
+          set_inuse(m, p, newsize);
+          m->dvsize = 0;
+          m->dv = 0;
+        }
+        newp = p;
+      }
+    }
+    else if (!cinuse(next)) { /* extend into next free chunk */
+      size_t nextsize = chunksize(next);
+      if (oldsize + nextsize >= nb) {
+        size_t rsize = oldsize + nextsize - nb;
+        unlink_chunk(m, next, nextsize);
+        if (rsize < MIN_CHUNK_SIZE) {
+          size_t newsize = oldsize + nextsize;
+          set_inuse(m, p, newsize);
+        }
+        else {
+          mchunkptr r = chunk_plus_offset(p, nb);
+          set_inuse(m, p, nb);
+          set_inuse(m, r, rsize);
+          dispose_chunk(m, r, rsize);
+        }
+        newp = p;
+      }
+    }
+  }
+  else {
+    USAGE_ERROR_ACTION(m, chunk2mem(p));
+  }
+  return newp;
+}
+
+static void* internal_memalign(mstate m, size_t alignment, size_t bytes) {
+  void* mem = 0;
+  if (alignment <  MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */
+    alignment = MIN_CHUNK_SIZE;
+  if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */
+    size_t a = MALLOC_ALIGNMENT << 1;
+    while (a < alignment) a <<= 1;
+    alignment = a;
+  }
+  if (bytes >= MAX_REQUEST - alignment) {
+    if (m != 0)  { /* Test isn't needed but avoids compiler warning */
+      MALLOC_FAILURE_ACTION;
+    }
+  }
+  else {
+    size_t nb = request2size(bytes);
+    size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD;
+    mem = internal_malloc(m, req);
+    if (mem != 0) {
+      mchunkptr p = mem2chunk(mem);
+      if (PREACTION(m))
+        return 0;
+      if ((((size_t)(mem)) & (alignment - 1)) != 0) { /* misaligned */
+        /*
+          Find an aligned spot inside chunk.  Since we need to give
+          back leading space in a chunk of at least MIN_CHUNK_SIZE, if
+          the first calculation places us at a spot with less than
+          MIN_CHUNK_SIZE leader, we can move to the next aligned spot.
+          We've allocated enough total room so that this is always
+          possible.
+        */
+        char* br = (char*)mem2chunk((size_t)(((size_t)((char*)mem + alignment -
+                                                       SIZE_T_ONE)) &
+                                             -alignment));
+        char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)?
+          br : br+alignment;
+        mchunkptr newp = (mchunkptr)pos;
+        size_t leadsize = pos - (char*)(p);
+        size_t newsize = chunksize(p) - leadsize;
+
+        if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */
+          newp->prev_foot = p->prev_foot + leadsize;
+          newp->head = newsize;
+        }
+        else { /* Otherwise, give back leader, use the rest */
+          set_inuse(m, newp, newsize);
+          set_inuse(m, p, leadsize);
+          dispose_chunk(m, p, leadsize);
+        }
+        p = newp;
+      }
+
+      /* Give back spare room at the end */
+      if (!is_mmapped(p)) {
+        size_t size = chunksize(p);
+        if (size > nb + MIN_CHUNK_SIZE) {
+          size_t remainder_size = size - nb;
+          mchunkptr remainder = chunk_plus_offset(p, nb);
+          set_inuse(m, p, nb);
+          set_inuse(m, remainder, remainder_size);
+          dispose_chunk(m, remainder, remainder_size);
+        }
+      }
+
+      mem = chunk2mem(p);
+      assert (chunksize(p) >= nb);
+      assert(((size_t)mem & (alignment - 1)) == 0);
+      check_inuse_chunk(m, p);
+      POSTACTION(m);
+    }
+  }
+  return mem;
+}
+
+/*
+  Common support for independent_X routines, handling
+    all of the combinations that can result.
+  The opts arg has:
+    bit 0 set if all elements are same size (using sizes[0])
+    bit 1 set if elements should be zeroed
+*/
+static void** ialloc(mstate m,
+                     size_t n_elements,
+                     size_t* sizes,
+                     int opts,
+                     void* chunks[]) {
+
+  size_t    element_size;   /* chunksize of each element, if all same */
+  size_t    contents_size;  /* total size of elements */
+  size_t    array_size;     /* request size of pointer array */
+  void*     mem;            /* malloced aggregate space */
+  mchunkptr p;              /* corresponding chunk */
+  size_t    remainder_size; /* remaining bytes while splitting */
+  void**    marray;         /* either "chunks" or malloced ptr array */
+  mchunkptr array_chunk;    /* chunk for malloced ptr array */
+  flag_t    was_enabled;    /* to disable mmap */
+  size_t    size;
+  size_t    i;
+
+  ensure_initialization();
+  /* compute array length, if needed */
+  if (chunks != 0) {
+    if (n_elements == 0)
+      return chunks; /* nothing to do */
+    marray = chunks;
+    array_size = 0;
+  }
+  else {
+    /* if empty req, must still return chunk representing empty array */
+    if (n_elements == 0)
+      return (void**)internal_malloc(m, 0);
+    marray = 0;
+    array_size = request2size(n_elements * (sizeof(void*)));
+  }
+
+  /* compute total element size */
+  if (opts & 0x1) { /* all-same-size */
+    element_size = request2size(*sizes);
+    contents_size = n_elements * element_size;
+  }
+  else { /* add up all the sizes */
+    element_size = 0;
+    contents_size = 0;
+    for (i = 0; i != n_elements; ++i)
+      contents_size += request2size(sizes[i]);
+  }
+
+  size = contents_size + array_size;
+
+  /*
+     Allocate the aggregate chunk.  First disable direct-mmapping so
+     malloc won't use it, since we would not be able to later
+     free/realloc space internal to a segregated mmap region.
+  */
+  was_enabled = use_mmap(m);
+  disable_mmap(m);
+  mem = internal_malloc(m, size - CHUNK_OVERHEAD);
+  if (was_enabled)
+    enable_mmap(m);
+  if (mem == 0)
+    return 0;
+
+  if (PREACTION(m)) return 0;
+  p = mem2chunk(mem);
+  remainder_size = chunksize(p);
+
+  assert(!is_mmapped(p));
+
+  if (opts & 0x2) {       /* optionally clear the elements */
+    memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size);
+  }
+
+  /* If not provided, allocate the pointer array as final part of chunk */
+  if (marray == 0) {
+    size_t  array_chunk_size;
+    array_chunk = chunk_plus_offset(p, contents_size);
+    array_chunk_size = remainder_size - contents_size;
+    marray = (void**) (chunk2mem(array_chunk));
+    set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size);
+    remainder_size = contents_size;
+  }
+
+  /* split out elements */
+  for (i = 0; ; ++i) {
+    marray[i] = chunk2mem(p);
+    if (i != n_elements-1) {
+      if (element_size != 0)
+        size = element_size;
+      else
+        size = request2size(sizes[i]);
+      remainder_size -= size;
+      set_size_and_pinuse_of_inuse_chunk(m, p, size);
+      p = chunk_plus_offset(p, size);
+    }
+    else { /* the final element absorbs any overallocation slop */
+      set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size);
+      break;
+    }
+  }
+
+#if DEBUG
+  if (marray != chunks) {
+    /* final element must have exactly exhausted chunk */
+    if (element_size != 0) {
+      assert(remainder_size == element_size);
+    }
+    else {
+      assert(remainder_size == request2size(sizes[i]));
+    }
+    check_inuse_chunk(m, mem2chunk(marray));
+  }
+  for (i = 0; i != n_elements; ++i)
+    check_inuse_chunk(m, mem2chunk(marray[i]));
+
+#endif /* DEBUG */
+
+  POSTACTION(m);
+  return marray;
+}
+
+/* Try to free all pointers in the given array.
+   Note: this could be made faster, by delaying consolidation,
+   at the price of disabling some user integrity checks, We
+   still optimize some consolidations by combining adjacent
+   chunks before freeing, which will occur often if allocated
+   with ialloc or the array is sorted.
+*/
+static size_t internal_bulk_free(mstate m, void* array[], size_t nelem) {
+  size_t unfreed = 0;
+  if (!PREACTION(m)) {
+    void** a;
+    void** fence = &(array[nelem]);
+    for (a = array; a != fence; ++a) {
+      void* mem = *a;
+      if (mem != 0) {
+        mchunkptr p = mem2chunk(mem);
+        size_t psize = chunksize(p);
+#if FOOTERS
+        if (get_mstate_for(p) != m) {
+          ++unfreed;
+          continue;
+        }
+#endif
+        check_inuse_chunk(m, p);
+        *a = 0;
+        if (RTCHECK(ok_address(m, p) && ok_inuse(p))) {
+          void ** b = a + 1; /* try to merge with next chunk */
+          mchunkptr next = next_chunk(p);
+          if (b != fence && *b == chunk2mem(next)) {
+            size_t newsize = chunksize(next) + psize;
+            set_inuse(m, p, newsize);
+            *b = chunk2mem(p);
+          }
+          else
+            dispose_chunk(m, p, psize);
+        }
+        else {
+          CORRUPTION_ERROR_ACTION(m);
+          break;
+        }
+      }
+    }
+    if (should_trim(m, m->topsize))
+      sys_trim(m, 0);
+    POSTACTION(m);
+  }
+  return unfreed;
+}
+
+/* Traversal */
+#if MALLOC_INSPECT_ALL
+static void internal_inspect_all(mstate m,
+                                 void(*handler)(void *start,
+                                                void *end,
+                                                size_t used_bytes,
+                                                void* callback_arg),
+                                 void* arg) {
+  if (is_initialized(m)) {
+    mchunkptr top = m->top;
+    msegmentptr s;
+    for (s = &m->seg; s != 0; s = s->next) {
+      mchunkptr q = align_as_chunk(s->base);
+      while (segment_holds(s, q) && q->head != FENCEPOST_HEAD) {
+        mchunkptr next = next_chunk(q);
+        size_t sz = chunksize(q);
+        size_t used;
+        void* start;
+        if (is_inuse(q)) {
+          used = sz - CHUNK_OVERHEAD; /* must not be mmapped */
+          start = chunk2mem(q);
+        }
+        else {
+          used = 0;
+          if (is_small(sz)) {     /* offset by possible bookkeeping */
+            start = (void*)((char*)q + sizeof(struct malloc_chunk));
+          }
+          else {
+            start = (void*)((char*)q + sizeof(struct malloc_tree_chunk));
+          }
+        }
+        if (start < (void*)next)  /* skip if all space is bookkeeping */
+          handler(start, next, used, arg);
+        if (q == top)
+          break;
+        q = next;
+      }
+    }
+  }
+}
+#endif /* MALLOC_INSPECT_ALL */
+
+/* ------------------ Exported realloc, memalign, etc -------------------- */
+
+#if !ONLY_MSPACES
+
+void* dlrealloc(void* oldmem, size_t bytes) {
+  void* mem = 0;
+  if (oldmem == 0) {
+    mem = dlmalloc(bytes);
+  }
+  else if (bytes >= MAX_REQUEST) {
+    MALLOC_FAILURE_ACTION;
+  }
+#ifdef REALLOC_ZERO_BYTES_FREES
+  else if (bytes == 0) {
+    dlfree(oldmem);
+  }
+#endif /* REALLOC_ZERO_BYTES_FREES */
+  else {
+    size_t nb = request2size(bytes);
+    mchunkptr oldp = mem2chunk(oldmem);
+#if ! FOOTERS
+    mstate m = gm;
+#else /* FOOTERS */
+    mstate m = get_mstate_for(oldp);
+    if (!ok_magic(m)) {
+      USAGE_ERROR_ACTION(m, oldmem);
+      return 0;
+    }
+#endif /* FOOTERS */
+    if (!PREACTION(m)) {
+      mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1);
+      POSTACTION(m);
+      if (newp != 0) {
+        check_inuse_chunk(m, newp);
+        mem = chunk2mem(newp);
+      }
+      else {
+        mem = internal_malloc(m, bytes);
+        if (mem != 0) {
+          size_t oc = chunksize(oldp) - overhead_for(oldp);
+          memcpy(mem, oldmem, (oc < bytes)? oc : bytes);
+          internal_free(m, oldmem);
+        }
+      }
+    }
+  }
+  return mem;
+}
+
+void* dlrealloc_in_place(void* oldmem, size_t bytes) {
+  void* mem = 0;
+  if (oldmem != 0) {
+    if (bytes >= MAX_REQUEST) {
+      MALLOC_FAILURE_ACTION;
+    }
+    else {
+      size_t nb = request2size(bytes);
+      mchunkptr oldp = mem2chunk(oldmem);
+#if ! FOOTERS
+      mstate m = gm;
+#else /* FOOTERS */
+      mstate m = get_mstate_for(oldp);
+      if (!ok_magic(m)) {
+        USAGE_ERROR_ACTION(m, oldmem);
+        return 0;
+      }
+#endif /* FOOTERS */
+      if (!PREACTION(m)) {
+        mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0);
+        POSTACTION(m);
+        if (newp == oldp) {
+          check_inuse_chunk(m, newp);
+          mem = oldmem;
+        }
+      }
+    }
+  }
+  return mem;
+}
+
+void* dlmemalign(size_t alignment, size_t bytes) {
+  if (alignment <= MALLOC_ALIGNMENT) {
+    return dlmalloc(bytes);
+  }
+  return internal_memalign(gm, alignment, bytes);
+}
+
+int dlposix_memalign(void** pp, size_t alignment, size_t bytes) {
+  void* mem = 0;
+  if (alignment == MALLOC_ALIGNMENT)
+    mem = dlmalloc(bytes);
+  else {
+    size_t d = alignment / sizeof(void*);
+    size_t r = alignment % sizeof(void*);
+    if (r != 0 || d == 0 || (d & (d-SIZE_T_ONE)) != 0)
+      return EINVAL;
+    else if (bytes <= MAX_REQUEST - alignment) {
+      if (alignment <  MIN_CHUNK_SIZE)
+        alignment = MIN_CHUNK_SIZE;
+      mem = internal_memalign(gm, alignment, bytes);
+    }
+  }
+  if (mem == 0)
+    return ENOMEM;
+  else {
+    *pp = mem;
+    return 0;
+  }
+}
+
+void* dlvalloc(size_t bytes) {
+  size_t pagesz;
+  ensure_initialization();
+  pagesz = mparams.page_size;
+  return dlmemalign(pagesz, bytes);
+}
+
+void* dlpvalloc(size_t bytes) {
+  size_t pagesz;
+  ensure_initialization();
+  pagesz = mparams.page_size;
+  return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE));
+}
+
+void** dlindependent_calloc(size_t n_elements, size_t elem_size,
+                            void* chunks[]) {
+  size_t sz = elem_size; /* serves as 1-element array */
+  return ialloc(gm, n_elements, &sz, 3, chunks);
+}
+
+void** dlindependent_comalloc(size_t n_elements, size_t sizes[],
+                              void* chunks[]) {
+  return ialloc(gm, n_elements, sizes, 0, chunks);
+}
+
+size_t dlbulk_free(void* array[], size_t nelem) {
+  return internal_bulk_free(gm, array, nelem);
+}
+
+#if MALLOC_INSPECT_ALL
+void dlmalloc_inspect_all(void(*handler)(void *start,
+                                         void *end,
+                                         size_t used_bytes,
+                                         void* callback_arg),
+                          void* arg) {
+  ensure_initialization();
+  if (!PREACTION(gm)) {
+    internal_inspect_all(gm, handler, arg);
+    POSTACTION(gm);
+  }
+}
+#endif /* MALLOC_INSPECT_ALL */
+
+int dlmalloc_trim(size_t pad) {
+  int result = 0;
+  ensure_initialization();
+  if (!PREACTION(gm)) {
+    result = sys_trim(gm, pad);
+    POSTACTION(gm);
+  }
+  return result;
+}
+
+size_t dlmalloc_footprint(void) {
+  return gm->footprint;
+}
+
+size_t dlmalloc_max_footprint(void) {
+  return gm->max_footprint;
+}
+
+size_t dlmalloc_footprint_limit(void) {
+  size_t maf = gm->footprint_limit;
+  return maf == 0 ? MAX_SIZE_T : maf;
+}
+
+size_t dlmalloc_set_footprint_limit(size_t bytes) {
+  size_t result;  /* invert sense of 0 */
+  if (bytes == 0)
+    result = granularity_align(1); /* Use minimal size */
+  if (bytes == MAX_SIZE_T)
+    result = 0;                    /* disable */
+  else
+    result = granularity_align(bytes);
+  return gm->footprint_limit = result;
+}
+
+#if !NO_MALLINFO
+struct mallinfo dlmallinfo(void) {
+  return internal_mallinfo(gm);
+}
+#endif /* NO_MALLINFO */
+
+#if !NO_MALLOC_STATS
+void dlmalloc_stats() {
+  internal_malloc_stats(gm);
+}
+#endif /* NO_MALLOC_STATS */
+
+int dlmallopt(int param_number, int value) {
+  return change_mparam(param_number, value);
+}
+
+size_t dlmalloc_usable_size(void* mem) {
+  if (mem != 0) {
+    mchunkptr p = mem2chunk(mem);
+    if (is_inuse(p))
+      return chunksize(p) - overhead_for(p);
+  }
+  return 0;
+}
+
+#endif /* !ONLY_MSPACES */
+
+/* ----------------------------- user mspaces ---------------------------- */
+
+#if MSPACES
+
+static mstate init_user_mstate(char* tbase, size_t tsize) {
+  size_t msize = pad_request(sizeof(struct malloc_state));
+  mchunkptr mn;
+  mchunkptr msp = align_as_chunk(tbase);
+  mstate m = (mstate)(chunk2mem(msp));
+  memset(m, 0, msize);
+  (void)INITIAL_LOCK(&m->mutex);
+  msp->head = (msize|INUSE_BITS);
+  m->seg.base = m->least_addr = tbase;
+  m->seg.size = m->footprint = m->max_footprint = tsize;
+  m->magic = mparams.magic;
+  m->release_checks = MAX_RELEASE_CHECK_RATE;
+  m->mflags = mparams.default_mflags;
+  m->extp = 0;
+  m->exts = 0;
+  disable_contiguous(m);
+  init_bins(m);
+  mn = next_chunk(mem2chunk(m));
+  init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE);
+  check_top_chunk(m, m->top);
+  return m;
+}
+
+mspace create_mspace(size_t capacity, int locked) {
+  mstate m = 0;
+  size_t msize;
+  ensure_initialization();
+  msize = pad_request(sizeof(struct malloc_state));
+  if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
+    size_t rs = ((capacity == 0)? mparams.granularity :
+                 (capacity + TOP_FOOT_SIZE + msize));
+    size_t tsize = granularity_align(rs);
+    char* tbase = (char*)(CALL_MMAP(tsize));
+    if (tbase != CMFAIL) {
+      m = init_user_mstate(tbase, tsize);
+      m->seg.sflags = USE_MMAP_BIT;
+      set_lock(m, locked);
+    }
+  }
+  return (mspace)m;
+}
+
+mspace create_mspace_with_base(void* base, size_t capacity, int locked) {
+  mstate m = 0;
+  size_t msize;
+  ensure_initialization();
+  msize = pad_request(sizeof(struct malloc_state));
+  if (capacity > msize + TOP_FOOT_SIZE &&
+      capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
+    m = init_user_mstate((char*)base, capacity);
+    m->seg.sflags = EXTERN_BIT;
+    set_lock(m, locked);
+  }
+  return (mspace)m;
+}
+
+int mspace_track_large_chunks(mspace msp, int enable) {
+  int ret = 0;
+  mstate ms = (mstate)msp;
+  if (!PREACTION(ms)) {
+    if (!use_mmap(ms)) {
+      ret = 1;
+    }
+    if (!enable) {
+      enable_mmap(ms);
+    } else {
+      disable_mmap(ms);
+    }
+    POSTACTION(ms);
+  }
+  return ret;
+}
+
+size_t destroy_mspace(mspace msp) {
+  size_t freed = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    msegmentptr sp = &ms->seg;
+    (void)DESTROY_LOCK(&ms->mutex); /* destroy before unmapped */
+    while (sp != 0) {
+      char* base = sp->base;
+      size_t size = sp->size;
+      flag_t flag = sp->sflags;
+      (void)base; /* placate people compiling -Wunused-variable */
+      sp = sp->next;
+      if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) &&
+          CALL_MUNMAP(base, size) == 0)
+        freed += size;
+    }
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return freed;
+}
+
+/*
+  mspace versions of routines are near-clones of the global
+  versions. This is not so nice but better than the alternatives.
+*/
+
+void* mspace_malloc(mspace msp, size_t bytes) {
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  if (!PREACTION(ms)) {
+    void* mem;
+    size_t nb;
+    if (bytes <= MAX_SMALL_REQUEST) {
+      bindex_t idx;
+      binmap_t smallbits;
+      nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
+      idx = small_index(nb);
+      smallbits = ms->smallmap >> idx;
+
+      if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
+        mchunkptr b, p;
+        idx += ~smallbits & 1;       /* Uses next bin if idx empty */
+        b = smallbin_at(ms, idx);
+        p = b->fd;
+        assert(chunksize(p) == small_index2size(idx));
+        unlink_first_small_chunk(ms, b, p, idx);
+        set_inuse_and_pinuse(ms, p, small_index2size(idx));
+        mem = chunk2mem(p);
+        check_malloced_chunk(ms, mem, nb);
+        goto postaction;
+      }
+
+      else if (nb > ms->dvsize) {
+        if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
+          mchunkptr b, p, r;
+          size_t rsize;
+          bindex_t i;
+          binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
+          binmap_t leastbit = least_bit(leftbits);
+          compute_bit2idx(leastbit, i);
+          b = smallbin_at(ms, i);
+          p = b->fd;
+          assert(chunksize(p) == small_index2size(i));
+          unlink_first_small_chunk(ms, b, p, i);
+          rsize = small_index2size(i) - nb;
+          /* Fit here cannot be remainderless if 4byte sizes */
+          if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
+            set_inuse_and_pinuse(ms, p, small_index2size(i));
+          else {
+            set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+            r = chunk_plus_offset(p, nb);
+            set_size_and_pinuse_of_free_chunk(r, rsize);
+            replace_dv(ms, r, rsize);
+          }
+          mem = chunk2mem(p);
+          check_malloced_chunk(ms, mem, nb);
+          goto postaction;
+        }
+
+        else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) {
+          check_malloced_chunk(ms, mem, nb);
+          goto postaction;
+        }
+      }
+    }
+    else if (bytes >= MAX_REQUEST)
+      nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
+    else {
+      nb = pad_request(bytes);
+      if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) {
+        check_malloced_chunk(ms, mem, nb);
+        goto postaction;
+      }
+    }
+
+    if (nb <= ms->dvsize) {
+      size_t rsize = ms->dvsize - nb;
+      mchunkptr p = ms->dv;
+      if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
+        mchunkptr r = ms->dv = chunk_plus_offset(p, nb);
+        ms->dvsize = rsize;
+        set_size_and_pinuse_of_free_chunk(r, rsize);
+        set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+      }
+      else { /* exhaust dv */
+        size_t dvs = ms->dvsize;
+        ms->dvsize = 0;
+        ms->dv = 0;
+        set_inuse_and_pinuse(ms, p, dvs);
+      }
+      mem = chunk2mem(p);
+      check_malloced_chunk(ms, mem, nb);
+      goto postaction;
+    }
+
+    else if (nb < ms->topsize) { /* Split top */
+      size_t rsize = ms->topsize -= nb;
+      mchunkptr p = ms->top;
+      mchunkptr r = ms->top = chunk_plus_offset(p, nb);
+      r->head = rsize | PINUSE_BIT;
+      set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+      mem = chunk2mem(p);
+      check_top_chunk(ms, ms->top);
+      check_malloced_chunk(ms, mem, nb);
+      goto postaction;
+    }
+
+    mem = sys_alloc(ms, nb);
+
+  postaction:
+    POSTACTION(ms);
+    return mem;
+  }
+
+  return 0;
+}
+
+void mspace_free(mspace msp, void* mem) {
+  if (mem != 0) {
+    mchunkptr p  = mem2chunk(mem);
+#if FOOTERS
+    mstate fm = get_mstate_for(p);
+    (void)msp; /* placate people compiling -Wunused */
+#else /* FOOTERS */
+    mstate fm = (mstate)msp;
+#endif /* FOOTERS */
+    if (!ok_magic(fm)) {
+      USAGE_ERROR_ACTION(fm, p);
+      return;
+    }
+    if (!PREACTION(fm)) {
+      check_inuse_chunk(fm, p);
+      if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
+        size_t psize = chunksize(p);
+        mchunkptr next = chunk_plus_offset(p, psize);
+        if (!pinuse(p)) {
+          size_t prevsize = p->prev_foot;
+          if (is_mmapped(p)) {
+            psize += prevsize + MMAP_FOOT_PAD;
+            if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+              fm->footprint -= psize;
+            goto postaction;
+          }
+          else {
+            mchunkptr prev = chunk_minus_offset(p, prevsize);
+            psize += prevsize;
+            p = prev;
+            if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
+              if (p != fm->dv) {
+                unlink_chunk(fm, p, prevsize);
+              }
+              else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+                fm->dvsize = psize;
+                set_free_with_pinuse(p, psize, next);
+                goto postaction;
+              }
+            }
+            else
+              goto erroraction;
+          }
+        }
+
+        if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
+          if (!cinuse(next)) {  /* consolidate forward */
+            if (next == fm->top) {
+              size_t tsize = fm->topsize += psize;
+              fm->top = p;
+              p->head = tsize | PINUSE_BIT;
+              if (p == fm->dv) {
+                fm->dv = 0;
+                fm->dvsize = 0;
+              }
+              if (should_trim(fm, tsize))
+                sys_trim(fm, 0);
+              goto postaction;
+            }
+            else if (next == fm->dv) {
+              size_t dsize = fm->dvsize += psize;
+              fm->dv = p;
+              set_size_and_pinuse_of_free_chunk(p, dsize);
+              goto postaction;
+            }
+            else {
+              size_t nsize = chunksize(next);
+              psize += nsize;
+              unlink_chunk(fm, next, nsize);
+              set_size_and_pinuse_of_free_chunk(p, psize);
+              if (p == fm->dv) {
+                fm->dvsize = psize;
+                goto postaction;
+              }
+            }
+          }
+          else
+            set_free_with_pinuse(p, psize, next);
+
+          if (is_small(psize)) {
+            insert_small_chunk(fm, p, psize);
+            check_free_chunk(fm, p);
+          }
+          else {
+            tchunkptr tp = (tchunkptr)p;
+            insert_large_chunk(fm, tp, psize);
+            check_free_chunk(fm, p);
+            if (--fm->release_checks == 0)
+              release_unused_segments(fm);
+          }
+          goto postaction;
+        }
+      }
+    erroraction:
+      USAGE_ERROR_ACTION(fm, p);
+    postaction:
+      POSTACTION(fm);
+    }
+  }
+}
+
+void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) {
+  void* mem;
+  size_t req = 0;
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  if (n_elements != 0) {
+    req = n_elements * elem_size;
+    if (((n_elements | elem_size) & ~(size_t)0xffff) &&
+        (req / n_elements != elem_size))
+      req = MAX_SIZE_T; /* force downstream failure on overflow */
+  }
+  mem = internal_malloc(ms, req);
+  if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
+    memset(mem, 0, req);
+  return mem;
+}
+
+void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) {
+  void* mem = 0;
+  if (oldmem == 0) {
+    mem = mspace_malloc(msp, bytes);
+  }
+  else if (bytes >= MAX_REQUEST) {
+    MALLOC_FAILURE_ACTION;
+  }
+#ifdef REALLOC_ZERO_BYTES_FREES
+  else if (bytes == 0) {
+    mspace_free(msp, oldmem);
+  }
+#endif /* REALLOC_ZERO_BYTES_FREES */
+  else {
+    size_t nb = request2size(bytes);
+    mchunkptr oldp = mem2chunk(oldmem);
+#if ! FOOTERS
+    mstate m = (mstate)msp;
+#else /* FOOTERS */
+    mstate m = get_mstate_for(oldp);
+    if (!ok_magic(m)) {
+      USAGE_ERROR_ACTION(m, oldmem);
+      return 0;
+    }
+#endif /* FOOTERS */
+    if (!PREACTION(m)) {
+      mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1);
+      POSTACTION(m);
+      if (newp != 0) {
+        check_inuse_chunk(m, newp);
+        mem = chunk2mem(newp);
+      }
+      else {
+        mem = mspace_malloc(m, bytes);
+        if (mem != 0) {
+          size_t oc = chunksize(oldp) - overhead_for(oldp);
+          memcpy(mem, oldmem, (oc < bytes)? oc : bytes);
+          mspace_free(m, oldmem);
+        }
+      }
+    }
+  }
+  return mem;
+}
+
+void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes) {
+  void* mem = 0;
+  if (oldmem != 0) {
+    if (bytes >= MAX_REQUEST) {
+      MALLOC_FAILURE_ACTION;
+    }
+    else {
+      size_t nb = request2size(bytes);
+      mchunkptr oldp = mem2chunk(oldmem);
+#if ! FOOTERS
+      mstate m = (mstate)msp;
+#else /* FOOTERS */
+      mstate m = get_mstate_for(oldp);
+      (void)msp; /* placate people compiling -Wunused */
+      if (!ok_magic(m)) {
+        USAGE_ERROR_ACTION(m, oldmem);
+        return 0;
+      }
+#endif /* FOOTERS */
+      if (!PREACTION(m)) {
+        mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0);
+        POSTACTION(m);
+        if (newp == oldp) {
+          check_inuse_chunk(m, newp);
+          mem = oldmem;
+        }
+      }
+    }
+  }
+  return mem;
+}
+
+void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) {
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  if (alignment <= MALLOC_ALIGNMENT)
+    return mspace_malloc(msp, bytes);
+  return internal_memalign(ms, alignment, bytes);
+}
+
+void** mspace_independent_calloc(mspace msp, size_t n_elements,
+                                 size_t elem_size, void* chunks[]) {
+  size_t sz = elem_size; /* serves as 1-element array */
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  return ialloc(ms, n_elements, &sz, 3, chunks);
+}
+
+void** mspace_independent_comalloc(mspace msp, size_t n_elements,
+                                   size_t sizes[], void* chunks[]) {
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  return ialloc(ms, n_elements, sizes, 0, chunks);
+}
+
+size_t mspace_bulk_free(mspace msp, void* array[], size_t nelem) {
+  return internal_bulk_free((mstate)msp, array, nelem);
+}
+
+#if MALLOC_INSPECT_ALL
+void mspace_inspect_all(mspace msp,
+                        void(*handler)(void *start,
+                                       void *end,
+                                       size_t used_bytes,
+                                       void* callback_arg),
+                        void* arg) {
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    if (!PREACTION(ms)) {
+      internal_inspect_all(ms, handler, arg);
+      POSTACTION(ms);
+    }
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+}
+#endif /* MALLOC_INSPECT_ALL */
+
+int mspace_trim(mspace msp, size_t pad) {
+  int result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    if (!PREACTION(ms)) {
+      result = sys_trim(ms, pad);
+      POSTACTION(ms);
+    }
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return result;
+}
+
+#if !NO_MALLOC_STATS
+void mspace_malloc_stats(mspace msp) {
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    internal_malloc_stats(ms);
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+}
+#endif /* NO_MALLOC_STATS */
+
+size_t mspace_footprint(mspace msp) {
+  size_t result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    result = ms->footprint;
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return result;
+}
+
+size_t mspace_max_footprint(mspace msp) {
+  size_t result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    result = ms->max_footprint;
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return result;
+}
+
+size_t mspace_footprint_limit(mspace msp) {
+  size_t result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    size_t maf = ms->footprint_limit;
+    result = (maf == 0) ? MAX_SIZE_T : maf;
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return result;
+}
+
+size_t mspace_set_footprint_limit(mspace msp, size_t bytes) {
+  size_t result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    if (bytes == 0)
+      result = granularity_align(1); /* Use minimal size */
+    if (bytes == MAX_SIZE_T)
+      result = 0;                    /* disable */
+    else
+      result = granularity_align(bytes);
+    ms->footprint_limit = result;
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return result;
+}
+
+#if !NO_MALLINFO
+struct mallinfo mspace_mallinfo(mspace msp) {
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return internal_mallinfo(ms);
+}
+#endif /* NO_MALLINFO */
+
+size_t mspace_usable_size(const void* mem) {
+  if (mem != 0) {
+    mchunkptr p = mem2chunk(mem);
+    if (is_inuse(p))
+      return chunksize(p) - overhead_for(p);
+  }
+  return 0;
+}
+
+int mspace_mallopt(int param_number, int value) {
+  return change_mparam(param_number, value);
+}
+
+#endif /* MSPACES */
+
+
+/* -------------------- Alternative MORECORE functions ------------------- */
+
+/*
+  Guidelines for creating a custom version of MORECORE:
+
+  * For best performance, MORECORE should allocate in multiples of pagesize.
+  * MORECORE may allocate more memory than requested. (Or even less,
+      but this will usually result in a malloc failure.)
+  * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call.
+  * For best performance, consecutive calls to MORECORE with positive
+      arguments should return increasing addresses, indicating that
+      space has been contiguously extended.
+  * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+  * MORECORE need not handle negative arguments -- it may instead
+      just return MFAIL when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+  As an example alternative MORECORE, here is a custom allocator
+  kindly contributed for pre-OSX macOS.  It uses virtually but not
+  necessarily physically contiguous non-paged memory (locked in,
+  present and won't get swapped out).  You can use it by uncommenting
+  this section, adding some #includes, and setting up the appropriate
+  defines above:
+
+      #define MORECORE osMoreCore
+
+  There is also a shutdown routine that should somehow be called for
+  cleanup upon program exit.
+
+  #define MAX_POOL_ENTRIES 100
+  #define MINIMUM_MORECORE_SIZE  (64 * 1024U)
+  static int next_os_pool;
+  void *our_os_pools[MAX_POOL_ENTRIES];
+
+  void *osMoreCore(int size)
+  {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MFAIL;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MFAIL;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+  }
+
+  // cleanup any allocated memory pools
+  // called as last thing before shutting down driver
+
+  void osCleanupMem(void)
+  {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+         *ptr = 0;
+      }
+  }
+
+*/
+
+
+/* -----------------------------------------------------------------------
+History:
+    v2.8.6 Wed Aug 29 06:57:58 2012  Doug Lea
+      * fix bad comparison in dlposix_memalign
+      * don't reuse adjusted asize in sys_alloc
+      * add LOCK_AT_FORK -- thanks to Kirill Artamonov for the suggestion
+      * reduce compiler warnings -- thanks to all who reported/suggested these
+
+    v2.8.5 Sun May 22 10:26:02 2011  Doug Lea  (dl at gee)
+      * Always perform unlink checks unless INSECURE
+      * Add posix_memalign.
+      * Improve realloc to expand in more cases; expose realloc_in_place.
+        Thanks to Peter Buhr for the suggestion.
+      * Add footprint_limit, inspect_all, bulk_free. Thanks
+        to Barry Hayes and others for the suggestions.
+      * Internal refactorings to avoid calls while holding locks
+      * Use non-reentrant locks by default. Thanks to Roland McGrath
+        for the suggestion.
+      * Small fixes to mspace_destroy, reset_on_error.
+      * Various configuration extensions/changes. Thanks
+         to all who contributed these.
+
+    V2.8.4a Thu Apr 28 14:39:43 2011 (dl at gee.cs.oswego.edu)
+      * Update Creative Commons URL
+
+    V2.8.4 Wed May 27 09:56:23 2009  Doug Lea  (dl at gee)
+      * Use zeros instead of prev foot for is_mmapped
+      * Add mspace_track_large_chunks; thanks to Jean Brouwers
+      * Fix set_inuse in internal_realloc; thanks to Jean Brouwers
+      * Fix insufficient sys_alloc padding when using 16byte alignment
+      * Fix bad error check in mspace_footprint
+      * Adaptations for ptmalloc; thanks to Wolfram Gloger.
+      * Reentrant spin locks; thanks to Earl Chew and others
+      * Win32 improvements; thanks to Niall Douglas and Earl Chew
+      * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options
+      * Extension hook in malloc_state
+      * Various small adjustments to reduce warnings on some compilers
+      * Various configuration extensions/changes for more platforms. Thanks
+         to all who contributed these.
+
+    V2.8.3 Thu Sep 22 11:16:32 2005  Doug Lea  (dl at gee)
+      * Add max_footprint functions
+      * Ensure all appropriate literals are size_t
+      * Fix conditional compilation problem for some #define settings
+      * Avoid concatenating segments with the one provided
+        in create_mspace_with_base
+      * Rename some variables to avoid compiler shadowing warnings
+      * Use explicit lock initialization.
+      * Better handling of sbrk interference.
+      * Simplify and fix segment insertion, trimming and mspace_destroy
+      * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x
+      * Thanks especially to Dennis Flanagan for help on these.
+
+    V2.8.2 Sun Jun 12 16:01:10 2005  Doug Lea  (dl at gee)
+      * Fix memalign brace error.
+
+    V2.8.1 Wed Jun  8 16:11:46 2005  Doug Lea  (dl at gee)
+      * Fix improper #endif nesting in C++
+      * Add explicit casts needed for C++
+
+    V2.8.0 Mon May 30 14:09:02 2005  Doug Lea  (dl at gee)
+      * Use trees for large bins
+      * Support mspaces
+      * Use segments to unify sbrk-based and mmap-based system allocation,
+        removing need for emulation on most platforms without sbrk.
+      * Default safety checks
+      * Optional footer checks. Thanks to William Robertson for the idea.
+      * Internal code refactoring
+      * Incorporate suggestions and platform-specific changes.
+        Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas,
+        Aaron Bachmann,  Emery Berger, and others.
+      * Speed up non-fastbin processing enough to remove fastbins.
+      * Remove useless cfree() to avoid conflicts with other apps.
+      * Remove internal memcpy, memset. Compilers handle builtins better.
+      * Remove some options that no one ever used and rename others.
+
+    V2.7.2 Sat Aug 17 09:07:30 2002  Doug Lea  (dl at gee)
+      * Fix malloc_state bitmap array misdeclaration
+
+    V2.7.1 Thu Jul 25 10:58:03 2002  Doug Lea  (dl at gee)
+      * Allow tuning of FIRST_SORTED_BIN_SIZE
+      * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte.
+      * Better detection and support for non-contiguousness of MORECORE.
+        Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger
+      * Bypass most of malloc if no frees. Thanks To Emery Berger.
+      * Fix freeing of old top non-contiguous chunk im sysmalloc.
+      * Raised default trim and map thresholds to 256K.
+      * Fix mmap-related #defines. Thanks to Lubos Lunak.
+      * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield.
+      * Branch-free bin calculation
+      * Default trim and mmap thresholds now 256K.
+
+    V2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+      * Introduce independent_comalloc and independent_calloc.
+        Thanks to Michael Pachos for motivation and help.
+      * Make optional .h file available
+      * Allow > 2GB requests on 32bit systems.
+      * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>.
+        Thanks also to Andreas Mueller <a.mueller at paradatec.de>,
+        and Anonymous.
+      * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for
+        helping test this.)
+      * memalign: check alignment arg
+      * realloc: don't try to shift chunks backwards, since this
+        leads to  more fragmentation in some programs and doesn't
+        seem to help in any others.
+      * Collect all cases in malloc requiring system memory into sysmalloc
+      * Use mmap as backup to sbrk
+      * Place all internal state in malloc_state
+      * Introduce fastbins (although similar to 2.5.1)
+      * Many minor tunings and cosmetic improvements
+      * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK
+      * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS
+        Thanks to Tony E. Bennett <tbennett@nvidia.com> and others.
+      * Include errno.h to support default failure action.
+
+    V2.6.6 Sun Dec  5 07:42:19 1999  Doug Lea  (dl at gee)
+      * return null for negative arguments
+      * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com>
+         * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h'
+          (e.g. WIN32 platforms)
+         * Cleanup header file inclusion for WIN32 platforms
+         * Cleanup code to avoid Microsoft Visual C++ compiler complaints
+         * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing
+           memory allocation routines
+         * Set 'malloc_getpagesize' for WIN32 platforms (needs more work)
+         * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to
+           usage of 'assert' in non-WIN32 code
+         * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to
+           avoid infinite loop
+      * Always call 'fREe()' rather than 'free()'
+
+    V2.6.5 Wed Jun 17 15:57:31 1998  Doug Lea  (dl at gee)
+      * Fixed ordering problem with boundary-stamping
+
+    V2.6.3 Sun May 19 08:17:58 1996  Doug Lea  (dl at gee)
+      * Added pvalloc, as recommended by H.J. Liu
+      * Added 64bit pointer support mainly from Wolfram Gloger
+      * Added anonymously donated WIN32 sbrk emulation
+      * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen
+      * malloc_extend_top: fix mask error that caused wastage after
+        foreign sbrks
+      * Add linux mremap support code from HJ Liu
+
+    V2.6.2 Tue Dec  5 06:52:55 1995  Doug Lea  (dl at gee)
+      * Integrated most documentation with the code.
+      * Add support for mmap, with help from
+        Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+      * Use last_remainder in more cases.
+      * Pack bins using idea from  colin@nyx10.cs.du.edu
+      * Use ordered bins instead of best-fit threshhold
+      * Eliminate block-local decls to simplify tracing and debugging.
+      * Support another case of realloc via move into top
+      * Fix error occuring when initial sbrk_base not word-aligned.
+      * Rely on page size for units instead of SBRK_UNIT to
+        avoid surprises about sbrk alignment conventions.
+      * Add mallinfo, mallopt. Thanks to Raymond Nijssen
+        (raymond@es.ele.tue.nl) for the suggestion.
+      * Add `pad' argument to malloc_trim and top_pad mallopt parameter.
+      * More precautions for cases where other routines call sbrk,
+        courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+      * Added macros etc., allowing use in linux libc from
+        H.J. Lu (hjl@gnu.ai.mit.edu)
+      * Inverted this history list
+
+    V2.6.1 Sat Dec  2 14:10:57 1995  Doug Lea  (dl at gee)
+      * Re-tuned and fixed to behave more nicely with V2.6.0 changes.
+      * Removed all preallocation code since under current scheme
+        the work required to undo bad preallocations exceeds
+        the work saved in good cases for most test programs.
+      * No longer use return list or unconsolidated bins since
+        no scheme using them consistently outperforms those that don't
+        given above changes.
+      * Use best fit for very large chunks to prevent some worst-cases.
+      * Added some support for debugging
+
+    V2.6.0 Sat Nov  4 07:05:23 1995  Doug Lea  (dl at gee)
+      * Removed footers when chunks are in use. Thanks to
+        Paul Wilson (wilson@cs.texas.edu) for the suggestion.
+
+    V2.5.4 Wed Nov  1 07:54:51 1995  Doug Lea  (dl at gee)
+      * Added malloc_trim, with help from Wolfram Gloger
+        (wmglo@Dent.MED.Uni-Muenchen.DE).
+
+    V2.5.3 Tue Apr 26 10:16:01 1994  Doug Lea  (dl at g)
+
+    V2.5.2 Tue Apr  5 16:20:40 1994  Doug Lea  (dl at g)
+      * realloc: try to expand in both directions
+      * malloc: swap order of clean-bin strategy;
+      * realloc: only conditionally expand backwards
+      * Try not to scavenge used bins
+      * Use bin counts as a guide to preallocation
+      * Occasionally bin return list chunks in first scan
+      * Add a few optimizations from colin@nyx10.cs.du.edu
+
+    V2.5.1 Sat Aug 14 15:40:43 1993  Doug Lea  (dl at g)
+      * faster bin computation & slightly different binning
+      * merged all consolidations to one part of malloc proper
+         (eliminating old malloc_find_space & malloc_clean_bin)
+      * Scan 2 returns chunks (not just 1)
+      * Propagate failure in realloc if malloc returns 0
+      * Add stuff to allow compilation on non-ANSI compilers
+          from kpv@research.att.com
+
+    V2.5 Sat Aug  7 07:41:59 1993  Doug Lea  (dl at g.oswego.edu)
+      * removed potential for odd address access in prev_chunk
+      * removed dependency on getpagesize.h
+      * misc cosmetics and a bit more internal documentation
+      * anticosmetics: mangled names in macros to evade debugger strangeness
+      * tested on sparc, hp-700, dec-mips, rs6000
+          with gcc & native cc (hp, dec only) allowing
+          Detlefs & Zorn comparison study (in SIGPLAN Notices.)
+
+    Trial version Fri Aug 28 13:14:29 1992  Doug Lea  (dl at g.oswego.edu)
+      * Based loosely on libg++-1.2X malloc. (It retains some of the overall
+         structure of old version,  but most details differ.)
+
+*/

From 494903c5d49182c658b9e33038951282cdf14003 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Sat, 10 Sep 2016 17:47:37 -0700
Subject: [PATCH 19/91] fix compiler warnings (#17)

---
 .travis/check-git-clang-format-output.sh | 2 +-
 Makefile                                 | 4 ++--
 src/event_loop.h                         | 1 +
 src/fling.c                              | 2 ++
 src/malloc.c                             | 2 +-
 src/plasma_client.c                      | 2 +-
 src/plasma_manager.c                     | 2 +-
 {third_party => thirdparty}/dlmalloc.c   | 0
 8 files changed, 9 insertions(+), 6 deletions(-)
 rename {third_party => thirdparty}/dlmalloc.c (100%)

diff --git a/.travis/check-git-clang-format-output.sh b/.travis/check-git-clang-format-output.sh
index c3431f68b..d71f78357 100755
--- a/.travis/check-git-clang-format-output.sh
+++ b/.travis/check-git-clang-format-output.sh
@@ -7,7 +7,7 @@ else
   base_commit="$TRAVIS_BRANCH"
   echo "Running clang-format against branch $base_commit, with hash $(git rev-parse $base_commit)"
 fi
-output="$(.travis/git-clang-format --binary clang-format-3.8 --commit $base_commit --diff --exclude ^third_party/)"
+output="$(.travis/git-clang-format --binary clang-format-3.8 --commit $base_commit --diff --exclude ^thirdparty/)"
 if [ "$output" == "no modified files to format" ] || [ "$output" == "clang-format did not modify any files" ] ; then
   echo "clang-format passed."
   exit 0
diff --git a/Makefile b/Makefile
index c317cc938..0b8566021 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 CC = gcc
-CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -I.
+CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -I.
 BUILD = build
 
 all: $(BUILD)/plasma_store $(BUILD)/plasma_manager $(BUILD)/plasma_client.so $(BUILD)/example
@@ -7,7 +7,7 @@ all: $(BUILD)/plasma_store $(BUILD)/plasma_manager $(BUILD)/plasma_client.so $(B
 clean:
 	rm -r $(BUILD)/*
 
-$(BUILD)/plasma_store: src/plasma_store.c src/plasma.h src/event_loop.h src/event_loop.c src/fling.h src/fling.c src/malloc.c src/malloc.h third_party/dlmalloc.c
+$(BUILD)/plasma_store: src/plasma_store.c src/plasma.h src/event_loop.h src/event_loop.c src/fling.h src/fling.c src/malloc.c src/malloc.h thirdparty/dlmalloc.c
 	$(CC) $(CFLAGS) src/plasma_store.c src/event_loop.c src/fling.c src/malloc.c -o $(BUILD)/plasma_store
 
 $(BUILD)/plasma_manager: src/plasma_manager.c src/event_loop.h src/event_loop.c src/plasma.h src/plasma_client.c src/fling.h src/fling.c
diff --git a/src/event_loop.h b/src/event_loop.h
index b83e1df63..5dee29682 100644
--- a/src/event_loop.h
+++ b/src/event_loop.h
@@ -2,6 +2,7 @@
 #define EVENT_LOOP_H
 
 #include <poll.h>
+#include <string.h>
 
 #include "utarray.h"
 #include "plasma.h"
diff --git a/src/fling.c b/src/fling.c
index 6363fdfb0..e1417aeeb 100644
--- a/src/fling.c
+++ b/src/fling.c
@@ -1,5 +1,7 @@
 #include "fling.h"
 
+#include <string.h>
+
 void init_msg(struct msghdr *msg,
               struct iovec *iov,
               char *buf,
diff --git a/src/malloc.c b/src/malloc.c
index bbfccd462..d83423844 100644
--- a/src/malloc.c
+++ b/src/malloc.c
@@ -19,7 +19,7 @@ int fake_munmap(void *, size_t);
 #define USE_DL_PREFIX
 #define HAVE_MORECORE 0
 
-#include "third_party/dlmalloc.c"
+#include "thirdparty/dlmalloc.c"
 
 #undef MMAP
 #undef MUNMAP
diff --git a/src/plasma_client.c b/src/plasma_client.c
index 4442555c4..af9feeae9 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -116,7 +116,7 @@ int plasma_manager_connect(const char *ip_addr, int port) {
 
   struct sockaddr_in addr;
   addr.sin_family = AF_INET;
-  bcopy(manager->h_addr, &addr.sin_addr.s_addr, manager->h_length);
+  memcpy(&addr.sin_addr.s_addr, manager->h_addr, manager->h_length);
   addr.sin_port = htons(port);
 
   int r = connect(fd, (struct sockaddr *) &addr, sizeof(addr));
diff --git a/src/plasma_manager.c b/src/plasma_manager.c
index 2e37d5f69..f6c45e099 100644
--- a/src/plasma_manager.c
+++ b/src/plasma_manager.c
@@ -49,7 +49,7 @@ void initiate_transfer(plasma_manager_state* s, plasma_request* req) {
   plasma_buffer buf = {.object_id = req->object_id, .writable = 0};
   plasma_get(store_conn, req->object_id, &buf.size, &buf.data);
 
-  char ip_addr[16];
+  char ip_addr[32];
   snprintf(ip_addr, 32, "%d.%d.%d.%d", req->addr[0], req->addr[1], req->addr[2],
            req->addr[3]);
 
diff --git a/third_party/dlmalloc.c b/thirdparty/dlmalloc.c
similarity index 100%
rename from third_party/dlmalloc.c
rename to thirdparty/dlmalloc.c

From a35de3b2872deb8500e42a1ba0a2e24dff20aa87 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Sat, 10 Sep 2016 19:42:08 -0700
Subject: [PATCH 20/91] Initial commit

---
 .gitignore |  33 +++++++++
 LICENSE    | 201 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 234 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 LICENSE

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..f805e810e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,33 @@
+# Object files
+*.o
+*.ko
+*.obj
+*.elf
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Libraries
+*.lib
+*.a
+*.la
+*.lo
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+*.exe
+*.out
+*.app
+*.i*86
+*.x86_64
+*.hex
+
+# Debug files
+*.dSYM/
+*.su
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 000000000..8dada3eda
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "{}"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright {yyyy} {name of copyright owner}
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

From 96a59200d3f59bc10e38c6d7903a940415f843d9 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Tue, 13 Sep 2016 16:45:44 -0700
Subject: [PATCH 21/91] Fix some bugs (#22)

* Bug fixes.

* Remove plasma_reply_type.

* Fix formatting.

* Speed up tests a little.

* Small fixes.
---
 src/plasma.h        | 17 ++++++------
 src/plasma_client.c |  7 -----
 src/plasma_store.c  | 68 +++++++++++++++++++++------------------------
 test/test.py        | 57 ++++++++++++++++++-------------------
 4 files changed, 70 insertions(+), 79 deletions(-)

diff --git a/src/plasma.h b/src/plasma.h
index 0bcc2615f..c7efa4c07 100644
--- a/src/plasma.h
+++ b/src/plasma.h
@@ -25,6 +25,15 @@
   fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
           errno == 0 ? "None" : strerror(errno), ##__VA_ARGS__)
 
+#define PLASMA_CHECK(CONDITION, M, ...)                                \
+  do {                                                                 \
+    if (!(CONDITION)) {                                                \
+      fprintf(stderr, "[FATAL] (%s:%d " #CONDITION ") \n" M, __FILE__, \
+              __LINE__);                                               \
+      exit(-1);                                                        \
+    }                                                                  \
+  } while (0)
+
 typedef struct {
   int64_t size;
   int64_t create_time;
@@ -55,15 +64,7 @@ typedef struct {
   int port;
 } plasma_request;
 
-enum plasma_reply_type {
-  /* the file descriptor represents an object */
-  PLASMA_OBJECT,
-  /* the file descriptor represents a future */
-  PLASMA_FUTURE,
-};
-
 typedef struct {
-  int type;
   ptrdiff_t offset;
   int64_t map_size;
   int64_t object_size;
diff --git a/src/plasma_client.c b/src/plasma_client.c
index af9feeae9..8a8a11b5c 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -30,7 +30,6 @@ void plasma_create(int conn, plasma_id object_id, int64_t size, void **data) {
   plasma_send(conn, &req);
   plasma_reply reply;
   int fd = recv_fd(conn, (char *) &reply, sizeof(plasma_reply));
-  assert(reply.type == PLASMA_OBJECT);
   assert(reply.object_size == size);
   *data =
       mmap(NULL, reply.map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0) +
@@ -48,12 +47,6 @@ void plasma_get(int conn, plasma_id object_id, int64_t *size, void **data) {
   plasma_reply reply;
   /* The following loop is run at most twice. */
   int fd = recv_fd(conn, (char *) &reply, sizeof(plasma_reply));
-  if (reply.type == PLASMA_FUTURE) {
-    int new_fd = recv_fd(fd, (char *) &reply, sizeof(plasma_reply));
-    close(fd);
-    fd = new_fd;
-  }
-  assert(reply.type == PLASMA_OBJECT);
   *data =
       mmap(NULL, reply.map_size, PROT_READ, MAP_SHARED, fd, 0) + reply.offset;
   if (*data == MAP_FAILED) {
diff --git a/src/plasma_store.c b/src/plasma_store.c
index 386107a6a..3720a46b2 100644
--- a/src/plasma_store.c
+++ b/src/plasma_store.c
@@ -27,7 +27,7 @@
 #include "plasma.h"
 #include "event_loop.h"
 
-#define MAX_NUM_CLIENTS 100000
+#define MAX_NUM_CLIENTS 100
 
 void* dlmalloc(size_t);
 
@@ -81,6 +81,10 @@ object_notify_entry* objects_notify = NULL;
 void create_object(int conn, plasma_request* req) {
   LOG_INFO("creating object"); /* TODO(pcm): add object_id here */
 
+  object_table_entry* entry;
+  HASH_FIND(handle, open_objects, &req->object_id, sizeof(plasma_id), entry);
+  PLASMA_CHECK(entry == NULL, "Cannot create object twice.");
+
   void* pointer = dlmalloc(req->size);
   int fd;
   int64_t map_size;
@@ -88,7 +92,7 @@ void create_object(int conn, plasma_request* req) {
   get_malloc_mapinfo(pointer, &fd, &map_size, &offset);
   assert(fd != -1);
 
-  object_table_entry* entry = malloc(sizeof(object_table_entry));
+  entry = malloc(sizeof(object_table_entry));
   memcpy(&entry->object_id, &req->object_id, 20);
   entry->info.size = req->size;
   /* TODO(pcm): set the other fields */
@@ -98,7 +102,6 @@ void create_object(int conn, plasma_request* req) {
   HASH_ADD(handle, open_objects, object_id, sizeof(plasma_id), entry);
   plasma_reply reply;
   memset(&reply, 0, sizeof(reply));
-  reply.type = PLASMA_OBJECT;
   reply.offset = offset;
   reply.map_size = map_size;
   reply.object_size = req->size;
@@ -110,21 +113,29 @@ void get_object(int conn, plasma_request* req) {
   object_table_entry* entry;
   HASH_FIND(handle, sealed_objects, &req->object_id, sizeof(plasma_id), entry);
   if (entry) {
-    plasma_reply reply = {PLASMA_OBJECT, entry->offset, entry->map_size,
-                          entry->info.size};
+    plasma_reply reply;
+    memset(&reply, 0, sizeof(plasma_reply));
+    reply.offset = entry->offset;
+    reply.map_size = entry->map_size;
+    reply.object_size = entry->info.size;
     send_fd(conn, entry->fd, (char*) &reply, sizeof(plasma_reply));
   } else {
+    object_notify_entry* notify_entry;
     LOG_INFO("object not in hash table of sealed objects");
-    int fd[2];
-    socketpair(AF_UNIX, SOCK_STREAM, 0, fd);
-    object_notify_entry* notify_entry = malloc(sizeof(object_notify_entry));
-    memcpy(&notify_entry->object_id, &req->object_id, 20);
-    notify_entry->conn[notify_entry->num_waiting] = fd[0];
+    HASH_FIND(handle, objects_notify, &req->object_id, sizeof(plasma_id),
+              notify_entry);
+    if (!notify_entry) {
+      notify_entry = malloc(sizeof(object_notify_entry));
+      memset(notify_entry, 0, sizeof(object_notify_entry));
+      notify_entry->num_waiting = 0;
+      memcpy(&notify_entry->object_id, &req->object_id, 20);
+      HASH_ADD(handle, objects_notify, object_id, sizeof(plasma_id),
+               notify_entry);
+    }
+    PLASMA_CHECK(notify_entry->num_waiting < MAX_NUM_CLIENTS - 1,
+                 "This exceeds the maximum number of clients.");
+    notify_entry->conn[notify_entry->num_waiting] = conn;
     notify_entry->num_waiting += 1;
-    HASH_ADD(handle, objects_notify, object_id, sizeof(plasma_id),
-             notify_entry);
-    plasma_reply reply = {PLASMA_FUTURE, 0, 0, -1};
-    send_fd(conn, fd[1], (char*) &reply, sizeof(plasma_reply));
   }
 }
 
@@ -145,12 +156,12 @@ void seal_object(int conn, plasma_request* req) {
   if (!notify_entry) {
     return;
   }
-  plasma_reply reply = {PLASMA_OBJECT, entry->offset, entry->map_size,
-                        entry->info.size};
+  plasma_reply reply = {.offset = entry->offset,
+                        .map_size = entry->map_size,
+                        .object_size = entry->info.size};
   for (int i = 0; i < notify_entry->num_waiting; ++i) {
     send_fd(notify_entry->conn[i], entry->fd, (char*) &reply,
             sizeof(plasma_reply));
-    close(notify_entry->conn[i]);
   }
   HASH_DELETE(handle, objects_notify, notify_entry);
   free(notify_entry);
@@ -189,19 +200,10 @@ void run_event_loop(int socket) {
       if (waiting->revents == 0)
         continue;
       if (waiting->fd == socket) {
-        while (1) {
-          /* Handle new incoming connections. */
-          int new_socket = accept(socket, NULL, NULL);
-          if (new_socket < 0) {
-            if (errno != EWOULDBLOCK) {
-              LOG_ERR("accept failed");
-              exit(-1);
-            }
-            break;
-          }
-          event_loop_attach(state.loop, 0, NULL, new_socket, POLLIN);
-          LOG_INFO("adding new client");
-        }
+        /* Handle new incoming connections. */
+        int new_socket = accept(socket, NULL, NULL);
+        event_loop_attach(state.loop, 0, NULL, new_socket, POLLIN);
+        LOG_INFO("adding new client");
       } else {
         int r = read(waiting->fd, &req, sizeof(plasma_request));
         if (r == -1) {
@@ -230,12 +232,6 @@ void start_server(char* socket_name) {
     close(fd);
     exit(-1);
   }
-  /* TODO(pcm): http://stackoverflow.com/q/1150635 */
-  if (ioctl(fd, FIONBIO, (char*) &on) < 0) {
-    LOG_ERR("ioctl failed");
-    close(fd);
-    exit(-1);
-  }
   struct sockaddr_un addr;
   memset(&addr, 0, sizeof(addr));
   addr.sun_family = AF_UNIX;
diff --git a/test/test.py b/test/test.py
index 6b1f8c546..a8c4b670d 100644
--- a/test/test.py
+++ b/test/test.py
@@ -94,34 +94,35 @@ class TestPlasmaManager(unittest.TestCase):
     self.p5.kill()
 
   def test_transfer(self):
-    # Create an object id string.
-    object_id1 = random_object_id()
-    # Create a new buffer and write to it.
-    memory_buffer = self.client1.create(object_id1, 20000)
-    for i in range(len(memory_buffer)):
-      memory_buffer[i] = chr(i % 10)
-    # Seal the buffer.
-    self.client1.seal(object_id1)
-    # Transfer the buffer to the the other PlasmaStore.
-    self.client1.transfer("127.0.0.1", self.port2, object_id1)
-    # Compare the two buffers.
-    self.assertEqual(self.client1.get(object_id1)[:], self.client2.get(object_id1)[:])
-    # Transfer the buffer again.
-    self.client1.transfer("127.0.0.1", self.port2, object_id1)
-    # Compare the two buffers.
-    self.assertEqual(self.client1.get(object_id1)[:], self.client2.get(object_id1)[:])
-    # Create a new object id string.
-    object_id2 = random_object_id()
-    # Create a new buffer and write to it.
-    memory_buffer = self.client2.create(object_id2, 20000)
-    for i in range(len(memory_buffer)):
-      memory_buffer[i] = chr(i % 10)
-    # Seal the buffer.
-    self.client2.seal(object_id2)
-    # Transfer the buffer to the the other PlasmaStore.
-    self.client2.transfer("127.0.0.1", self.port1, object_id2)
-    # Compare the two buffers.
-    self.assertEqual(self.client1.get(object_id2)[:], self.client2.get(object_id2)[:])
+    for _ in range(100):
+      # Create an object id string.
+      object_id1 = random_object_id()
+      # Create a new buffer and set the first and last entries.
+      memory_buffer = self.client1.create(object_id1, 20000)
+      memory_buffer[0] = chr(1)
+      memory_buffer[-1] = chr(2)
+      # Seal the buffer.
+      self.client1.seal(object_id1)
+      # Transfer the buffer to the the other PlasmaStore.
+      self.client1.transfer("127.0.0.1", self.port2, object_id1)
+      # Compare the two buffers.
+      self.assertEqual(self.client1.get(object_id1)[:], self.client2.get(object_id1)[:])
+      # Transfer the buffer again.
+      self.client1.transfer("127.0.0.1", self.port2, object_id1)
+      # Compare the two buffers.
+      self.assertEqual(self.client1.get(object_id1)[:], self.client2.get(object_id1)[:])
+      # Create a new object id string.
+      object_id2 = random_object_id()
+      # Create a new buffer and set the first and last entries.
+      memory_buffer = self.client2.create(object_id2, 20000)
+      memory_buffer[0] = chr(3)
+      memory_buffer[-1] = chr(4)
+      # Seal the buffer.
+      self.client2.seal(object_id2)
+      # Transfer the buffer to the the other PlasmaStore.
+      self.client2.transfer("127.0.0.1", self.port1, object_id2)
+      # Compare the two buffers.
+      self.assertEqual(self.client1.get(object_id2)[:], self.client2.get(object_id2)[:])
 
   def test_illegal_functionality(self):
     # Create an object id string.

From 7d629d4e489e1ab10bd85b5fc94e861f4ae159cc Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Tue, 13 Sep 2016 18:54:26 -0700
Subject: [PATCH 22/91] Adding object table (#1)

* code for maintaining the object table

* Makefile fix

* Clone git submodules.

* directory -> object_table

* Fix Makefile and remove unnecessary files.

* Fix formatting.

* make code more generic
---
 .clang-format                            |    6 +
 .gitignore                               |    2 +
 .gitmodules                              |    3 +
 .travis.yml                              |   26 +
 .travis/check-git-clang-format-output.sh |   18 +
 .travis/git-clang-format                 |  476 ++++++++++
 Makefile                                 |   24 +
 build/.gitkeep                           |    0
 common.c                                 |   15 +
 common.h                                 |   29 +
 event_loop.c                             |   92 ++
 event_loop.h                             |   38 +
 state/db.h                               |   29 +
 state/object_table.h                     |   15 +
 state/redis.c                            |  188 ++++
 state/redis.h                            |   26 +
 test/db_tests.c                          |   69 ++
 thirdparty/build-redis.sh                |    4 +
 thirdparty/greatest.h                    | 1023 ++++++++++++++++++++++
 thirdparty/hiredis                       |    1 +
 thirdparty/utarray.h                     |  238 +++++
 21 files changed, 2322 insertions(+)
 create mode 100644 .clang-format
 create mode 100644 .gitmodules
 create mode 100644 .travis.yml
 create mode 100755 .travis/check-git-clang-format-output.sh
 create mode 100755 .travis/git-clang-format
 create mode 100644 Makefile
 create mode 100644 build/.gitkeep
 create mode 100644 common.c
 create mode 100644 common.h
 create mode 100644 event_loop.c
 create mode 100644 event_loop.h
 create mode 100644 state/db.h
 create mode 100644 state/object_table.h
 create mode 100644 state/redis.c
 create mode 100644 state/redis.h
 create mode 100644 test/db_tests.c
 create mode 100644 thirdparty/build-redis.sh
 create mode 100644 thirdparty/greatest.h
 create mode 160000 thirdparty/hiredis
 create mode 100644 thirdparty/utarray.h

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 000000000..90d254290
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,6 @@
+BasedOnStyle: Chromium
+DerivePointerAlignment: true
+IndentCaseLabels: false
+PointerAlignment: Right
+SpaceAfterCStyleCast: true
+
diff --git a/.gitignore b/.gitignore
index f805e810e..2a07abca4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+*~
+
 # Object files
 *.o
 *.ko
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..4026f8268
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "thirdparty/hiredis"]
+	path = thirdparty/hiredis
+	url = https://github.com/redis/hiredis
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 000000000..220df4b86
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,26 @@
+sudo: required
+
+language: generic
+
+matrix:
+  include:
+    - os: linux
+      dist: trusty
+    - os: osx
+      osx_image: xcode7
+    - os: linux
+      dist: trusty
+      env: LINT=1
+      before_install:
+        # In case we ever want to use a different version of clang-format:
+        #- wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
+        #- echo "deb http://apt.llvm.org/trusty/ llvm-toolchain-trusty main" | sudo tee -a /etc/apt/sources.list > /dev/null
+        - sudo apt-get update -qq
+        - sudo apt-get install -qq clang-format-3.8
+      install: []
+      script:
+        - .travis/check-git-clang-format-output.sh
+
+install:
+  - make
+  - make test
diff --git a/.travis/check-git-clang-format-output.sh b/.travis/check-git-clang-format-output.sh
new file mode 100755
index 000000000..d71f78357
--- /dev/null
+++ b/.travis/check-git-clang-format-output.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+if [ "$TRAVIS_PULL_REQUEST" == "false" ] ; then
+  # Not in a pull request, so compare against parent commit
+  base_commit="HEAD^"
+  echo "Running clang-format against parent commit $(git rev-parse $base_commit)"
+else
+  base_commit="$TRAVIS_BRANCH"
+  echo "Running clang-format against branch $base_commit, with hash $(git rev-parse $base_commit)"
+fi
+output="$(.travis/git-clang-format --binary clang-format-3.8 --commit $base_commit --diff --exclude ^thirdparty/)"
+if [ "$output" == "no modified files to format" ] || [ "$output" == "clang-format did not modify any files" ] ; then
+  echo "clang-format passed."
+  exit 0
+else
+  echo "clang-format failed:"
+  echo "$output"
+  exit 1
+fi
diff --git a/.travis/git-clang-format b/.travis/git-clang-format
new file mode 100755
index 000000000..37b352835
--- /dev/null
+++ b/.travis/git-clang-format
@@ -0,0 +1,476 @@
+#!/usr/bin/env python
+#
+#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+r"""                                                                             
+clang-format git integration                                                     
+============================                                                     
+                                                                                 
+This file provides a clang-format integration for git. Put it somewhere in your  
+path and ensure that it is executable. Then, "git clang-format" will invoke      
+clang-format on the changes in current files or a specific commit.               
+                                                                                 
+For further details, run:                                                        
+git clang-format -h                                                              
+                                                                                 
+Requires Python 2.7                                                              
+"""               
+
+import argparse
+import collections
+import contextlib
+import errno
+import os
+import re
+import subprocess
+import sys
+
+usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]'
+
+desc = '''
+Run clang-format on all lines that differ between the working directory
+and <commit>, which defaults to HEAD.  Changes are only applied to the working
+directory.
+The following git-config settings set the default of the corresponding option:
+  clangFormat.binary
+  clangFormat.commit
+  clangFormat.extension
+  clangFormat.style
+'''
+
+# Name of the temporary index file in which save the output of clang-format.
+# This file is created within the .git directory.
+temp_index_basename = 'clang-format-index'
+
+
+Range = collections.namedtuple('Range', 'start, count')
+
+
+def main():
+  config = load_git_config()
+
+  # In order to keep '--' yet allow options after positionals, we need to
+  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
+  # nargs=argparse.REMAINDER disallows options after positionals.)
+  argv = sys.argv[1:]
+  try:
+    idx = argv.index('--')
+  except ValueError:
+    dash_dash = []
+  else:
+    dash_dash = argv[idx:]
+    argv = argv[:idx]
+
+  default_extensions = ','.join([
+      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
+      'c', 'h',  # C
+      'm',  # ObjC
+      'mm',  # ObjC++
+      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
+      # Other languages that clang-format supports
+      'proto', 'protodevel',  # Protocol Buffers
+      'js',  # JavaScript
+      'ts',  # TypeScript
+      ])
+
+  p = argparse.ArgumentParser(
+    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
+    description=desc)
+  p.add_argument('--binary',
+                 default=config.get('clangformat.binary', 'clang-format'),
+                 help='path to clang-format'),
+  p.add_argument('--commit',
+                 default=config.get('clangformat.commit', 'HEAD'),
+                 help='default commit to use if none is specified'),
+  p.add_argument('--diff', action='store_true',
+                 help='print a diff instead of applying the changes')
+  p.add_argument('--extensions',
+                 default=config.get('clangformat.extensions',
+                                    default_extensions),
+                 help=('comma-separated list of file extensions to format, '
+                       'excluding the period and case-insensitive')),
+  p.add_argument('--exclude', help='Exclude files matching this regex.')
+  p.add_argument('-f', '--force', action='store_true',
+                 help='allow changes to unstaged files')
+  p.add_argument('-p', '--patch', action='store_true',
+                 help='select hunks interactively')
+  p.add_argument('-q', '--quiet', action='count', default=0,
+                 help='print less information')
+  p.add_argument('--style',
+                 default=config.get('clangformat.style', None),
+                 help='passed to clang-format'),
+  p.add_argument('-v', '--verbose', action='count', default=0,
+                 help='print extra information')
+  # We gather all the remaining positional arguments into 'args' since we need
+  # to use some heuristics to determine whether or not <commit> was present.
+  # However, to print pretty messages, we make use of metavar and help.
+  p.add_argument('args', nargs='*', metavar='<commit>',
+                 help='revision from which to compute the diff')
+  p.add_argument('ignored', nargs='*', metavar='<file>...',
+                 help='if specified, only consider differences in these files')
+  opts = p.parse_args(argv)
+
+  opts.verbose -= opts.quiet
+  del opts.quiet
+
+  commit, files = interpret_args(opts.args, dash_dash, opts.commit)
+  changed_lines = compute_diff_and_extract_lines(commit, files)
+  if opts.verbose >= 1:
+    ignored_files = set(changed_lines)
+  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
+  if opts.exclude:
+    for filename in changed_lines.keys():
+      if re.match(opts.exclude, filename):
+        del changed_lines[filename]
+  if opts.verbose >= 1:
+    ignored_files.difference_update(changed_lines)
+    if ignored_files:
+      print 'Ignoring changes in the following files:'
+      for filename in ignored_files:
+        print '   ', filename
+    if changed_lines:
+      print 'Running clang-format on the following files:'
+      for filename in changed_lines:
+        print '   ', filename
+  if not changed_lines:
+    print 'no modified files to format'
+    return
+  # The computed diff outputs absolute paths, so we must cd before accessing
+  # those files.
+  cd_to_toplevel()
+  old_tree = create_tree_from_workdir(changed_lines)
+  new_tree = run_clang_format_and_save_to_tree(changed_lines,
+                                               binary=opts.binary,
+                                               style=opts.style)
+  if opts.verbose >= 1:
+    print 'old tree:', old_tree
+    print 'new tree:', new_tree
+  if old_tree == new_tree:
+    if opts.verbose >= 0:
+      print 'clang-format did not modify any files'
+  elif opts.diff:
+    print_diff(old_tree, new_tree)
+  else:
+    changed_files = apply_changes(old_tree, new_tree, force=opts.force,
+                                  patch_mode=opts.patch)
+    if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
+      print 'changed files:'
+      for filename in changed_files:
+        print '   ', filename
+
+
+def load_git_config(non_string_options=None):
+  """Return the git configuration as a dictionary.
+  All options are assumed to be strings unless in `non_string_options`, in which
+  is a dictionary mapping option name (in lower case) to either "--bool" or
+  "--int"."""
+  if non_string_options is None:
+    non_string_options = {}
+  out = {}
+  for entry in run('git', 'config', '--list', '--null').split('\0'):
+    if entry:
+      name, value = entry.split('\n', 1)
+      if name in non_string_options:
+        value = run('git', 'config', non_string_options[name], name)
+      out[name] = value
+  return out
+
+
+def interpret_args(args, dash_dash, default_commit):
+  """Interpret `args` as "[commit] [--] [files...]" and return (commit, files).
+  It is assumed that "--" and everything that follows has been removed from
+  args and placed in `dash_dash`.
+  If "--" is present (i.e., `dash_dash` is non-empty), the argument to its
+  left (if present) is taken as commit.  Otherwise, the first argument is
+  checked if it is a commit or a file.  If commit is not given,
+  `default_commit` is used."""
+  if dash_dash:
+    if len(args) == 0:
+      commit = default_commit
+    elif len(args) > 1:
+      die('at most one commit allowed; %d given' % len(args))
+    else:
+      commit = args[0]
+    object_type = get_object_type(commit)
+    if object_type not in ('commit', 'tag'):
+      if object_type is None:
+        die("'%s' is not a commit" % commit)
+      else:
+        die("'%s' is a %s, but a commit was expected" % (commit, object_type))
+    files = dash_dash[1:]
+  elif args:
+    if disambiguate_revision(args[0]):
+      commit = args[0]
+      files = args[1:]
+    else:
+      commit = default_commit
+      files = args
+  else:
+    commit = default_commit
+    files = []
+  return commit, files
+
+
+def disambiguate_revision(value):
+  """Returns True if `value` is a revision, False if it is a file, or dies."""
+  # If `value` is ambiguous (neither a commit nor a file), the following
+  # command will die with an appropriate error message.
+  run('git', 'rev-parse', value, verbose=False)
+  object_type = get_object_type(value)
+  if object_type is None:
+    return False
+  if object_type in ('commit', 'tag'):
+    return True
+  die('`%s` is a %s, but a commit or filename was expected' %
+      (value, object_type))
+
+
+def get_object_type(value):
+  """Returns a string description of an object's type, or None if it is not
+  a valid git object."""
+  cmd = ['git', 'cat-file', '-t', value]
+  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+  stdout, stderr = p.communicate()
+  if p.returncode != 0:
+    return None
+  return stdout.strip()
+
+
+def compute_diff_and_extract_lines(commit, files):
+  """Calls compute_diff() followed by extract_lines()."""
+  diff_process = compute_diff(commit, files)
+  changed_lines = extract_lines(diff_process.stdout)
+  diff_process.stdout.close()
+  diff_process.wait()
+  if diff_process.returncode != 0:
+    # Assume error was already printed to stderr.
+    sys.exit(2)
+  return changed_lines
+
+
+def compute_diff(commit, files):
+  """Return a subprocess object producing the diff from `commit`.
+  The return value's `stdin` file object will produce a patch with the
+  differences between the working directory and `commit`, filtered on `files`
+  (if non-empty).  Zero context lines are used in the patch."""
+  cmd = ['git', 'diff-index', '-p', '-U0', commit, '--']
+  cmd.extend(files)
+  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+  p.stdin.close()
+  return p
+
+
+def extract_lines(patch_file):
+  """Extract the changed lines in `patch_file`.
+  The return value is a dictionary mapping filename to a list of (start_line,
+  line_count) pairs.
+  The input must have been produced with ``-U0``, meaning unidiff format with
+  zero lines of context.  The return value is a dict mapping filename to a
+  list of line `Range`s."""
+  matches = {}
+  for line in patch_file:
+    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
+    if match:
+      filename = match.group(1).rstrip('\r\n')
+    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
+    if match:
+      start_line = int(match.group(1))
+      line_count = 1
+      if match.group(3):
+        line_count = int(match.group(3))
+      if line_count > 0:
+        matches.setdefault(filename, []).append(Range(start_line, line_count))
+  return matches
+
+
+def filter_by_extension(dictionary, allowed_extensions):
+  """Delete every key in `dictionary` that doesn't have an allowed extension.
+  `allowed_extensions` must be a collection of lowercase file extensions,
+  excluding the period."""
+  allowed_extensions = frozenset(allowed_extensions)
+  for filename in dictionary.keys():
+    base_ext = filename.rsplit('.', 1)
+    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
+      del dictionary[filename]
+
+
+def cd_to_toplevel():
+  """Change to the top level of the git repository."""
+  toplevel = run('git', 'rev-parse', '--show-toplevel')
+  os.chdir(toplevel)
+
+
+def create_tree_from_workdir(filenames):
+  """Create a new git tree with the given files from the working directory.
+  Returns the object ID (SHA-1) of the created tree."""
+  return create_tree(filenames, '--stdin')
+
+
+def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format',
+                                      style=None):
+  """Run clang-format on each file and save the result to a git tree.
+  Returns the object ID (SHA-1) of the created tree."""
+  def index_info_generator():
+    for filename, line_ranges in changed_lines.iteritems():
+      mode = oct(os.stat(filename).st_mode)
+      blob_id = clang_format_to_blob(filename, line_ranges, binary=binary,
+                                     style=style)
+      yield '%s %s\t%s' % (mode, blob_id, filename)
+  return create_tree(index_info_generator(), '--index-info')
+
+
+def create_tree(input_lines, mode):
+  """Create a tree object from the given input.
+  If mode is '--stdin', it must be a list of filenames.  If mode is
+  '--index-info' is must be a list of values suitable for "git update-index
+  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
+  is invalid."""
+  assert mode in ('--stdin', '--index-info')
+  cmd = ['git', 'update-index', '--add', '-z', mode]
+  with temporary_index_file():
+    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
+    for line in input_lines:
+      p.stdin.write('%s\0' % line)
+    p.stdin.close()
+    if p.wait() != 0:
+      die('`%s` failed' % ' '.join(cmd))
+    tree_id = run('git', 'write-tree')
+    return tree_id
+
+
+def clang_format_to_blob(filename, line_ranges, binary='clang-format',
+                         style=None):
+  """Run clang-format on the given file and save the result to a git blob.
+  Returns the object ID (SHA-1) of the created blob."""
+  clang_format_cmd = [binary, filename]
+  if style:
+    clang_format_cmd.extend(['-style='+style])
+  clang_format_cmd.extend([
+      '-lines=%s:%s' % (start_line, start_line+line_count-1)
+      for start_line, line_count in line_ranges])
+  try:
+    clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE,
+                                    stdout=subprocess.PIPE)
+  except OSError as e:
+    if e.errno == errno.ENOENT:
+      die('cannot find executable "%s"' % binary)
+    else:
+      raise
+  clang_format.stdin.close()
+  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
+  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
+                                 stdout=subprocess.PIPE)
+  clang_format.stdout.close()
+  stdout = hash_object.communicate()[0]
+  if hash_object.returncode != 0:
+    die('`%s` failed' % ' '.join(hash_object_cmd))
+  if clang_format.wait() != 0:
+    die('`%s` failed' % ' '.join(clang_format_cmd))
+  return stdout.rstrip('\r\n')
+
+
+@contextlib.contextmanager
+def temporary_index_file(tree=None):
+  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
+  the file afterward."""
+  index_path = create_temporary_index(tree)
+  old_index_path = os.environ.get('GIT_INDEX_FILE')
+  os.environ['GIT_INDEX_FILE'] = index_path
+  try:
+    yield
+  finally:
+    if old_index_path is None:
+      del os.environ['GIT_INDEX_FILE']
+    else:
+      os.environ['GIT_INDEX_FILE'] = old_index_path
+    os.remove(index_path)
+
+
+def create_temporary_index(tree=None):
+  """Create a temporary index file and return the created file's path.
+  If `tree` is not None, use that as the tree to read in.  Otherwise, an
+  empty index is created."""
+  gitdir = run('git', 'rev-parse', '--git-dir')
+  path = os.path.join(gitdir, temp_index_basename)
+  if tree is None:
+    tree = '--empty'
+  run('git', 'read-tree', '--index-output='+path, tree)
+  return path
+
+
+def print_diff(old_tree, new_tree):
+  """Print the diff between the two trees to stdout."""
+  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
+  # is expected to be viewed by the user, and only the former does nice things
+  # like color and pagination.
+  subprocess.check_call(['git', 'diff', old_tree, new_tree, '--'])
+
+
+def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
+  """Apply the changes in `new_tree` to the working directory.
+  Bails if there are local changes in those files and not `force`.  If
+  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
+  changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree,
+                      new_tree).rstrip('\0').split('\0')
+  if not force:
+    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
+    if unstaged_files:
+      print >>sys.stderr, ('The following files would be modified but '
+                           'have unstaged changes:')
+      print >>sys.stderr, unstaged_files
+      print >>sys.stderr, 'Please commit, stage, or stash them first.'
+      sys.exit(2)
+  if patch_mode:
+    # In patch mode, we could just as well create an index from the new tree
+    # and checkout from that, but then the user will be presented with a
+    # message saying "Discard ... from worktree".  Instead, we use the old
+    # tree as the index and checkout from new_tree, which gives the slightly
+    # better message, "Apply ... to index and worktree".  This is not quite
+    # right, since it won't be applied to the user's index, but oh well.
+    with temporary_index_file(old_tree):
+      subprocess.check_call(['git', 'checkout', '--patch', new_tree])
+    index_tree = old_tree
+  else:
+    with temporary_index_file(new_tree):
+      run('git', 'checkout-index', '-a', '-f')
+  return changed_files
+
+
+def run(*args, **kwargs):
+  stdin = kwargs.pop('stdin', '')
+  verbose = kwargs.pop('verbose', True)
+  strip = kwargs.pop('strip', True)
+  for name in kwargs:
+    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
+  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                       stdin=subprocess.PIPE)
+  stdout, stderr = p.communicate(input=stdin)
+  if p.returncode == 0:
+    if stderr:
+      if verbose:
+        print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
+      print >>sys.stderr, stderr.rstrip()
+    if strip:
+      stdout = stdout.rstrip('\r\n')
+    return stdout
+  if verbose:
+    print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode)
+  if stderr:
+    print >>sys.stderr, stderr.rstrip()
+  sys.exit(2)
+
+
+def die(message):
+  print >>sys.stderr, 'error:', message
+  sys.exit(2)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..fd9da0b97
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,24 @@
+CC = gcc
+CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L
+BUILD = build
+
+CFLAGS += -Wmissing-prototypes
+CFLAGS += -Wstrict-prototypes
+CFLAGS += -Wmissing-declarations
+
+$(BUILD)/db_tests: hiredis test/db_tests.c thirdparty/greatest.h event_loop.c state/redis.c common.c
+	$(CC) -o $@ test/db_tests.c event_loop.c state/redis.c common.c thirdparty/hiredis/libhiredis.a $(CFLAGS) -I. -Ithirdparty
+
+clean:
+	rm -r $(BUILD)/*
+
+redis:
+	cd thirdparty ; bash ./build-redis.sh
+
+hiredis:
+	git submodule update --init --recursive -- "thirdparty/hiredis" ; cd thirdparty/hiredis ; make
+
+test: hiredis redis $(BUILD)/db_tests FORCE
+	./thirdparty/redis-3.2.3/src/redis-server & sleep 1s ; ./build/db_tests
+
+FORCE:
diff --git a/build/.gitkeep b/build/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/common.c b/common.c
new file mode 100644
index 000000000..e227eb16e
--- /dev/null
+++ b/common.c
@@ -0,0 +1,15 @@
+#include "common.h"
+
+char *sha1_to_hex(const unsigned char *sha1, char *buffer) {
+  static const char hex[] = "0123456789abcdef";
+  char *buf = buffer;
+
+  for (int i = 0; i < UNIQUE_ID_SIZE; i++) {
+    unsigned int val = *sha1++;
+    *buf++ = hex[val >> 4];
+    *buf++ = hex[val & 0xf];
+  }
+  *buf = '\0';
+
+  return buffer;
+}
diff --git a/common.h b/common.h
new file mode 100644
index 000000000..61abc7b50
--- /dev/null
+++ b/common.h
@@ -0,0 +1,29 @@
+#ifndef COMMON_H
+#define COMMON_H
+
+#include <errno.h>
+
+#ifdef NDEBUG
+#define LOG_DEBUG(M, ...)
+#else
+#define LOG_DEBUG(M, ...) \
+  fprintf(stderr, "[DEBUG] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
+#endif
+
+#define LOG_ERR(M, ...)                                                     \
+  fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
+          errno == 0 ? "None" : strerror(errno), ##__VA_ARGS__)
+
+#define LOG_INFO(M, ...) \
+  fprintf(stderr, "[INFO] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
+
+#define UNIQUE_ID_SIZE 20
+
+typedef struct { unsigned char id[UNIQUE_ID_SIZE]; } unique_id;
+
+/* Convert a 20 byte sha1 hash to a hexdecimal string. This function assumes
+ * that buffer points to an already allocated char array of size 2 *
+ * UNIQUE_ID_SIZE + 1 */
+char *sha1_to_hex(const unsigned char *sha1, char *buffer);
+
+#endif
diff --git a/event_loop.c b/event_loop.c
new file mode 100644
index 000000000..d7169f5d8
--- /dev/null
+++ b/event_loop.c
@@ -0,0 +1,92 @@
+#include "event_loop.h"
+
+#include <assert.h>
+#include <unistd.h>
+
+UT_icd item_icd = {sizeof(event_loop_item), NULL, NULL, NULL};
+UT_icd poll_icd = {sizeof(struct pollfd), NULL, NULL, NULL};
+
+/* Initializes the event loop.
+ * This function needs to be called before any other event loop function. */
+void event_loop_init(event_loop *loop) {
+  utarray_new(loop->items, &item_icd);
+  utarray_new(loop->waiting, &poll_icd);
+}
+
+/* Add a new file descriptor fd to the event loop.
+ * This function sets a user defined type and id for the file descriptor
+ * which can be queried using event_loop_type and event_loop_id. The parameter
+ * events is the same as in http://linux.die.net/man/2/poll.
+ * Returns the index of the item in the event loop. */
+int64_t event_loop_attach(event_loop *loop,
+                          int type,
+                          void *data,
+                          int fd,
+                          int events) {
+  assert(utarray_len(loop->items) == utarray_len(loop->waiting));
+  int64_t index = utarray_len(loop->items);
+  event_loop_item item = {.type = type, .data = data};
+  utarray_push_back(loop->items, &item);
+  struct pollfd waiting = {.fd = fd, .events = events};
+  utarray_push_back(loop->waiting, &waiting);
+  return index;
+}
+
+/* Detach a file descriptor from the event loop.
+ * This invalidates all other indices into the event loop items, but leaves
+ * the ids of the event loop items valid. */
+void event_loop_detach(event_loop *loop, int64_t index, int shall_close) {
+  struct pollfd *waiting_item =
+      (struct pollfd *) utarray_eltptr(loop->waiting, index);
+  struct pollfd *waiting_back = (struct pollfd *) utarray_back(loop->waiting);
+  if (shall_close) {
+    close(waiting_item->fd);
+  }
+  *waiting_item = *waiting_back;
+  utarray_pop_back(loop->waiting);
+
+  event_loop_item *items_item =
+      (event_loop_item *) utarray_eltptr(loop->items, index);
+  event_loop_item *items_back = (event_loop_item *) utarray_back(loop->items);
+  *items_item = *items_back;
+  utarray_pop_back(loop->items);
+}
+
+/* Poll the file descriptors associated to this event loop.
+ * See http://linux.die.net/man/2/poll */
+int event_loop_poll(event_loop *loop) {
+  return poll((struct pollfd *) utarray_front(loop->waiting),
+              utarray_len(loop->waiting), -1);
+}
+
+/* Get the total number of file descriptors participating in the event loop. */
+int64_t event_loop_size(event_loop *loop) {
+  return utarray_len(loop->waiting);
+}
+
+/* Get the pollfd structure associated to a file descriptor participating in the
+ * event loop. */
+struct pollfd *event_loop_get(event_loop *loop, int64_t index) {
+  return (struct pollfd *) utarray_eltptr(loop->waiting, index);
+}
+
+/* Set the data connection information for participant in the event loop. */
+void event_loop_set_data(event_loop *loop, int64_t index, void *data) {
+  event_loop_item *item =
+      (event_loop_item *) utarray_eltptr(loop->items, index);
+  item->data = data;
+}
+
+/* Get the data connection information for participant in the event loop. */
+void *event_loop_get_data(event_loop *loop, int64_t index) {
+  event_loop_item *item =
+      (event_loop_item *) utarray_eltptr(loop->items, index);
+  return item->data;
+}
+
+/* Free the space associated to the event loop.
+ * Does not free the event_loop datastructure itself. */
+void event_loop_free(event_loop *loop) {
+  utarray_free(loop->items);
+  utarray_free(loop->waiting);
+}
diff --git a/event_loop.h b/event_loop.h
new file mode 100644
index 000000000..0903bb9d4
--- /dev/null
+++ b/event_loop.h
@@ -0,0 +1,38 @@
+#ifndef EVENT_LOOP_H
+#define EVENT_LOOP_H
+
+#include <poll.h>
+#include <stdint.h>
+
+#include "utarray.h"
+
+typedef struct {
+  /* The type of connection (e.g. redis, client, manager, data transfer). */
+  int type;
+  /* Data associated with the connection (managed by the user) */
+  void *data;
+} event_loop_item;
+
+typedef struct {
+  /* Array of event_loop_items that hold information for connections. */
+  UT_array *items;
+  /* Array of file descriptors that are waiting, corresponding to items. */
+  UT_array *waiting;
+} event_loop;
+
+/* Event loop functions. */
+void event_loop_init(event_loop *loop);
+void event_loop_free(event_loop *loop);
+int64_t event_loop_attach(event_loop *loop,
+                          int type,
+                          void *data,
+                          int fd,
+                          int events);
+void event_loop_detach(event_loop *loop, int64_t index, int shall_close);
+int event_loop_poll(event_loop *loop);
+int64_t event_loop_size(event_loop *loop);
+struct pollfd *event_loop_get(event_loop *loop, int64_t index);
+void event_loop_set_data(event_loop *loop, int64_t index, void *data);
+void *event_loop_get_data(event_loop *loop, int64_t index);
+
+#endif
diff --git a/state/db.h b/state/db.h
new file mode 100644
index 000000000..b586f9acf
--- /dev/null
+++ b/state/db.h
@@ -0,0 +1,29 @@
+#ifndef DB_H
+#define DB_H
+
+#include "event_loop.h"
+
+typedef struct db_conn_impl db_conn;
+
+/* Connect to the global system store at address and port. The last
+ * parameter is an output parameter and we assume the memory is
+ * allocated by the caller. */
+void db_connect(const char *db_address,
+                int db_port,
+                const char *client_type,
+                const char *client_addr,
+                int client_port,
+                db_conn *db);
+
+/* Attach global system store onnection to event loop. Returns the index of the
+ * connection in the loop. */
+int64_t db_attach(db_conn *db, event_loop *loop, int connection_type);
+
+/* This function will be called by the user if there is a new event in the
+ * event loop associated with the global system store connection. */
+void db_event(db_conn *db);
+
+/* Disconnect from the global system store. */
+void db_disconnect(db_conn *db);
+
+#endif
diff --git a/state/object_table.h b/state/object_table.h
new file mode 100644
index 000000000..6b4d62e4b
--- /dev/null
+++ b/state/object_table.h
@@ -0,0 +1,15 @@
+#include "common.h"
+#include "db.h"
+
+typedef void (*lookup_callback)(void *);
+
+/* Register a new object with the directory. */
+void object_table_add(db_conn *db, unique_id object_id);
+
+/* Remove object from the directory */
+void object_table_remove(db_conn *db, unique_id object_id);
+
+/* Look up entry from the directory */
+void object_table_lookup(db_conn *db,
+                         unique_id object_id,
+                         lookup_callback callback);
diff --git a/state/redis.c b/state/redis.c
new file mode 100644
index 000000000..a8029a063
--- /dev/null
+++ b/state/redis.c
@@ -0,0 +1,188 @@
+/* Redis implementation of the global state store */
+
+#include <assert.h>
+
+#include "common.h"
+#include "db.h"
+#include "object_table.h"
+#include "event_loop.h"
+#include "redis.h"
+
+static void poll_add_read(void *privdata) {
+  db_conn *conn = (db_conn *) privdata;
+  if (!conn->reading) {
+    conn->reading = 1;
+    event_loop_get(conn->loop, 0)->events |= POLLIN;
+  }
+}
+
+static void poll_del_read(void *privdata) {
+  db_conn *conn = (db_conn *) privdata;
+  if (conn->reading) {
+    conn->reading = 0;
+    event_loop_get(conn->loop, 0)->events &= ~POLLIN;
+  }
+}
+
+static void poll_add_write(void *privdata) {
+  db_conn *conn = (db_conn *) privdata;
+  if (!conn->writing) {
+    conn->writing = 1;
+    event_loop_get(conn->loop, 0)->events |= POLLOUT;
+  }
+}
+
+static void poll_del_write(void *privdata) {
+  db_conn *conn = (db_conn *) privdata;
+  if (conn->writing) {
+    conn->writing = 0;
+    event_loop_get(conn->loop, 0)->events &= ~POLLOUT;
+  }
+}
+
+#define LOG_REDIS_ERR(context, M, ...)                                        \
+  fprintf(stderr, "[ERROR] (%s:%d: message: %s) " M "\n", __FILE__, __LINE__, \
+          context->errstr, ##__VA_ARGS__)
+
+#define CHECK_REDIS_CONNECT(CONTEXT_TYPE, context, M, ...) \
+  do {                                                     \
+    CONTEXT_TYPE *_context = (context);                    \
+    if (!_context) {                                       \
+      LOG_ERR("could not allocate redis context");         \
+      exit(-1);                                            \
+    }                                                      \
+    if (_context->err) {                                   \
+      LOG_REDIS_ERR(_context, M, ##__VA_ARGS__);           \
+      exit(-1);                                            \
+    }                                                      \
+  } while (0);
+
+void db_connect(const char *address,
+                int port,
+                const char *client_type,
+                const char *client_addr,
+                int client_port,
+                db_conn *db) {
+  /* Sync connection for initial handshake */
+  redisReply *reply;
+  long long num_clients;
+  redisContext *context = redisConnect(address, port);
+  CHECK_REDIS_CONNECT(redisContext, context, "could not connect to redis %s:%d",
+                      address, port);
+  /* Add new client using optimistic locking. */
+  while (1) {
+    reply = redisCommand(context, "WATCH %s", client_type);
+    freeReplyObject(reply);
+    reply = redisCommand(context, "HLEN %s", client_type);
+    num_clients = reply->integer;
+    freeReplyObject(reply);
+    reply = redisCommand(context, "MULTI");
+    freeReplyObject(reply);
+    reply = redisCommand(context, "HSET %s %lld %s:%d", client_type,
+                         num_clients, client_addr, client_port);
+    freeReplyObject(reply);
+    reply = redisCommand(context, "EXEC");
+    if (reply) {
+      freeReplyObject(reply);
+      break;
+    }
+    freeReplyObject(reply);
+  }
+  redisFree(context);
+
+  db->client_type = strdup(client_type);
+  db->client_id = num_clients;
+  db->reading = 0;
+  db->writing = 0;
+
+  /* Establish async connection */
+  db->context = redisAsyncConnect(address, port);
+  CHECK_REDIS_CONNECT(redisAsyncContext, db->context,
+                      "could not connect to redis %s:%d", address, port);
+  db->context->data = (void *) db;
+}
+
+void db_event(db_conn *db) {
+  if (db->reading) {
+    redisAsyncHandleRead(db->context);
+  }
+  if (db->writing) {
+    redisAsyncHandleWrite(db->context);
+  }
+}
+
+int64_t db_attach(db_conn *db, event_loop *loop, int connection_type) {
+  db->loop = loop;
+
+  redisAsyncContext *ac = db->context;
+  redisContext *c = &(ac->c);
+
+  if (ac->ev.data != NULL) {
+    return REDIS_ERR;
+  }
+
+  ac->ev.addRead = poll_add_read;
+  ac->ev.delRead = poll_del_read;
+  ac->ev.addWrite = poll_add_write;
+  ac->ev.delWrite = poll_del_write;
+  // TODO(pcm): Implement cleanup function
+
+  ac->ev.data = db;
+
+  return event_loop_attach(loop, connection_type, NULL, c->fd,
+                           POLLIN | POLLOUT);
+}
+
+void object_table_add(db_conn *db, unique_id object_id) {
+  static char hex_object_id[2 * UNIQUE_ID_SIZE + 1];
+  sha1_to_hex(&object_id.id[0], &hex_object_id[0]);
+  redisAsyncCommand(db->context, NULL, NULL, "SADD obj:%s %d",
+                    &hex_object_id[0], 0);
+  if (db->context->err) {
+    LOG_REDIS_ERR(db->context, "could not add object_table entry");
+  }
+}
+
+void object_table_lookup_callback(redisAsyncContext *c,
+                                  void *r,
+                                  void *privdata) {
+  redisReply *reply = r;
+  if (reply == NULL)
+    return;
+  lookup_callback callback = privdata;
+  char *str = malloc(reply->len);
+  memcpy(str, reply->str, reply->len);
+  callback(str);
+}
+
+void object_table_fetch_addr_port(redisAsyncContext *c,
+                                  void *r,
+                                  void *privdata) {
+  redisReply *reply = r;
+  if (reply == NULL)
+    return;
+  long long manager_id = -1;
+  if (reply->type == REDIS_REPLY_STRING) {
+    manager_id = strtoll(reply->str, NULL, 10);
+  } else if (reply->type != REDIS_REPLY_INTEGER) {
+    manager_id = reply->integer;
+  } else {
+    LOG_ERR("expected integer or string, received type %d", reply->type);
+    exit(-1);
+  }
+  db_conn *db = c->data;
+  redisAsyncCommand(db->context, object_table_lookup_callback, privdata,
+                    "HGET %s %lld", db->client_type, manager_id);
+}
+
+void object_table_lookup(db_conn *db,
+                         unique_id object_id,
+                         lookup_callback callback) {
+  static char hex_object_id[2 * UNIQUE_ID_SIZE + 1];
+  sha1_to_hex(&object_id.id[0], &hex_object_id[0]);
+  redisAsyncCommand(db->context, object_table_fetch_addr_port, callback,
+                    "SRANDMEMBER obj:%s", &hex_object_id[0]);
+  if (db->context->err) {
+    LOG_REDIS_ERR(db->context, "error in object_table lookup");
+  }
+}
diff --git a/state/redis.h b/state/redis.h
new file mode 100644
index 000000000..471044b06
--- /dev/null
+++ b/state/redis.h
@@ -0,0 +1,26 @@
+#include "db.h"
+#include "object_table.h"
+
+#include "hiredis/hiredis.h"
+#include "hiredis/async.h"
+
+struct db_conn_impl {
+  /* String that identifies this client type. */
+  char *client_type;
+  /* Unique ID for this client within the type. */
+  int64_t client_id;
+  /* Redis context for this global state store connection. */
+  redisAsyncContext *context;
+  /* Which events are we processing (read, write)? */
+  int reading, writing;
+  /* The event loop this global state store connection is part of. */
+  event_loop *loop;
+};
+
+void object_table_fetch_addr_port(redisAsyncContext *c,
+                                  void *r,
+                                  void *privdata);
+
+void object_table_lookup_callback(redisAsyncContext *c,
+                                  void *r,
+                                  void *privdata);
diff --git a/test/db_tests.c b/test/db_tests.c
new file mode 100644
index 000000000..b3a0d582e
--- /dev/null
+++ b/test/db_tests.c
@@ -0,0 +1,69 @@
+#include "greatest.h"
+
+#include <assert.h>
+
+#include "event_loop.h"
+#include "state/db.h"
+#include "state/object_table.h"
+#include "state/redis.h"
+
+SUITE(db_tests);
+
+int lookup_successful = 0;
+const char *manager_addr = "127.0.0.1";
+int manager_port = 12345;
+char received_addr[16] = {0};
+char received_port[6] = {0};
+
+void test_callback(void *userdata);
+
+void test_callback(void *userdata) {
+  char *reply = userdata;
+  lookup_successful = 1;
+  if (!reply ||
+      sscanf(reply, "%15[0-9.]:%5[0-9]", received_addr, received_port) != 2) {
+    assert(0);
+  }
+  free(reply);
+}
+
+TEST object_table_lookup_test(void) {
+  event_loop loop;
+  event_loop_init(&loop);
+  db_conn conn;
+  db_connect("127.0.0.1", 6379, "plasma_manager", manager_addr, manager_port,
+             &conn);
+  int64_t index = db_attach(&conn, &loop, 0);
+  unique_id id = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};
+  object_table_add(&conn, id);
+  object_table_lookup(&conn, id, test_callback);
+  while (!lookup_successful) {
+    int num_ready = event_loop_poll(&loop);
+    if (num_ready < 0) {
+      exit(-1);
+    }
+    for (int i = 0; i < event_loop_size(&loop); ++i) {
+      struct pollfd *waiting = event_loop_get(&loop, i);
+      if (waiting->revents == 0)
+        continue;
+      if (i == index) {
+        db_event(&conn);
+      }
+    }
+  }
+  ASSERT_STR_EQ(&received_addr[0], manager_addr);
+  ASSERT_EQ(atoi(received_port), manager_port);
+  PASS();
+}
+
+SUITE(db_tests) {
+  RUN_TEST(object_table_lookup_test);
+}
+
+GREATEST_MAIN_DEFS();
+
+int main(int argc, char **argv) {
+  GREATEST_MAIN_BEGIN();
+  RUN_SUITE(db_tests);
+  GREATEST_MAIN_END();
+}
diff --git a/thirdparty/build-redis.sh b/thirdparty/build-redis.sh
new file mode 100644
index 000000000..57c68c97b
--- /dev/null
+++ b/thirdparty/build-redis.sh
@@ -0,0 +1,4 @@
+wget http://download.redis.io/releases/redis-3.2.3.tar.gz
+tar xvfz redis-3.2.3.tar.gz
+cd redis-3.2.3
+make
diff --git a/thirdparty/greatest.h b/thirdparty/greatest.h
new file mode 100644
index 000000000..eb34ff426
--- /dev/null
+++ b/thirdparty/greatest.h
@@ -0,0 +1,1023 @@
+/*
+ * Copyright (c) 2011-2016 Scott Vokes <vokes.s@gmail.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef GREATEST_H
+#define GREATEST_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* 1.2.1 */
+#define GREATEST_VERSION_MAJOR 1
+#define GREATEST_VERSION_MINOR 2
+#define GREATEST_VERSION_PATCH 1
+
+/* A unit testing system for C, contained in 1 file.
+ * It doesn't use dynamic allocation or depend on anything
+ * beyond ANSI C89.
+ *
+ * An up-to-date version can be found at:
+ *     https://github.com/silentbicycle/greatest/
+ */
+
+
+/*********************************************************************
+ * Minimal test runner template
+ *********************************************************************/
+#if 0
+#include "greatest.h"
+TEST foo_should_foo(void) {
+    PASS();
+}
+static void setup_cb(void *data) {
+    printf("setup callback for each test case\n");
+}
+static void teardown_cb(void *data) {
+    printf("teardown callback for each test case\n");
+}
+SUITE(suite) {
+    /* Optional setup/teardown callbacks which will be run before/after
+     * every test case. If using a test suite, they will be cleared when
+     * the suite finishes. */
+    SET_SETUP(setup_cb, voidp_to_callback_data);
+    SET_TEARDOWN(teardown_cb, voidp_to_callback_data);
+    RUN_TEST(foo_should_foo);
+}
+/* Add definitions that need to be in the test runner's main file. */
+GREATEST_MAIN_DEFS();
+/* Set up, run suite(s) of tests, report pass/fail/skip stats. */
+int run_tests(void) {
+    GREATEST_INIT();            /* init. greatest internals */
+    /* List of suites to run (if any). */
+    RUN_SUITE(suite);
+    /* Tests can also be run directly, without using test suites. */
+    RUN_TEST(foo_should_foo);
+    GREATEST_PRINT_REPORT();          /* display results */
+    return greatest_all_passed();
+}
+/* main(), for a standalone command-line test runner.
+ * This replaces run_tests above, and adds command line option
+ * handling and exiting with a pass/fail status. */
+int main(int argc, char **argv) {
+    GREATEST_MAIN_BEGIN();      /* init & parse command-line args */
+    RUN_SUITE(suite);
+    GREATEST_MAIN_END();        /* display results */
+}
+#endif
+/*********************************************************************/
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+/***********
+ * Options *
+ ***********/
+
+/* Default column width for non-verbose output. */
+#ifndef GREATEST_DEFAULT_WIDTH
+#define GREATEST_DEFAULT_WIDTH 72
+#endif
+
+/* FILE *, for test logging. */
+#ifndef GREATEST_STDOUT
+#define GREATEST_STDOUT stdout
+#endif
+
+/* Remove GREATEST_ prefix from most commonly used symbols? */
+#ifndef GREATEST_USE_ABBREVS
+#define GREATEST_USE_ABBREVS 1
+#endif
+
+/* Set to 0 to disable all use of setjmp/longjmp. */
+#ifndef GREATEST_USE_LONGJMP
+#define GREATEST_USE_LONGJMP 1
+#endif
+
+#if GREATEST_USE_LONGJMP
+#include <setjmp.h>
+#endif
+
+/* Set to 0 to disable all use of time.h / clock(). */
+#ifndef GREATEST_USE_TIME
+#define GREATEST_USE_TIME 1
+#endif
+
+#if GREATEST_USE_TIME
+#include <time.h>
+#endif
+
+/* Floating point type, for ASSERT_IN_RANGE. */
+#ifndef GREATEST_FLOAT
+#define GREATEST_FLOAT double
+#define GREATEST_FLOAT_FMT "%g"
+#endif
+
+/*********
+ * Types *
+ *********/
+
+/* Info for the current running suite. */
+typedef struct greatest_suite_info {
+    unsigned int tests_run;
+    unsigned int passed;
+    unsigned int failed;
+    unsigned int skipped;
+
+#if GREATEST_USE_TIME
+    /* timers, pre/post running suite and individual tests */
+    clock_t pre_suite;
+    clock_t post_suite;
+    clock_t pre_test;
+    clock_t post_test;
+#endif
+} greatest_suite_info;
+
+/* Type for a suite function. */
+typedef void (greatest_suite_cb)(void);
+
+/* Types for setup/teardown callbacks. If non-NULL, these will be run
+ * and passed the pointer to their additional data. */
+typedef void (greatest_setup_cb)(void *udata);
+typedef void (greatest_teardown_cb)(void *udata);
+
+/* Type for an equality comparison between two pointers of the same type.
+ * Should return non-0 if equal, otherwise 0.
+ * UDATA is a closure value, passed through from ASSERT_EQUAL_T[m]. */
+typedef int greatest_equal_cb(const void *exp, const void *got, void *udata);
+
+/* Type for a callback that prints a value pointed to by T.
+ * Return value has the same meaning as printf's.
+ * UDATA is a closure value, passed through from ASSERT_EQUAL_T[m]. */
+typedef int greatest_printf_cb(const void *t, void *udata);
+
+/* Callbacks for an arbitrary type; needed for type-specific
+ * comparisons via GREATEST_ASSERT_EQUAL_T[m].*/
+typedef struct greatest_type_info {
+    greatest_equal_cb *equal;
+    greatest_printf_cb *print;
+} greatest_type_info;
+
+typedef struct greatest_memory_cmp_env {
+    const unsigned char *exp;
+    const unsigned char *got;
+    size_t size;
+} greatest_memory_cmp_env;
+
+/* Callbacks for string and raw memory types. */
+extern greatest_type_info greatest_type_info_string;
+extern greatest_type_info greatest_type_info_memory;
+
+typedef enum {
+    GREATEST_FLAG_FIRST_FAIL = 0x01,
+    GREATEST_FLAG_LIST_ONLY = 0x02
+} greatest_flag_t;
+
+/* Struct containing all test runner state. */
+typedef struct greatest_run_info {
+    unsigned char flags;
+    unsigned char verbosity;
+    unsigned int tests_run;     /* total test count */
+
+    /* overall pass/fail/skip counts */
+    unsigned int passed;
+    unsigned int failed;
+    unsigned int skipped;
+    unsigned int assertions;
+
+    /* currently running test suite */
+    greatest_suite_info suite;
+
+    /* info to print about the most recent failure */
+    const char *fail_file;
+    unsigned int fail_line;
+    const char *msg;
+
+    /* current setup/teardown hooks and userdata */
+    greatest_setup_cb *setup;
+    void *setup_udata;
+    greatest_teardown_cb *teardown;
+    void *teardown_udata;
+
+    /* formatting info for ".....s...F"-style output */
+    unsigned int col;
+    unsigned int width;
+
+    /* only run a specific suite or test */
+    const char *suite_filter;
+    const char *test_filter;
+
+#if GREATEST_USE_TIME
+    /* overall timers */
+    clock_t begin;
+    clock_t end;
+#endif
+
+#if GREATEST_USE_LONGJMP
+    jmp_buf jump_dest;
+#endif
+} greatest_run_info;
+
+struct greatest_report_t {
+    /* overall pass/fail/skip counts */
+    unsigned int passed;
+    unsigned int failed;
+    unsigned int skipped;
+    unsigned int assertions;
+};
+
+/* Global var for the current testing context.
+ * Initialized by GREATEST_MAIN_DEFS(). */
+extern greatest_run_info greatest_info;
+
+/* Type for ASSERT_ENUM_EQ's ENUM_STR argument. */
+typedef const char *greatest_enum_str_fun(int value);
+
+/**********************
+ * Exported functions *
+ **********************/
+
+/* These are used internally by greatest. */
+void greatest_do_pass(const char *name);
+void greatest_do_fail(const char *name);
+void greatest_do_skip(const char *name);
+int greatest_pre_test(const char *name);
+void greatest_post_test(const char *name, int res);
+void greatest_usage(const char *name);
+int greatest_do_assert_equal_t(const void *exp, const void *got,
+    greatest_type_info *type_info, void *udata);
+
+/* These are part of the public greatest API. */
+void GREATEST_SET_SETUP_CB(greatest_setup_cb *cb, void *udata);
+void GREATEST_SET_TEARDOWN_CB(greatest_teardown_cb *cb, void *udata);
+int greatest_all_passed(void);
+void greatest_set_test_filter(const char *name);
+void greatest_set_suite_filter(const char *name);
+void greatest_get_report(struct greatest_report_t *report);
+unsigned int greatest_get_verbosity(void);
+void greatest_set_verbosity(unsigned int verbosity);
+void greatest_set_flag(greatest_flag_t flag);
+
+
+/********************
+* Language Support *
+********************/
+
+/* If __VA_ARGS__ (C99) is supported, allow parametric testing
+* without needing to manually manage the argument struct. */
+#if __STDC_VERSION__ >= 19901L || _MSC_VER >= 1800
+#define GREATEST_VA_ARGS
+#endif
+
+
+/**********
+ * Macros *
+ **********/
+
+/* Define a suite. */
+#define GREATEST_SUITE(NAME) void NAME(void); void NAME(void)
+
+/* Declare a suite, provided by another compilation unit. */
+#define GREATEST_SUITE_EXTERN(NAME) void NAME(void)
+
+/* Start defining a test function.
+ * The arguments are not included, to allow parametric testing. */
+#define GREATEST_TEST static enum greatest_test_res
+
+/* PASS/FAIL/SKIP result from a test. Used internally. */
+typedef enum greatest_test_res {
+    GREATEST_TEST_RES_PASS = 0,
+    GREATEST_TEST_RES_FAIL = -1,
+    GREATEST_TEST_RES_SKIP = 1
+} greatest_test_res;
+
+/* Run a suite. */
+#define GREATEST_RUN_SUITE(S_NAME) greatest_run_suite(S_NAME, #S_NAME)
+
+/* Run a test in the current suite. */
+#define GREATEST_RUN_TEST(TEST)                                         \
+    do {                                                                \
+        if (greatest_pre_test(#TEST) == 1) {                            \
+            enum greatest_test_res res = GREATEST_SAVE_CONTEXT();       \
+            if (res == GREATEST_TEST_RES_PASS) {                        \
+                res = TEST();                                           \
+            }                                                           \
+            greatest_post_test(#TEST, res);                             \
+        } else if (GREATEST_LIST_ONLY()) {                              \
+            fprintf(GREATEST_STDOUT, "  %s\n", #TEST);                  \
+        }                                                               \
+    } while (0)
+
+/* Ignore a test, don't warn about it being unused. */
+#define GREATEST_IGNORE_TEST(TEST) (void)TEST
+
+/* Run a test in the current suite with one void * argument,
+ * which can be a pointer to a struct with multiple arguments. */
+#define GREATEST_RUN_TEST1(TEST, ENV)                                   \
+    do {                                                                \
+        if (greatest_pre_test(#TEST) == 1) {                            \
+            int res = TEST(ENV);                                        \
+            greatest_post_test(#TEST, res);                             \
+        } else if (GREATEST_LIST_ONLY()) {                              \
+            fprintf(GREATEST_STDOUT, "  %s\n", #TEST);                  \
+        }                                                               \
+    } while (0)
+
+#ifdef GREATEST_VA_ARGS
+#define GREATEST_RUN_TESTp(TEST, ...)                                   \
+    do {                                                                \
+        if (greatest_pre_test(#TEST) == 1) {                            \
+            int res = TEST(__VA_ARGS__);                                \
+            greatest_post_test(#TEST, res);                             \
+        } else if (GREATEST_LIST_ONLY()) {                              \
+            fprintf(GREATEST_STDOUT, "  %s\n", #TEST);                  \
+        }                                                               \
+    } while (0)
+#endif
+
+
+/* Check if the test runner is in verbose mode. */
+#define GREATEST_IS_VERBOSE() ((greatest_info.verbosity) > 0)
+#define GREATEST_LIST_ONLY()                                            \
+    (greatest_info.flags & GREATEST_FLAG_LIST_ONLY)
+#define GREATEST_FIRST_FAIL()                                           \
+    (greatest_info.flags & GREATEST_FLAG_FIRST_FAIL)
+#define GREATEST_FAILURE_ABORT()                                        \
+    (greatest_info.suite.failed > 0 && GREATEST_FIRST_FAIL())
+
+/* Message-less forms of tests defined below. */
+#define GREATEST_PASS() GREATEST_PASSm(NULL)
+#define GREATEST_FAIL() GREATEST_FAILm(NULL)
+#define GREATEST_SKIP() GREATEST_SKIPm(NULL)
+#define GREATEST_ASSERT(COND)                                           \
+    GREATEST_ASSERTm(#COND, COND)
+#define GREATEST_ASSERT_OR_LONGJMP(COND)                                \
+    GREATEST_ASSERT_OR_LONGJMPm(#COND, COND)
+#define GREATEST_ASSERT_FALSE(COND)                                     \
+    GREATEST_ASSERT_FALSEm(#COND, COND)
+#define GREATEST_ASSERT_EQ(EXP, GOT)                                    \
+    GREATEST_ASSERT_EQm(#EXP " != " #GOT, EXP, GOT)
+#define GREATEST_ASSERT_EQ_FMT(EXP, GOT, FMT)                           \
+    GREATEST_ASSERT_EQ_FMTm(#EXP " != " #GOT, EXP, GOT, FMT)
+#define GREATEST_ASSERT_IN_RANGE(EXP, GOT, TOL)                         \
+    GREATEST_ASSERT_IN_RANGEm(#EXP " != " #GOT " +/- " #TOL, EXP, GOT, TOL)
+#define GREATEST_ASSERT_EQUAL_T(EXP, GOT, TYPE_INFO, UDATA)             \
+    GREATEST_ASSERT_EQUAL_Tm(#EXP " != " #GOT, EXP, GOT, TYPE_INFO, UDATA)
+#define GREATEST_ASSERT_STR_EQ(EXP, GOT)                                \
+    GREATEST_ASSERT_STR_EQm(#EXP " != " #GOT, EXP, GOT)
+#define GREATEST_ASSERT_STRN_EQ(EXP, GOT, SIZE)                         \
+    GREATEST_ASSERT_STRN_EQm(#EXP " != " #GOT, EXP, GOT, SIZE)
+#define GREATEST_ASSERT_MEM_EQ(EXP, GOT, SIZE)                          \
+    GREATEST_ASSERT_MEM_EQm(#EXP " != " #GOT, EXP, GOT, SIZE)
+#define GREATEST_ASSERT_ENUM_EQ(EXP, GOT, ENUM_STR)                     \
+    GREATEST_ASSERT_ENUM_EQm(#EXP " != " #GOT, EXP, GOT, ENUM_STR)
+
+/* The following forms take an additional message argument first,
+ * to be displayed by the test runner. */
+
+/* Fail if a condition is not true, with message. */
+#define GREATEST_ASSERTm(MSG, COND)                                     \
+    do {                                                                \
+        greatest_info.assertions++;                                     \
+        if (!(COND)) { GREATEST_FAILm(MSG); }                           \
+    } while (0)
+
+/* Fail if a condition is not true, longjmping out of test. */
+#define GREATEST_ASSERT_OR_LONGJMPm(MSG, COND)                          \
+    do {                                                                \
+        greatest_info.assertions++;                                     \
+        if (!(COND)) { GREATEST_FAIL_WITH_LONGJMPm(MSG); }              \
+    } while (0)
+
+/* Fail if a condition is not false, with message. */
+#define GREATEST_ASSERT_FALSEm(MSG, COND)                               \
+    do {                                                                \
+        greatest_info.assertions++;                                     \
+        if ((COND)) { GREATEST_FAILm(MSG); }                            \
+    } while (0)
+
+/* Fail if EXP != GOT (equality comparison by ==). */
+#define GREATEST_ASSERT_EQm(MSG, EXP, GOT)                              \
+    do {                                                                \
+        greatest_info.assertions++;                                     \
+        if ((EXP) != (GOT)) { GREATEST_FAILm(MSG); }                    \
+    } while (0)
+
+/* Fail if EXP != GOT (equality comparison by ==).
+ * Warning: EXP and GOT will be evaluated more than once on failure. */
+#define GREATEST_ASSERT_EQ_FMTm(MSG, EXP, GOT, FMT)                     \
+    do {                                                                \
+        const char *greatest_FMT = ( FMT );                             \
+        greatest_info.assertions++;                                     \
+        if ((EXP) != (GOT)) {                                           \
+            fprintf(GREATEST_STDOUT, "\nExpected: ");                   \
+            fprintf(GREATEST_STDOUT, greatest_FMT, EXP);                \
+            fprintf(GREATEST_STDOUT, "\n     Got: ");                   \
+            fprintf(GREATEST_STDOUT, greatest_FMT, GOT);                \
+            fprintf(GREATEST_STDOUT, "\n");                             \
+            GREATEST_FAILm(MSG);                                        \
+        }                                                               \
+    } while (0)
+
+/* Fail if EXP is not equal to GOT, printing enum IDs. */
+#define GREATEST_ASSERT_ENUM_EQm(MSG, EXP, GOT, ENUM_STR)               \
+    do {                                                                \
+        int greatest_EXP = (int)(EXP);                                  \
+        int greatest_GOT = (int)(GOT);                                  \
+        greatest_enum_str_fun *greatest_ENUM_STR = ENUM_STR;            \
+        if (greatest_EXP != greatest_GOT) {                             \
+            fprintf(GREATEST_STDOUT, "\nExpected: %s",                  \
+                greatest_ENUM_STR(greatest_EXP));                       \
+            fprintf(GREATEST_STDOUT, "\n     Got: %s\n",                \
+                greatest_ENUM_STR(greatest_GOT));                       \
+            GREATEST_FAILm(MSG);                                        \
+        }                                                               \
+    } while (0)                                                         \
+
+/* Fail if GOT not in range of EXP +|- TOL. */
+#define GREATEST_ASSERT_IN_RANGEm(MSG, EXP, GOT, TOL)                   \
+    do {                                                                \
+        GREATEST_FLOAT greatest_EXP = (EXP);                            \
+        GREATEST_FLOAT greatest_GOT = (GOT);                            \
+        GREATEST_FLOAT greatest_TOL = (TOL);                            \
+        greatest_info.assertions++;                                     \
+        if ((greatest_EXP > greatest_GOT &&                             \
+                greatest_EXP - greatest_GOT > greatest_TOL) ||          \
+            (greatest_EXP < greatest_GOT &&                             \
+                greatest_GOT - greatest_EXP > greatest_TOL)) {          \
+            fprintf(GREATEST_STDOUT,                                    \
+                "\nExpected: " GREATEST_FLOAT_FMT                       \
+                " +/- " GREATEST_FLOAT_FMT                              \
+                "\n     Got: " GREATEST_FLOAT_FMT                       \
+                "\n",                                                   \
+                greatest_EXP, greatest_TOL, greatest_GOT);              \
+            GREATEST_FAILm(MSG);                                        \
+        }                                                               \
+    } while (0)
+
+/* Fail if EXP is not equal to GOT, according to strcmp. */
+#define GREATEST_ASSERT_STR_EQm(MSG, EXP, GOT)                          \
+    do {                                                                \
+        GREATEST_ASSERT_EQUAL_Tm(MSG, EXP, GOT,                         \
+            &greatest_type_info_string, NULL);                          \
+    } while (0)                                                         \
+
+/* Fail if EXP is not equal to GOT, according to strcmp. */
+#define GREATEST_ASSERT_STRN_EQm(MSG, EXP, GOT, SIZE)                   \
+    do {                                                                \
+        size_t size = SIZE;                                             \
+        GREATEST_ASSERT_EQUAL_Tm(MSG, EXP, GOT,                         \
+            &greatest_type_info_string, &size);                         \
+    } while (0)                                                         \
+
+/* Fail if EXP is not equal to GOT, according to memcmp. */
+#define GREATEST_ASSERT_MEM_EQm(MSG, EXP, GOT, SIZE)                    \
+    do {                                                                \
+        greatest_memory_cmp_env env;                                    \
+        env.exp = (const unsigned char *)EXP;                           \
+        env.got = (const unsigned char *)GOT;                           \
+        env.size = SIZE;                                                \
+        GREATEST_ASSERT_EQUAL_Tm(MSG, env.exp, env.got,                 \
+            &greatest_type_info_memory, &env);                          \
+    } while (0)                                                         \
+
+/* Fail if EXP is not equal to GOT, according to a comparison
+ * callback in TYPE_INFO. If they are not equal, optionally use a
+ * print callback in TYPE_INFO to print them. */
+#define GREATEST_ASSERT_EQUAL_Tm(MSG, EXP, GOT, TYPE_INFO, UDATA)       \
+    do {                                                                \
+        greatest_type_info *type_info = (TYPE_INFO);                    \
+        greatest_info.assertions++;                                     \
+        if (!greatest_do_assert_equal_t(EXP, GOT,                       \
+                type_info, UDATA)) {                                    \
+            if (type_info == NULL || type_info->equal == NULL) {        \
+                GREATEST_FAILm("type_info->equal callback missing!");   \
+            } else {                                                    \
+                GREATEST_FAILm(MSG);                                    \
+            }                                                           \
+        }                                                               \
+    } while (0)                                                         \
+
+/* Pass. */
+#define GREATEST_PASSm(MSG)                                             \
+    do {                                                                \
+        greatest_info.msg = MSG;                                        \
+        return GREATEST_TEST_RES_PASS;                                  \
+    } while (0)
+
+/* Fail. */
+#define GREATEST_FAILm(MSG)                                             \
+    do {                                                                \
+        greatest_info.fail_file = __FILE__;                             \
+        greatest_info.fail_line = __LINE__;                             \
+        greatest_info.msg = MSG;                                        \
+        return GREATEST_TEST_RES_FAIL;                                  \
+    } while (0)
+
+/* Optional GREATEST_FAILm variant that longjmps. */
+#if GREATEST_USE_LONGJMP
+#define GREATEST_FAIL_WITH_LONGJMP() GREATEST_FAIL_WITH_LONGJMPm(NULL)
+#define GREATEST_FAIL_WITH_LONGJMPm(MSG)                                \
+    do {                                                                \
+        greatest_info.fail_file = __FILE__;                             \
+        greatest_info.fail_line = __LINE__;                             \
+        greatest_info.msg = MSG;                                        \
+        longjmp(greatest_info.jump_dest, GREATEST_TEST_RES_FAIL);       \
+    } while (0)
+#endif
+
+/* Skip the current test. */
+#define GREATEST_SKIPm(MSG)                                             \
+    do {                                                                \
+        greatest_info.msg = MSG;                                        \
+        return GREATEST_TEST_RES_SKIP;                                  \
+    } while (0)
+
+/* Check the result of a subfunction using ASSERT, etc. */
+#define GREATEST_CHECK_CALL(RES)                                        \
+    do {                                                                \
+        enum greatest_test_res greatest_RES = RES;                      \
+        if (greatest_RES != GREATEST_TEST_RES_PASS) {                   \
+            return greatest_RES;                                        \
+        }                                                               \
+    } while (0)                                                         \
+
+#if GREATEST_USE_TIME
+#define GREATEST_SET_TIME(NAME)                                         \
+    NAME = clock();                                                     \
+    if (NAME == (clock_t) -1) {                                         \
+        fprintf(GREATEST_STDOUT,                                        \
+            "clock error: %s\n", #NAME);                                \
+        exit(EXIT_FAILURE);                                             \
+    }
+
+#define GREATEST_CLOCK_DIFF(C1, C2)                                     \
+    fprintf(GREATEST_STDOUT, " (%lu ticks, %.3f sec)",                  \
+        (long unsigned int) (C2) - (long unsigned int)(C1),             \
+        (double)((C2) - (C1)) / (1.0 * (double)CLOCKS_PER_SEC))
+#else
+#define GREATEST_SET_TIME(UNUSED)
+#define GREATEST_CLOCK_DIFF(UNUSED1, UNUSED2)
+#endif
+
+#if GREATEST_USE_LONGJMP
+#define GREATEST_SAVE_CONTEXT()                                         \
+        /* setjmp returns 0 (GREATEST_TEST_RES_PASS) on first call */   \
+        /* so the test runs, then RES_FAIL from FAIL_WITH_LONGJMP. */   \
+        ((enum greatest_test_res)(setjmp(greatest_info.jump_dest)))
+#else
+#define GREATEST_SAVE_CONTEXT()                                         \
+    /*a no-op, since setjmp/longjmp aren't being used */                \
+    GREATEST_TEST_RES_PASS
+#endif
+
+/* Include several function definitions in the main test file. */
+#define GREATEST_MAIN_DEFS()                                            \
+                                                                        \
+/* Is FILTER a subset of NAME? */                                       \
+static int greatest_name_match(const char *name,                        \
+    const char *filter) {                                               \
+    size_t offset = 0;                                                  \
+    size_t filter_len = strlen(filter);                                 \
+    while (name[offset] != '\0') {                                      \
+        if (name[offset] == filter[0]) {                                \
+            if (0 == strncmp(&name[offset], filter, filter_len)) {      \
+                return 1;                                               \
+            }                                                           \
+        }                                                               \
+        offset++;                                                       \
+    }                                                                   \
+                                                                        \
+    return 0;                                                           \
+}                                                                       \
+                                                                        \
+int greatest_pre_test(const char *name) {                               \
+    if (!GREATEST_LIST_ONLY()                                           \
+        && (!GREATEST_FIRST_FAIL() || greatest_info.suite.failed == 0)  \
+        && (greatest_info.test_filter == NULL ||                        \
+            greatest_name_match(name, greatest_info.test_filter))) {    \
+        GREATEST_SET_TIME(greatest_info.suite.pre_test);                \
+        if (greatest_info.setup) {                                      \
+            greatest_info.setup(greatest_info.setup_udata);             \
+        }                                                               \
+        return 1;               /* test should be run */                \
+    } else {                                                            \
+        return 0;               /* skipped */                           \
+    }                                                                   \
+}                                                                       \
+                                                                        \
+void greatest_post_test(const char *name, int res) {                    \
+    GREATEST_SET_TIME(greatest_info.suite.post_test);                   \
+    if (greatest_info.teardown) {                                       \
+        void *udata = greatest_info.teardown_udata;                     \
+        greatest_info.teardown(udata);                                  \
+    }                                                                   \
+                                                                        \
+    if (res <= GREATEST_TEST_RES_FAIL) {                                \
+        greatest_do_fail(name);                                         \
+    } else if (res >= GREATEST_TEST_RES_SKIP) {                         \
+        greatest_do_skip(name);                                         \
+    } else if (res == GREATEST_TEST_RES_PASS) {                         \
+        greatest_do_pass(name);                                         \
+    }                                                                   \
+    greatest_info.suite.tests_run++;                                    \
+    greatest_info.col++;                                                \
+    if (GREATEST_IS_VERBOSE()) {                                        \
+        GREATEST_CLOCK_DIFF(greatest_info.suite.pre_test,               \
+            greatest_info.suite.post_test);                             \
+        fprintf(GREATEST_STDOUT, "\n");                                 \
+    } else if (greatest_info.col % greatest_info.width == 0) {          \
+        fprintf(GREATEST_STDOUT, "\n");                                 \
+        greatest_info.col = 0;                                          \
+    }                                                                   \
+    if (GREATEST_STDOUT == stdout) fflush(stdout);                      \
+}                                                                       \
+                                                                        \
+static void report_suite(void) {                                        \
+    if (greatest_info.suite.tests_run > 0) {                            \
+        fprintf(GREATEST_STDOUT,                                        \
+            "\n%u test%s - %u passed, %u failed, %u skipped",           \
+            greatest_info.suite.tests_run,                              \
+            greatest_info.suite.tests_run == 1 ? "" : "s",              \
+            greatest_info.suite.passed,                                 \
+            greatest_info.suite.failed,                                 \
+            greatest_info.suite.skipped);                               \
+        GREATEST_CLOCK_DIFF(greatest_info.suite.pre_suite,              \
+            greatest_info.suite.post_suite);                            \
+        fprintf(GREATEST_STDOUT, "\n");                                 \
+    }                                                                   \
+}                                                                       \
+                                                                        \
+static void update_counts_and_reset_suite(void) {                       \
+    greatest_info.setup = NULL;                                         \
+    greatest_info.setup_udata = NULL;                                   \
+    greatest_info.teardown = NULL;                                      \
+    greatest_info.teardown_udata = NULL;                                \
+    greatest_info.passed += greatest_info.suite.passed;                 \
+    greatest_info.failed += greatest_info.suite.failed;                 \
+    greatest_info.skipped += greatest_info.suite.skipped;               \
+    greatest_info.tests_run += greatest_info.suite.tests_run;           \
+    memset(&greatest_info.suite, 0, sizeof(greatest_info.suite));       \
+    greatest_info.col = 0;                                              \
+}                                                                       \
+                                                                        \
+static void greatest_run_suite(greatest_suite_cb *suite_cb,             \
+                               const char *suite_name) {                \
+    if (greatest_info.suite_filter &&                                   \
+        !greatest_name_match(suite_name, greatest_info.suite_filter)) { \
+        return;                                                         \
+    }                                                                   \
+    update_counts_and_reset_suite();                                    \
+    if (GREATEST_FIRST_FAIL() && greatest_info.failed > 0) { return; }  \
+    fprintf(GREATEST_STDOUT, "\n* Suite %s:\n", suite_name);            \
+    GREATEST_SET_TIME(greatest_info.suite.pre_suite);                   \
+    suite_cb();                                                         \
+    GREATEST_SET_TIME(greatest_info.suite.post_suite);                  \
+    report_suite();                                                     \
+}                                                                       \
+                                                                        \
+void greatest_do_pass(const char *name) {                               \
+    if (GREATEST_IS_VERBOSE()) {                                        \
+        fprintf(GREATEST_STDOUT, "PASS %s: %s",                         \
+            name, greatest_info.msg ? greatest_info.msg : "");          \
+    } else {                                                            \
+        fprintf(GREATEST_STDOUT, ".");                                  \
+    }                                                                   \
+    greatest_info.suite.passed++;                                       \
+}                                                                       \
+                                                                        \
+void greatest_do_fail(const char *name) {                               \
+    if (GREATEST_IS_VERBOSE()) {                                        \
+        fprintf(GREATEST_STDOUT,                                        \
+            "FAIL %s: %s (%s:%u)",                                      \
+            name, greatest_info.msg ? greatest_info.msg : "",           \
+            greatest_info.fail_file, greatest_info.fail_line);          \
+    } else {                                                            \
+        fprintf(GREATEST_STDOUT, "F");                                  \
+        greatest_info.col++;                                            \
+        /* add linebreak if in line of '.'s */                          \
+        if (greatest_info.col != 0) {                                   \
+            fprintf(GREATEST_STDOUT, "\n");                             \
+            greatest_info.col = 0;                                      \
+        }                                                               \
+        fprintf(GREATEST_STDOUT, "FAIL %s: %s (%s:%u)\n",               \
+            name,                                                       \
+            greatest_info.msg ? greatest_info.msg : "",                 \
+            greatest_info.fail_file, greatest_info.fail_line);          \
+    }                                                                   \
+    greatest_info.suite.failed++;                                       \
+}                                                                       \
+                                                                        \
+void greatest_do_skip(const char *name) {                               \
+    if (GREATEST_IS_VERBOSE()) {                                        \
+        fprintf(GREATEST_STDOUT, "SKIP %s: %s",                         \
+            name,                                                       \
+            greatest_info.msg ?                                         \
+            greatest_info.msg : "" );                                   \
+    } else {                                                            \
+        fprintf(GREATEST_STDOUT, "s");                                  \
+    }                                                                   \
+    greatest_info.suite.skipped++;                                      \
+}                                                                       \
+                                                                        \
+int greatest_do_assert_equal_t(const void *exp, const void *got,        \
+        greatest_type_info *type_info, void *udata) {                   \
+    int eq = 0;                                                         \
+    if (type_info == NULL || type_info->equal == NULL) {                \
+        return 0;                                                       \
+    }                                                                   \
+    eq = type_info->equal(exp, got, udata);                             \
+    if (!eq) {                                                          \
+        if (type_info->print != NULL) {                                 \
+            fprintf(GREATEST_STDOUT, "\nExpected: ");                   \
+            (void)type_info->print(exp, udata);                         \
+            fprintf(GREATEST_STDOUT, "\n     Got: ");                   \
+            (void)type_info->print(got, udata);                         \
+            fprintf(GREATEST_STDOUT, "\n");                             \
+        } else {                                                        \
+            fprintf(GREATEST_STDOUT,                                    \
+                "GREATEST_ASSERT_EQUAL_T failure at %s:%u\n",           \
+                greatest_info.fail_file,                                \
+                greatest_info.fail_line);                               \
+        }                                                               \
+    }                                                                   \
+    return eq;                                                          \
+}                                                                       \
+                                                                        \
+void greatest_usage(const char *name) {                                 \
+    fprintf(GREATEST_STDOUT,                                            \
+        "Usage: %s [-hlfv] [-s SUITE] [-t TEST]\n"                      \
+        "  -h, --help  print this Help\n"                               \
+        "  -l          List suites and their tests, then exit\n"        \
+        "  -f          Stop runner after first failure\n"               \
+        "  -v          Verbose output\n"                                \
+        "  -s SUITE    only run suites containing string SUITE\n"       \
+        "  -t TEST     only run tests containing string TEST\n",        \
+        name);                                                          \
+}                                                                       \
+                                                                        \
+static void greatest_parse_args(int argc, char **argv) {                \
+    int i = 0;                                                          \
+    for (i = 1; i < argc; i++) {                                        \
+        if (0 == strncmp("-t", argv[i], 2)) {                           \
+            if (argc <= i + 1) {                                        \
+                greatest_usage(argv[0]);                                \
+                exit(EXIT_FAILURE);                                     \
+            }                                                           \
+            greatest_info.test_filter = argv[i+1];                      \
+            i++;                                                        \
+        } else if (0 == strncmp("-s", argv[i], 2)) {                    \
+            if (argc <= i + 1) {                                        \
+                greatest_usage(argv[0]);                                \
+                exit(EXIT_FAILURE);                                     \
+            }                                                           \
+            greatest_info.suite_filter = argv[i+1];                     \
+            i++;                                                        \
+        } else if (0 == strncmp("-f", argv[i], 2)) {                    \
+            greatest_info.flags |= GREATEST_FLAG_FIRST_FAIL;            \
+        } else if (0 == strncmp("-v", argv[i], 2)) {                    \
+            greatest_info.verbosity++;                                  \
+        } else if (0 == strncmp("-l", argv[i], 2)) {                    \
+            greatest_info.flags |= GREATEST_FLAG_LIST_ONLY;             \
+        } else if (0 == strncmp("-h", argv[i], 2) ||                    \
+                   0 == strncmp("--help", argv[i], 6)) {                \
+            greatest_usage(argv[0]);                                    \
+            exit(EXIT_SUCCESS);                                         \
+        } else if (0 == strncmp("--", argv[i], 2)) {                    \
+            break;                                                      \
+        } else {                                                        \
+            fprintf(GREATEST_STDOUT,                                    \
+                "Unknown argument '%s'\n", argv[i]);                    \
+            greatest_usage(argv[0]);                                    \
+            exit(EXIT_FAILURE);                                         \
+        }                                                               \
+    }                                                                   \
+}                                                                       \
+                                                                        \
+int greatest_all_passed(void) { return (greatest_info.failed == 0); }   \
+                                                                        \
+void greatest_set_test_filter(const char *name) {                       \
+    greatest_info.test_filter = name;                                   \
+}                                                                       \
+                                                                        \
+void greatest_set_suite_filter(const char *name) {                      \
+    greatest_info.suite_filter = name;                                  \
+}                                                                       \
+                                                                        \
+void greatest_get_report(struct greatest_report_t *report) {            \
+    if (report) {                                                       \
+        report->passed = greatest_info.passed;                          \
+        report->failed = greatest_info.failed;                          \
+        report->skipped = greatest_info.skipped;                        \
+        report->assertions = greatest_info.assertions;                  \
+    }                                                                   \
+}                                                                       \
+                                                                        \
+unsigned int greatest_get_verbosity(void) {                             \
+    return greatest_info.verbosity;                                     \
+}                                                                       \
+                                                                        \
+void greatest_set_verbosity(unsigned int verbosity) {                   \
+    greatest_info.verbosity = (unsigned char)verbosity;                 \
+}                                                                       \
+                                                                        \
+void greatest_set_flag(greatest_flag_t flag) {                          \
+    greatest_info.flags |= flag;                                        \
+}                                                                       \
+                                                                        \
+void GREATEST_SET_SETUP_CB(greatest_setup_cb *cb, void *udata) {        \
+    greatest_info.setup = cb;                                           \
+    greatest_info.setup_udata = udata;                                  \
+}                                                                       \
+                                                                        \
+void GREATEST_SET_TEARDOWN_CB(greatest_teardown_cb *cb,                 \
+                                    void *udata) {                      \
+    greatest_info.teardown = cb;                                        \
+    greatest_info.teardown_udata = udata;                               \
+}                                                                       \
+                                                                        \
+static int greatest_string_equal_cb(const void *exp, const void *got,   \
+    void *udata) {                                                      \
+    size_t *size = (size_t *)udata;                                     \
+    return (size != NULL                                                \
+        ? (0 == strncmp((const char *)exp, (const char *)got, *size))   \
+        : (0 == strcmp((const char *)exp, (const char *)got)));         \
+}                                                                       \
+                                                                        \
+static int greatest_string_printf_cb(const void *t, void *udata) {      \
+    (void)udata; /* note: does not check \0 termination. */             \
+    return fprintf(GREATEST_STDOUT, "%s", (const char *)t);             \
+}                                                                       \
+                                                                        \
+greatest_type_info greatest_type_info_string = {                        \
+    greatest_string_equal_cb,                                           \
+    greatest_string_printf_cb,                                          \
+};                                                                      \
+                                                                        \
+static int greatest_memory_equal_cb(const void *exp, const void *got,   \
+    void *udata) {                                                      \
+    greatest_memory_cmp_env *env = (greatest_memory_cmp_env *)udata;    \
+    return (0 == memcmp(exp, got, env->size));                          \
+}                                                                       \
+                                                                        \
+static int greatest_memory_printf_cb(const void *t, void *udata) {      \
+    greatest_memory_cmp_env *env = (greatest_memory_cmp_env *)udata;    \
+    unsigned char *buf = (unsigned char *)t, diff_mark = ' ';           \
+    FILE *out = GREATEST_STDOUT;                                        \
+    size_t i, line_i, line_len = 0;                                     \
+    int len = 0;   /* format hexdump with differences highlighted */    \
+    for (i = 0; i < env->size; i+= line_len) {                          \
+        diff_mark = ' ';                                                \
+        line_len = env->size - i;                                       \
+        if (line_len > 16) { line_len = 16; }                           \
+        for (line_i = i; line_i < i + line_len; line_i++) {             \
+            if (env->exp[line_i] != env->got[line_i]) diff_mark = 'X';  \
+        }                                                               \
+        len += fprintf(out, "\n%04x %c ", (unsigned int)i, diff_mark);  \
+        for (line_i = i; line_i < i + line_len; line_i++) {             \
+            int m = env->exp[line_i] == env->got[line_i]; /* match? */  \
+            len += fprintf(out, "%02x%c", buf[line_i], m ? ' ' : '<');  \
+        }                                                               \
+        for (line_i = 0; line_i < 16 - line_len; line_i++) {            \
+            len += fprintf(out, "   ");                                 \
+        }                                                               \
+        fprintf(out, " ");                                              \
+        for (line_i = i; line_i < i + line_len; line_i++) {             \
+            unsigned char c = buf[line_i];                              \
+            len += fprintf(out, "%c", isprint(c) ? c : '.');            \
+        }                                                               \
+    }                                                                   \
+    len += fprintf(out, "\n");                                          \
+    return len;                                                         \
+}                                                                       \
+                                                                        \
+greatest_type_info greatest_type_info_memory = {                        \
+    greatest_memory_equal_cb,                                           \
+    greatest_memory_printf_cb,                                          \
+};                                                                      \
+                                                                        \
+greatest_run_info greatest_info
+
+/* Init internals. */
+#define GREATEST_INIT()                                                 \
+    do {                                                                \
+        /* Suppress unused function warning if features aren't used */  \
+        (void)greatest_run_suite;                                       \
+        (void)greatest_parse_args;                                      \
+                                                                        \
+        memset(&greatest_info, 0, sizeof(greatest_info));               \
+        greatest_info.width = GREATEST_DEFAULT_WIDTH;                   \
+        GREATEST_SET_TIME(greatest_info.begin);                         \
+    } while (0)                                                         \
+
+/* Handle command-line arguments, etc. */
+#define GREATEST_MAIN_BEGIN()                                           \
+    do {                                                                \
+        GREATEST_INIT();                                                \
+        greatest_parse_args(argc, argv);                                \
+    } while (0)
+
+/* Report passes, failures, skipped tests, the number of
+ * assertions, and the overall run time. */
+#define GREATEST_PRINT_REPORT()                                         \
+    do {                                                                \
+        if (!GREATEST_LIST_ONLY()) {                                    \
+            update_counts_and_reset_suite();                            \
+            GREATEST_SET_TIME(greatest_info.end);                       \
+            fprintf(GREATEST_STDOUT,                                    \
+                "\nTotal: %u test%s",                                   \
+                greatest_info.tests_run,                                \
+                greatest_info.tests_run == 1 ? "" : "s");               \
+            GREATEST_CLOCK_DIFF(greatest_info.begin,                    \
+                greatest_info.end);                                     \
+            fprintf(GREATEST_STDOUT, ", %u assertion%s\n",              \
+                greatest_info.assertions,                               \
+                greatest_info.assertions == 1 ? "" : "s");              \
+            fprintf(GREATEST_STDOUT,                                    \
+                "Pass: %u, fail: %u, skip: %u.\n",                      \
+                greatest_info.passed,                                   \
+                greatest_info.failed, greatest_info.skipped);           \
+        }                                                               \
+    } while (0)
+
+/* Report results, exit with exit status based on results. */
+#define GREATEST_MAIN_END()                                             \
+    do {                                                                \
+        GREATEST_PRINT_REPORT();                                        \
+        return (greatest_all_passed() ? EXIT_SUCCESS : EXIT_FAILURE);   \
+    } while (0)
+
+/* Make abbreviations without the GREATEST_ prefix for the
+ * most commonly used symbols. */
+#if GREATEST_USE_ABBREVS
+#define TEST           GREATEST_TEST
+#define SUITE          GREATEST_SUITE
+#define SUITE_EXTERN   GREATEST_SUITE_EXTERN
+#define RUN_TEST       GREATEST_RUN_TEST
+#define RUN_TEST1      GREATEST_RUN_TEST1
+#define RUN_SUITE      GREATEST_RUN_SUITE
+#define IGNORE_TEST    GREATEST_IGNORE_TEST
+#define ASSERT         GREATEST_ASSERT
+#define ASSERTm        GREATEST_ASSERTm
+#define ASSERT_FALSE   GREATEST_ASSERT_FALSE
+#define ASSERT_EQ      GREATEST_ASSERT_EQ
+#define ASSERT_EQ_FMT  GREATEST_ASSERT_EQ_FMT
+#define ASSERT_IN_RANGE GREATEST_ASSERT_IN_RANGE
+#define ASSERT_EQUAL_T GREATEST_ASSERT_EQUAL_T
+#define ASSERT_STR_EQ  GREATEST_ASSERT_STR_EQ
+#define ASSERT_STRN_EQ GREATEST_ASSERT_STRN_EQ
+#define ASSERT_MEM_EQ  GREATEST_ASSERT_MEM_EQ
+#define ASSERT_ENUM_EQ GREATEST_ASSERT_ENUM_EQ
+#define ASSERT_FALSEm  GREATEST_ASSERT_FALSEm
+#define ASSERT_EQm     GREATEST_ASSERT_EQm
+#define ASSERT_EQ_FMTm GREATEST_ASSERT_EQ_FMTm
+#define ASSERT_IN_RANGEm GREATEST_ASSERT_IN_RANGEm
+#define ASSERT_EQUAL_Tm GREATEST_ASSERT_EQUAL_Tm
+#define ASSERT_STR_EQm GREATEST_ASSERT_STR_EQm
+#define ASSERT_STRN_EQm GREATEST_ASSERT_STRN_EQm
+#define ASSERT_MEM_EQm GREATEST_ASSERT_MEM_EQm
+#define ASSERT_ENUM_EQm GREATEST_ASSERT_ENUM_EQm
+#define PASS           GREATEST_PASS
+#define FAIL           GREATEST_FAIL
+#define SKIP           GREATEST_SKIP
+#define PASSm          GREATEST_PASSm
+#define FAILm          GREATEST_FAILm
+#define SKIPm          GREATEST_SKIPm
+#define SET_SETUP      GREATEST_SET_SETUP_CB
+#define SET_TEARDOWN   GREATEST_SET_TEARDOWN_CB
+#define CHECK_CALL     GREATEST_CHECK_CALL
+
+#ifdef GREATEST_VA_ARGS
+#define RUN_TESTp      GREATEST_RUN_TESTp
+#endif
+
+#if GREATEST_USE_LONGJMP
+#define ASSERT_OR_LONGJMP  GREATEST_ASSERT_OR_LONGJMP
+#define ASSERT_OR_LONGJMPm GREATEST_ASSERT_OR_LONGJMPm
+#define FAIL_WITH_LONGJMP  GREATEST_FAIL_WITH_LONGJMP
+#define FAIL_WITH_LONGJMPm GREATEST_FAIL_WITH_LONGJMPm
+#endif
+
+#endif /* USE_ABBREVS */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/thirdparty/hiredis b/thirdparty/hiredis
new file mode 160000
index 000000000..5f98e1d35
--- /dev/null
+++ b/thirdparty/hiredis
@@ -0,0 +1 @@
+Subproject commit 5f98e1d35dcf00a026793ada2662f6e1ba77eb17
diff --git a/thirdparty/utarray.h b/thirdparty/utarray.h
new file mode 100644
index 000000000..979e99e98
--- /dev/null
+++ b/thirdparty/utarray.h
@@ -0,0 +1,238 @@
+/*
+Copyright (c) 2008-2016, Troy D. Hanson   http://troydhanson.github.com/uthash/
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* a dynamic array implementation using macros
+ */
+#ifndef UTARRAY_H
+#define UTARRAY_H
+
+#define UTARRAY_VERSION 2.0.1
+
+#ifdef __GNUC__
+#define _UNUSED_ __attribute__ ((__unused__))
+#else
+#define _UNUSED_
+#endif
+
+#include <stddef.h>  /* size_t */
+#include <string.h>  /* memset, etc */
+#include <stdlib.h>  /* exit */
+
+#ifndef oom
+#define oom() exit(-1)
+#endif
+
+typedef void (ctor_f)(void *dst, const void *src);
+typedef void (dtor_f)(void *elt);
+typedef void (init_f)(void *elt);
+typedef struct {
+    size_t sz;
+    init_f *init;
+    ctor_f *copy;
+    dtor_f *dtor;
+} UT_icd;
+
+typedef struct {
+    unsigned i,n;/* i: index of next available slot, n: num slots */
+    UT_icd icd;  /* initializer, copy and destructor functions */
+    char *d;     /* n slots of size icd->sz*/
+} UT_array;
+
+#define utarray_init(a,_icd) do {                                             \
+  memset(a,0,sizeof(UT_array));                                               \
+  (a)->icd = *(_icd);                                                         \
+} while(0)
+
+#define utarray_done(a) do {                                                  \
+  if ((a)->n) {                                                               \
+    if ((a)->icd.dtor) {                                                      \
+      unsigned _ut_i;                                                         \
+      for(_ut_i=0; _ut_i < (a)->i; _ut_i++) {                                 \
+        (a)->icd.dtor(utarray_eltptr(a,_ut_i));                               \
+      }                                                                       \
+    }                                                                         \
+    free((a)->d);                                                             \
+  }                                                                           \
+  (a)->n=0;                                                                   \
+} while(0)
+
+#define utarray_new(a,_icd) do {                                              \
+  (a) = (UT_array*)malloc(sizeof(UT_array));                                  \
+  if ((a) == NULL) oom();                                                     \
+  utarray_init(a,_icd);                                                       \
+} while(0)
+
+#define utarray_free(a) do {                                                  \
+  utarray_done(a);                                                            \
+  free(a);                                                                    \
+} while(0)
+
+#define utarray_reserve(a,by) do {                                            \
+  if (((a)->i+(by)) > (a)->n) {                                               \
+    char *utarray_tmp;                                                        \
+    while (((a)->i+(by)) > (a)->n) { (a)->n = ((a)->n ? (2*(a)->n) : 8); }    \
+    utarray_tmp=(char*)realloc((a)->d, (a)->n*(a)->icd.sz);                   \
+    if (utarray_tmp == NULL) oom();                                           \
+    (a)->d=utarray_tmp;                                                       \
+  }                                                                           \
+} while(0)
+
+#define utarray_push_back(a,p) do {                                           \
+  utarray_reserve(a,1);                                                       \
+  if ((a)->icd.copy) { (a)->icd.copy( _utarray_eltptr(a,(a)->i++), p); }      \
+  else { memcpy(_utarray_eltptr(a,(a)->i++), p, (a)->icd.sz); };              \
+} while(0)
+
+#define utarray_pop_back(a) do {                                              \
+  if ((a)->icd.dtor) { (a)->icd.dtor( _utarray_eltptr(a,--((a)->i))); }       \
+  else { (a)->i--; }                                                          \
+} while(0)
+
+#define utarray_extend_back(a) do {                                           \
+  utarray_reserve(a,1);                                                       \
+  if ((a)->icd.init) { (a)->icd.init(_utarray_eltptr(a,(a)->i)); }            \
+  else { memset(_utarray_eltptr(a,(a)->i),0,(a)->icd.sz); }                   \
+  (a)->i++;                                                                   \
+} while(0)
+
+#define utarray_len(a) ((a)->i)
+
+#define utarray_eltptr(a,j) (((j) < (a)->i) ? _utarray_eltptr(a,j) : NULL)
+#define _utarray_eltptr(a,j) ((a)->d + ((a)->icd.sz * (j)))
+
+#define utarray_insert(a,p,j) do {                                            \
+  if ((j) > (a)->i) utarray_resize(a,j);                                      \
+  utarray_reserve(a,1);                                                       \
+  if ((j) < (a)->i) {                                                         \
+    memmove( _utarray_eltptr(a,(j)+1), _utarray_eltptr(a,j),                  \
+             ((a)->i - (j))*((a)->icd.sz));                                   \
+  }                                                                           \
+  if ((a)->icd.copy) { (a)->icd.copy( _utarray_eltptr(a,j), p); }             \
+  else { memcpy(_utarray_eltptr(a,j), p, (a)->icd.sz); };                     \
+  (a)->i++;                                                                   \
+} while(0)
+
+#define utarray_inserta(a,w,j) do {                                           \
+  if (utarray_len(w) == 0) break;                                             \
+  if ((j) > (a)->i) utarray_resize(a,j);                                      \
+  utarray_reserve(a,utarray_len(w));                                          \
+  if ((j) < (a)->i) {                                                         \
+    memmove(_utarray_eltptr(a,(j)+utarray_len(w)),                            \
+            _utarray_eltptr(a,j),                                             \
+            ((a)->i - (j))*((a)->icd.sz));                                    \
+  }                                                                           \
+  if ((a)->icd.copy) {                                                        \
+    unsigned _ut_i;                                                           \
+    for(_ut_i=0;_ut_i<(w)->i;_ut_i++) {                                       \
+      (a)->icd.copy(_utarray_eltptr(a, (j) + _ut_i), _utarray_eltptr(w, _ut_i)); \
+    }                                                                         \
+  } else {                                                                    \
+    memcpy(_utarray_eltptr(a,j), _utarray_eltptr(w,0),                        \
+           utarray_len(w)*((a)->icd.sz));                                     \
+  }                                                                           \
+  (a)->i += utarray_len(w);                                                   \
+} while(0)
+
+#define utarray_resize(dst,num) do {                                          \
+  unsigned _ut_i;                                                             \
+  if ((dst)->i > (unsigned)(num)) {                                           \
+    if ((dst)->icd.dtor) {                                                    \
+      for (_ut_i = (num); _ut_i < (dst)->i; ++_ut_i) {                        \
+        (dst)->icd.dtor(_utarray_eltptr(dst, _ut_i));                         \
+      }                                                                       \
+    }                                                                         \
+  } else if ((dst)->i < (unsigned)(num)) {                                    \
+    utarray_reserve(dst, (num) - (dst)->i);                                   \
+    if ((dst)->icd.init) {                                                    \
+      for (_ut_i = (dst)->i; _ut_i < (unsigned)(num); ++_ut_i) {              \
+        (dst)->icd.init(_utarray_eltptr(dst, _ut_i));                         \
+      }                                                                       \
+    } else {                                                                  \
+      memset(_utarray_eltptr(dst, (dst)->i), 0, (dst)->icd.sz*((num) - (dst)->i)); \
+    }                                                                         \
+  }                                                                           \
+  (dst)->i = (num);                                                           \
+} while(0)
+
+#define utarray_concat(dst,src) do {                                          \
+  utarray_inserta(dst, src, utarray_len(dst));                                \
+} while(0)
+
+#define utarray_erase(a,pos,len) do {                                         \
+  if ((a)->icd.dtor) {                                                        \
+    unsigned _ut_i;                                                           \
+    for (_ut_i = 0; _ut_i < (len); _ut_i++) {                                 \
+      (a)->icd.dtor(utarray_eltptr(a, (pos) + _ut_i));                        \
+    }                                                                         \
+  }                                                                           \
+  if ((a)->i > ((pos) + (len))) {                                             \
+    memmove(_utarray_eltptr(a, pos), _utarray_eltptr(a, (pos) + (len)),       \
+            ((a)->i - ((pos) + (len))) * (a)->icd.sz);                        \
+  }                                                                           \
+  (a)->i -= (len);                                                            \
+} while(0)
+
+#define utarray_renew(a,u) do {                                               \
+  if (a) utarray_clear(a);                                                    \
+  else utarray_new(a, u);                                                     \
+} while(0)
+
+#define utarray_clear(a) do {                                                 \
+  if ((a)->i > 0) {                                                           \
+    if ((a)->icd.dtor) {                                                      \
+      unsigned _ut_i;                                                         \
+      for(_ut_i=0; _ut_i < (a)->i; _ut_i++) {                                 \
+        (a)->icd.dtor(_utarray_eltptr(a, _ut_i));                             \
+      }                                                                       \
+    }                                                                         \
+    (a)->i = 0;                                                               \
+  }                                                                           \
+} while(0)
+
+#define utarray_sort(a,cmp) do {                                              \
+  qsort((a)->d, (a)->i, (a)->icd.sz, cmp);                                    \
+} while(0)
+
+#define utarray_find(a,v,cmp) bsearch((v),(a)->d,(a)->i,(a)->icd.sz,cmp)
+
+#define utarray_front(a) (((a)->i) ? (_utarray_eltptr(a,0)) : NULL)
+#define utarray_next(a,e) (((e)==NULL) ? utarray_front(a) : ((((a)->i) > (utarray_eltidx(a,e)+1)) ? _utarray_eltptr(a,utarray_eltidx(a,e)+1) : NULL))
+#define utarray_prev(a,e) (((e)==NULL) ? utarray_back(a) : ((utarray_eltidx(a,e) > 0) ? _utarray_eltptr(a,utarray_eltidx(a,e)-1) : NULL))
+#define utarray_back(a) (((a)->i) ? (_utarray_eltptr(a,(a)->i-1)) : NULL)
+#define utarray_eltidx(a,e) (((char*)(e) >= (a)->d) ? (((char*)(e) - (a)->d)/(a)->icd.sz) : -1)
+
+/* last we pre-define a few icd for common utarrays of ints and strings */
+static void utarray_str_cpy(void *dst, const void *src) {
+  char **_src = (char**)src, **_dst = (char**)dst;
+  *_dst = (*_src == NULL) ? NULL : strdup(*_src);
+}
+static void utarray_str_dtor(void *elt) {
+  char **eltc = (char**)elt;
+  if (*eltc != NULL) free(*eltc);
+}
+static const UT_icd ut_str_icd _UNUSED_ = {sizeof(char*),NULL,utarray_str_cpy,utarray_str_dtor};
+static const UT_icd ut_int_icd _UNUSED_ = {sizeof(int),NULL,NULL,NULL};
+static const UT_icd ut_ptr_icd _UNUSED_ = {sizeof(void*),NULL,NULL,NULL};
+
+
+#endif /* UTARRAY_H */

From 72361c9b44933591c415dd198644e1c61ec9f617 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Wed, 14 Sep 2016 14:20:34 -0700
Subject: [PATCH 23/91] Add metadata handling (#23)

* Automatic whitespace fixes.

* Add metadata handling.

* Make create take a buffer instead of a string for the metadata.

* Small fixes.
---
 lib/python/plasma.py | 43 +++++++++++++++++++------
 src/example.c        |  6 ++--
 src/plasma.h         | 39 +++++++++++++++++-----
 src/plasma_client.c  | 54 ++++++++++++++++++++++++-------
 src/plasma_manager.c | 34 +++++++++++++------
 src/plasma_store.c   | 20 +++++++-----
 test/test.py         | 77 ++++++++++++++++++++++++++++++++++----------
 7 files changed, 206 insertions(+), 67 deletions(-)

diff --git a/lib/python/plasma.py b/lib/python/plasma.py
index 5d18b06e4..b3b772f13 100644
--- a/lib/python/plasma.py
+++ b/lib/python/plasma.py
@@ -25,7 +25,7 @@ class PlasmaClient(object):
 
   def __init__(self, socket_name, addr=None, port=None):
     """Initialize the PlasmaClient.
-    
+
     Args:
       socket_name (str): Name of the socket the plasma store is listening at.
       addr (str): IPv4 address of plasma manager attached to the plasma store.
@@ -36,10 +36,10 @@ class PlasmaClient(object):
 
     self.client.plasma_store_connect.restype = ctypes.c_int
 
-    self.client.plasma_create.argtypes = [ctypes.c_int, PlasmaID, ctypes.c_int64, ctypes.POINTER(ctypes.c_void_p)]
+    self.client.plasma_create.argtypes = [ctypes.c_int, PlasmaID, ctypes.c_int64, ctypes.POINTER(ctypes.c_uint8), ctypes.c_int64, ctypes.POINTER(ctypes.c_void_p)]
     self.client.plasma_create.restype = None
 
-    self.client.plasma_get.argtypes = [ctypes.c_int, PlasmaID, ctypes.POINTER(ctypes.c_int64), ctypes.POINTER(ctypes.c_void_p)]
+    self.client.plasma_get.argtypes = [ctypes.c_int, PlasmaID, ctypes.POINTER(ctypes.c_int64), ctypes.POINTER(ctypes.c_void_p), ctypes.POINTER(ctypes.c_int64), ctypes.POINTER(ctypes.c_void_p)]
     self.client.plasma_get.restype = None
 
     self.client.plasma_seal.argtypes = [ctypes.c_int, PlasmaID]
@@ -60,7 +60,7 @@ class PlasmaClient(object):
     else:
       self.manager_conn = -1 # not connected
 
-  def create(self, object_id, size):
+  def create(self, object_id, size, metadata=None):
     """Create a new buffer in the PlasmaStore for a particular object ID.
 
     The returned buffer is mutable until seal is called.
@@ -68,25 +68,49 @@ class PlasmaClient(object):
     Args:
       object_id (str): A string used to identify an object.
       size (int): The size in bytes of the created buffer.
+      metadata (buffer): An optional buffer encoding whatever metadata the user
+        wishes to encode.
     """
+    # This is used to hold the address of the buffer.
     data = ctypes.c_void_p()
-    self.client.plasma_create(self.sock, make_plasma_id(object_id), size, ctypes.byref(data))
+    # Turn the metadata into the right type.
+    metadata = buffer("") if metadata is None else metadata
+    metadata = (ctypes.c_ubyte * len(metadata)).from_buffer_copy(metadata)
+    self.client.plasma_create(self.sock, make_plasma_id(object_id), size, metadata, len(metadata), ctypes.byref(data))
     return self.buffer_from_read_write_memory(data, size)
 
   def get(self, object_id):
     """Create a buffer from the PlasmaStore based on object ID.
 
-    This method can only be called after the buffer has been sealed. The
-    retrieved buffer is immutable.
+    If the object has not been sealed yet, this call will block. The retrieved
+    buffer is immutable.
 
     Args:
       object_id (str): A string used to identify an object.
     """
     size = ctypes.c_int64()
     data = ctypes.c_void_p()
-    buf = self.client.plasma_get(self.sock, make_plasma_id(object_id), ctypes.byref(size), ctypes.byref(data))
+    metadata_size = ctypes.c_int64()
+    metadata = ctypes.c_void_p()
+    buf = self.client.plasma_get(self.sock, make_plasma_id(object_id), ctypes.byref(size), ctypes.byref(data), ctypes.byref(metadata_size), ctypes.byref(metadata))
     return self.buffer_from_memory(data, size)
 
+  def get_metadata(self, object_id):
+    """Create a buffer from the PlasmaStore based on object ID.
+
+    If the object has not been sealed yet, this call will block until the object
+    has been sealed. The retrieved buffer is immutable.
+
+    Args:
+      object_id (str): A string used to identify an object.
+    """
+    size = ctypes.c_int64()
+    data = ctypes.c_void_p()
+    metadata_size = ctypes.c_int64()
+    metadata = ctypes.c_void_p()
+    buf = self.client.plasma_get(self.sock, make_plasma_id(object_id), ctypes.byref(size), ctypes.byref(data), ctypes.byref(metadata_size), ctypes.byref(metadata))
+    return self.buffer_from_memory(metadata, metadata_size)
+
   def seal(self, object_id):
     """Seal the buffer in the PlasmaStore for a particular object ID.
 
@@ -100,7 +124,7 @@ class PlasmaClient(object):
 
   def transfer(self, addr, port, object_id):
     """Transfer local object with id object_id to another plasma instance
-    
+
     Args:
       addr (str): IPv4 address of the plasma instance the object is sent to.
       port (int): Port number of the plasma instance the object is sent to.
@@ -109,4 +133,3 @@ class PlasmaClient(object):
     if self.manager_conn == -1:
       raise Exception("Not connected to the plasma manager socket")
     self.client.plasma_transfer(self.manager_conn, addr, port, make_plasma_id(object_id))
-    
diff --git a/src/example.c b/src/example.c
index f2b445675..7d0a9ac14 100644
--- a/src/example.c
+++ b/src/example.c
@@ -17,7 +17,7 @@
 int main(int argc, char *argv[]) {
   int conn = -1;
   int64_t size;
-  void *data;
+  uint8_t *data;
   int c;
   plasma_id id = {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                    255, 255, 255, 255, 255, 255, 255, 255, 255, 255}};
@@ -28,14 +28,14 @@ int main(int argc, char *argv[]) {
       break;
     case 'c':
       assert(conn != -1);
-      plasma_create(conn, id, 100, &data);
+      plasma_create(conn, id, 100, NULL, 0, &data);
       break;
     case 'f':
       assert(conn != -1);
       plasma_seal(conn, id);
       break;
     case 'g':
-      plasma_get(conn, id, &size, &data);
+      plasma_get(conn, id, &size, &data, NULL, NULL);
       break;
     default:
       abort();
diff --git a/src/plasma.h b/src/plasma.h
index c7efa4c07..3295d9a89 100644
--- a/src/plasma.h
+++ b/src/plasma.h
@@ -35,7 +35,8 @@
   } while (0)
 
 typedef struct {
-  int64_t size;
+  int64_t data_size;
+  int64_t metadata_size;
   int64_t create_time;
   int64_t construct_duration;
 } plasma_object_info;
@@ -59,21 +60,33 @@ enum plasma_request_type {
 typedef struct {
   int type;
   plasma_id object_id;
-  int64_t size;
+  /* The size of the data. */
+  int64_t data_size;
+  /* The size of the metadata. */
+  int64_t metadata_size;
   uint8_t addr[4];
   int port;
 } plasma_request;
 
 typedef struct {
-  ptrdiff_t offset;
+  /* The offset in the memory mapped file of the data. */
+  ptrdiff_t data_offset;
+  /* The offset in the memory mapped file of the metadata. */
+  ptrdiff_t metadata_offset;
+  /* The size of the memory mapped file. */
   int64_t map_size;
-  int64_t object_size;
+  /* The size of the data. */
+  int64_t data_size;
+  /* The size of the metadata. */
+  int64_t metadata_size;
 } plasma_reply;
 
 typedef struct {
   plasma_id object_id;
-  void *data;
-  int64_t size;
+  uint8_t *data;
+  int64_t data_size;
+  uint8_t *metadata;
+  int64_t metadata_size;
   int writable;
 } plasma_buffer;
 
@@ -83,8 +96,18 @@ int plasma_store_connect(const char *socket_name);
 /* Connect to a possibly remote plasma manager */
 int plasma_manager_connect(const char *addr, int port);
 
-void plasma_create(int store, plasma_id object_id, int64_t size, void **data);
-void plasma_get(int store, plasma_id object_id, int64_t *size, void **data);
+void plasma_create(int conn,
+                   plasma_id object_id,
+                   int64_t size,
+                   uint8_t *metadata,
+                   int64_t metadata_size,
+                   uint8_t **data);
+void plasma_get(int conn,
+                plasma_id object_id,
+                int64_t *size,
+                uint8_t **data,
+                int64_t *metadata_size,
+                uint8_t **metadata);
 void plasma_seal(int store, plasma_id object_id);
 
 void plasma_send(int conn, plasma_request *req);
diff --git a/src/plasma_client.c b/src/plasma_client.c
index 8a8a11b5c..e236f3378 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -23,38 +23,70 @@ void plasma_send(int fd, plasma_request *req) {
   }
 }
 
-void plasma_create(int conn, plasma_id object_id, int64_t size, void **data) {
-  LOG_INFO("called plasma_create on conn %d with size %" PRId64, conn, size);
-  plasma_request req = {
-      .type = PLASMA_CREATE, .object_id = object_id, .size = size};
+void plasma_create(int conn,
+                   plasma_id object_id,
+                   int64_t data_size,
+                   uint8_t *metadata,
+                   int64_t metadata_size,
+                   uint8_t **data) {
+  LOG_INFO(
+      "called plasma_create on conn %d with size %d and metadata size "
+      "%d" PRId64,
+      conn, size, metadata_size);
+  plasma_request req = {.type = PLASMA_CREATE,
+                        .object_id = object_id,
+                        .data_size = data_size,
+                        .metadata_size = metadata_size};
   plasma_send(conn, &req);
   plasma_reply reply;
   int fd = recv_fd(conn, (char *) &reply, sizeof(plasma_reply));
-  assert(reply.object_size == size);
-  *data =
-      mmap(NULL, reply.map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0) +
-      reply.offset;
+  assert(reply.data_size == data_size);
+  assert(reply.metadata_size == metadata_size);
+  /* The metadata should come right after the data. */
+  assert(reply.metadata_offset == reply.data_offset + data_size);
+  *data = ((uint8_t *) mmap(NULL, reply.map_size, PROT_READ | PROT_WRITE,
+                            MAP_SHARED, fd, 0)) +
+          reply.data_offset;
   if (*data == MAP_FAILED) {
     LOG_ERR("mmap failed");
     exit(-1);
   }
+  /* If plasma_create is being called from a transfer, then we will not copy the
+   * metadata here. The metadata will be written along with the data streamed
+   * from the transfer. */
+  if (metadata != NULL) {
+    /* Copy the metadata to the buffer. */
+    memcpy(*data + reply.data_size, metadata, metadata_size);
+  }
   close(fd);
 }
 
-void plasma_get(int conn, plasma_id object_id, int64_t *size, void **data) {
+/* This method is used to get both the data and the metadata. */
+void plasma_get(int conn,
+                plasma_id object_id,
+                int64_t *size,
+                uint8_t **data,
+                int64_t *metadata_size,
+                uint8_t **metadata) {
   plasma_request req = {.type = PLASMA_GET, .object_id = object_id};
   plasma_send(conn, &req);
   plasma_reply reply;
   /* The following loop is run at most twice. */
   int fd = recv_fd(conn, (char *) &reply, sizeof(plasma_reply));
   *data =
-      mmap(NULL, reply.map_size, PROT_READ, MAP_SHARED, fd, 0) + reply.offset;
+      ((uint8_t *) mmap(NULL, reply.map_size, PROT_READ, MAP_SHARED, fd, 0)) +
+      reply.data_offset;
   if (*data == MAP_FAILED) {
     LOG_ERR("mmap failed");
     exit(-1);
   }
   close(fd);
-  *size = reply.object_size;
+  *size = reply.data_size;
+  /* If requested, return the metadata as well. */
+  if (metadata != NULL) {
+    *metadata = *data + reply.data_size;
+    *metadata_size = reply.metadata_size;
+  }
 }
 
 void plasma_seal(int fd, plasma_id object_id) {
diff --git a/src/plasma_manager.c b/src/plasma_manager.c
index f6c45e099..15da6ecc6 100644
--- a/src/plasma_manager.c
+++ b/src/plasma_manager.c
@@ -46,9 +46,19 @@ void init_plasma_manager(plasma_manager_state* s,
  * the data header to the other object manager. */
 void initiate_transfer(plasma_manager_state* s, plasma_request* req) {
   int store_conn = plasma_store_connect(s->store_socket_name);
-  plasma_buffer buf = {.object_id = req->object_id, .writable = 0};
-  plasma_get(store_conn, req->object_id, &buf.size, &buf.data);
-
+  uint8_t* data;
+  int64_t data_size;
+  uint8_t* metadata;
+  int64_t metadata_size;
+  plasma_get(store_conn, req->object_id, &data_size, &data, &metadata_size,
+             &metadata);
+  assert(metadata == data + data_size);
+  plasma_buffer buf = {.object_id = req->object_id,
+                       .data = data, /* We treat this as a pointer to the
+                                        concatenated data and metadata. */
+                       .data_size = data_size,
+                       .metadata_size = metadata_size,
+                       .writable = 0};
   char ip_addr[32];
   snprintf(ip_addr, 32, "%d.%d.%d.%d", req->addr[0], req->addr[1], req->addr[2],
            req->addr[3]);
@@ -59,9 +69,10 @@ void initiate_transfer(plasma_manager_state* s, plasma_request* req) {
                           .buf = buf,
                           .cursor = 0};
   event_loop_attach(s->loop, CONNECTION_DATA, &conn, fd, POLLOUT);
-
-  plasma_request manager_req = {
-      .type = PLASMA_DATA, .object_id = req->object_id, .size = buf.size};
+  plasma_request manager_req = {.type = PLASMA_DATA,
+                                .object_id = req->object_id,
+                                .data_size = buf.data_size,
+                                .metadata_size = buf.metadata_size};
   plasma_send(fd, &manager_req);
 }
 
@@ -72,9 +83,12 @@ void start_reading_data(int64_t index,
                         plasma_manager_state* s,
                         plasma_request* req) {
   int store_conn = plasma_store_connect(s->store_socket_name);
-  plasma_buffer buf = {
-      .object_id = req->object_id, .size = req->size, .writable = 1};
-  plasma_create(store_conn, req->object_id, req->size, &buf.data);
+  plasma_buffer buf = {.object_id = req->object_id,
+                       .data_size = req->data_size,
+                       .metadata_size = req->metadata_size,
+                       .writable = 1};
+  plasma_create(store_conn, req->object_id, req->data_size, NULL,
+                req->metadata_size, &buf.data);
   data_connection conn = {.type = DATA_CONNECTION_READ,
                           .store_conn = store_conn,
                           .buf = buf,
@@ -140,7 +154,7 @@ void read_from_socket(plasma_manager_state* state,
     break;
   case DATA_CONNECTION_WRITE:
     LOG_DEBUG("polled DATA_CONNECTION_WRITE");
-    s = conn->buf.size - conn->cursor;
+    s = conn->buf.data_size + conn->buf.metadata_size - conn->cursor;
     if (s > BUFSIZE)
       s = BUFSIZE;
     r = write(waiting->fd, conn->buf.data + conn->cursor, s);
diff --git a/src/plasma_store.c b/src/plasma_store.c
index 3720a46b2..c2a082634 100644
--- a/src/plasma_store.c
+++ b/src/plasma_store.c
@@ -85,7 +85,7 @@ void create_object(int conn, plasma_request* req) {
   HASH_FIND(handle, open_objects, &req->object_id, sizeof(plasma_id), entry);
   PLASMA_CHECK(entry == NULL, "Cannot create object twice.");
 
-  void* pointer = dlmalloc(req->size);
+  uint8_t* pointer = dlmalloc(req->data_size + req->metadata_size);
   int fd;
   int64_t map_size;
   ptrdiff_t offset;
@@ -94,7 +94,8 @@ void create_object(int conn, plasma_request* req) {
 
   entry = malloc(sizeof(object_table_entry));
   memcpy(&entry->object_id, &req->object_id, 20);
-  entry->info.size = req->size;
+  entry->info.data_size = req->data_size;
+  entry->info.metadata_size = req->metadata_size;
   /* TODO(pcm): set the other fields */
   entry->fd = fd;
   entry->map_size = map_size;
@@ -102,9 +103,11 @@ void create_object(int conn, plasma_request* req) {
   HASH_ADD(handle, open_objects, object_id, sizeof(plasma_id), entry);
   plasma_reply reply;
   memset(&reply, 0, sizeof(reply));
-  reply.offset = offset;
+  reply.data_offset = offset;
+  reply.metadata_offset = offset + req->data_size;
   reply.map_size = map_size;
-  reply.object_size = req->size;
+  reply.data_size = req->data_size;
+  reply.metadata_size = req->metadata_size;
   send_fd(conn, fd, (char*) &reply, sizeof(reply));
 }
 
@@ -115,9 +118,10 @@ void get_object(int conn, plasma_request* req) {
   if (entry) {
     plasma_reply reply;
     memset(&reply, 0, sizeof(plasma_reply));
-    reply.offset = entry->offset;
+    reply.data_offset = entry->offset;
     reply.map_size = entry->map_size;
-    reply.object_size = entry->info.size;
+    reply.data_size = entry->info.data_size;
+    reply.metadata_size = entry->info.metadata_size;
     send_fd(conn, entry->fd, (char*) &reply, sizeof(plasma_reply));
   } else {
     object_notify_entry* notify_entry;
@@ -156,9 +160,9 @@ void seal_object(int conn, plasma_request* req) {
   if (!notify_entry) {
     return;
   }
-  plasma_reply reply = {.offset = entry->offset,
+  plasma_reply reply = {.data_offset = entry->offset,
                         .map_size = entry->map_size,
-                        .object_size = entry->info.size};
+                        .data_size = entry->info.data_size};
   for (int i = 0; i < notify_entry->num_waiting; ++i) {
     send_fd(notify_entry->conn[i], entry->fd, (char*) &reply,
             sizeof(plasma_reply));
diff --git a/test/test.py b/test/test.py
index a8c4b670d..aae9f2b9e 100644
--- a/test/test.py
+++ b/test/test.py
@@ -14,6 +14,31 @@ import plasma
 def random_object_id():
   return "".join([chr(random.randint(0, 255)) for _ in range(20)])
 
+def generate_metadata(length):
+  metadata = length * ["\x00"]
+  if length > 0:
+    metadata[0] = chr(random.randint(0, 255))
+    metadata[-1] = chr(random.randint(0, 255))
+    for _ in range(100):
+      metadata[random.randint(0, length - 1)] = chr(random.randint(0, 255))
+  return buffer("".join(metadata))
+
+def write_to_data_buffer(buff, length):
+  if length > 0:
+    buff[0] = chr(random.randint(0, 255))
+    buff[-1] = chr(random.randint(0, 255))
+    for _ in range(100):
+      buff[random.randint(0, length - 1)] = chr(random.randint(0, 255))
+
+def create_object(client, data_size, metadata_size, seal=True):
+  object_id = random_object_id()
+  metadata = generate_metadata(metadata_size)
+  memory_buffer = client.create(object_id, data_size, metadata)
+  write_to_data_buffer(memory_buffer, data_size)
+  if seal:
+    client.seal(object_id)
+  return object_id, memory_buffer, metadata
+
 class TestPlasmaClient(unittest.TestCase):
 
   def setUp(self):
@@ -32,7 +57,7 @@ class TestPlasmaClient(unittest.TestCase):
     # Create an object id string.
     object_id = random_object_id()
     # Create a new buffer and write to it.
-    length = 1000
+    length = 50
     memory_buffer = self.plasma_client.create(object_id, length)
     for i in range(length):
       memory_buffer[i] = chr(i % 256)
@@ -43,6 +68,28 @@ class TestPlasmaClient(unittest.TestCase):
     for i in range(length):
       self.assertEqual(memory_buffer[i], chr(i % 256))
 
+  def test_create_with_metadata(self):
+    for length in range(1000):
+      # Create an object id string.
+      object_id = random_object_id()
+      # Create a random metadata string.
+      metadata = generate_metadata(length)
+      # Create a new buffer and write to it.
+      memory_buffer = self.plasma_client.create(object_id, length, metadata)
+      for i in range(length):
+        memory_buffer[i] = chr(i % 256)
+      # Seal the object.
+      self.plasma_client.seal(object_id)
+      # Get the object.
+      memory_buffer = self.plasma_client.get(object_id)
+      for i in range(length):
+        self.assertEqual(memory_buffer[i], chr(i % 256))
+      # Get the metadata.
+      metadata_buffer = self.plasma_client.get_metadata(object_id)
+      self.assertEqual(len(metadata), len(metadata_buffer))
+      for i in range(len(metadata)):
+        self.assertEqual(metadata[i], metadata_buffer[i])
+
   def test_illegal_functionality(self):
     # Create an object id string.
     object_id = random_object_id()
@@ -95,34 +142,30 @@ class TestPlasmaManager(unittest.TestCase):
 
   def test_transfer(self):
     for _ in range(100):
-      # Create an object id string.
-      object_id1 = random_object_id()
-      # Create a new buffer and set the first and last entries.
-      memory_buffer = self.client1.create(object_id1, 20000)
-      memory_buffer[0] = chr(1)
-      memory_buffer[-1] = chr(2)
-      # Seal the buffer.
-      self.client1.seal(object_id1)
+      # Create an object.
+      object_id1, memory_buffer1, metadata1 = create_object(self.client1, 2000, 2000)
       # Transfer the buffer to the the other PlasmaStore.
       self.client1.transfer("127.0.0.1", self.port2, object_id1)
       # Compare the two buffers.
+      self.assertEqual(memory_buffer1[:], self.client2.get(object_id1)[:])
       self.assertEqual(self.client1.get(object_id1)[:], self.client2.get(object_id1)[:])
+      self.assertEqual(metadata1[:], self.client2.get_metadata(object_id1)[:])
+      self.assertEqual(self.client1.get_metadata(object_id1)[:], self.client2.get_metadata(object_id1)[:])
       # Transfer the buffer again.
       self.client1.transfer("127.0.0.1", self.port2, object_id1)
+      self.assertEqual(metadata1[:], self.client2.get_metadata(object_id1)[:])
       # Compare the two buffers.
       self.assertEqual(self.client1.get(object_id1)[:], self.client2.get(object_id1)[:])
-      # Create a new object id string.
-      object_id2 = random_object_id()
-      # Create a new buffer and set the first and last entries.
-      memory_buffer = self.client2.create(object_id2, 20000)
-      memory_buffer[0] = chr(3)
-      memory_buffer[-1] = chr(4)
-      # Seal the buffer.
-      self.client2.seal(object_id2)
+
+      # Create an object.
+      object_id2, memory_buffer2, metadata2 = create_object(self.client2, 20000, 20000)
       # Transfer the buffer to the the other PlasmaStore.
       self.client2.transfer("127.0.0.1", self.port1, object_id2)
       # Compare the two buffers.
+      self.assertEqual(memory_buffer2[:], self.client2.get(object_id2)[:])
       self.assertEqual(self.client1.get(object_id2)[:], self.client2.get(object_id2)[:])
+      self.assertEqual(metadata2[:], self.client2.get_metadata(object_id2)[:])
+      self.assertEqual(self.client1.get_metadata(object_id2)[:], self.client2.get_metadata(object_id2)[:])
 
   def test_illegal_functionality(self):
     # Create an object id string.

From 28c19a38c9dca7ebdd56c80b7a6233f2b476c58b Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Wed, 14 Sep 2016 14:21:24 -0700
Subject: [PATCH 24/91] Catch incorrect arguments to PlasmaClient constructor.
 (#18)

---
 lib/python/plasma.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/lib/python/plasma.py b/lib/python/plasma.py
index b3b772f13..8a114f294 100644
--- a/lib/python/plasma.py
+++ b/lib/python/plasma.py
@@ -31,6 +31,12 @@ class PlasmaClient(object):
       addr (str): IPv4 address of plasma manager attached to the plasma store.
       port (int): Port number of the plasma manager attached to the plasma store.
     """
+    if port is not None:
+      if not isinstance(port, int):
+        raise Exception("The 'port' argument must be an integer. The given argument has type {}.".format(type(port)))
+      if not 0 < port < 65536:
+        raise Exception("The 'port' argument must be greater than 0 and less than 65536. The given value is {}.".format(port))
+
     plasma_client_library = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../../build/plasma_client.so")
     self.client = ctypes.cdll.LoadLibrary(plasma_client_library)
 

From 13560bdb6b1a360c1bb9329e4a08ae70d3522f41 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Wed, 14 Sep 2016 17:45:18 -0700
Subject: [PATCH 25/91] Fix offset in get_malloc_mapinfo. (#24)

* Fix offset in get_malloc_mapinfo.

* Don't add offset inside mmap_record

* make clang-format happy
---
 src/malloc.c        | 24 ++++++++++++++----------
 src/malloc.h        |  2 +-
 src/plasma_client.c |  4 ++++
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/malloc.c b/src/malloc.c
index d83423844..f18535437 100644
--- a/src/malloc.c
+++ b/src/malloc.c
@@ -36,6 +36,7 @@ struct mmap_record {
   UT_hash_handle hh_pointer;
 };
 
+/* TODO(rshin): Don't have two hash tables. */
 struct mmap_record *records_by_fd = NULL;
 struct mmap_record *records_by_pointer = NULL;
 
@@ -65,16 +66,16 @@ int create_buffer(int64_t size) {
 }
 
 void *fake_mmap(size_t size) {
-  // Add sizeof(size_t) so that the returned pointer is deliberately not
-  // page-aligned. This ensures that the segments of memory returned by
-  // fake_mmap are never contiguous.
-  int fd = create_buffer(size + sizeof(size_t));
-  void *pointer = mmap(NULL, size + sizeof(size_t), PROT_READ | PROT_WRITE,
-                       MAP_SHARED, fd, 0);
+  /* Add sizeof(size_t) so that the returned pointer is deliberately not
+   * page-aligned. This ensures that the segments of memory returned by
+   * fake_mmap are never contiguous. */
+  size += sizeof(size_t);
+
+  int fd = create_buffer(size);
+  void *pointer = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
   if (pointer == MAP_FAILED) {
     return pointer;
   }
-  pointer += sizeof(size_t);
 
   struct mmap_record *record = malloc(sizeof(struct mmap_record));
   record->fd = fd;
@@ -83,16 +84,19 @@ void *fake_mmap(size_t size) {
   HASH_ADD(hh_fd, records_by_fd, fd, sizeof(fd), record);
   HASH_ADD(hh_pointer, records_by_pointer, pointer, sizeof(pointer), record);
 
+  /* We lie to dlmalloc about where mapped memory actually lives. */
+  pointer += sizeof(size_t);
   LOG_DEBUG("%p = fake_mmap(%lu)", pointer, size);
   return pointer;
 }
 
 int fake_munmap(void *addr, size_t size) {
   LOG_DEBUG("fake_munmap(%p, %lu)", addr, size);
+  addr -= sizeof(size_t);
+  size += sizeof(size_t);
 
   struct mmap_record *record;
 
-  addr -= sizeof(size_t);
   HASH_FIND(hh_pointer, records_by_pointer, &addr, sizeof(addr), record);
   assert(record != NULL);
   close(record->fd);
@@ -100,7 +104,7 @@ int fake_munmap(void *addr, size_t size) {
   HASH_DELETE(hh_fd, records_by_fd, record);
   HASH_DELETE(hh_pointer, records_by_pointer, record);
 
-  return munmap(addr, size + sizeof(size_t));
+  return munmap(addr, size);
 }
 
 void get_malloc_mapinfo(void *addr,
@@ -108,7 +112,7 @@ void get_malloc_mapinfo(void *addr,
                         int64_t *map_size,
                         ptrdiff_t *offset) {
   struct mmap_record *record;
-  // TODO(rshin): Implement a more efficient search through records_by_fd.
+  /* TODO(rshin): Implement a more efficient search through records_by_fd. */
   for (record = records_by_fd; record != NULL; record = record->hh_fd.next) {
     if (addr >= record->pointer && addr < record->pointer + record->size) {
       *fd = record->fd;
diff --git a/src/malloc.h b/src/malloc.h
index 2b7395eba..9fc1f48bb 100644
--- a/src/malloc.h
+++ b/src/malloc.h
@@ -6,4 +6,4 @@ void get_malloc_mapinfo(void *addr,
                         int64_t *map_length,
                         ptrdiff_t *offset);
 
-#endif  // MALLOC_H
+#endif /* MALLOC_H */
diff --git a/src/plasma_client.c b/src/plasma_client.c
index e236f3378..20e5e087f 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -44,6 +44,8 @@ void plasma_create(int conn,
   assert(reply.metadata_size == metadata_size);
   /* The metadata should come right after the data. */
   assert(reply.metadata_offset == reply.data_offset + data_size);
+
+  // TOOD(rshin): Don't call mmap if this fd has already been mapepd.
   *data = ((uint8_t *) mmap(NULL, reply.map_size, PROT_READ | PROT_WRITE,
                             MAP_SHARED, fd, 0)) +
           reply.data_offset;
@@ -73,6 +75,8 @@ void plasma_get(int conn,
   plasma_reply reply;
   /* The following loop is run at most twice. */
   int fd = recv_fd(conn, (char *) &reply, sizeof(plasma_reply));
+
+  // TOOD(rshin): Don't call mmap if this fd has already been mapepd.
   *data =
       ((uint8_t *) mmap(NULL, reply.map_size, PROT_READ, MAP_SHARED, fd, 0)) +
       reply.data_offset;

From 0198a0d2993bd5007c2758764b8955dac74e0e80 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Thu, 15 Sep 2016 15:39:33 -0700
Subject: [PATCH 26/91] make plasma robust wrt to number of open file
 descriptors (#25)

---
 lib/python/plasma.py | 18 ++++------
 src/plasma.h         | 38 +++++++++++----------
 src/plasma_client.c  | 78 ++++++++++++++++++++++++++------------------
 src/plasma_client.h  | 27 +++++++++++++++
 src/plasma_manager.c | 68 +++++++++++++++++++-------------------
 src/plasma_store.c   |  7 +++-
 6 files changed, 139 insertions(+), 97 deletions(-)
 create mode 100644 src/plasma_client.h

diff --git a/lib/python/plasma.py b/lib/python/plasma.py
index 8a114f294..2bbcd6d49 100644
--- a/lib/python/plasma.py
+++ b/lib/python/plasma.py
@@ -40,15 +40,9 @@ class PlasmaClient(object):
     plasma_client_library = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../../build/plasma_client.so")
     self.client = ctypes.cdll.LoadLibrary(plasma_client_library)
 
-    self.client.plasma_store_connect.restype = ctypes.c_int
-
-    self.client.plasma_create.argtypes = [ctypes.c_int, PlasmaID, ctypes.c_int64, ctypes.POINTER(ctypes.c_uint8), ctypes.c_int64, ctypes.POINTER(ctypes.c_void_p)]
+    self.client.plasma_store_connect.restype = ctypes.c_void_p
     self.client.plasma_create.restype = None
-
-    self.client.plasma_get.argtypes = [ctypes.c_int, PlasmaID, ctypes.POINTER(ctypes.c_int64), ctypes.POINTER(ctypes.c_void_p), ctypes.POINTER(ctypes.c_int64), ctypes.POINTER(ctypes.c_void_p)]
     self.client.plasma_get.restype = None
-
-    self.client.plasma_seal.argtypes = [ctypes.c_int, PlasmaID]
     self.client.plasma_seal.restype = None
 
     self.buffer_from_memory = ctypes.pythonapi.PyBuffer_FromMemory
@@ -59,7 +53,7 @@ class PlasmaClient(object):
     self.buffer_from_read_write_memory.argtypes = [ctypes.c_void_p, ctypes.c_int64]
     self.buffer_from_read_write_memory.restype = ctypes.py_object
 
-    self.sock = self.client.plasma_store_connect(socket_name)
+    self.store_conn = ctypes.c_void_p(self.client.plasma_store_connect(socket_name))
 
     if addr is not None and port is not None:
       self.manager_conn = self.client.plasma_manager_connect(addr, port)
@@ -82,7 +76,7 @@ class PlasmaClient(object):
     # Turn the metadata into the right type.
     metadata = buffer("") if metadata is None else metadata
     metadata = (ctypes.c_ubyte * len(metadata)).from_buffer_copy(metadata)
-    self.client.plasma_create(self.sock, make_plasma_id(object_id), size, metadata, len(metadata), ctypes.byref(data))
+    self.client.plasma_create(self.store_conn, make_plasma_id(object_id), size, ctypes.cast(metadata, ctypes.POINTER(ctypes.c_ubyte * len(metadata))), len(metadata), ctypes.byref(data))
     return self.buffer_from_read_write_memory(data, size)
 
   def get(self, object_id):
@@ -98,7 +92,7 @@ class PlasmaClient(object):
     data = ctypes.c_void_p()
     metadata_size = ctypes.c_int64()
     metadata = ctypes.c_void_p()
-    buf = self.client.plasma_get(self.sock, make_plasma_id(object_id), ctypes.byref(size), ctypes.byref(data), ctypes.byref(metadata_size), ctypes.byref(metadata))
+    buf = self.client.plasma_get(self.store_conn, make_plasma_id(object_id), ctypes.byref(size), ctypes.byref(data), ctypes.byref(metadata_size), ctypes.byref(metadata))
     return self.buffer_from_memory(data, size)
 
   def get_metadata(self, object_id):
@@ -114,7 +108,7 @@ class PlasmaClient(object):
     data = ctypes.c_void_p()
     metadata_size = ctypes.c_int64()
     metadata = ctypes.c_void_p()
-    buf = self.client.plasma_get(self.sock, make_plasma_id(object_id), ctypes.byref(size), ctypes.byref(data), ctypes.byref(metadata_size), ctypes.byref(metadata))
+    buf = self.client.plasma_get(self.store_conn, make_plasma_id(object_id), ctypes.byref(size), ctypes.byref(data), ctypes.byref(metadata_size), ctypes.byref(metadata))
     return self.buffer_from_memory(metadata, metadata_size)
 
   def seal(self, object_id):
@@ -126,7 +120,7 @@ class PlasmaClient(object):
     Args:
       object_id (str): A string used to identify an object.
     """
-    self.client.plasma_seal(self.sock, make_plasma_id(object_id))
+    self.client.plasma_seal(self.store_conn, make_plasma_id(object_id))
 
   def transfer(self, addr, port, object_id):
     """Transfer local object with id object_id to another plasma instance
diff --git a/src/plasma.h b/src/plasma.h
index 3295d9a89..ddf89ad34 100644
--- a/src/plasma.h
+++ b/src/plasma.h
@@ -7,6 +7,8 @@
 #include <stddef.h>
 #include <string.h>
 
+#include "uthash.h"
+
 #ifdef NDEBUG
 #define LOG_DEBUG(M, ...)
 #else
@@ -79,6 +81,8 @@ typedef struct {
   int64_t data_size;
   /* The size of the metadata. */
   int64_t metadata_size;
+  /* Numerical value of the fd of the memory mapped file in the store. */
+  int store_fd_val;
 } plasma_reply;
 
 typedef struct {
@@ -90,25 +94,23 @@ typedef struct {
   int writable;
 } plasma_buffer;
 
-/* Connect to the local plasma store UNIX domain socket */
-int plasma_store_connect(const char *socket_name);
+typedef struct {
+  /* Key that uniquely identifies the  memory mapped file. In practice, we
+   * take the numerical value of the file descriptor in the object store. */
+  int key;
+  /* The result of mmap for this file descriptor. */
+  uint8_t *pointer;
+  /* Handle for the uthash table. */
+  UT_hash_handle hh;
+} client_mmap_table_entry;
 
-/* Connect to a possibly remote plasma manager */
-int plasma_manager_connect(const char *addr, int port);
-
-void plasma_create(int conn,
-                   plasma_id object_id,
-                   int64_t size,
-                   uint8_t *metadata,
-                   int64_t metadata_size,
-                   uint8_t **data);
-void plasma_get(int conn,
-                plasma_id object_id,
-                int64_t *size,
-                uint8_t **data,
-                int64_t *metadata_size,
-                uint8_t **metadata);
-void plasma_seal(int store, plasma_id object_id);
+/* A client connection with a plasma store */
+typedef struct {
+  /* File descriptor of the Unix domain socket that connects to the store. */
+  int conn;
+  /* Table of dlmalloc buffer files that have been memory mapped so far. */
+  client_mmap_table_entry *mmap_table;
+} plasma_store_conn;
 
 void plasma_send(int conn, plasma_request *req);
 
diff --git a/src/plasma_client.c b/src/plasma_client.c
index 20e5e087f..3d94e503b 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -13,17 +13,46 @@
 #include <netdb.h>
 
 #include "plasma.h"
+#include "plasma_client.h"
 #include "fling.h"
 
 void plasma_send(int fd, plasma_request *req) {
   int req_count = sizeof(plasma_request);
   if (write(fd, req, req_count) != req_count) {
-    LOG_ERR("write error");
+    LOG_ERR("write error, fd = %d", fd);
     exit(-1);
   }
 }
 
-void plasma_create(int conn,
+/* If the file descriptor fd has been mmapped in this client process before,
+ * return the pointer that was returned by mmap, otherwise mmap it and store the
+ * pointer in a hash table. */
+uint8_t *lookup_or_mmap(plasma_store_conn *conn,
+                        int fd,
+                        int store_fd_val,
+                        int64_t map_size) {
+  client_mmap_table_entry *entry;
+  HASH_FIND_INT(conn->mmap_table, &store_fd_val, entry);
+  if (entry) {
+    close(fd);
+    return entry->pointer;
+  } else {
+    uint8_t *result =
+        mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+    if (result == MAP_FAILED) {
+      LOG_ERR("mmap failed");
+      exit(-1);
+    }
+    close(fd);
+    entry = malloc(sizeof(client_mmap_table_entry));
+    entry->key = store_fd_val;
+    entry->pointer = result;
+    HASH_ADD_INT(conn->mmap_table, key, entry);
+    return result;
+  }
+}
+
+void plasma_create(plasma_store_conn *conn,
                    plasma_id object_id,
                    int64_t data_size,
                    uint8_t *metadata,
@@ -37,22 +66,15 @@ void plasma_create(int conn,
                         .object_id = object_id,
                         .data_size = data_size,
                         .metadata_size = metadata_size};
-  plasma_send(conn, &req);
+  plasma_send(conn->conn, &req);
   plasma_reply reply;
-  int fd = recv_fd(conn, (char *) &reply, sizeof(plasma_reply));
+  int fd = recv_fd(conn->conn, (char *) &reply, sizeof(plasma_reply));
   assert(reply.data_size == data_size);
   assert(reply.metadata_size == metadata_size);
   /* The metadata should come right after the data. */
   assert(reply.metadata_offset == reply.data_offset + data_size);
-
-  // TOOD(rshin): Don't call mmap if this fd has already been mapepd.
-  *data = ((uint8_t *) mmap(NULL, reply.map_size, PROT_READ | PROT_WRITE,
-                            MAP_SHARED, fd, 0)) +
+  *data = lookup_or_mmap(conn, fd, reply.store_fd_val, reply.map_size) +
           reply.data_offset;
-  if (*data == MAP_FAILED) {
-    LOG_ERR("mmap failed");
-    exit(-1);
-  }
   /* If plasma_create is being called from a transfer, then we will not copy the
    * metadata here. The metadata will be written along with the data streamed
    * from the transfer. */
@@ -60,31 +82,21 @@ void plasma_create(int conn,
     /* Copy the metadata to the buffer. */
     memcpy(*data + reply.data_size, metadata, metadata_size);
   }
-  close(fd);
 }
 
 /* This method is used to get both the data and the metadata. */
-void plasma_get(int conn,
+void plasma_get(plasma_store_conn *conn,
                 plasma_id object_id,
                 int64_t *size,
                 uint8_t **data,
                 int64_t *metadata_size,
                 uint8_t **metadata) {
   plasma_request req = {.type = PLASMA_GET, .object_id = object_id};
-  plasma_send(conn, &req);
+  plasma_send(conn->conn, &req);
   plasma_reply reply;
-  /* The following loop is run at most twice. */
-  int fd = recv_fd(conn, (char *) &reply, sizeof(plasma_reply));
-
-  // TOOD(rshin): Don't call mmap if this fd has already been mapepd.
-  *data =
-      ((uint8_t *) mmap(NULL, reply.map_size, PROT_READ, MAP_SHARED, fd, 0)) +
-      reply.data_offset;
-  if (*data == MAP_FAILED) {
-    LOG_ERR("mmap failed");
-    exit(-1);
-  }
-  close(fd);
+  int fd = recv_fd(conn->conn, (char *) &reply, sizeof(plasma_reply));
+  *data = lookup_or_mmap(conn, fd, reply.store_fd_val, reply.map_size) +
+          reply.data_offset;
   *size = reply.data_size;
   /* If requested, return the metadata as well. */
   if (metadata != NULL) {
@@ -93,12 +105,12 @@ void plasma_get(int conn,
   }
 }
 
-void plasma_seal(int fd, plasma_id object_id) {
+void plasma_seal(plasma_store_conn *conn, plasma_id object_id) {
   plasma_request req = {.type = PLASMA_SEAL, .object_id = object_id};
-  plasma_send(fd, &req);
+  plasma_send(conn->conn, &req);
 }
 
-int plasma_store_connect(const char *socket_name) {
+plasma_store_conn *plasma_store_connect(const char *socket_name) {
   assert(socket_name);
   struct sockaddr_un addr;
   int fd;
@@ -125,7 +137,11 @@ int plasma_store_connect(const char *socket_name) {
     LOG_ERR("could not connect to store %s", socket_name);
     exit(-1);
   }
-  return fd;
+  /* Initialize the store connection struct */
+  plasma_store_conn *result = malloc(sizeof(plasma_store_conn));
+  result->conn = fd;
+  result->mmap_table = NULL;
+  return result;
 }
 
 #define h_addr h_addr_list[0]
diff --git a/src/plasma_client.h b/src/plasma_client.h
new file mode 100644
index 000000000..4c7cc008e
--- /dev/null
+++ b/src/plasma_client.h
@@ -0,0 +1,27 @@
+#ifndef PLASMA_CLIENT_H
+#define PLASMA_CLIENT_H
+
+/* Connect to the local plasma store UNIX domain socket with path socket_name
+ * and return the resulting connection. */
+plasma_store_conn *plasma_store_connect(const char *socket_name);
+
+/* Connect to a possibly remote plasma manager */
+int plasma_manager_connect(const char *addr, int port);
+
+void plasma_create(plasma_store_conn *conn,
+                   plasma_id object_id,
+                   int64_t size,
+                   uint8_t *metadata,
+                   int64_t metadata_size,
+                   uint8_t **data);
+
+void plasma_get(plasma_store_conn *conn,
+                plasma_id object_id,
+                int64_t *size,
+                uint8_t **data,
+                int64_t *metadata_size,
+                uint8_t **metadata);
+
+void plasma_seal(plasma_store_conn *conn, plasma_id object_id);
+
+#endif
diff --git a/src/plasma_manager.c b/src/plasma_manager.c
index 15da6ecc6..7cad02504 100644
--- a/src/plasma_manager.c
+++ b/src/plasma_manager.c
@@ -22,35 +22,36 @@
 
 #include "event_loop.h"
 #include "plasma.h"
+#include "plasma_client.h"
 #include "plasma_manager.h"
 
 typedef struct {
-  /* Name of the socket connecting to local plasma store. */
-  const char* store_socket_name;
+  /* Connection to local plasma store. */
+  plasma_store_conn *conn;
   /* Event loop. */
-  event_loop* loop;
+  event_loop *loop;
 } plasma_manager_state;
 
 /* Initialize the plasma manager. This function initializes the event loop
  * of the plasma manager, and stores the address 'store_socket_name' of
  * the local plasma store socket. */
-void init_plasma_manager(plasma_manager_state* s,
-                         const char* store_socket_name) {
+void init_plasma_manager(plasma_manager_state *s,
+                         const char *store_socket_name) {
   s->loop = malloc(sizeof(event_loop));
   event_loop_init(s->loop);
-  s->store_socket_name = store_socket_name;
+  s->conn = plasma_store_connect(store_socket_name);
+  LOG_INFO("Connected to object store %s", store_socket_name);
 }
 
 /* Start transfering data to another object store manager. This establishes
- * a connection to both the manager and the local object store and sends
- * the data header to the other object manager. */
-void initiate_transfer(plasma_manager_state* s, plasma_request* req) {
-  int store_conn = plasma_store_connect(s->store_socket_name);
-  uint8_t* data;
+ * a connection to the remote manager and sends the data header to the other
+ * object manager. */
+void initiate_transfer(plasma_manager_state *s, plasma_request *req) {
+  uint8_t *data;
   int64_t data_size;
-  uint8_t* metadata;
+  uint8_t *metadata;
   int64_t metadata_size;
-  plasma_get(store_conn, req->object_id, &data_size, &data, &metadata_size,
+  plasma_get(s->conn, req->object_id, &data_size, &data, &metadata_size,
              &metadata);
   assert(metadata == data + data_size);
   plasma_buffer buf = {.object_id = req->object_id,
@@ -65,7 +66,7 @@ void initiate_transfer(plasma_manager_state* s, plasma_request* req) {
 
   int fd = plasma_manager_connect(&ip_addr[0], req->port);
   data_connection conn = {.type = DATA_CONNECTION_WRITE,
-                          .store_conn = store_conn,
+                          .store_conn = s->conn->conn,
                           .buf = buf,
                           .cursor = 0};
   event_loop_attach(s->loop, CONNECTION_DATA, &conn, fd, POLLOUT);
@@ -80,17 +81,16 @@ void initiate_transfer(plasma_manager_state* s, plasma_request* req) {
  * Initializes the object we are going to write to in the
  * local plasma store and then switches the data socket to reading mode. */
 void start_reading_data(int64_t index,
-                        plasma_manager_state* s,
-                        plasma_request* req) {
-  int store_conn = plasma_store_connect(s->store_socket_name);
+                        plasma_manager_state *s,
+                        plasma_request *req) {
   plasma_buffer buf = {.object_id = req->object_id,
                        .data_size = req->data_size,
                        .metadata_size = req->metadata_size,
                        .writable = 1};
-  plasma_create(store_conn, req->object_id, req->data_size, NULL,
+  plasma_create(s->conn, req->object_id, req->data_size, NULL,
                 req->metadata_size, &buf.data);
   data_connection conn = {.type = DATA_CONNECTION_READ,
-                          .store_conn = store_conn,
+                          .store_conn = s->conn->conn,
                           .buf = buf,
                           .cursor = 0};
   event_loop_set_connection(s->loop, index, &conn);
@@ -99,8 +99,8 @@ void start_reading_data(int64_t index,
 /* Handle a command request that came in through a socket (transfering data,
  * or accepting incoming data). */
 void process_command(int64_t id,
-                     plasma_manager_state* state,
-                     plasma_request* req) {
+                     plasma_manager_state *state,
+                     plasma_request *req) {
   switch (req->type) {
   case PLASMA_TRANSFER:
     LOG_INFO("transfering object to manager with port %d", req->port);
@@ -117,12 +117,12 @@ void process_command(int64_t id,
 }
 
 /* Handle data or command event incoming on socket with index "index". */
-void read_from_socket(plasma_manager_state* state,
-                      struct pollfd* waiting,
+void read_from_socket(plasma_manager_state *state,
+                      struct pollfd *waiting,
                       int64_t index,
-                      plasma_request* req) {
+                      plasma_request *req) {
   ssize_t r, s;
-  data_connection* conn = event_loop_get_connection(state->loop, index);
+  data_connection *conn = event_loop_get_connection(state->loop, index);
   switch (conn->type) {
   case DATA_CONNECTION_HEADER:
     r = read(waiting->fd, req, sizeof(plasma_request));
@@ -147,8 +147,7 @@ void read_from_socket(plasma_manager_state* state,
     }
     if (r == 0) {
       LOG_DEBUG("reading on channel %" PRId64 " finished", index);
-      plasma_seal(conn->store_conn, conn->buf.object_id);
-      close(conn->store_conn);
+      plasma_seal(state->conn, conn->buf.object_id);
       event_loop_detach(state->loop, index, 1);
     }
     break;
@@ -170,7 +169,6 @@ void read_from_socket(plasma_manager_state* state,
     }
     if (r == 0) {
       LOG_DEBUG("writing on channel %" PRId64 " finished", index);
-      close(conn->store_conn);
       event_loop_detach(state->loop, index, 1);
     }
     break;
@@ -181,7 +179,7 @@ void read_from_socket(plasma_manager_state* state,
 }
 
 /* Main event loop of the plasma manager. */
-void run_event_loop(int sock, plasma_manager_state* s) {
+void run_event_loop(int sock, plasma_manager_state *s) {
   /* Add listening socket. */
   event_loop_attach(s->loop, CONNECTION_LISTENER, NULL, sock, POLLIN);
   plasma_request req;
@@ -192,7 +190,7 @@ void run_event_loop(int sock, plasma_manager_state* s) {
       exit(-1);
     }
     for (int i = 0; i < event_loop_size(s->loop); ++i) {
-      struct pollfd* waiting = event_loop_get(s->loop, i);
+      struct pollfd *waiting = event_loop_get(s->loop, i);
       if (waiting->revents == 0)
         continue;
       if (waiting->fd == sock) {
@@ -215,8 +213,8 @@ void run_event_loop(int sock, plasma_manager_state* s) {
   }
 }
 
-void start_server(const char* store_socket_name,
-                  const char* master_addr,
+void start_server(const char *store_socket_name,
+                  const char *master_addr,
                   int port) {
   struct sockaddr_in name;
   int sock = socket(PF_INET, SOCK_STREAM, 0);
@@ -249,11 +247,11 @@ void start_server(const char* store_socket_name,
   run_event_loop(sock, &state);
 }
 
-int main(int argc, char* argv[]) {
+int main(int argc, char *argv[]) {
   /* Socket name of the plasma store this manager is connected to. */
-  char* store_socket_name = NULL;
+  char *store_socket_name = NULL;
   /* IP address of this node. */
-  char* master_addr = NULL;
+  char *master_addr = NULL;
   /* Port number the manager should use. */
   int port;
   int c;
diff --git a/src/plasma_store.c b/src/plasma_store.c
index c2a082634..a7aef2c00 100644
--- a/src/plasma_store.c
+++ b/src/plasma_store.c
@@ -108,6 +108,7 @@ void create_object(int conn, plasma_request* req) {
   reply.map_size = map_size;
   reply.data_size = req->data_size;
   reply.metadata_size = req->metadata_size;
+  reply.store_fd_val = fd;
   send_fd(conn, fd, (char*) &reply, sizeof(reply));
 }
 
@@ -122,6 +123,7 @@ void get_object(int conn, plasma_request* req) {
     reply.map_size = entry->map_size;
     reply.data_size = entry->info.data_size;
     reply.metadata_size = entry->info.metadata_size;
+    reply.store_fd_val = entry->fd;
     send_fd(conn, entry->fd, (char*) &reply, sizeof(plasma_reply));
   } else {
     object_notify_entry* notify_entry;
@@ -151,6 +153,7 @@ void seal_object(int conn, plasma_request* req) {
   if (!entry) {
     return; /* TODO(pcm): return error */
   }
+  int fd = entry->fd;
   HASH_DELETE(handle, open_objects, entry);
   HASH_ADD(handle, sealed_objects, object_id, sizeof(plasma_id), entry);
   /* Inform processes that the object is ready now. */
@@ -162,7 +165,9 @@ void seal_object(int conn, plasma_request* req) {
   }
   plasma_reply reply = {.data_offset = entry->offset,
                         .map_size = entry->map_size,
-                        .data_size = entry->info.data_size};
+                        .data_size = entry->info.data_size,
+                        .metadata_size = entry->info.metadata_size,
+                        .store_fd_val = fd};
   for (int i = 0; i < notify_entry->num_waiting; ++i) {
     send_fd(notify_entry->conn[i], entry->fd, (char*) &reply,
             sizeof(plasma_reply));

From 73f4b962535bc0573f5c8c7159f4b20b0324213f Mon Sep 17 00:00:00 2001
From: Stephanie Wang <swang93@mit.edu>
Date: Thu, 15 Sep 2016 16:28:52 -0700
Subject: [PATCH 27/91] Sockets (#3)

* Socket methods to be used by an event loop

* Git ignore build files

* File renames

* Some fixes

* Fixes

* Fixes

* Memory leakage fix
---
 .gitignore          |   3 ++
 Makefile            |   8 +++-
 event_loop.c        |   1 -
 sockets.c           | 111 ++++++++++++++++++++++++++++++++++++++++++++
 sockets.h           |  10 ++++
 test/socket_tests.c |  45 ++++++++++++++++++
 6 files changed, 175 insertions(+), 3 deletions(-)
 create mode 100644 sockets.c
 create mode 100644 sockets.h
 create mode 100644 test/socket_tests.c

diff --git a/.gitignore b/.gitignore
index 2a07abca4..fff8ef269 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,3 +33,6 @@
 # Debug files
 *.dSYM/
 *.su
+
+# Build files
+build/*
diff --git a/Makefile b/Makefile
index fd9da0b97..8ca748e1d 100644
--- a/Makefile
+++ b/Makefile
@@ -9,6 +9,9 @@ CFLAGS += -Wmissing-declarations
 $(BUILD)/db_tests: hiredis test/db_tests.c thirdparty/greatest.h event_loop.c state/redis.c common.c
 	$(CC) -o $@ test/db_tests.c event_loop.c state/redis.c common.c thirdparty/hiredis/libhiredis.a $(CFLAGS) -I. -Ithirdparty
 
+$(BUILD)/socket_tests: test/socket_tests.c thirdparty/greatest.h sockets.c
+	$(CC) -o $@ test/socket_tests.c sockets.c $(CFLAGS) -I. -Ithirdparty
+
 clean:
 	rm -r $(BUILD)/*
 
@@ -18,7 +21,8 @@ redis:
 hiredis:
 	git submodule update --init --recursive -- "thirdparty/hiredis" ; cd thirdparty/hiredis ; make
 
-test: hiredis redis $(BUILD)/db_tests FORCE
-	./thirdparty/redis-3.2.3/src/redis-server & sleep 1s ; ./build/db_tests
+test: hiredis redis $(BUILD)/db_tests $(BUILD)/socket_tests FORCE
+	./thirdparty/redis-3.2.3/src/redis-server &
+	sleep 1s ; ./build/db_tests ; ./build/socket_tests
 
 FORCE:
diff --git a/event_loop.c b/event_loop.c
index d7169f5d8..ebc6ebc13 100644
--- a/event_loop.c
+++ b/event_loop.c
@@ -1,7 +1,6 @@
 #include "event_loop.h"
 
 #include <assert.h>
-#include <unistd.h>
 
 UT_icd item_icd = {sizeof(event_loop_item), NULL, NULL, NULL};
 UT_icd poll_icd = {sizeof(struct pollfd), NULL, NULL, NULL};
diff --git a/sockets.c b/sockets.c
new file mode 100644
index 000000000..6fd41e476
--- /dev/null
+++ b/sockets.c
@@ -0,0 +1,111 @@
+#include "sockets.h"
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "common.h"
+
+/* Binds to a Unix domain datagram socket at the given
+ * pathname. Removes any existing file at the pathname. Returns
+ * a file descriptor for the socket, or -1 if an error
+ * occurred. */
+int bind_ipc_sock(const char *socket_pathname) {
+  struct sockaddr_un socket_address;
+  int socket_fd;
+
+  socket_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+  if (socket_fd < 0) {
+    LOG_ERR("socket() failed for pathname %s.", socket_pathname);
+    return -1;
+  }
+
+  unlink(socket_pathname);
+  memset(&socket_address, 0, sizeof(struct sockaddr_un));
+  socket_address.sun_family = AF_UNIX;
+  if (strlen(socket_pathname) + 1 > sizeof(socket_address.sun_path)) {
+    LOG_ERR("Socket pathname is too long.");
+    return -1;
+  }
+  strncpy(socket_address.sun_path, socket_pathname,
+          strlen(socket_pathname) + 1);
+
+  if (bind(socket_fd, (struct sockaddr *) &socket_address,
+           sizeof(struct sockaddr_un)) != 0) {
+    LOG_ERR("Bind failed for pathname %s.", socket_pathname);
+    return -1;
+  }
+
+  return socket_fd;
+}
+
+/* Connects to a Unix domain datagram socket at the given
+ * pathname. Returns a file descriptor for the socket, or -1 if
+ * an error occurred. */
+int connect_ipc_sock(const char *socket_pathname) {
+  struct sockaddr_un socket_address;
+  int socket_fd;
+
+  socket_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+  if (socket_fd < 0) {
+    LOG_ERR("socket() failed for pathname %s.", socket_pathname);
+    return -1;
+  }
+
+  memset(&socket_address, 0, sizeof(struct sockaddr_un));
+  socket_address.sun_family = AF_UNIX;
+  if (strlen(socket_pathname) + 1 > sizeof(socket_address.sun_path)) {
+    LOG_ERR("Socket pathname is too long.");
+    return -1;
+  }
+  strncpy(socket_address.sun_path, socket_pathname,
+          strlen(socket_pathname) + 1);
+
+  if (connect(socket_fd, (struct sockaddr *) &socket_address,
+              sizeof(struct sockaddr_un)) != 0) {
+    LOG_ERR("Connection to socket failed for pathname %s.", socket_pathname);
+    return -1;
+  }
+
+  return socket_fd;
+}
+
+/* Sends a message on the given socket file descriptor. */
+void send_ipc_sock(int socket_fd, char *message) {
+  int length = strlen(message);
+  int nbytes;
+  nbytes = send(socket_fd, (char *) &length, sizeof(length), 0);
+  if (nbytes == -1) {
+    fprintf(stderr, "Error sending to socket.\n");
+    return;
+  }
+  nbytes = send(socket_fd, (char *) message, length * sizeof(char), 0);
+  if (nbytes == -1) {
+    fprintf(stderr, "Error sending to socket.\n");
+    return;
+  }
+}
+
+/* Receives a message on the given socket file descriptor. Allocates and
+ * returns a pointer to the message.
+ * NOTE: Caller must free the message! */
+char *recv_ipc_sock(int socket_fd) {
+  int length;
+  int nbytes;
+  nbytes = recv(socket_fd, &length, sizeof(length), 0);
+  if (nbytes == -1) {
+    fprintf(stderr, "Error receiving from socket.\n");
+    return NULL;
+  }
+  char *message = malloc((length + 1) * sizeof(char));
+  nbytes = recv(socket_fd, message, length * sizeof(char), 0);
+  if (nbytes == -1) {
+    fprintf(stderr, "Error receiving from socket.\n");
+    return NULL;
+  }
+  message[length] = '\0';
+  return message;
+}
diff --git a/sockets.h b/sockets.h
new file mode 100644
index 000000000..7ad0c7141
--- /dev/null
+++ b/sockets.h
@@ -0,0 +1,10 @@
+#ifndef SOCKETS_H
+#define SOCKETS_H
+
+/* Helper functions for socket communication. */
+int bind_ipc_sock(const char* socket_pathname);
+int connect_ipc_sock(const char* socket_pathname);
+void send_ipc_sock(int socket_fd, char* message);
+char* recv_ipc_sock(int socket_fd);
+
+#endif
diff --git a/test/socket_tests.c b/test/socket_tests.c
new file mode 100644
index 000000000..1fa352246
--- /dev/null
+++ b/test/socket_tests.c
@@ -0,0 +1,45 @@
+#include "greatest.h"
+
+#include <assert.h>
+#include <unistd.h>
+
+#include "sockets.h"
+
+SUITE(event_loop_tests);
+
+TEST ipc_socket_test(void) {
+  const char* socket_pathname = "test-socket";
+  int socket_fd = bind_ipc_sock(socket_pathname);
+  ASSERT(socket_fd >= 0);
+
+  char* test_string = "hello world";
+  pid_t pid = fork();
+  if (pid == 0) {
+    close(socket_fd);
+    socket_fd = connect_ipc_sock(socket_pathname);
+    ASSERT(socket_fd >= 0);
+    send_ipc_sock(socket_fd, test_string);
+    close(socket_fd);
+  } else {
+    char* message = recv_ipc_sock(socket_fd);
+    ASSERT(message != NULL);
+    ASSERT_STR_EQ(test_string, message);
+    free(message);
+    close(socket_fd);
+    unlink(socket_pathname);
+  }
+
+  PASS();
+}
+
+SUITE(event_loop_tests) {
+  RUN_TEST(ipc_socket_test);
+}
+
+GREATEST_MAIN_DEFS();
+
+int main(int argc, char** argv) {
+  GREATEST_MAIN_BEGIN();
+  RUN_SUITE(event_loop_tests);
+  GREATEST_MAIN_END();
+}

From 4521e5f1830687d043c4495bac673798e73e2e94 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Fri, 16 Sep 2016 18:26:57 -0700
Subject: [PATCH 28/91] Initial commit

---
 .gitignore |  33 +++++++++
 LICENSE    | 201 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 README.md  |   2 +
 3 files changed, 236 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 LICENSE
 create mode 100644 README.md

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..f805e810e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,33 @@
+# Object files
+*.o
+*.ko
+*.obj
+*.elf
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Libraries
+*.lib
+*.a
+*.la
+*.lo
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+*.exe
+*.out
+*.app
+*.i*86
+*.x86_64
+*.hex
+
+# Debug files
+*.dSYM/
+*.su
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 000000000..8dada3eda
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "{}"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright {yyyy} {name of copyright owner}
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/README.md b/README.md
new file mode 100644
index 000000000..fb354f8ea
--- /dev/null
+++ b/README.md
@@ -0,0 +1,2 @@
+# yokoh
+A local scheduler and node manager for Ray

From 0b7d81cae682b7445c4a38c8979ab8b835b8f54b Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Sat, 17 Sep 2016 00:03:10 -0700
Subject: [PATCH 29/91] API for creating task specifications (#5)

* API for creating task specifications

* fixes

* add more checks and improve comments
---
 Makefile          |   7 ++-
 common.h          |   8 +++
 task.c            | 133 ++++++++++++++++++++++++++++++++++++++++++++++
 task.h            |  53 ++++++++++++++++++
 test/task_tests.c |  54 +++++++++++++++++++
 5 files changed, 253 insertions(+), 2 deletions(-)
 create mode 100644 task.c
 create mode 100644 task.h
 create mode 100644 test/task_tests.c

diff --git a/Makefile b/Makefile
index 8ca748e1d..d02a9cb53 100644
--- a/Makefile
+++ b/Makefile
@@ -12,6 +12,9 @@ $(BUILD)/db_tests: hiredis test/db_tests.c thirdparty/greatest.h event_loop.c st
 $(BUILD)/socket_tests: test/socket_tests.c thirdparty/greatest.h sockets.c
 	$(CC) -o $@ test/socket_tests.c sockets.c $(CFLAGS) -I. -Ithirdparty
 
+$(BUILD)/task_tests: test/task_tests.c task.c sockets.c common.h
+	$(CC) -o $@ test/task_tests.c task.c sockets.c $(CFLAGS) -I. -Ithirdparty
+
 clean:
 	rm -r $(BUILD)/*
 
@@ -21,8 +24,8 @@ redis:
 hiredis:
 	git submodule update --init --recursive -- "thirdparty/hiredis" ; cd thirdparty/hiredis ; make
 
-test: hiredis redis $(BUILD)/db_tests $(BUILD)/socket_tests FORCE
+test: hiredis redis $(BUILD)/db_tests $(BUILD)/socket_tests $(BUILD)/task_tests FORCE
 	./thirdparty/redis-3.2.3/src/redis-server &
-	sleep 1s ; ./build/db_tests ; ./build/socket_tests
+	sleep 1s ; ./build/db_tests ; ./build/socket_tests ; ./build/task_tests
 
 FORCE:
diff --git a/common.h b/common.h
index 61abc7b50..d38556dcf 100644
--- a/common.h
+++ b/common.h
@@ -17,6 +17,14 @@
 #define LOG_INFO(M, ...) \
   fprintf(stderr, "[INFO] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
 
+#define CHECK(COND)                        \
+  do {                                     \
+    if (!(COND)) {                         \
+      LOG_ERR("Check failure: %s", #COND); \
+      exit(-1);                            \
+    }                                      \
+  } while (0);
+
 #define UNIQUE_ID_SIZE 20
 
 typedef struct { unsigned char id[UNIQUE_ID_SIZE]; } unique_id;
diff --git a/task.c b/task.c
new file mode 100644
index 000000000..c337cc1a3
--- /dev/null
+++ b/task.c
@@ -0,0 +1,133 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "task.h"
+#include "common.h"
+#include "sockets.h"
+
+/* Tasks are stored in a consecutive chunk of memory, the first
+ * sizeof(task_spec) bytes are arranged according to the struct
+ * task_spec. Then there is an array of task_args of length
+ * (num_args + num_returns), and then follows the data of
+ * pass-by-value arguments of size args_value_size. The offsets in the
+ * task_arg.val are with respect to the end of the augmented structure,
+ * i.e. with respect to the address &task_spec.args_and_returns[0] +
+ * (task_spec->num_args + task_spec->num_returns) * sizeof(task_arg). */
+
+typedef struct {
+  /* Either ARG_BY_REF or ARG_BY_VAL. */
+  int8_t type;
+  union {
+    object_id obj_id;
+    struct {
+      /* Offset where the data associated to this arg is located relative
+       * to &task_spec.args_and_returns[0]. */
+      ptrdiff_t offset;
+      int64_t length;
+    } value;
+  };
+} task_arg;
+
+struct task_spec_impl {
+  function_id func_id;
+  /* Total number of arguments. */
+  int64_t num_args;
+  /* Index of the last argument that has been constructed. */
+  int64_t arg_index;
+  /* Number of return values. */
+  int64_t num_returns;
+  /* Number of bytes the pass-by-value arguments are occupying. */
+  int64_t args_value_size;
+  /* The offset of the number of bytes of pass-by-value data that
+   * has been written so far, relative to &task_spec->args_and_returns[0] +
+   * (task_spec->num_args + task_spec->num_returns) * sizeof(task_arg) */
+  int64_t args_value_offset;
+  /* Argument and return IDs as well as offsets for pass-by-value args. */
+  task_arg args_and_returns[0];
+};
+
+task_spec *alloc_task_spec(function_id func_id,
+                           int64_t num_args,
+                           int64_t num_returns,
+                           int64_t args_value_size) {
+  int64_t size = sizeof(task_spec) +
+                 (num_args + num_returns) * sizeof(task_arg) + args_value_size;
+  task_spec *task = malloc(size);
+  memset(task, 0, size);
+  task->func_id = func_id;
+  task->num_args = num_args;
+  task->arg_index = 0;
+  task->num_returns = num_returns;
+  task->args_value_size = args_value_size;
+  return task;
+}
+
+int64_t task_num_args(task_spec *spec) {
+  return spec->num_args;
+}
+
+int64_t task_num_returns(task_spec *spec) {
+  return spec->num_returns;
+}
+
+int8_t task_arg_type(task_spec *spec, int64_t arg_index) {
+  CHECK(0 <= arg_index && arg_index < spec->num_args);
+  return spec->args_and_returns[arg_index].type;
+}
+
+object_id *task_arg_id(task_spec *spec, int64_t arg_index) {
+  CHECK(0 <= arg_index && arg_index < spec->num_args);
+  task_arg *arg = &spec->args_and_returns[arg_index];
+  CHECK(arg->type == ARG_BY_REF)
+  return &arg->obj_id;
+}
+
+uint8_t *task_arg_val(task_spec *spec, int64_t arg_index) {
+  CHECK(0 <= arg_index && arg_index < spec->num_args);
+  task_arg *arg = &spec->args_and_returns[arg_index];
+  CHECK(arg->type == ARG_BY_VAL);
+  uint8_t *data = (uint8_t *) &spec->args_and_returns[0];
+  data += (spec->num_args + spec->num_returns) * sizeof(task_arg);
+  return data + arg->value.offset;
+}
+
+int64_t task_arg_length(task_spec *spec, int64_t arg_index) {
+  CHECK(0 <= arg_index && arg_index < spec->num_args);
+  task_arg *arg = &spec->args_and_returns[arg_index];
+  CHECK(arg->type == ARG_BY_VAL);
+  return arg->value.length;
+}
+
+int64_t task_args_add_ref(task_spec *spec, object_id obj_id) {
+  task_arg *arg = &spec->args_and_returns[spec->arg_index];
+  arg->type = ARG_BY_REF;
+  arg->obj_id = obj_id;
+  return spec->arg_index++;
+}
+
+int64_t task_args_add_val(task_spec *spec, uint8_t *data, int64_t length) {
+  task_arg *arg = &spec->args_and_returns[spec->arg_index];
+  arg->type = ARG_BY_VAL;
+  arg->value.offset = spec->args_value_offset;
+  arg->value.length = length;
+  uint8_t *addr = task_arg_val(spec, spec->arg_index);
+  CHECK(spec->args_value_offset + length <= spec->args_value_size);
+  CHECK(spec->arg_index != spec->num_args - 1 ||
+        spec->args_value_offset + length == spec->args_value_size);
+  memcpy(addr, data, length);
+  spec->args_value_offset += length;
+  return spec->arg_index++;
+}
+
+object_id *task_return(task_spec *spec, int64_t ret_index) {
+  CHECK(0 <= ret_index && ret_index < spec->num_returns);
+  task_arg *ret = &spec->args_and_returns[spec->num_args + ret_index];
+  CHECK(ret->type == ARG_BY_REF); /* No memory corruption. */
+  return &ret->obj_id;
+}
+
+void free_task_spec(task_spec *spec) {
+  CHECK(spec->arg_index == spec->num_args); /* Task was fully constructed */
+  free(spec);
+}
diff --git a/task.h b/task.h
new file mode 100644
index 000000000..fbf557fe0
--- /dev/null
+++ b/task.h
@@ -0,0 +1,53 @@
+/* This API specifies the task data structure. It is in C so we can
+ * easily construct tasks from other languages like Python. The datastructures
+ * are also defined in such a way that memory is contiguous and all pointers
+ * are relative, so that we can memcpy the datastructure and ship it over the
+ * network without serialization and deserialization. */
+
+#include <stddef.h>
+#include <stdint.h>
+#include "common.h"
+
+typedef unique_id function_id;
+typedef unique_id object_id;
+
+typedef struct task_spec_impl task_spec;
+
+/* If argument is passed by value or reference. */
+enum arg_type { ARG_BY_REF, ARG_BY_VAL };
+
+/* Construct and modify task specifications. */
+
+/* Allocating and initializing a task. */
+task_spec *alloc_task_spec(function_id func_id,
+                           int64_t num_args,
+                           int64_t num_returns,
+                           int64_t args_value_size);
+
+/* Getting the number of arguments and returns. */
+int64_t task_num_args(task_spec *spec);
+int64_t task_num_returns(task_spec *spec);
+
+/* Getting task arguments. */
+int8_t task_arg_type(task_spec *spec, int64_t arg_index);
+unique_id *task_arg_id(task_spec *spec, int64_t arg_index);
+uint8_t *task_arg_val(task_spec *spec, int64_t arg_index);
+int64_t task_arg_length(task_spec *spec, int64_t arg_index);
+
+/* Setting task arguments. Note that this API only allows you to set the
+ * arguments in their order of appearance. */
+int64_t task_args_add_ref(task_spec *spec, object_id obj_id);
+int64_t task_args_add_val(task_spec *spec, uint8_t *data, int64_t length);
+
+/* Getting and setting return arguments. Tasks return by reference for now. */
+unique_id *task_return(task_spec *spec, int64_t ret_index);
+
+/* Freeing the task datastructure. */
+void free_task_spec(task_spec *spec);
+
+/* Write the task specification to a file or socket. */
+int send_task(int fd, task_spec *spec);
+
+/* Read the task specification from a file or socket. It is the user's
+ * responsibility to free the task after it has been used. */
+task_spec *recv_task(int fd);
diff --git a/test/task_tests.c b/test/task_tests.c
new file mode 100644
index 000000000..d8443e68c
--- /dev/null
+++ b/test/task_tests.c
@@ -0,0 +1,54 @@
+#include "greatest.h"
+
+#include "task.h"
+
+SUITE(task_tests);
+
+TEST task_test(void) {
+  function_id func_id = {
+      {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}};
+  task_spec* task = alloc_task_spec(func_id, 4, 2, 10);
+  ASSERT(task_num_args(task) == 4);
+  ASSERT(task_num_returns(task) == 2);
+
+  unique_id arg1 = {
+      {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}};
+  ASSERT(task_args_add_ref(task, arg1) == 0);
+  ASSERT(task_args_add_val(task, (uint8_t*) "hello", 5) == 1);
+  unique_id arg2 = {
+      {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}};
+  ASSERT(task_args_add_ref(task, arg2) == 2);
+  ASSERT(task_args_add_val(task, (uint8_t*) "world", 5) == 3);
+
+  unique_id ret0 = {
+      {4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}};
+  unique_id ret1 = {
+      {5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}};
+  memcpy(task_return(task, 0), &ret0, sizeof(ret0));
+  memcpy(task_return(task, 1), &ret1, sizeof(ret1));
+
+  ASSERT(memcmp(task_arg_id(task, 0), &arg1, sizeof(arg1)) == 0);
+  ASSERT(memcmp(task_arg_val(task, 1), (uint8_t*) "hello",
+                task_arg_length(task, 1)) == 0);
+  ASSERT(memcmp(task_arg_id(task, 2), &arg2, sizeof(arg2)) == 0);
+  ASSERT(memcmp(task_arg_val(task, 3), (uint8_t*) "world",
+                task_arg_length(task, 3)) == 0);
+
+  ASSERT(memcmp(task_return(task, 0), &ret0, sizeof(unique_id)) == 0);
+  ASSERT(memcmp(task_return(task, 1), &ret1, sizeof(unique_id)) == 0);
+
+  free_task_spec(task);
+  PASS();
+}
+
+SUITE(task_tests) {
+  RUN_TEST(task_test);
+}
+
+GREATEST_MAIN_DEFS();
+
+int main(int argc, char** argv) {
+  GREATEST_MAIN_BEGIN();
+  RUN_SUITE(task_tests);
+  GREATEST_MAIN_END();
+}

From 9e4203c18c4ee56f881e0165d0d4a1d07b0b4644 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Sat, 17 Sep 2016 00:30:31 -0700
Subject: [PATCH 30/91] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index fb354f8ea..b95056c2f 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,2 @@
-# yokoh
-A local scheduler and node manager for Ray
+# Halo
+A local scheduler and node manager for Ray.

From ff11ee21efc11b341e3300d5140509e2088a992d Mon Sep 17 00:00:00 2001
From: Stephanie Wang <swang93@mit.edu>
Date: Sat, 17 Sep 2016 15:15:18 -0700
Subject: [PATCH 31/91] Convert to streaming sockets (#7)

* Convert to streaming sockets

* Formatting
---
 sockets.c           | 42 ++++++++++++++++++++++++++++++------------
 sockets.h           |  1 +
 test/socket_tests.c | 12 ++++++++----
 3 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/sockets.c b/sockets.c
index 6fd41e476..82ca490f7 100644
--- a/sockets.c
+++ b/sockets.c
@@ -17,7 +17,7 @@ int bind_ipc_sock(const char *socket_pathname) {
   struct sockaddr_un socket_address;
   int socket_fd;
 
-  socket_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+  socket_fd = socket(AF_UNIX, SOCK_STREAM, 0);
   if (socket_fd < 0) {
     LOG_ERR("socket() failed for pathname %s.", socket_pathname);
     return -1;
@@ -38,6 +38,7 @@ int bind_ipc_sock(const char *socket_pathname) {
     LOG_ERR("Bind failed for pathname %s.", socket_pathname);
     return -1;
   }
+  listen(socket_fd, 5);
 
   return socket_fd;
 }
@@ -49,7 +50,7 @@ int connect_ipc_sock(const char *socket_pathname) {
   struct sockaddr_un socket_address;
   int socket_fd;
 
-  socket_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+  socket_fd = socket(AF_UNIX, SOCK_STREAM, 0);
   if (socket_fd < 0) {
     LOG_ERR("socket() failed for pathname %s.", socket_pathname);
     return -1;
@@ -77,33 +78,50 @@ int connect_ipc_sock(const char *socket_pathname) {
 void send_ipc_sock(int socket_fd, char *message) {
   int length = strlen(message);
   int nbytes;
-  nbytes = send(socket_fd, (char *) &length, sizeof(length), 0);
+  nbytes = write(socket_fd, (char *) &length, sizeof(length));
   if (nbytes == -1) {
-    fprintf(stderr, "Error sending to socket.\n");
+    LOG_ERR("Error sending to socket.\n");
     return;
   }
-  nbytes = send(socket_fd, (char *) message, length * sizeof(char), 0);
+  nbytes = write(socket_fd, (char *) message, length * sizeof(char));
   if (nbytes == -1) {
-    fprintf(stderr, "Error sending to socket.\n");
+    LOG_ERR("Error sending to socket.\n");
     return;
   }
 }
 
+/* Accept a new client connection on the given socket
+ * descriptor. Returns a descriptor for the new socket. */
+int accept_client(int socket_fd) {
+  struct sockaddr_un client_addr;
+  int client_fd, client_len;
+  client_len = sizeof(client_addr);
+  client_fd = accept(socket_fd, (struct sockaddr *) &client_addr,
+                     (socklen_t *) &client_len);
+  if (client_fd < 0) {
+    LOG_ERR("Error reading from socket.");
+    return -1;
+  }
+  return client_fd;
+}
+
 /* Receives a message on the given socket file descriptor. Allocates and
  * returns a pointer to the message.
  * NOTE: Caller must free the message! */
 char *recv_ipc_sock(int socket_fd) {
   int length;
   int nbytes;
-  nbytes = recv(socket_fd, &length, sizeof(length), 0);
-  if (nbytes == -1) {
-    fprintf(stderr, "Error receiving from socket.\n");
+  nbytes = read(socket_fd, &length, sizeof(length));
+  if (nbytes < 0) {
+    LOG_ERR("Error reading length of message from socket.");
     return NULL;
   }
+
   char *message = malloc((length + 1) * sizeof(char));
-  nbytes = recv(socket_fd, message, length * sizeof(char), 0);
-  if (nbytes == -1) {
-    fprintf(stderr, "Error receiving from socket.\n");
+  nbytes = read(socket_fd, message, length);
+  if (nbytes < 0) {
+    LOG_ERR("Error reading message from socket.");
+    free(message);
     return NULL;
   }
   message[length] = '\0';
diff --git a/sockets.h b/sockets.h
index 7ad0c7141..a563470c9 100644
--- a/sockets.h
+++ b/sockets.h
@@ -5,6 +5,7 @@
 int bind_ipc_sock(const char* socket_pathname);
 int connect_ipc_sock(const char* socket_pathname);
 void send_ipc_sock(int socket_fd, char* message);
+int accept_client(int socket_fd);
 char* recv_ipc_sock(int socket_fd);
 
 #endif
diff --git a/test/socket_tests.c b/test/socket_tests.c
index 1fa352246..b57ab9000 100644
--- a/test/socket_tests.c
+++ b/test/socket_tests.c
@@ -5,7 +5,7 @@
 
 #include "sockets.h"
 
-SUITE(event_loop_tests);
+SUITE(socket_tests);
 
 TEST ipc_socket_test(void) {
   const char* socket_pathname = "test-socket";
@@ -20,11 +20,15 @@ TEST ipc_socket_test(void) {
     ASSERT(socket_fd >= 0);
     send_ipc_sock(socket_fd, test_string);
     close(socket_fd);
+    exit(0);
   } else {
-    char* message = recv_ipc_sock(socket_fd);
+    int client_fd = accept_client(socket_fd);
+    ASSERT(client_fd >= 0);
+    char* message = recv_ipc_sock(client_fd);
     ASSERT(message != NULL);
     ASSERT_STR_EQ(test_string, message);
     free(message);
+    close(client_fd);
     close(socket_fd);
     unlink(socket_pathname);
   }
@@ -32,7 +36,7 @@ TEST ipc_socket_test(void) {
   PASS();
 }
 
-SUITE(event_loop_tests) {
+SUITE(socket_tests) {
   RUN_TEST(ipc_socket_test);
 }
 
@@ -40,6 +44,6 @@ GREATEST_MAIN_DEFS();
 
 int main(int argc, char** argv) {
   GREATEST_MAIN_BEGIN();
-  RUN_SUITE(event_loop_tests);
+  RUN_SUITE(socket_tests);
   GREATEST_MAIN_END();
 }

From b18f214d555d353daee304b69718e0acc2e8440d Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Sun, 18 Sep 2016 13:35:43 -0700
Subject: [PATCH 32/91] Make it possible to read and write data that is not
 null-terminated (#9)

* Make it possible to read and write data that is not null-terminated

* formating
---
 Makefile                            | 12 ++---
 sockets.c => io.c                   | 74 +++++++++++++++++------------
 io.h                                | 21 ++++++++
 sockets.h                           | 11 -----
 task.c                              |  2 +-
 test/{socket_tests.c => io_tests.c} | 25 ++++++----
 6 files changed, 87 insertions(+), 58 deletions(-)
 rename sockets.c => io.c (70%)
 create mode 100644 io.h
 delete mode 100644 sockets.h
 rename test/{socket_tests.c => io_tests.c} (57%)

diff --git a/Makefile b/Makefile
index d02a9cb53..e1c894737 100644
--- a/Makefile
+++ b/Makefile
@@ -9,11 +9,11 @@ CFLAGS += -Wmissing-declarations
 $(BUILD)/db_tests: hiredis test/db_tests.c thirdparty/greatest.h event_loop.c state/redis.c common.c
 	$(CC) -o $@ test/db_tests.c event_loop.c state/redis.c common.c thirdparty/hiredis/libhiredis.a $(CFLAGS) -I. -Ithirdparty
 
-$(BUILD)/socket_tests: test/socket_tests.c thirdparty/greatest.h sockets.c
-	$(CC) -o $@ test/socket_tests.c sockets.c $(CFLAGS) -I. -Ithirdparty
+$(BUILD)/io_tests: test/io_tests.c thirdparty/greatest.h io.c
+	$(CC) -o $@ test/io_tests.c io.c $(CFLAGS) -I. -Ithirdparty
 
-$(BUILD)/task_tests: test/task_tests.c task.c sockets.c common.h
-	$(CC) -o $@ test/task_tests.c task.c sockets.c $(CFLAGS) -I. -Ithirdparty
+$(BUILD)/task_tests: test/task_tests.c task.c io.c common.h
+	$(CC) -o $@ test/task_tests.c task.c io.c $(CFLAGS) -I. -Ithirdparty
 
 clean:
 	rm -r $(BUILD)/*
@@ -24,8 +24,8 @@ redis:
 hiredis:
 	git submodule update --init --recursive -- "thirdparty/hiredis" ; cd thirdparty/hiredis ; make
 
-test: hiredis redis $(BUILD)/db_tests $(BUILD)/socket_tests $(BUILD)/task_tests FORCE
+test: hiredis redis $(BUILD)/db_tests $(BUILD)/io_tests $(BUILD)/task_tests FORCE
 	./thirdparty/redis-3.2.3/src/redis-server &
-	sleep 1s ; ./build/db_tests ; ./build/socket_tests ; ./build/task_tests
+	sleep 1s ; ./build/db_tests ; ./build/io_tests ; ./build/task_tests
 
 FORCE:
diff --git a/sockets.c b/io.c
similarity index 70%
rename from sockets.c
rename to io.c
index 82ca490f7..1d16a78e0 100644
--- a/sockets.c
+++ b/io.c
@@ -1,4 +1,4 @@
-#include "sockets.h"
+#include "io.h"
 
 #include <stdlib.h>
 #include <unistd.h>
@@ -6,6 +6,7 @@
 #include <sys/un.h>
 #include <string.h>
 #include <stdio.h>
+#include <inttypes.h>
 
 #include "common.h"
 
@@ -74,22 +75,6 @@ int connect_ipc_sock(const char *socket_pathname) {
   return socket_fd;
 }
 
-/* Sends a message on the given socket file descriptor. */
-void send_ipc_sock(int socket_fd, char *message) {
-  int length = strlen(message);
-  int nbytes;
-  nbytes = write(socket_fd, (char *) &length, sizeof(length));
-  if (nbytes == -1) {
-    LOG_ERR("Error sending to socket.\n");
-    return;
-  }
-  nbytes = write(socket_fd, (char *) message, length * sizeof(char));
-  if (nbytes == -1) {
-    LOG_ERR("Error sending to socket.\n");
-    return;
-  }
-}
-
 /* Accept a new client connection on the given socket
  * descriptor. Returns a descriptor for the new socket. */
 int accept_client(int socket_fd) {
@@ -105,25 +90,52 @@ int accept_client(int socket_fd) {
   return client_fd;
 }
 
-/* Receives a message on the given socket file descriptor. Allocates and
- * returns a pointer to the message.
- * NOTE: Caller must free the message! */
-char *recv_ipc_sock(int socket_fd) {
-  int length;
-  int nbytes;
-  nbytes = read(socket_fd, &length, sizeof(length));
+/* Write a sequence of bytes on a file descriptor. */
+void write_bytes(int fd, uint8_t *bytes, int64_t length) {
+  ssize_t nbytes = write(fd, (char *) &length, sizeof(length));
+  if (nbytes == -1) {
+    LOG_ERR("Error sending to socket.\n");
+    return;
+  }
+  nbytes = write(fd, (char *) bytes, length * sizeof(char));
+  if (nbytes == -1) {
+    LOG_ERR("Error sending to socket.\n");
+    return;
+  }
+}
+
+/* Read a sequence of bytes written by write_bytes from a file descriptor.
+ * Allocates and returns a pointer to the bytes.
+ * NOTE: Caller must free the memory! */
+void read_bytes(int fd, uint8_t **bytes, int64_t *length) {
+  ssize_t nbytes = read(fd, length, sizeof(int64_t));
   if (nbytes < 0) {
     LOG_ERR("Error reading length of message from socket.");
-    return NULL;
+    *bytes = NULL;
+    return;
   }
 
-  char *message = malloc((length + 1) * sizeof(char));
-  nbytes = read(socket_fd, message, length);
+  *bytes = malloc(*length * sizeof(uint8_t));
+  nbytes = read(fd, *bytes, *length);
   if (nbytes < 0) {
     LOG_ERR("Error reading message from socket.");
-    free(message);
-    return NULL;
+    free(*bytes);
+    *bytes = NULL;
   }
-  message[length] = '\0';
-  return message;
+}
+
+/* Write a null-terminated string to a file descriptor. */
+void write_string(int fd, char *message) {
+  /* Account for the \0 at the end of the string. */
+  write_bytes(fd, (uint8_t *) message, strlen(message) + 1);
+}
+
+/* Reads a null-terminated string from the file descriptor that has been
+ * written by write_string. Allocates and returns a pointer to the string.
+ * NOTE: Caller must free the memory! */
+char *read_string(int fd) {
+  uint8_t *bytes;
+  int64_t length;
+  read_bytes(fd, &bytes, &length);
+  return (char *) bytes;
 }
diff --git a/io.h b/io.h
new file mode 100644
index 000000000..c6dd3bb30
--- /dev/null
+++ b/io.h
@@ -0,0 +1,21 @@
+#ifndef IO_H
+#define IO_H
+
+#include <stdint.h>
+
+/* Helper functions for socket communication. */
+
+int bind_ipc_sock(const char *socket_pathname);
+int connect_ipc_sock(const char *socket_pathname);
+
+int accept_client(int socket_fd);
+
+/* Reading and writing data */
+
+void write_bytes(int fd, uint8_t *bytes, int64_t length);
+void read_bytes(int fd, uint8_t **bytes, int64_t *length);
+
+void write_string(int fd, char *message);
+char *read_string(int fd);
+
+#endif
diff --git a/sockets.h b/sockets.h
deleted file mode 100644
index a563470c9..000000000
--- a/sockets.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef SOCKETS_H
-#define SOCKETS_H
-
-/* Helper functions for socket communication. */
-int bind_ipc_sock(const char* socket_pathname);
-int connect_ipc_sock(const char* socket_pathname);
-void send_ipc_sock(int socket_fd, char* message);
-int accept_client(int socket_fd);
-char* recv_ipc_sock(int socket_fd);
-
-#endif
diff --git a/task.c b/task.c
index c337cc1a3..3f3c01661 100644
--- a/task.c
+++ b/task.c
@@ -4,7 +4,7 @@
 
 #include "task.h"
 #include "common.h"
-#include "sockets.h"
+#include "io.h"
 
 /* Tasks are stored in a consecutive chunk of memory, the first
  * sizeof(task_spec) bytes are arranged according to the struct
diff --git a/test/socket_tests.c b/test/io_tests.c
similarity index 57%
rename from test/socket_tests.c
rename to test/io_tests.c
index b57ab9000..9216aa56e 100644
--- a/test/socket_tests.c
+++ b/test/io_tests.c
@@ -2,32 +2,39 @@
 
 #include <assert.h>
 #include <unistd.h>
+#include <inttypes.h>
 
-#include "sockets.h"
+#include "io.h"
 
-SUITE(socket_tests);
+SUITE(io_tests);
 
 TEST ipc_socket_test(void) {
-  const char* socket_pathname = "test-socket";
+  const char *socket_pathname = "test-socket";
   int socket_fd = bind_ipc_sock(socket_pathname);
   ASSERT(socket_fd >= 0);
 
-  char* test_string = "hello world";
+  char *test_string = "hello world";
+  char *test_bytes = "another string";
   pid_t pid = fork();
   if (pid == 0) {
     close(socket_fd);
     socket_fd = connect_ipc_sock(socket_pathname);
     ASSERT(socket_fd >= 0);
-    send_ipc_sock(socket_fd, test_string);
+    write_string(socket_fd, test_string);
+    write_bytes(socket_fd, (uint8_t *) test_bytes, strlen(test_bytes));
     close(socket_fd);
     exit(0);
   } else {
     int client_fd = accept_client(socket_fd);
     ASSERT(client_fd >= 0);
-    char* message = recv_ipc_sock(client_fd);
+    char *message = read_string(client_fd);
     ASSERT(message != NULL);
     ASSERT_STR_EQ(test_string, message);
     free(message);
+    int64_t len;
+    uint8_t *bytes;
+    read_bytes(client_fd, &bytes, &len);
+    ASSERT(memcmp(test_bytes, bytes, len) == 0);
     close(client_fd);
     close(socket_fd);
     unlink(socket_pathname);
@@ -36,14 +43,14 @@ TEST ipc_socket_test(void) {
   PASS();
 }
 
-SUITE(socket_tests) {
+SUITE(io_tests) {
   RUN_TEST(ipc_socket_test);
 }
 
 GREATEST_MAIN_DEFS();
 
-int main(int argc, char** argv) {
+int main(int argc, char **argv) {
   GREATEST_MAIN_BEGIN();
-  RUN_SUITE(socket_tests);
+  RUN_SUITE(io_tests);
   GREATEST_MAIN_END();
 }

From c238ae4aa01025761a6c709c812f1e35caabefcd Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Sun, 18 Sep 2016 13:57:27 -0700
Subject: [PATCH 33/91] do not re-download and rebuild redis if it already
 exists (#10)

---
 thirdparty/build-redis.sh | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/thirdparty/build-redis.sh b/thirdparty/build-redis.sh
index 57c68c97b..230e9ae29 100644
--- a/thirdparty/build-redis.sh
+++ b/thirdparty/build-redis.sh
@@ -1,4 +1,6 @@
-wget http://download.redis.io/releases/redis-3.2.3.tar.gz
-tar xvfz redis-3.2.3.tar.gz
-cd redis-3.2.3
-make
+if [ ! -f redis-3.2.3/src/redis-server ]; then
+  wget http://download.redis.io/releases/redis-3.2.3.tar.gz
+  tar xvfz redis-3.2.3.tar.gz
+  cd redis-3.2.3
+  make
+fi

From 37f035dbd07e080b64934722d917e3a8bb4c46f6 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Sun, 18 Sep 2016 18:06:42 -0700
Subject: [PATCH 34/91] implementing reading and writing tasks (#11)

---
 Makefile          |  4 ++--
 common.c          | 18 ++++++++++++++++++
 common.h          |  4 ++++
 task.c            | 26 ++++++++++++++++++++++++--
 task.h            | 12 ++++++++++--
 test/task_tests.c | 40 ++++++++++++++++++++++++++++------------
 6 files changed, 86 insertions(+), 18 deletions(-)

diff --git a/Makefile b/Makefile
index e1c894737..29a7ea1ba 100644
--- a/Makefile
+++ b/Makefile
@@ -12,8 +12,8 @@ $(BUILD)/db_tests: hiredis test/db_tests.c thirdparty/greatest.h event_loop.c st
 $(BUILD)/io_tests: test/io_tests.c thirdparty/greatest.h io.c
 	$(CC) -o $@ test/io_tests.c io.c $(CFLAGS) -I. -Ithirdparty
 
-$(BUILD)/task_tests: test/task_tests.c task.c io.c common.h
-	$(CC) -o $@ test/task_tests.c task.c io.c $(CFLAGS) -I. -Ithirdparty
+$(BUILD)/task_tests: test/task_tests.c task.h task.c io.h io.c common.h common.h common.c
+	$(CC) -o $@ test/task_tests.c task.c io.c common.c $(CFLAGS) -I. -Ithirdparty
 
 clean:
 	rm -r $(BUILD)/*
diff --git a/common.c b/common.c
index e227eb16e..9e0a86310 100644
--- a/common.c
+++ b/common.c
@@ -1,5 +1,23 @@
 #include "common.h"
 
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+unique_id globally_unique_id(void) {
+  /* Use /dev/urandom for "real" randomness. */
+  int fd;
+  if ((fd = open("/dev/urandom", O_RDONLY)) == -1) {
+    LOG_ERR("Could not generate random number");
+  }
+  unique_id result;
+  read(fd, &result.id[0], UNIQUE_ID_SIZE);
+  close(fd);
+  return result;
+}
+
 char *sha1_to_hex(const unsigned char *sha1, char *buffer) {
   static const char hex[] = "0123456789abcdef";
   char *buf = buffer;
diff --git a/common.h b/common.h
index d38556dcf..6dc360119 100644
--- a/common.h
+++ b/common.h
@@ -1,6 +1,7 @@
 #ifndef COMMON_H
 #define COMMON_H
 
+#include <string.h>
 #include <errno.h>
 
 #ifdef NDEBUG
@@ -29,6 +30,9 @@
 
 typedef struct { unsigned char id[UNIQUE_ID_SIZE]; } unique_id;
 
+/* Generate a globally unique ID. */
+unique_id globally_unique_id(void);
+
 /* Convert a 20 byte sha1 hash to a hexdecimal string. This function assumes
  * that buffer points to an already allocated char array of size 2 *
  * UNIQUE_ID_SIZE + 1 */
diff --git a/task.c b/task.c
index 3f3c01661..5d5271d91 100644
--- a/task.c
+++ b/task.c
@@ -47,12 +47,16 @@ struct task_spec_impl {
   task_arg args_and_returns[0];
 };
 
+/* The size of a task specification is given by the following expression. */
+#define TASK_SPEC_SIZE(NUM_ARGS, NUM_RETURNS, ARGS_VALUE_SIZE)           \
+  (sizeof(task_spec) + ((NUM_ARGS) + (NUM_RETURNS)) * sizeof(task_arg) + \
+   (ARGS_VALUE_SIZE))
+
 task_spec *alloc_task_spec(function_id func_id,
                            int64_t num_args,
                            int64_t num_returns,
                            int64_t args_value_size) {
-  int64_t size = sizeof(task_spec) +
-                 (num_args + num_returns) * sizeof(task_arg) + args_value_size;
+  int64_t size = TASK_SPEC_SIZE(num_args, num_returns, args_value_size);
   task_spec *task = malloc(size);
   memset(task, 0, size);
   task->func_id = func_id;
@@ -63,6 +67,11 @@ task_spec *alloc_task_spec(function_id func_id,
   return task;
 }
 
+int64_t task_size(task_spec *spec) {
+  return TASK_SPEC_SIZE(spec->num_args, spec->num_returns,
+                        spec->args_value_size);
+}
+
 int64_t task_num_args(task_spec *spec) {
   return spec->num_args;
 }
@@ -131,3 +140,16 @@ void free_task_spec(task_spec *spec) {
   CHECK(spec->arg_index == spec->num_args); /* Task was fully constructed */
   free(spec);
 }
+
+void write_task(int fd, task_spec *spec) {
+  write_bytes(fd, (uint8_t *) spec, task_size(spec));
+}
+
+task_spec *read_task(int fd) {
+  uint8_t *bytes;
+  int64_t length;
+  read_bytes(fd, &bytes, &length);
+  task_spec *spec = (task_spec *) bytes;
+  CHECK(task_size(spec) == length);
+  return spec;
+}
diff --git a/task.h b/task.h
index fbf557fe0..ad85540ee 100644
--- a/task.h
+++ b/task.h
@@ -1,3 +1,6 @@
+#ifndef TASK_H
+#define TASK_H
+
 /* This API specifies the task data structure. It is in C so we can
  * easily construct tasks from other languages like Python. The datastructures
  * are also defined in such a way that memory is contiguous and all pointers
@@ -24,6 +27,9 @@ task_spec *alloc_task_spec(function_id func_id,
                            int64_t num_returns,
                            int64_t args_value_size);
 
+/* Size of the task in bytes. */
+int64_t task_size(task_spec *spec);
+
 /* Getting the number of arguments and returns. */
 int64_t task_num_args(task_spec *spec);
 int64_t task_num_returns(task_spec *spec);
@@ -46,8 +52,10 @@ unique_id *task_return(task_spec *spec, int64_t ret_index);
 void free_task_spec(task_spec *spec);
 
 /* Write the task specification to a file or socket. */
-int send_task(int fd, task_spec *spec);
+void write_task(int fd, task_spec *spec);
 
 /* Read the task specification from a file or socket. It is the user's
  * responsibility to free the task after it has been used. */
-task_spec *recv_task(int fd);
+task_spec *read_task(int fd);
+
+#endif
diff --git a/test/task_tests.c b/test/task_tests.c
index d8443e68c..68a6a6537 100644
--- a/test/task_tests.c
+++ b/test/task_tests.c
@@ -1,29 +1,29 @@
 #include "greatest.h"
 
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "common.h"
 #include "task.h"
 
 SUITE(task_tests);
 
 TEST task_test(void) {
-  function_id func_id = {
-      {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}};
-  task_spec* task = alloc_task_spec(func_id, 4, 2, 10);
+  function_id func_id = globally_unique_id();
+  task_spec *task = alloc_task_spec(func_id, 4, 2, 10);
   ASSERT(task_num_args(task) == 4);
   ASSERT(task_num_returns(task) == 2);
 
-  unique_id arg1 = {
-      {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}};
+  unique_id arg1 = globally_unique_id();
   ASSERT(task_args_add_ref(task, arg1) == 0);
   ASSERT(task_args_add_val(task, (uint8_t*) "hello", 5) == 1);
-  unique_id arg2 = {
-      {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}};
+  unique_id arg2 = globally_unique_id();
   ASSERT(task_args_add_ref(task, arg2) == 2);
   ASSERT(task_args_add_val(task, (uint8_t*) "world", 5) == 3);
 
-  unique_id ret0 = {
-      {4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}};
-  unique_id ret1 = {
-      {5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}};
+  unique_id ret0 = globally_unique_id();
+  unique_id ret1 = globally_unique_id();
   memcpy(task_return(task, 0), &ret0, sizeof(ret0));
   memcpy(task_return(task, 1), &ret1, sizeof(ret1));
 
@@ -41,13 +41,29 @@ TEST task_test(void) {
   PASS();
 }
 
+TEST send_task(void) {
+  function_id func_id = globally_unique_id();
+  task_spec *task = alloc_task_spec(func_id, 4, 2, 10);
+  *task_return(task, 1) = globally_unique_id();
+  int fd[2];
+  socketpair(AF_UNIX, SOCK_STREAM, 0, fd);
+  write_task(fd[0], task);
+  task_spec *result = read_task(fd[1]);
+  ASSERT(memcmp(task, result, task_size(task)) == 0);
+  ASSERT(memcmp(task, result, task_size(result)) == 0);
+  free(task);
+  free(result);
+  PASS();
+}
+
 SUITE(task_tests) {
   RUN_TEST(task_test);
+  RUN_TEST(send_task);
 }
 
 GREATEST_MAIN_DEFS();
 
-int main(int argc, char** argv) {
+int main(int argc, char **argv) {
   GREATEST_MAIN_BEGIN();
   RUN_SUITE(task_tests);
   GREATEST_MAIN_END();

From d11161bb01a9d191888c966366a7a3185cb238c4 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Sun, 18 Sep 2016 20:47:53 -0700
Subject: [PATCH 35/91] make static libraries (#13)

---
 Makefile | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/Makefile b/Makefile
index 29a7ea1ba..1f3deebe2 100644
--- a/Makefile
+++ b/Makefile
@@ -1,22 +1,28 @@
 CC = gcc
-CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L
+CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -fPIC -I. -Ithirdparty
 BUILD = build
 
 CFLAGS += -Wmissing-prototypes
 CFLAGS += -Wstrict-prototypes
 CFLAGS += -Wmissing-declarations
 
-$(BUILD)/db_tests: hiredis test/db_tests.c thirdparty/greatest.h event_loop.c state/redis.c common.c
-	$(CC) -o $@ test/db_tests.c event_loop.c state/redis.c common.c thirdparty/hiredis/libhiredis.a $(CFLAGS) -I. -Ithirdparty
+all: $(BUILD)/libcommon.a
 
-$(BUILD)/io_tests: test/io_tests.c thirdparty/greatest.h io.c
-	$(CC) -o $@ test/io_tests.c io.c $(CFLAGS) -I. -Ithirdparty
+$(BUILD)/libcommon.a: event_loop.o common.o task.o io.o state/redis.o
+	ar rcs $@ $^
 
-$(BUILD)/task_tests: test/task_tests.c task.h task.c io.h io.c common.h common.h common.c
-	$(CC) -o $@ test/task_tests.c task.c io.c common.c $(CFLAGS) -I. -Ithirdparty
+$(BUILD)/db_tests: hiredis test/db_tests.c $(BUILD)/libcommon.a
+	$(CC) -o $@ test/db_tests.c $(BUILD)/libcommon.a thirdparty/hiredis/libhiredis.a $(CFLAGS)
+
+$(BUILD)/io_tests: test/io_tests.c $(BUILD)/libcommon.a
+	$(CC) -o $@ $^ $(CFLAGS)
+
+$(BUILD)/task_tests: test/task_tests.c $(BUILD)/libcommon.a
+	$(CC) -o $@ $^ $(CFLAGS)
 
 clean:
-	rm -r $(BUILD)/*
+	rm -f *.o state/*.o test/*.o
+	rm -rf $(BUILD)/*
 
 redis:
 	cd thirdparty ; bash ./build-redis.sh

From 6c6f2d047309a16f75dd73906ada943a9edc67e2 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Tue, 20 Sep 2016 17:02:56 -0700
Subject: [PATCH 36/91] Implement object table API (#16)

---
 Makefile             |    4 -
 common.h             |    2 +
 event_loop.c         |   17 +-
 event_loop.h         |    1 +
 state/object_table.h |   16 +-
 state/redis.c        |   81 ++--
 state/redis.h        |   26 +-
 state/task_queue.h   |   33 ++
 state/task_table.h   |   13 +
 test/db_tests.c      |  100 +++-
 thirdparty/uthash.h  | 1074 ++++++++++++++++++++++++++++++++++++++++++
 11 files changed, 1300 insertions(+), 67 deletions(-)
 create mode 100644 state/task_queue.h
 create mode 100644 state/task_table.h
 create mode 100644 thirdparty/uthash.h

diff --git a/Makefile b/Makefile
index 1f3deebe2..29b7befd9 100644
--- a/Makefile
+++ b/Makefile
@@ -2,10 +2,6 @@ CC = gcc
 CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -fPIC -I. -Ithirdparty
 BUILD = build
 
-CFLAGS += -Wmissing-prototypes
-CFLAGS += -Wstrict-prototypes
-CFLAGS += -Wmissing-declarations
-
 all: $(BUILD)/libcommon.a
 
 $(BUILD)/libcommon.a: event_loop.o common.o task.o io.o state/redis.o
diff --git a/common.h b/common.h
index 6dc360119..96e6402f0 100644
--- a/common.h
+++ b/common.h
@@ -38,4 +38,6 @@ unique_id globally_unique_id(void);
  * UNIQUE_ID_SIZE + 1 */
 char *sha1_to_hex(const unsigned char *sha1, char *buffer);
 
+typedef unique_id object_id;
+
 #endif
diff --git a/event_loop.c b/event_loop.c
index ebc6ebc13..0fd79e6af 100644
--- a/event_loop.c
+++ b/event_loop.c
@@ -12,6 +12,13 @@ void event_loop_init(event_loop *loop) {
   utarray_new(loop->waiting, &poll_icd);
 }
 
+/* Free the space associated to the event loop.
+ * Does not free the event_loop datastructure itself. */
+void event_loop_free(event_loop *loop) {
+  utarray_free(loop->items);
+  utarray_free(loop->waiting);
+}
+
 /* Add a new file descriptor fd to the event loop.
  * This function sets a user defined type and id for the file descriptor
  * which can be queried using event_loop_type and event_loop_id. The parameter
@@ -83,9 +90,9 @@ void *event_loop_get_data(event_loop *loop, int64_t index) {
   return item->data;
 }
 
-/* Free the space associated to the event loop.
- * Does not free the event_loop datastructure itself. */
-void event_loop_free(event_loop *loop) {
-  utarray_free(loop->items);
-  utarray_free(loop->waiting);
+/* Return the type of connection. */
+int event_loop_type(event_loop *loop, int64_t index) {
+  event_loop_item *item =
+      (event_loop_item *) utarray_eltptr(loop->items, index);
+  return item->type;
 }
diff --git a/event_loop.h b/event_loop.h
index 0903bb9d4..a96ec4643 100644
--- a/event_loop.h
+++ b/event_loop.h
@@ -34,5 +34,6 @@ int64_t event_loop_size(event_loop *loop);
 struct pollfd *event_loop_get(event_loop *loop, int64_t index);
 void event_loop_set_data(event_loop *loop, int64_t index, void *data);
 void *event_loop_get_data(event_loop *loop, int64_t index);
+int event_loop_type(event_loop *loop, int64_t index);
 
 #endif
diff --git a/state/object_table.h b/state/object_table.h
index 6b4d62e4b..7c00ab2ba 100644
--- a/state/object_table.h
+++ b/state/object_table.h
@@ -1,15 +1,21 @@
 #include "common.h"
 #include "db.h"
 
-typedef void (*lookup_callback)(void *);
+/* The callback that is called when the result of a lookup
+ * in the object table comes back. The callback should free
+ * the manager_vector array, but NOT the strings they are pointing to. */
+typedef void (*lookup_callback)(object_id object_id,
+                                int manager_count,
+                                const char *manager_vector[]);
 
 /* Register a new object with the directory. */
-void object_table_add(db_conn *db, unique_id object_id);
+/* TODO(pcm): Retry, print for each attempt. */
+void object_table_add(db_conn *db, object_id object_id);
 
-/* Remove object from the directory */
-void object_table_remove(db_conn *db, unique_id object_id);
+/* Remove object from the directory. */
+void object_table_remove(db_conn *db, object_id object_id, const char *manager);
 
 /* Look up entry from the directory */
 void object_table_lookup(db_conn *db,
-                         unique_id object_id,
+                         object_id object_id,
                          lookup_callback callback);
diff --git a/state/redis.c b/state/redis.c
index a8029a063..c781b81ef 100644
--- a/state/redis.c
+++ b/state/redis.c
@@ -5,6 +5,7 @@
 #include "common.h"
 #include "db.h"
 #include "object_table.h"
+#include "task_queue.h"
 #include "event_loop.h"
 #include "redis.h"
 
@@ -88,12 +89,13 @@ void db_connect(const char *address,
     }
     freeReplyObject(reply);
   }
-  redisFree(context);
 
   db->client_type = strdup(client_type);
   db->client_id = num_clients;
   db->reading = 0;
   db->writing = 0;
+  db->service_cache = NULL;
+  db->sync_context = context;
 
   /* Establish async connection */
   db->context = redisAsyncConnect(address, port);
@@ -102,6 +104,18 @@ void db_connect(const char *address,
   db->context->data = (void *) db;
 }
 
+void db_disconnect(db_conn *db) {
+  redisFree(db->sync_context);
+  redisAsyncFree(db->context);
+  service_cache_entry *e, *tmp;
+  HASH_ITER(hh, db->service_cache, e, tmp) {
+    free(e->addr);
+    HASH_DEL(db->service_cache, e);
+    free(e);
+  }
+  free(db->client_type);
+}
+
 void db_event(db_conn *db) {
   if (db->reading) {
     redisAsyncHandleRead(db->context);
@@ -137,51 +151,62 @@ void object_table_add(db_conn *db, unique_id object_id) {
   static char hex_object_id[2 * UNIQUE_ID_SIZE + 1];
   sha1_to_hex(&object_id.id[0], &hex_object_id[0]);
   redisAsyncCommand(db->context, NULL, NULL, "SADD obj:%s %d",
-                    &hex_object_id[0], 0);
+                    &hex_object_id[0], db->client_id);
   if (db->context->err) {
     LOG_REDIS_ERR(db->context, "could not add object_table entry");
   }
 }
 
-void object_table_lookup_callback(redisAsyncContext *c,
-                                  void *r,
-                                  void *privdata) {
+void object_table_get_entry(redisAsyncContext *c, void *r, void *privdata) {
+  db_conn *db = c->data;
+  lookup_callback_data *cb_data = privdata;
   redisReply *reply = r;
   if (reply == NULL)
     return;
-  lookup_callback callback = privdata;
-  char *str = malloc(reply->len);
-  memcpy(str, reply->str, reply->len);
-  callback(str);
-}
-
-void object_table_fetch_addr_port(redisAsyncContext *c,
-                                  void *r,
-                                  void *privdata) {
-  redisReply *reply = r;
-  if (reply == NULL)
-    return;
-  long long manager_id = -1;
-  if (reply->type == REDIS_REPLY_STRING) {
-    manager_id = strtoll(reply->str, NULL, 10);
-  } else if (reply->type != REDIS_REPLY_INTEGER) {
-    manager_id = reply->integer;
+  int *result = malloc(reply->elements * sizeof(int));
+  int64_t manager_count = reply->elements;
+  if (reply->type == REDIS_REPLY_ARRAY) {
+    for (int j = 0; j < reply->elements; j++) {
+      CHECK(reply->element[j]->type == REDIS_REPLY_STRING);
+      result[j] = atoi(reply->element[j]->str);
+      service_cache_entry *entry;
+      HASH_FIND_INT(db->service_cache, &result[j], entry);
+      if (!entry) {
+        redisReply *reply = redisCommand(db->sync_context, "HGET %s %lld",
+                                         db->client_type, result[j]);
+        CHECK(reply->type == REDIS_REPLY_STRING);
+        entry = malloc(sizeof(service_cache_entry));
+        entry->service_id = result[j];
+        entry->addr = strdup(reply->str);
+        HASH_ADD_INT(db->service_cache, service_id, entry);
+        freeReplyObject(reply);
+      }
+    }
   } else {
     LOG_ERR("expected integer or string, received type %d", reply->type);
     exit(-1);
   }
-  db_conn *db = c->data;
-  redisAsyncCommand(db->context, object_table_lookup_callback, privdata,
-                    "HGET %s %lld", db->client_type, manager_id);
+  const char **manager_vector = malloc(manager_count * sizeof(char *));
+  for (int j = 0; j < manager_count; ++j) {
+    service_cache_entry *entry;
+    HASH_FIND_INT(db->service_cache, &result[j], entry);
+    manager_vector[j] = entry->addr;
+  }
+  cb_data->callback(cb_data->object_id, manager_count, manager_vector);
+  free(privdata);
+  free(result);
 }
 
 void object_table_lookup(db_conn *db,
-                         unique_id object_id,
+                         object_id object_id,
                          lookup_callback callback) {
   static char hex_object_id[2 * UNIQUE_ID_SIZE + 1];
   sha1_to_hex(&object_id.id[0], &hex_object_id[0]);
-  redisAsyncCommand(db->context, object_table_fetch_addr_port, callback,
-                    "SRANDMEMBER obj:%s", &hex_object_id[0]);
+  lookup_callback_data *cb_data = malloc(sizeof(lookup_callback_data));
+  cb_data->callback = callback;
+  cb_data->object_id = object_id;
+  redisAsyncCommand(db->context, object_table_get_entry, cb_data,
+                    "SMEMBERS obj:%s", &hex_object_id[0]);
   if (db->context->err) {
     LOG_REDIS_ERR(db->context, "error in object_table lookup");
   }
diff --git a/state/redis.h b/state/redis.h
index 471044b06..ad8d5cbcf 100644
--- a/state/redis.h
+++ b/state/redis.h
@@ -3,6 +3,16 @@
 
 #include "hiredis/hiredis.h"
 #include "hiredis/async.h"
+#include "uthash.h"
+
+typedef struct {
+  /* Unique ID for this service. */
+  int service_id;
+  /* IP address and port of this service. */
+  const char *addr;
+  /* Handle for the uthash table. */
+  UT_hash_handle hh;
+} service_cache_entry;
 
 struct db_conn_impl {
   /* String that identifies this client type. */
@@ -15,11 +25,21 @@ struct db_conn_impl {
   int reading, writing;
   /* The event loop this global state store connection is part of. */
   event_loop *loop;
+  /* Cache for the IP addresses of services. */
+  service_cache_entry *service_cache;
+  /* Redis context for synchronous connections.
+   * Should only be used very rarely, it is not asynchronous. */
+  redisContext *sync_context;
 };
 
-void object_table_fetch_addr_port(redisAsyncContext *c,
-                                  void *r,
-                                  void *privdata);
+typedef struct {
+  /* The callback that will be called. */
+  lookup_callback callback;
+  /* Object ID that is looked up. */
+  object_id object_id;
+} lookup_callback_data;
+
+void object_table_get_entry(redisAsyncContext *c, void *r, void *privdata);
 
 void object_table_lookup_callback(redisAsyncContext *c,
                                   void *r,
diff --git a/state/task_queue.h b/state/task_queue.h
new file mode 100644
index 000000000..0226c501b
--- /dev/null
+++ b/state/task_queue.h
@@ -0,0 +1,33 @@
+#ifndef TASK_QUEUE_H
+#define TASK_QUEUE_H
+
+#include "db.h"
+#include "task.h"
+
+/* The task ID is a deterministic hash of the function ID that
+ * the task executes and the argument IDs or argument values */
+typedef unique_id task_id;
+
+/* The task instance ID is a globally unique ID generated which
+ * identifies this particular execution of the task */
+typedef unique_id task_iid;
+
+/* The node id is an identifier for the node the task is
+ * scheduled on */
+typedef unique_id node_id;
+
+/* Callback for subscribing to the task queue. The only argument this
+ * callback gets is the task_id of the. */
+typedef void (*task_queue_callback)(task_iid *task_iid, task_spec *task);
+
+/* Submit task to the global scheduler. */
+void task_queue_submit_task(db_conn *db, task_iid task_iid, task_spec *task);
+
+/* Submit task to a local scheduler based on the decision made by the global
+ * scheduler. */
+void task_queue_schedule_task(db_conn *db, task_iid task_iid, node_id node);
+
+/* Subscribe to task queue. */
+void task_queue_register_callback(db_conn *db, task_queue_callback callback);
+
+#endif
diff --git a/state/task_table.h b/state/task_table.h
new file mode 100644
index 000000000..64285da67
--- /dev/null
+++ b/state/task_table.h
@@ -0,0 +1,13 @@
+#ifndef TASK_TABLE_H
+#define TASK_TABLE_H
+
+#include "db.h"
+#include "task.h"
+
+/* Add task to the task table, handle errors here. */
+status task_table_add_task(db_conn *db, task_iid task_iid, task_spec *task);
+
+/* Get specific task from the task table. */
+status task_table_get_task(db_conn *db, task_iid task_iid, task_spec *task);
+
+#endif /* TASK_TABLE_H */
diff --git a/test/db_tests.c b/test/db_tests.c
index b3a0d582e..0788345f2 100644
--- a/test/db_tests.c
+++ b/test/db_tests.c
@@ -6,37 +6,61 @@
 #include "state/db.h"
 #include "state/object_table.h"
 #include "state/redis.h"
+#include "task.h"
 
 SUITE(db_tests);
 
 int lookup_successful = 0;
 const char *manager_addr = "127.0.0.1";
-int manager_port = 12345;
-char received_addr[16] = {0};
-char received_port[6] = {0};
+int manager_port1 = 12345;
+int manager_port2 = 12346;
+char received_addr1[16] = {0};
+char received_port1[6] = {0};
+char received_addr2[16] = {0};
+char received_port2[6] = {0};
 
-void test_callback(void *userdata);
-
-void test_callback(void *userdata) {
-  char *reply = userdata;
+/* This is for synchronizing to make sure both entries have been written. */
+void sync_test_callback(object_id object_id,
+                        int manager_count,
+                        const char *manager_vector[]) {
   lookup_successful = 1;
-  if (!reply ||
-      sscanf(reply, "%15[0-9.]:%5[0-9]", received_addr, received_port) != 2) {
-    assert(0);
+  free(manager_vector);
+}
+
+/* This performs the actual test. */
+void test_callback(object_id object_id,
+                   int manager_count,
+                   const char *manager_vector[]) {
+  CHECK(manager_count == 2);
+  lookup_successful = 1;
+  if (!manager_vector[0] ||
+      sscanf(manager_vector[0], "%15[0-9.]:%5[0-9]", received_addr1,
+             received_port1) != 2) {
+    CHECK(0);
   }
-  free(reply);
+  if (!manager_vector[1] ||
+      sscanf(manager_vector[1], "%15[0-9.]:%5[0-9]", received_addr2,
+             received_port2) != 2) {
+    CHECK(0);
+  }
+  free(manager_vector);
 }
 
 TEST object_table_lookup_test(void) {
   event_loop loop;
   event_loop_init(&loop);
-  db_conn conn;
-  db_connect("127.0.0.1", 6379, "plasma_manager", manager_addr, manager_port,
-             &conn);
-  int64_t index = db_attach(&conn, &loop, 0);
-  unique_id id = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};
-  object_table_add(&conn, id);
-  object_table_lookup(&conn, id, test_callback);
+  db_conn conn1;
+  db_connect("127.0.0.1", 6379, "plasma_manager", manager_addr, manager_port1,
+             &conn1);
+  db_conn conn2;
+  db_connect("127.0.0.1", 6379, "plasma_manager", manager_addr, manager_port2,
+             &conn2);
+  int64_t index1 = db_attach(&conn1, &loop, 0);
+  int64_t index2 = db_attach(&conn2, &loop, 1);
+  unique_id id = globally_unique_id();
+  object_table_add(&conn1, id);
+  object_table_add(&conn2, id);
+  object_table_lookup(&conn1, id, sync_test_callback);
   while (!lookup_successful) {
     int num_ready = event_loop_poll(&loop);
     if (num_ready < 0) {
@@ -46,18 +70,50 @@ TEST object_table_lookup_test(void) {
       struct pollfd *waiting = event_loop_get(&loop, i);
       if (waiting->revents == 0)
         continue;
-      if (i == index) {
-        db_event(&conn);
+      if (i == index1) {
+        db_event(&conn1);
+      }
+      if (i == index2) {
+        db_event(&conn2);
       }
     }
   }
-  ASSERT_STR_EQ(&received_addr[0], manager_addr);
-  ASSERT_EQ(atoi(received_port), manager_port);
+  lookup_successful = 0;
+  object_table_lookup(&conn1, id, test_callback);
+  while (!lookup_successful) {
+    int num_ready = event_loop_poll(&loop);
+    if (num_ready < 0) {
+      exit(-1);
+    }
+    for (int i = 0; i < event_loop_size(&loop); ++i) {
+      struct pollfd *waiting = event_loop_get(&loop, i);
+      if (waiting->revents == 0)
+        continue;
+      if (i == index1) {
+        db_event(&conn1);
+      }
+      if (i == index2) {
+        db_event(&conn2);
+      }
+    }
+  }
+  int port1 = atoi(received_port1);
+  int port2 = atoi(received_port2);
+  ASSERT_STR_EQ(&received_addr1[0], manager_addr);
+  ASSERT((port1 == manager_port1 && port2 == manager_port2) ||
+         (port2 == manager_port1 && port1 == manager_port2));
+
+  db_disconnect(&conn1);
+  db_disconnect(&conn2);
+
+  event_loop_free(&loop);
+
   PASS();
 }
 
 SUITE(db_tests) {
   RUN_TEST(object_table_lookup_test);
+  /* RUN_TEST(task_queue_test); */
 }
 
 GREATEST_MAIN_DEFS();
diff --git a/thirdparty/uthash.h b/thirdparty/uthash.h
new file mode 100644
index 000000000..45d1f9fc1
--- /dev/null
+++ b/thirdparty/uthash.h
@@ -0,0 +1,1074 @@
+/*
+Copyright (c) 2003-2016, Troy D. Hanson     http://troydhanson.github.com/uthash/
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef UTHASH_H
+#define UTHASH_H
+
+#define UTHASH_VERSION 2.0.1
+
+#include <string.h>   /* memcmp,strlen */
+#include <stddef.h>   /* ptrdiff_t */
+#include <stdlib.h>   /* exit() */
+
+/* These macros use decltype or the earlier __typeof GNU extension.
+   As decltype is only available in newer compilers (VS2010 or gcc 4.3+
+   when compiling c++ source) this code uses whatever method is needed
+   or, for VS2008 where neither is available, uses casting workarounds. */
+#if defined(_MSC_VER)   /* MS compiler */
+#if _MSC_VER >= 1600 && defined(__cplusplus)  /* VS2010 or newer in C++ mode */
+#define DECLTYPE(x) (decltype(x))
+#else                   /* VS2008 or older (or VS2010 in C mode) */
+#define NO_DECLTYPE
+#define DECLTYPE(x)
+#endif
+#elif defined(__BORLANDC__) || defined(__LCC__) || defined(__WATCOMC__)
+#define NO_DECLTYPE
+#define DECLTYPE(x)
+#else                   /* GNU, Sun and other compilers */
+#define DECLTYPE(x) (__typeof(x))
+#endif
+
+#ifdef NO_DECLTYPE
+#define DECLTYPE_ASSIGN(dst,src)                                                 \
+do {                                                                             \
+  char **_da_dst = (char**)(&(dst));                                             \
+  *_da_dst = (char*)(src);                                                       \
+} while (0)
+#else
+#define DECLTYPE_ASSIGN(dst,src)                                                 \
+do {                                                                             \
+  (dst) = DECLTYPE(dst)(src);                                                    \
+} while (0)
+#endif
+
+/* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */
+#if defined(_WIN32)
+#if defined(_MSC_VER) && _MSC_VER >= 1600
+#include <stdint.h>
+#elif defined(__WATCOMC__) || defined(__MINGW32__) || defined(__CYGWIN__)
+#include <stdint.h>
+#else
+typedef unsigned int uint32_t;
+typedef unsigned char uint8_t;
+#endif
+#elif defined(__GNUC__) && !defined(__VXWORKS__)
+#include <stdint.h>
+#else
+typedef unsigned int uint32_t;
+typedef unsigned char uint8_t;
+#endif
+
+#ifndef uthash_fatal
+#define uthash_fatal(msg) exit(-1)        /* fatal error (out of memory,etc) */
+#endif
+#ifndef uthash_malloc
+#define uthash_malloc(sz) malloc(sz)      /* malloc fcn                      */
+#endif
+#ifndef uthash_free
+#define uthash_free(ptr,sz) free(ptr)     /* free fcn                        */
+#endif
+#ifndef uthash_strlen
+#define uthash_strlen(s) strlen(s)
+#endif
+#ifndef uthash_memcmp
+#define uthash_memcmp(a,b,n) memcmp(a,b,n)
+#endif
+
+#ifndef uthash_noexpand_fyi
+#define uthash_noexpand_fyi(tbl)          /* can be defined to log noexpand  */
+#endif
+#ifndef uthash_expand_fyi
+#define uthash_expand_fyi(tbl)            /* can be defined to log expands   */
+#endif
+
+/* initial number of buckets */
+#define HASH_INITIAL_NUM_BUCKETS 32U     /* initial number of buckets        */
+#define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */
+#define HASH_BKT_CAPACITY_THRESH 10U     /* expand when bucket count reaches */
+
+/* calculate the element whose hash handle address is hhp */
+#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
+/* calculate the hash handle from element address elp */
+#define HH_FROM_ELMT(tbl,elp) ((UT_hash_handle *)(((char*)(elp)) + ((tbl)->hho)))
+
+#define HASH_VALUE(keyptr,keylen,hashv)                                          \
+do {                                                                             \
+  HASH_FCN(keyptr, keylen, hashv);                                               \
+} while (0)
+
+#define HASH_FIND_BYHASHVALUE(hh,head,keyptr,keylen,hashval,out)                 \
+do {                                                                             \
+  (out) = NULL;                                                                  \
+  if (head) {                                                                    \
+    unsigned _hf_bkt;                                                            \
+    HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _hf_bkt);                  \
+    if (HASH_BLOOM_TEST((head)->hh.tbl, hashval) != 0) {                         \
+      HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], keyptr, keylen, hashval, out); \
+    }                                                                            \
+  }                                                                              \
+} while (0)
+
+#define HASH_FIND(hh,head,keyptr,keylen,out)                                     \
+do {                                                                             \
+  unsigned _hf_hashv;                                                            \
+  HASH_VALUE(keyptr, keylen, _hf_hashv);                                         \
+  HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out);               \
+} while (0)
+
+#ifdef HASH_BLOOM
+#define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM)
+#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL)
+#define HASH_BLOOM_MAKE(tbl)                                                     \
+do {                                                                             \
+  (tbl)->bloom_nbits = HASH_BLOOM;                                               \
+  (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN);                 \
+  if (!((tbl)->bloom_bv))  { uthash_fatal( "out of memory"); }                   \
+  memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN);                                \
+  (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE;                                       \
+} while (0)
+
+#define HASH_BLOOM_FREE(tbl)                                                     \
+do {                                                                             \
+  uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN);                              \
+} while (0)
+
+#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U)))
+#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U)))
+
+#define HASH_BLOOM_ADD(tbl,hashv)                                                \
+  HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U)))
+
+#define HASH_BLOOM_TEST(tbl,hashv)                                               \
+  HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U)))
+
+#else
+#define HASH_BLOOM_MAKE(tbl)
+#define HASH_BLOOM_FREE(tbl)
+#define HASH_BLOOM_ADD(tbl,hashv)
+#define HASH_BLOOM_TEST(tbl,hashv) (1)
+#define HASH_BLOOM_BYTELEN 0U
+#endif
+
+#define HASH_MAKE_TABLE(hh,head)                                                 \
+do {                                                                             \
+  (head)->hh.tbl = (UT_hash_table*)uthash_malloc(                                \
+                  sizeof(UT_hash_table));                                        \
+  if (!((head)->hh.tbl))  { uthash_fatal( "out of memory"); }                    \
+  memset((head)->hh.tbl, 0, sizeof(UT_hash_table));                              \
+  (head)->hh.tbl->tail = &((head)->hh);                                          \
+  (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS;                        \
+  (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2;              \
+  (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head);                    \
+  (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc(                      \
+          HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket));               \
+  if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); }             \
+  memset((head)->hh.tbl->buckets, 0,                                             \
+          HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket));               \
+  HASH_BLOOM_MAKE((head)->hh.tbl);                                               \
+  (head)->hh.tbl->signature = HASH_SIGNATURE;                                    \
+} while (0)
+
+#define HASH_REPLACE_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,replaced,cmpfcn) \
+do {                                                                             \
+  (replaced) = NULL;                                                             \
+  HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \
+  if (replaced) {                                                                \
+     HASH_DELETE(hh, head, replaced);                                            \
+  }                                                                              \
+  HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn); \
+} while (0)
+
+#define HASH_REPLACE_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add,replaced) \
+do {                                                                             \
+  (replaced) = NULL;                                                             \
+  HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \
+  if (replaced) {                                                                \
+     HASH_DELETE(hh, head, replaced);                                            \
+  }                                                                              \
+  HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add); \
+} while (0)
+
+#define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced)                   \
+do {                                                                             \
+  unsigned _hr_hashv;                                                            \
+  HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv);                         \
+  HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced); \
+} while (0)
+
+#define HASH_REPLACE_INORDER(hh,head,fieldname,keylen_in,add,replaced,cmpfcn)    \
+do {                                                                             \
+  unsigned _hr_hashv;                                                            \
+  HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv);                         \
+  HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced, cmpfcn); \
+} while (0)
+
+#define HASH_APPEND_LIST(hh, head, add)                                          \
+do {                                                                             \
+  (add)->hh.next = NULL;                                                         \
+  (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail);           \
+  (head)->hh.tbl->tail->next = (add);                                            \
+  (head)->hh.tbl->tail = &((add)->hh);                                           \
+} while (0)
+
+#define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh,head,keyptr,keylen_in,hashval,add,cmpfcn) \
+do {                                                                             \
+  unsigned _ha_bkt;                                                              \
+  (add)->hh.hashv = (hashval);                                                   \
+  (add)->hh.key = (char*) (keyptr);                                              \
+  (add)->hh.keylen = (unsigned) (keylen_in);                                     \
+  if (!(head)) {                                                                 \
+    (add)->hh.next = NULL;                                                       \
+    (add)->hh.prev = NULL;                                                       \
+    (head) = (add);                                                              \
+    HASH_MAKE_TABLE(hh, head);                                                   \
+  } else {                                                                       \
+    struct UT_hash_handle *_hs_iter = &(head)->hh;                               \
+    (add)->hh.tbl = (head)->hh.tbl;                                              \
+    do {                                                                         \
+      if (cmpfcn(DECLTYPE(head) ELMT_FROM_HH((head)->hh.tbl, _hs_iter), add) > 0) \
+        break;                                                                   \
+    } while ((_hs_iter = _hs_iter->next));                                       \
+    if (_hs_iter) {                                                              \
+      (add)->hh.next = _hs_iter;                                                 \
+      if (((add)->hh.prev = _hs_iter->prev)) {                                   \
+        HH_FROM_ELMT((head)->hh.tbl, _hs_iter->prev)->next = (add);              \
+      } else {                                                                   \
+        (head) = (add);                                                          \
+      }                                                                          \
+      _hs_iter->prev = (add);                                                    \
+    } else {                                                                     \
+      HASH_APPEND_LIST(hh, head, add);                                           \
+    }                                                                            \
+  }                                                                              \
+  (head)->hh.tbl->num_items++;                                                   \
+  HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt);                    \
+  HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], &(add)->hh);                 \
+  HASH_BLOOM_ADD((head)->hh.tbl, hashval);                                       \
+  HASH_EMIT_KEY(hh, head, keyptr, keylen_in);                                    \
+  HASH_FSCK(hh, head);                                                           \
+} while (0)
+
+#define HASH_ADD_KEYPTR_INORDER(hh,head,keyptr,keylen_in,add,cmpfcn)             \
+do {                                                                             \
+  unsigned _hs_hashv;                                                            \
+  HASH_VALUE(keyptr, keylen_in, _hs_hashv);                                      \
+  HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, _hs_hashv, add, cmpfcn); \
+} while (0)
+
+#define HASH_ADD_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,cmpfcn) \
+  HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn)
+
+#define HASH_ADD_INORDER(hh,head,fieldname,keylen_in,add,cmpfcn)                 \
+  HASH_ADD_KEYPTR_INORDER(hh, head, &((add)->fieldname), keylen_in, add, cmpfcn)
+
+#define HASH_ADD_KEYPTR_BYHASHVALUE(hh,head,keyptr,keylen_in,hashval,add)        \
+do {                                                                             \
+  unsigned _ha_bkt;                                                              \
+  (add)->hh.hashv = (hashval);                                                   \
+  (add)->hh.key = (char*) (keyptr);                                              \
+  (add)->hh.keylen = (unsigned) (keylen_in);                                     \
+  if (!(head)) {                                                                 \
+    (add)->hh.next = NULL;                                                       \
+    (add)->hh.prev = NULL;                                                       \
+    (head) = (add);                                                              \
+    HASH_MAKE_TABLE(hh, head);                                                   \
+  } else {                                                                       \
+    (add)->hh.tbl = (head)->hh.tbl;                                              \
+    HASH_APPEND_LIST(hh, head, add);                                             \
+  }                                                                              \
+  (head)->hh.tbl->num_items++;                                                   \
+  HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt);                    \
+  HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], &(add)->hh);                 \
+  HASH_BLOOM_ADD((head)->hh.tbl, hashval);                                       \
+  HASH_EMIT_KEY(hh, head, keyptr, keylen_in);                                    \
+  HASH_FSCK(hh, head);                                                           \
+} while (0)
+
+#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add)                            \
+do {                                                                             \
+  unsigned _ha_hashv;                                                            \
+  HASH_VALUE(keyptr, keylen_in, _ha_hashv);                                      \
+  HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, _ha_hashv, add);      \
+} while (0)
+
+#define HASH_ADD_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add)            \
+  HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add)
+
+#define HASH_ADD(hh,head,fieldname,keylen_in,add)                                \
+  HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add)
+
+#define HASH_TO_BKT(hashv,num_bkts,bkt)                                          \
+do {                                                                             \
+  bkt = ((hashv) & ((num_bkts) - 1U));                                           \
+} while (0)
+
+/* delete "delptr" from the hash table.
+ * "the usual" patch-up process for the app-order doubly-linked-list.
+ * The use of _hd_hh_del below deserves special explanation.
+ * These used to be expressed using (delptr) but that led to a bug
+ * if someone used the same symbol for the head and deletee, like
+ *  HASH_DELETE(hh,users,users);
+ * We want that to work, but by changing the head (users) below
+ * we were forfeiting our ability to further refer to the deletee (users)
+ * in the patch-up process. Solution: use scratch space to
+ * copy the deletee pointer, then the latter references are via that
+ * scratch pointer rather than through the repointed (users) symbol.
+ */
+#define HASH_DELETE(hh,head,delptr)                                              \
+do {                                                                             \
+    struct UT_hash_handle *_hd_hh_del;                                           \
+    if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) )  {         \
+        uthash_free((head)->hh.tbl->buckets,                                     \
+                    (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
+        HASH_BLOOM_FREE((head)->hh.tbl);                                         \
+        uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                      \
+        head = NULL;                                                             \
+    } else {                                                                     \
+        unsigned _hd_bkt;                                                        \
+        _hd_hh_del = &((delptr)->hh);                                            \
+        if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) {     \
+            (head)->hh.tbl->tail =                                               \
+                (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) +               \
+                (head)->hh.tbl->hho);                                            \
+        }                                                                        \
+        if ((delptr)->hh.prev != NULL) {                                         \
+            ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) +                  \
+                    (head)->hh.tbl->hho))->next = (delptr)->hh.next;             \
+        } else {                                                                 \
+            DECLTYPE_ASSIGN(head,(delptr)->hh.next);                             \
+        }                                                                        \
+        if (_hd_hh_del->next != NULL) {                                          \
+            ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next +                     \
+                    (head)->hh.tbl->hho))->prev =                                \
+                    _hd_hh_del->prev;                                            \
+        }                                                                        \
+        HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt);   \
+        HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del);        \
+        (head)->hh.tbl->num_items--;                                             \
+    }                                                                            \
+    HASH_FSCK(hh,head);                                                          \
+} while (0)
+
+
+/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
+#define HASH_FIND_STR(head,findstr,out)                                          \
+    HASH_FIND(hh,head,findstr,(unsigned)uthash_strlen(findstr),out)
+#define HASH_ADD_STR(head,strfield,add)                                          \
+    HASH_ADD(hh,head,strfield[0],(unsigned)uthash_strlen(add->strfield),add)
+#define HASH_REPLACE_STR(head,strfield,add,replaced)                             \
+    HASH_REPLACE(hh,head,strfield[0],(unsigned)uthash_strlen(add->strfield),add,replaced)
+#define HASH_FIND_INT(head,findint,out)                                          \
+    HASH_FIND(hh,head,findint,sizeof(int),out)
+#define HASH_ADD_INT(head,intfield,add)                                          \
+    HASH_ADD(hh,head,intfield,sizeof(int),add)
+#define HASH_REPLACE_INT(head,intfield,add,replaced)                             \
+    HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced)
+#define HASH_FIND_PTR(head,findptr,out)                                          \
+    HASH_FIND(hh,head,findptr,sizeof(void *),out)
+#define HASH_ADD_PTR(head,ptrfield,add)                                          \
+    HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
+#define HASH_REPLACE_PTR(head,ptrfield,add,replaced)                             \
+    HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced)
+#define HASH_DEL(head,delptr)                                                    \
+    HASH_DELETE(hh,head,delptr)
+
+/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
+ * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
+ */
+#ifdef HASH_DEBUG
+#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0)
+#define HASH_FSCK(hh,head)                                                       \
+do {                                                                             \
+    struct UT_hash_handle *_thh;                                                 \
+    if (head) {                                                                  \
+        unsigned _bkt_i;                                                         \
+        unsigned _count;                                                         \
+        char *_prev;                                                             \
+        _count = 0;                                                              \
+        for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) {       \
+            unsigned _bkt_count = 0;                                             \
+            _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head;                      \
+            _prev = NULL;                                                        \
+            while (_thh) {                                                       \
+               if (_prev != (char*)(_thh->hh_prev)) {                            \
+                   HASH_OOPS("invalid hh_prev %p, actual %p\n",                  \
+                    _thh->hh_prev, _prev );                                      \
+               }                                                                 \
+               _bkt_count++;                                                     \
+               _prev = (char*)(_thh);                                            \
+               _thh = _thh->hh_next;                                             \
+            }                                                                    \
+            _count += _bkt_count;                                                \
+            if ((head)->hh.tbl->buckets[_bkt_i].count !=  _bkt_count) {          \
+               HASH_OOPS("invalid bucket count %u, actual %u\n",                 \
+                (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count);              \
+            }                                                                    \
+        }                                                                        \
+        if (_count != (head)->hh.tbl->num_items) {                               \
+            HASH_OOPS("invalid hh item count %u, actual %u\n",                   \
+                (head)->hh.tbl->num_items, _count );                             \
+        }                                                                        \
+        /* traverse hh in app order; check next/prev integrity, count */         \
+        _count = 0;                                                              \
+        _prev = NULL;                                                            \
+        _thh =  &(head)->hh;                                                     \
+        while (_thh) {                                                           \
+           _count++;                                                             \
+           if (_prev !=(char*)(_thh->prev)) {                                    \
+              HASH_OOPS("invalid prev %p, actual %p\n",                          \
+                    _thh->prev, _prev );                                         \
+           }                                                                     \
+           _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh);                    \
+           _thh = ( _thh->next ?  (UT_hash_handle*)((char*)(_thh->next) +        \
+                                  (head)->hh.tbl->hho) : NULL );                 \
+        }                                                                        \
+        if (_count != (head)->hh.tbl->num_items) {                               \
+            HASH_OOPS("invalid app item count %u, actual %u\n",                  \
+                (head)->hh.tbl->num_items, _count );                             \
+        }                                                                        \
+    }                                                                            \
+} while (0)
+#else
+#define HASH_FSCK(hh,head)
+#endif
+
+/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
+ * the descriptor to which this macro is defined for tuning the hash function.
+ * The app can #include <unistd.h> to get the prototype for write(2). */
+#ifdef HASH_EMIT_KEYS
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)                                   \
+do {                                                                             \
+    unsigned _klen = fieldlen;                                                   \
+    write(HASH_EMIT_KEYS, &_klen, sizeof(_klen));                                \
+    write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen);                      \
+} while (0)
+#else
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
+#endif
+
+/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
+#ifdef HASH_FUNCTION
+#define HASH_FCN HASH_FUNCTION
+#else
+#define HASH_FCN HASH_JEN
+#endif
+
+/* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */
+#define HASH_BER(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned _hb_keylen=(unsigned)keylen;                                          \
+  const unsigned char *_hb_key=(const unsigned char*)(key);                      \
+  (hashv) = 0;                                                                   \
+  while (_hb_keylen-- != 0U) {                                                   \
+      (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++;                         \
+  }                                                                              \
+} while (0)
+
+
+/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
+ * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
+#define HASH_SAX(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned _sx_i;                                                                \
+  const unsigned char *_hs_key=(const unsigned char*)(key);                      \
+  hashv = 0;                                                                     \
+  for(_sx_i=0; _sx_i < keylen; _sx_i++) {                                        \
+      hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i];                     \
+  }                                                                              \
+} while (0)
+/* FNV-1a variation */
+#define HASH_FNV(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned _fn_i;                                                                \
+  const unsigned char *_hf_key=(const unsigned char*)(key);                      \
+  hashv = 2166136261U;                                                           \
+  for(_fn_i=0; _fn_i < keylen; _fn_i++) {                                        \
+      hashv = hashv ^ _hf_key[_fn_i];                                            \
+      hashv = hashv * 16777619U;                                                 \
+  }                                                                              \
+} while (0)
+
+#define HASH_OAT(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned _ho_i;                                                                \
+  const unsigned char *_ho_key=(const unsigned char*)(key);                      \
+  hashv = 0;                                                                     \
+  for(_ho_i=0; _ho_i < keylen; _ho_i++) {                                        \
+      hashv += _ho_key[_ho_i];                                                   \
+      hashv += (hashv << 10);                                                    \
+      hashv ^= (hashv >> 6);                                                     \
+  }                                                                              \
+  hashv += (hashv << 3);                                                         \
+  hashv ^= (hashv >> 11);                                                        \
+  hashv += (hashv << 15);                                                        \
+} while (0)
+
+#define HASH_JEN_MIX(a,b,c)                                                      \
+do {                                                                             \
+  a -= b; a -= c; a ^= ( c >> 13 );                                              \
+  b -= c; b -= a; b ^= ( a << 8 );                                               \
+  c -= a; c -= b; c ^= ( b >> 13 );                                              \
+  a -= b; a -= c; a ^= ( c >> 12 );                                              \
+  b -= c; b -= a; b ^= ( a << 16 );                                              \
+  c -= a; c -= b; c ^= ( b >> 5 );                                               \
+  a -= b; a -= c; a ^= ( c >> 3 );                                               \
+  b -= c; b -= a; b ^= ( a << 10 );                                              \
+  c -= a; c -= b; c ^= ( b >> 15 );                                              \
+} while (0)
+
+#define HASH_JEN(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned _hj_i,_hj_j,_hj_k;                                                    \
+  unsigned const char *_hj_key=(unsigned const char*)(key);                      \
+  hashv = 0xfeedbeefu;                                                           \
+  _hj_i = _hj_j = 0x9e3779b9u;                                                   \
+  _hj_k = (unsigned)(keylen);                                                    \
+  while (_hj_k >= 12U) {                                                         \
+    _hj_i +=    (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 )                      \
+        + ( (unsigned)_hj_key[2] << 16 )                                         \
+        + ( (unsigned)_hj_key[3] << 24 ) );                                      \
+    _hj_j +=    (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 )                      \
+        + ( (unsigned)_hj_key[6] << 16 )                                         \
+        + ( (unsigned)_hj_key[7] << 24 ) );                                      \
+    hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 )                         \
+        + ( (unsigned)_hj_key[10] << 16 )                                        \
+        + ( (unsigned)_hj_key[11] << 24 ) );                                     \
+                                                                                 \
+     HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                          \
+                                                                                 \
+     _hj_key += 12;                                                              \
+     _hj_k -= 12U;                                                               \
+  }                                                                              \
+  hashv += (unsigned)(keylen);                                                   \
+  switch ( _hj_k ) {                                                             \
+     case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */        \
+     case 10: hashv += ( (unsigned)_hj_key[9] << 16 );  /* FALLTHROUGH */        \
+     case 9:  hashv += ( (unsigned)_hj_key[8] << 8 );   /* FALLTHROUGH */        \
+     case 8:  _hj_j += ( (unsigned)_hj_key[7] << 24 );  /* FALLTHROUGH */        \
+     case 7:  _hj_j += ( (unsigned)_hj_key[6] << 16 );  /* FALLTHROUGH */        \
+     case 6:  _hj_j += ( (unsigned)_hj_key[5] << 8 );   /* FALLTHROUGH */        \
+     case 5:  _hj_j += _hj_key[4];                      /* FALLTHROUGH */        \
+     case 4:  _hj_i += ( (unsigned)_hj_key[3] << 24 );  /* FALLTHROUGH */        \
+     case 3:  _hj_i += ( (unsigned)_hj_key[2] << 16 );  /* FALLTHROUGH */        \
+     case 2:  _hj_i += ( (unsigned)_hj_key[1] << 8 );   /* FALLTHROUGH */        \
+     case 1:  _hj_i += _hj_key[0];                                               \
+  }                                                                              \
+  HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                             \
+} while (0)
+
+/* The Paul Hsieh hash function */
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__)             \
+  || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)             \
+                       +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+#define HASH_SFH(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned const char *_sfh_key=(unsigned const char*)(key);                     \
+  uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen;                                \
+                                                                                 \
+  unsigned _sfh_rem = _sfh_len & 3U;                                             \
+  _sfh_len >>= 2;                                                                \
+  hashv = 0xcafebabeu;                                                           \
+                                                                                 \
+  /* Main loop */                                                                \
+  for (;_sfh_len > 0U; _sfh_len--) {                                             \
+    hashv    += get16bits (_sfh_key);                                            \
+    _sfh_tmp  = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv;              \
+    hashv     = (hashv << 16) ^ _sfh_tmp;                                        \
+    _sfh_key += 2U*sizeof (uint16_t);                                            \
+    hashv    += hashv >> 11;                                                     \
+  }                                                                              \
+                                                                                 \
+  /* Handle end cases */                                                         \
+  switch (_sfh_rem) {                                                            \
+    case 3: hashv += get16bits (_sfh_key);                                       \
+            hashv ^= hashv << 16;                                                \
+            hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18;              \
+            hashv += hashv >> 11;                                                \
+            break;                                                               \
+    case 2: hashv += get16bits (_sfh_key);                                       \
+            hashv ^= hashv << 11;                                                \
+            hashv += hashv >> 17;                                                \
+            break;                                                               \
+    case 1: hashv += *_sfh_key;                                                  \
+            hashv ^= hashv << 10;                                                \
+            hashv += hashv >> 1;                                                 \
+  }                                                                              \
+                                                                                 \
+    /* Force "avalanching" of final 127 bits */                                  \
+    hashv ^= hashv << 3;                                                         \
+    hashv += hashv >> 5;                                                         \
+    hashv ^= hashv << 4;                                                         \
+    hashv += hashv >> 17;                                                        \
+    hashv ^= hashv << 25;                                                        \
+    hashv += hashv >> 6;                                                         \
+} while (0)
+
+#ifdef HASH_USING_NO_STRICT_ALIASING
+/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
+ * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
+ * MurmurHash uses the faster approach only on CPU's where we know it's safe.
+ *
+ * Note the preprocessor built-in defines can be emitted using:
+ *
+ *   gcc -m64 -dM -E - < /dev/null                  (on gcc)
+ *   cc -## a.c (where a.c is a simple test file)   (Sun Studio)
+ */
+#if (defined(__i386__) || defined(__x86_64__)  || defined(_M_IX86))
+#define MUR_GETBLOCK(p,i) p[i]
+#else /* non intel */
+#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL)
+#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL)
+#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL)
+#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL)
+#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL))
+#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__))
+#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24))
+#define MUR_TWO_TWO(p)   ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16))
+#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >>  8))
+#else /* assume little endian non-intel */
+#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24))
+#define MUR_TWO_TWO(p)   ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16))
+#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) <<  8))
+#endif
+#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) :           \
+                            (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
+                             (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) :  \
+                                                      MUR_ONE_THREE(p))))
+#endif
+#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
+#define MUR_FMIX(_h) \
+do {                 \
+  _h ^= _h >> 16;    \
+  _h *= 0x85ebca6bu; \
+  _h ^= _h >> 13;    \
+  _h *= 0xc2b2ae35u; \
+  _h ^= _h >> 16;    \
+} while (0)
+
+#define HASH_MUR(key,keylen,hashv)                                     \
+do {                                                                   \
+  const uint8_t *_mur_data = (const uint8_t*)(key);                    \
+  const int _mur_nblocks = (int)(keylen) / 4;                          \
+  uint32_t _mur_h1 = 0xf88D5353u;                                      \
+  uint32_t _mur_c1 = 0xcc9e2d51u;                                      \
+  uint32_t _mur_c2 = 0x1b873593u;                                      \
+  uint32_t _mur_k1 = 0;                                                \
+  const uint8_t *_mur_tail;                                            \
+  const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \
+  int _mur_i;                                                          \
+  for(_mur_i = -_mur_nblocks; _mur_i!=0; _mur_i++) {                   \
+    _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i);                        \
+    _mur_k1 *= _mur_c1;                                                \
+    _mur_k1 = MUR_ROTL32(_mur_k1,15);                                  \
+    _mur_k1 *= _mur_c2;                                                \
+                                                                       \
+    _mur_h1 ^= _mur_k1;                                                \
+    _mur_h1 = MUR_ROTL32(_mur_h1,13);                                  \
+    _mur_h1 = (_mur_h1*5U) + 0xe6546b64u;                              \
+  }                                                                    \
+  _mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4));          \
+  _mur_k1=0;                                                           \
+  switch((keylen) & 3U) {                                              \
+    case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \
+    case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8;  /* FALLTHROUGH */ \
+    case 1: _mur_k1 ^= (uint32_t)_mur_tail[0];                         \
+    _mur_k1 *= _mur_c1;                                                \
+    _mur_k1 = MUR_ROTL32(_mur_k1,15);                                  \
+    _mur_k1 *= _mur_c2;                                                \
+    _mur_h1 ^= _mur_k1;                                                \
+  }                                                                    \
+  _mur_h1 ^= (uint32_t)(keylen);                                       \
+  MUR_FMIX(_mur_h1);                                                   \
+  hashv = _mur_h1;                                                     \
+} while (0)
+#endif  /* HASH_USING_NO_STRICT_ALIASING */
+
+/* iterate over items in a known bucket to find desired item */
+#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,hashval,out)               \
+do {                                                                             \
+  if ((head).hh_head != NULL) {                                                  \
+    DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (head).hh_head));                     \
+  } else {                                                                       \
+    (out) = NULL;                                                                \
+  }                                                                              \
+  while ((out) != NULL) {                                                        \
+    if ((out)->hh.hashv == (hashval) && (out)->hh.keylen == (keylen_in)) {       \
+      if (uthash_memcmp((out)->hh.key, keyptr, keylen_in) == 0) {                \
+        break;                                                                   \
+      }                                                                          \
+    }                                                                            \
+    if ((out)->hh.hh_next != NULL) {                                             \
+      DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next));                \
+    } else {                                                                     \
+      (out) = NULL;                                                              \
+    }                                                                            \
+  }                                                                              \
+} while (0)
+
+/* add an item to a bucket  */
+#define HASH_ADD_TO_BKT(head,addhh)                                              \
+do {                                                                             \
+ head.count++;                                                                   \
+ (addhh)->hh_next = head.hh_head;                                                \
+ (addhh)->hh_prev = NULL;                                                        \
+ if (head.hh_head != NULL) { (head).hh_head->hh_prev = (addhh); }                \
+ (head).hh_head=addhh;                                                           \
+ if ((head.count >= ((head.expand_mult+1U) * HASH_BKT_CAPACITY_THRESH))          \
+     && ((addhh)->tbl->noexpand != 1U)) {                                        \
+       HASH_EXPAND_BUCKETS((addhh)->tbl);                                        \
+ }                                                                               \
+} while (0)
+
+/* remove an item from a given bucket */
+#define HASH_DEL_IN_BKT(hh,head,hh_del)                                          \
+    (head).count--;                                                              \
+    if ((head).hh_head == hh_del) {                                              \
+      (head).hh_head = hh_del->hh_next;                                          \
+    }                                                                            \
+    if (hh_del->hh_prev) {                                                       \
+        hh_del->hh_prev->hh_next = hh_del->hh_next;                              \
+    }                                                                            \
+    if (hh_del->hh_next) {                                                       \
+        hh_del->hh_next->hh_prev = hh_del->hh_prev;                              \
+    }
+
+/* Bucket expansion has the effect of doubling the number of buckets
+ * and redistributing the items into the new buckets. Ideally the
+ * items will distribute more or less evenly into the new buckets
+ * (the extent to which this is true is a measure of the quality of
+ * the hash function as it applies to the key domain).
+ *
+ * With the items distributed into more buckets, the chain length
+ * (item count) in each bucket is reduced. Thus by expanding buckets
+ * the hash keeps a bound on the chain length. This bounded chain
+ * length is the essence of how a hash provides constant time lookup.
+ *
+ * The calculation of tbl->ideal_chain_maxlen below deserves some
+ * explanation. First, keep in mind that we're calculating the ideal
+ * maximum chain length based on the *new* (doubled) bucket count.
+ * In fractions this is just n/b (n=number of items,b=new num buckets).
+ * Since the ideal chain length is an integer, we want to calculate
+ * ceil(n/b). We don't depend on floating point arithmetic in this
+ * hash, so to calculate ceil(n/b) with integers we could write
+ *
+ *      ceil(n/b) = (n/b) + ((n%b)?1:0)
+ *
+ * and in fact a previous version of this hash did just that.
+ * But now we have improved things a bit by recognizing that b is
+ * always a power of two. We keep its base 2 log handy (call it lb),
+ * so now we can write this with a bit shift and logical AND:
+ *
+ *      ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
+ *
+ */
+#define HASH_EXPAND_BUCKETS(tbl)                                                 \
+do {                                                                             \
+    unsigned _he_bkt;                                                            \
+    unsigned _he_bkt_i;                                                          \
+    struct UT_hash_handle *_he_thh, *_he_hh_nxt;                                 \
+    UT_hash_bucket *_he_new_buckets, *_he_newbkt;                                \
+    _he_new_buckets = (UT_hash_bucket*)uthash_malloc(                            \
+             2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket));            \
+    if (!_he_new_buckets) { uthash_fatal( "out of memory"); }                    \
+    memset(_he_new_buckets, 0,                                                   \
+            2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket));             \
+    tbl->ideal_chain_maxlen =                                                    \
+       (tbl->num_items >> (tbl->log2_num_buckets+1U)) +                          \
+       (((tbl->num_items & ((tbl->num_buckets*2U)-1U)) != 0U) ? 1U : 0U);        \
+    tbl->nonideal_items = 0;                                                     \
+    for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++)                \
+    {                                                                            \
+        _he_thh = tbl->buckets[ _he_bkt_i ].hh_head;                             \
+        while (_he_thh != NULL) {                                                \
+           _he_hh_nxt = _he_thh->hh_next;                                        \
+           HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2U, _he_bkt);           \
+           _he_newbkt = &(_he_new_buckets[ _he_bkt ]);                           \
+           if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) {                \
+             tbl->nonideal_items++;                                              \
+             _he_newbkt->expand_mult = _he_newbkt->count /                       \
+                                        tbl->ideal_chain_maxlen;                 \
+           }                                                                     \
+           _he_thh->hh_prev = NULL;                                              \
+           _he_thh->hh_next = _he_newbkt->hh_head;                               \
+           if (_he_newbkt->hh_head != NULL) { _he_newbkt->hh_head->hh_prev =     \
+                _he_thh; }                                                       \
+           _he_newbkt->hh_head = _he_thh;                                        \
+           _he_thh = _he_hh_nxt;                                                 \
+        }                                                                        \
+    }                                                                            \
+    uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
+    tbl->num_buckets *= 2U;                                                      \
+    tbl->log2_num_buckets++;                                                     \
+    tbl->buckets = _he_new_buckets;                                              \
+    tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ?         \
+        (tbl->ineff_expands+1U) : 0U;                                            \
+    if (tbl->ineff_expands > 1U) {                                               \
+        tbl->noexpand=1;                                                         \
+        uthash_noexpand_fyi(tbl);                                                \
+    }                                                                            \
+    uthash_expand_fyi(tbl);                                                      \
+} while (0)
+
+
+/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
+/* Note that HASH_SORT assumes the hash handle name to be hh.
+ * HASH_SRT was added to allow the hash handle name to be passed in. */
+#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
+#define HASH_SRT(hh,head,cmpfcn)                                                 \
+do {                                                                             \
+  unsigned _hs_i;                                                                \
+  unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize;               \
+  struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail;            \
+  if (head != NULL) {                                                            \
+      _hs_insize = 1;                                                            \
+      _hs_looping = 1;                                                           \
+      _hs_list = &((head)->hh);                                                  \
+      while (_hs_looping != 0U) {                                                \
+          _hs_p = _hs_list;                                                      \
+          _hs_list = NULL;                                                       \
+          _hs_tail = NULL;                                                       \
+          _hs_nmerges = 0;                                                       \
+          while (_hs_p != NULL) {                                                \
+              _hs_nmerges++;                                                     \
+              _hs_q = _hs_p;                                                     \
+              _hs_psize = 0;                                                     \
+              for ( _hs_i = 0; _hs_i  < _hs_insize; _hs_i++ ) {                  \
+                  _hs_psize++;                                                   \
+                  _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ?              \
+                          ((void*)((char*)(_hs_q->next) +                        \
+                          (head)->hh.tbl->hho)) : NULL);                         \
+                  if (! (_hs_q) ) { break; }                                     \
+              }                                                                  \
+              _hs_qsize = _hs_insize;                                            \
+              while ((_hs_psize > 0U) || ((_hs_qsize > 0U) && (_hs_q != NULL))) {\
+                  if (_hs_psize == 0U) {                                         \
+                      _hs_e = _hs_q;                                             \
+                      _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ?          \
+                              ((void*)((char*)(_hs_q->next) +                    \
+                              (head)->hh.tbl->hho)) : NULL);                     \
+                      _hs_qsize--;                                               \
+                  } else if ( (_hs_qsize == 0U) || (_hs_q == NULL) ) {           \
+                      _hs_e = _hs_p;                                             \
+                      if (_hs_p != NULL){                                        \
+                        _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ?        \
+                                ((void*)((char*)(_hs_p->next) +                  \
+                                (head)->hh.tbl->hho)) : NULL);                   \
+                       }                                                         \
+                      _hs_psize--;                                               \
+                  } else if ((                                                   \
+                      cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
+                             DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
+                             ) <= 0) {                                           \
+                      _hs_e = _hs_p;                                             \
+                      if (_hs_p != NULL){                                        \
+                        _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ?        \
+                               ((void*)((char*)(_hs_p->next) +                   \
+                               (head)->hh.tbl->hho)) : NULL);                    \
+                       }                                                         \
+                      _hs_psize--;                                               \
+                  } else {                                                       \
+                      _hs_e = _hs_q;                                             \
+                      _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ?          \
+                              ((void*)((char*)(_hs_q->next) +                    \
+                              (head)->hh.tbl->hho)) : NULL);                     \
+                      _hs_qsize--;                                               \
+                  }                                                              \
+                  if ( _hs_tail != NULL ) {                                      \
+                      _hs_tail->next = ((_hs_e != NULL) ?                        \
+                            ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL);          \
+                  } else {                                                       \
+                      _hs_list = _hs_e;                                          \
+                  }                                                              \
+                  if (_hs_e != NULL) {                                           \
+                  _hs_e->prev = ((_hs_tail != NULL) ?                            \
+                     ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL);              \
+                  }                                                              \
+                  _hs_tail = _hs_e;                                              \
+              }                                                                  \
+              _hs_p = _hs_q;                                                     \
+          }                                                                      \
+          if (_hs_tail != NULL){                                                 \
+            _hs_tail->next = NULL;                                               \
+          }                                                                      \
+          if ( _hs_nmerges <= 1U ) {                                             \
+              _hs_looping=0;                                                     \
+              (head)->hh.tbl->tail = _hs_tail;                                   \
+              DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list));      \
+          }                                                                      \
+          _hs_insize *= 2U;                                                      \
+      }                                                                          \
+      HASH_FSCK(hh,head);                                                        \
+ }                                                                               \
+} while (0)
+
+/* This function selects items from one hash into another hash.
+ * The end result is that the selected items have dual presence
+ * in both hashes. There is no copy of the items made; rather
+ * they are added into the new hash through a secondary hash
+ * hash handle that must be present in the structure. */
+#define HASH_SELECT(hh_dst, dst, hh_src, src, cond)                              \
+do {                                                                             \
+  unsigned _src_bkt, _dst_bkt;                                                   \
+  void *_last_elt=NULL, *_elt;                                                   \
+  UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL;                         \
+  ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst));                 \
+  if (src != NULL) {                                                             \
+    for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) {     \
+      for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head;                \
+          _src_hh != NULL;                                                       \
+          _src_hh = _src_hh->hh_next) {                                          \
+          _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh);                       \
+          if (cond(_elt)) {                                                      \
+            _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho);               \
+            _dst_hh->key = _src_hh->key;                                         \
+            _dst_hh->keylen = _src_hh->keylen;                                   \
+            _dst_hh->hashv = _src_hh->hashv;                                     \
+            _dst_hh->prev = _last_elt;                                           \
+            _dst_hh->next = NULL;                                                \
+            if (_last_elt_hh != NULL) { _last_elt_hh->next = _elt; }             \
+            if (dst == NULL) {                                                   \
+              DECLTYPE_ASSIGN(dst,_elt);                                         \
+              HASH_MAKE_TABLE(hh_dst,dst);                                       \
+            } else {                                                             \
+              _dst_hh->tbl = (dst)->hh_dst.tbl;                                  \
+            }                                                                    \
+            HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt);    \
+            HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh);            \
+            (dst)->hh_dst.tbl->num_items++;                                      \
+            _last_elt = _elt;                                                    \
+            _last_elt_hh = _dst_hh;                                              \
+          }                                                                      \
+      }                                                                          \
+    }                                                                            \
+  }                                                                              \
+  HASH_FSCK(hh_dst,dst);                                                         \
+} while (0)
+
+#define HASH_CLEAR(hh,head)                                                      \
+do {                                                                             \
+  if (head != NULL) {                                                            \
+    uthash_free((head)->hh.tbl->buckets,                                         \
+                (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket));      \
+    HASH_BLOOM_FREE((head)->hh.tbl);                                             \
+    uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                          \
+    (head)=NULL;                                                                 \
+  }                                                                              \
+} while (0)
+
+#define HASH_OVERHEAD(hh,head)                                                   \
+ ((head != NULL) ? (                                                             \
+ (size_t)(((head)->hh.tbl->num_items   * sizeof(UT_hash_handle))   +             \
+          ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket))   +             \
+           sizeof(UT_hash_table)                                   +             \
+           (HASH_BLOOM_BYTELEN))) : 0U)
+
+#ifdef NO_DECLTYPE
+#define HASH_ITER(hh,head,el,tmp)                                                \
+for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \
+  (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL)))
+#else
+#define HASH_ITER(hh,head,el,tmp)                                                \
+for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL));      \
+  (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL)))
+#endif
+
+/* obtain a count of items in the hash */
+#define HASH_COUNT(head) HASH_CNT(hh,head)
+#define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U)
+
+typedef struct UT_hash_bucket {
+   struct UT_hash_handle *hh_head;
+   unsigned count;
+
+   /* expand_mult is normally set to 0. In this situation, the max chain length
+    * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
+    * the bucket's chain exceeds this length, bucket expansion is triggered).
+    * However, setting expand_mult to a non-zero value delays bucket expansion
+    * (that would be triggered by additions to this particular bucket)
+    * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
+    * (The multiplier is simply expand_mult+1). The whole idea of this
+    * multiplier is to reduce bucket expansions, since they are expensive, in
+    * situations where we know that a particular bucket tends to be overused.
+    * It is better to let its chain length grow to a longer yet-still-bounded
+    * value, than to do an O(n) bucket expansion too often.
+    */
+   unsigned expand_mult;
+
+} UT_hash_bucket;
+
+/* random signature used only to find hash tables in external analysis */
+#define HASH_SIGNATURE 0xa0111fe1u
+#define HASH_BLOOM_SIGNATURE 0xb12220f2u
+
+typedef struct UT_hash_table {
+   UT_hash_bucket *buckets;
+   unsigned num_buckets, log2_num_buckets;
+   unsigned num_items;
+   struct UT_hash_handle *tail; /* tail hh in app order, for fast append    */
+   ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
+
+   /* in an ideal situation (all buckets used equally), no bucket would have
+    * more than ceil(#items/#buckets) items. that's the ideal chain length. */
+   unsigned ideal_chain_maxlen;
+
+   /* nonideal_items is the number of items in the hash whose chain position
+    * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
+    * hash distribution; reaching them in a chain traversal takes >ideal steps */
+   unsigned nonideal_items;
+
+   /* ineffective expands occur when a bucket doubling was performed, but
+    * afterward, more than half the items in the hash had nonideal chain
+    * positions. If this happens on two consecutive expansions we inhibit any
+    * further expansion, as it's not helping; this happens when the hash
+    * function isn't a good fit for the key domain. When expansion is inhibited
+    * the hash will still work, albeit no longer in constant time. */
+   unsigned ineff_expands, noexpand;
+
+   uint32_t signature; /* used only to find hash tables in external analysis */
+#ifdef HASH_BLOOM
+   uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
+   uint8_t *bloom_bv;
+   uint8_t bloom_nbits;
+#endif
+
+} UT_hash_table;
+
+typedef struct UT_hash_handle {
+   struct UT_hash_table *tbl;
+   void *prev;                       /* prev element in app order      */
+   void *next;                       /* next element in app order      */
+   struct UT_hash_handle *hh_prev;   /* previous hh in bucket order    */
+   struct UT_hash_handle *hh_next;   /* next hh in bucket order        */
+   void *key;                        /* ptr to enclosing struct's key  */
+   unsigned keylen;                  /* enclosing struct's key len     */
+   unsigned hashv;                   /* result of hash-fcn(key)        */
+} UT_hash_handle;
+
+#endif /* UTHASH_H */

From 313241e30321a5499fef51df461233c38cbe8261 Mon Sep 17 00:00:00 2001
From: Stephanie Wang <swang93@mit.edu>
Date: Tue, 20 Sep 2016 22:40:35 -0700
Subject: [PATCH 37/91] Asynchronous Redis IPC (#14)

* Asynchronous Redis IPC

* make valgrind happy

* cleanup
---
 Makefile           |   7 ++-
 common.h           |   6 ++
 io.c               |   4 +-
 state/redis.c      |  35 ++++++++++--
 state/redis.h      |   4 ++
 test/db_tests.c    |   7 ++-
 test/redis_tests.c | 138 +++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 189 insertions(+), 12 deletions(-)
 create mode 100644 test/redis_tests.c

diff --git a/Makefile b/Makefile
index 29b7befd9..f1659c882 100644
--- a/Makefile
+++ b/Makefile
@@ -16,6 +16,9 @@ $(BUILD)/io_tests: test/io_tests.c $(BUILD)/libcommon.a
 $(BUILD)/task_tests: test/task_tests.c $(BUILD)/libcommon.a
 	$(CC) -o $@ $^ $(CFLAGS)
 
+$(BUILD)/redis_tests: hiredis test/redis_tests.c $(BUILD)/libcommon.a
+	$(CC) -o $@ test/redis_tests.c $(BUILD)/libcommon.a thirdparty/hiredis/libhiredis.a $(CFLAGS)
+
 clean:
 	rm -f *.o state/*.o test/*.o
 	rm -rf $(BUILD)/*
@@ -26,8 +29,8 @@ redis:
 hiredis:
 	git submodule update --init --recursive -- "thirdparty/hiredis" ; cd thirdparty/hiredis ; make
 
-test: hiredis redis $(BUILD)/db_tests $(BUILD)/io_tests $(BUILD)/task_tests FORCE
+test: hiredis redis $(BUILD)/db_tests $(BUILD)/io_tests $(BUILD)/task_tests $(BUILD)/redis_tests FORCE
 	./thirdparty/redis-3.2.3/src/redis-server &
-	sleep 1s ; ./build/db_tests ; ./build/io_tests ; ./build/task_tests
+	sleep 1s ; ./build/db_tests ; ./build/io_tests ; ./build/task_tests ; ./build/redis_tests
 
 FORCE:
diff --git a/common.h b/common.h
index 96e6402f0..3f30b5661 100644
--- a/common.h
+++ b/common.h
@@ -28,6 +28,12 @@
 
 #define UNIQUE_ID_SIZE 20
 
+// Cleanup method for running tests with the greatest library.
+// Runs the test, then clears the Redis database.
+#define RUN_REDIS_TEST(context, test) \
+  RUN_TEST(test);                     \
+  freeReplyObject(redisCommand(context, "FLUSHALL"));
+
 typedef struct { unsigned char id[UNIQUE_ID_SIZE]; } unique_id;
 
 /* Generate a globally unique ID. */
diff --git a/io.c b/io.c
index 1d16a78e0..99295512b 100644
--- a/io.c
+++ b/io.c
@@ -10,7 +10,7 @@
 
 #include "common.h"
 
-/* Binds to a Unix domain datagram socket at the given
+/* Binds to a Unix domain streaming socket at the given
  * pathname. Removes any existing file at the pathname. Returns
  * a file descriptor for the socket, or -1 if an error
  * occurred. */
@@ -44,7 +44,7 @@ int bind_ipc_sock(const char *socket_pathname) {
   return socket_fd;
 }
 
-/* Connects to a Unix domain datagram socket at the given
+/* Connects to a Unix domain streaming socket at the given
  * pathname. Returns a file descriptor for the socket, or -1 if
  * an error occurred. */
 int connect_ipc_sock(const char *socket_pathname) {
diff --git a/state/redis.c b/state/redis.c
index c781b81ef..ae3fb1a6f 100644
--- a/state/redis.c
+++ b/state/redis.c
@@ -8,12 +8,13 @@
 #include "task_queue.h"
 #include "event_loop.h"
 #include "redis.h"
+#include "io.h"
 
 static void poll_add_read(void *privdata) {
   db_conn *conn = (db_conn *) privdata;
   if (!conn->reading) {
     conn->reading = 1;
-    event_loop_get(conn->loop, 0)->events |= POLLIN;
+    event_loop_get(conn->loop, conn->db_index)->events |= POLLIN;
   }
 }
 
@@ -21,7 +22,7 @@ static void poll_del_read(void *privdata) {
   db_conn *conn = (db_conn *) privdata;
   if (conn->reading) {
     conn->reading = 0;
-    event_loop_get(conn->loop, 0)->events &= ~POLLIN;
+    event_loop_get(conn->loop, conn->db_index)->events &= ~POLLIN;
   }
 }
 
@@ -29,7 +30,7 @@ static void poll_add_write(void *privdata) {
   db_conn *conn = (db_conn *) privdata;
   if (!conn->writing) {
     conn->writing = 1;
-    event_loop_get(conn->loop, 0)->events |= POLLOUT;
+    event_loop_get(conn->loop, conn->db_index)->events |= POLLOUT;
   }
 }
 
@@ -37,7 +38,7 @@ static void poll_del_write(void *privdata) {
   db_conn *conn = (db_conn *) privdata;
   if (conn->writing) {
     conn->writing = 0;
-    event_loop_get(conn->loop, 0)->events &= ~POLLOUT;
+    event_loop_get(conn->loop, conn->db_index)->events &= ~POLLOUT;
   }
 }
 
@@ -143,8 +144,10 @@ int64_t db_attach(db_conn *db, event_loop *loop, int connection_type) {
 
   ac->ev.data = db;
 
-  return event_loop_attach(loop, connection_type, NULL, c->fd,
-                           POLLIN | POLLOUT);
+  int64_t index =
+      event_loop_attach(loop, connection_type, NULL, c->fd, POLLIN | POLLOUT);
+  db->db_index = index;
+  return index;
 }
 
 void object_table_add(db_conn *db, unique_id object_id) {
@@ -211,3 +214,23 @@ void object_table_lookup(db_conn *db,
     LOG_REDIS_ERR(db->context, "error in object_table lookup");
   }
 }
+
+void send_redis_command(int socket_fd, const char *format, ...) {
+  char *cmd;
+  va_list ap;
+  int len;
+
+  va_start(ap, format);
+  len = redisvFormatCommand(&cmd, format, ap);
+  va_end(ap);
+  if (len == -1) {
+    LOG_ERR("Out of memory while formatting Redis command.");
+    return;
+  } else if (len == -2) {
+    LOG_ERR("Invalid Redis format string.");
+    return;
+  }
+
+  write_string(socket_fd, cmd);
+  free(cmd);
+}
diff --git a/state/redis.h b/state/redis.h
index ad8d5cbcf..132724e67 100644
--- a/state/redis.h
+++ b/state/redis.h
@@ -25,6 +25,8 @@ struct db_conn_impl {
   int reading, writing;
   /* The event loop this global state store connection is part of. */
   event_loop *loop;
+  /* Index of the database connection in the event loop */
+  int64_t db_index;
   /* Cache for the IP addresses of services. */
   service_cache_entry *service_cache;
   /* Redis context for synchronous connections.
@@ -44,3 +46,5 @@ void object_table_get_entry(redisAsyncContext *c, void *r, void *privdata);
 void object_table_lookup_callback(redisAsyncContext *c,
                                   void *r,
                                   void *privdata);
+
+void send_redis_command(int socket_fd, const char *format, ...);
diff --git a/test/db_tests.c b/test/db_tests.c
index 0788345f2..568eeaeb1 100644
--- a/test/db_tests.c
+++ b/test/db_tests.c
@@ -108,12 +108,15 @@ TEST object_table_lookup_test(void) {
 
   event_loop_free(&loop);
 
+  lookup_successful = 0;
   PASS();
 }
 
 SUITE(db_tests) {
-  RUN_TEST(object_table_lookup_test);
-  /* RUN_TEST(task_queue_test); */
+  redisContext *context = redisConnect("127.0.0.1", 6379);
+  redisCommand(context, "FLUSHALL");
+  RUN_REDIS_TEST(context, object_table_lookup_test);
+  redisFree(context);
 }
 
 GREATEST_MAIN_DEFS();
diff --git a/test/redis_tests.c b/test/redis_tests.c
new file mode 100644
index 000000000..9efd3bace
--- /dev/null
+++ b/test/redis_tests.c
@@ -0,0 +1,138 @@
+#include "greatest.h"
+
+#include <assert.h>
+#include <unistd.h>
+
+#include "event_loop.h"
+#include "state/db.h"
+#include "state/redis.h"
+#include "io.h"
+
+SUITE(redis_tests);
+
+int lookup_successful = 0;
+const char *test_set_format = "SET %s %s";
+const char *test_get_format = "GET %s";
+const char *test_key = "foo";
+const char *test_value = "bar";
+
+void async_redis_socket_test_callback(redisAsyncContext *ac,
+                                      void *r,
+                                      void *privdata) {
+  redisContext *context = redisConnect("127.0.0.1", 6379);
+  redisReply *reply = redisCommand(context, test_get_format, test_key);
+  redisFree(context);
+  assert(reply != NULL);
+  if (strcmp(reply->str, test_value)) {
+    freeReplyObject(reply);
+    assert(0);
+  }
+  freeReplyObject(reply);
+  lookup_successful = 1;
+}
+
+TEST redis_socket_test(void) {
+  const char *socket_pathname = "redis-test-socket";
+  redisContext *context = redisConnect("127.0.0.1", 6379);
+  ASSERT(context != NULL);
+  int socket_fd = bind_ipc_sock(socket_pathname);
+  ASSERT(socket_fd >= 0);
+
+  int client_fd = connect_ipc_sock(socket_pathname);
+  ASSERT(client_fd >= 0);
+
+  send_redis_command(client_fd, test_set_format, test_key, test_value);
+
+  int server_fd = accept_client(socket_fd);
+  char *cmd = read_string(server_fd);
+  close(client_fd);
+  close(server_fd);
+  close(socket_fd);
+  unlink(socket_pathname);
+
+  redisAppendFormattedCommand(context, cmd, strlen(cmd));
+  redisReply *tmp;
+  redisGetReply(context, &tmp);
+  freeReplyObject(tmp);
+  redisReply *reply = redisCommand(context, "GET %s", test_key);
+  ASSERT(reply != NULL);
+  ASSERT_STR_EQ(reply->str, test_value);
+  freeReplyObject(reply);
+
+  free(cmd);
+  redisFree(context);
+  PASS();
+}
+
+TEST async_redis_socket_test(void) {
+  int socket_fd, server_fd, client_fd;
+  event_loop loop;
+  event_loop_init(&loop);
+  /* Start IPC channel. */
+  const char *socket_pathname = "async-redis-test-socket";
+  socket_fd = bind_ipc_sock(socket_pathname);
+  ASSERT(socket_fd >= 0);
+  int64_t ipc_index = event_loop_attach(&loop, 1, NULL, socket_fd, POLLIN);
+
+  /* Start connection to Redis. */
+  db_conn conn;
+  db_connect("127.0.0.1", 6379, "", "", 0, &conn);
+  int64_t db_index = db_attach(&conn, &loop, 0);
+
+  /* Send a command to the Redis process. */
+  client_fd = connect_ipc_sock(socket_pathname);
+  ASSERT(client_fd >= 0);
+  send_redis_command(client_fd, test_set_format, test_key, test_value);
+
+  while (!lookup_successful) {
+    int num_ready = event_loop_poll(&loop);
+    if (num_ready < 0) {
+      exit(-1);
+    }
+    for (int i = 0; i < event_loop_size(&loop); ++i) {
+      struct pollfd *waiting = event_loop_get(&loop, i);
+      if (waiting->revents == 0)
+        continue;
+      if (i == db_index) {
+        db_event(&conn);
+      } else if (i == ipc_index) {
+        /* For some reason, this check is necessary for Travis
+         * to pass these tests. */
+        ASSERT(waiting->revents & POLLIN);
+        server_fd = accept_client(socket_fd);
+        ASSERT(server_fd >= 0);
+        event_loop_attach(&loop, 1, NULL, server_fd, POLLIN);
+      } else {
+        char *cmd = read_string(waiting->fd);
+        redisAsyncFormattedCommand(conn.context,
+                                   async_redis_socket_test_callback, NULL, cmd,
+                                   strlen(cmd));
+        free(cmd);
+      }
+    }
+  }
+  db_disconnect(&conn);
+  event_loop_free(&loop);
+  close(server_fd);
+  close(client_fd);
+  close(socket_fd);
+  unlink(socket_pathname);
+  lookup_successful = 0;
+  PASS();
+}
+
+SUITE(redis_tests) {
+  redisContext *context = redisConnect("127.0.0.1", 6379);
+  freeReplyObject(redisCommand(context, "FLUSHALL"));
+  RUN_REDIS_TEST(context, redis_socket_test);
+  RUN_REDIS_TEST(context, async_redis_socket_test);
+  redisFree(context);
+}
+
+GREATEST_MAIN_DEFS();
+
+int main(int argc, char **argv) {
+  GREATEST_MAIN_BEGIN();
+  RUN_SUITE(redis_tests);
+  GREATEST_MAIN_END();
+}

From 7a079547b009ae2737bf5cb3a0cf87f46299681a Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Thu, 22 Sep 2016 23:15:45 -0700
Subject: [PATCH 38/91] task queue tests and extensions (#18)

* task queue tests and extensions

* clean up test
---
 Makefile              |   7 +-
 common.c              |  60 +++++++
 common.h              |   9 +-
 event_loop.c          |   6 +-
 event_loop.h          |   2 +-
 state/redis.c         |  18 ++
 task.c                |  96 +++++++++-
 task.h                |  12 +-
 test/common_tests.c   |  27 +++
 test/db_tests.c       |  39 ++++-
 test/example_task.h   |  14 ++
 test/redis_tests.c    |   2 +-
 test/task_tests.c     |  20 +++
 thirdparty/utstring.h | 398 ++++++++++++++++++++++++++++++++++++++++++
 14 files changed, 695 insertions(+), 15 deletions(-)
 create mode 100644 test/common_tests.c
 create mode 100644 test/example_task.h
 create mode 100644 thirdparty/utstring.h

diff --git a/Makefile b/Makefile
index f1659c882..36793877f 100644
--- a/Makefile
+++ b/Makefile
@@ -7,6 +7,9 @@ all: $(BUILD)/libcommon.a
 $(BUILD)/libcommon.a: event_loop.o common.o task.o io.o state/redis.o
 	ar rcs $@ $^
 
+$(BUILD)/common_tests: test/common_tests.c $(BUILD)/libcommon.a
+	$(CC) -o $@ test/common_tests.c $(BUILD)/libcommon.a $(CFLAGS)
+
 $(BUILD)/db_tests: hiredis test/db_tests.c $(BUILD)/libcommon.a
 	$(CC) -o $@ test/db_tests.c $(BUILD)/libcommon.a thirdparty/hiredis/libhiredis.a $(CFLAGS)
 
@@ -29,8 +32,8 @@ redis:
 hiredis:
 	git submodule update --init --recursive -- "thirdparty/hiredis" ; cd thirdparty/hiredis ; make
 
-test: hiredis redis $(BUILD)/db_tests $(BUILD)/io_tests $(BUILD)/task_tests $(BUILD)/redis_tests FORCE
+test: hiredis redis $(BUILD)/common_tests $(BUILD)/db_tests $(BUILD)/io_tests $(BUILD)/task_tests $(BUILD)/redis_tests FORCE
 	./thirdparty/redis-3.2.3/src/redis-server &
-	sleep 1s ; ./build/db_tests ; ./build/io_tests ; ./build/task_tests ; ./build/redis_tests
+	sleep 1s ; ./build/common_tests ; ./build/db_tests ; ./build/io_tests ; ./build/task_tests ; ./build/redis_tests
 
 FORCE:
diff --git a/common.c b/common.c
index 9e0a86310..53e32fe13 100644
--- a/common.c
+++ b/common.c
@@ -31,3 +31,63 @@ char *sha1_to_hex(const unsigned char *sha1, char *buffer) {
 
   return buffer;
 }
+
+const signed char hexval_table[256] = {
+    -1, -1, -1, -1, -1, -1, -1, -1, /* 00-07 */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* 08-0f */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* 10-17 */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* 18-1f */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* 20-27 */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* 28-2f */
+    +0, +1, +2, +3, +4, +5, +6, +7, /* 30-37 */
+    +8, +9, -1, -1, -1, -1, -1, -1, /* 38-3f */
+    -1, 10, 11, 12, 13, 14, 15, -1, /* 40-47 */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* 48-4f */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* 50-57 */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* 58-5f */
+    -1, 10, 11, 12, 13, 14, 15, -1, /* 60-67 */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* 68-67 */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* 70-77 */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* 78-7f */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* 80-87 */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* 88-8f */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* 90-97 */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* 98-9f */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* a0-a7 */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* a8-af */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* b0-b7 */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* b8-bf */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* c0-c7 */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* c8-cf */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* d0-d7 */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* d8-df */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* e0-e7 */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* e8-ef */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* f0-f7 */
+    -1, -1, -1, -1, -1, -1, -1, -1, /* f8-ff */
+};
+
+static inline unsigned int hexval(unsigned char c) {
+  return hexval_table[c];
+}
+
+/*
+ * Convert two consecutive hexadecimal digits into a char.  Return a
+ * negative value on error.  Don't run over the end of short strings.
+ */
+static inline int hex2chr(const char *s) {
+  int val = hexval(s[0]);
+  return (val < 0) ? val : (val << 4) | hexval(s[1]);
+}
+
+int hex_to_sha1(const char *hex, unsigned char *sha1) {
+  int i;
+  for (i = 0; i < UNIQUE_ID_SIZE; i++) {
+    int val = hex2chr(hex);
+    if (val < 0)
+      return -1;
+    *sha1++ = val;
+    hex += 2;
+  }
+  return 0;
+}
diff --git a/common.h b/common.h
index 3f30b5661..7e4b73ebe 100644
--- a/common.h
+++ b/common.h
@@ -28,8 +28,8 @@
 
 #define UNIQUE_ID_SIZE 20
 
-// Cleanup method for running tests with the greatest library.
-// Runs the test, then clears the Redis database.
+/* Cleanup method for running tests with the greatest library.
+ * Runs the test, then clears the Redis database. */
 #define RUN_REDIS_TEST(context, test) \
   RUN_TEST(test);                     \
   freeReplyObject(redisCommand(context, "FLUSHALL"));
@@ -44,6 +44,11 @@ unique_id globally_unique_id(void);
  * UNIQUE_ID_SIZE + 1 */
 char *sha1_to_hex(const unsigned char *sha1, char *buffer);
 
+/* Convert a hexdecimal string of length 40 to a 20 byte sha1 hash. This
+ * function assumes that sha1 points to an already allocated char array of size
+ * UNIQUE_ID_SIZE. */
+int hex_to_sha1(const char *hex, unsigned char *sha1);
+
 typedef unique_id object_id;
 
 #endif
diff --git a/event_loop.c b/event_loop.c
index 0fd79e6af..d89710ba5 100644
--- a/event_loop.c
+++ b/event_loop.c
@@ -59,10 +59,10 @@ void event_loop_detach(event_loop *loop, int64_t index, int shall_close) {
 }
 
 /* Poll the file descriptors associated to this event loop.
- * See http://linux.die.net/man/2/poll */
-int event_loop_poll(event_loop *loop) {
+ * See http://linux.die.net/man/2/poll. The timeout is in milliseconds. */
+int event_loop_poll(event_loop *loop, int timeout) {
   return poll((struct pollfd *) utarray_front(loop->waiting),
-              utarray_len(loop->waiting), -1);
+              utarray_len(loop->waiting), timeout);
 }
 
 /* Get the total number of file descriptors participating in the event loop. */
diff --git a/event_loop.h b/event_loop.h
index a96ec4643..840abfd8c 100644
--- a/event_loop.h
+++ b/event_loop.h
@@ -29,7 +29,7 @@ int64_t event_loop_attach(event_loop *loop,
                           int fd,
                           int events);
 void event_loop_detach(event_loop *loop, int64_t index, int shall_close);
-int event_loop_poll(event_loop *loop);
+int event_loop_poll(event_loop *loop, int timeout);
 int64_t event_loop_size(event_loop *loop);
 struct pollfd *event_loop_get(event_loop *loop, int64_t index);
 void event_loop_set_data(event_loop *loop, int64_t index, void *data);
diff --git a/state/redis.c b/state/redis.c
index ae3fb1a6f..c2e01df5e 100644
--- a/state/redis.c
+++ b/state/redis.c
@@ -2,6 +2,8 @@
 
 #include <assert.h>
 
+#include "utstring.h"
+
 #include "common.h"
 #include "db.h"
 #include "object_table.h"
@@ -215,6 +217,22 @@ void object_table_lookup(db_conn *db,
   }
 }
 
+void task_queue_submit_task(db_conn *db, task_iid task_iid, task_spec *task) {
+  /* For converting an id to hex, which has double the number
+   * of bytes compared to the id (+ 1 byte for '\0'). */
+  static char hex[2 * UNIQUE_ID_SIZE + 1];
+  UT_string *command;
+  utstring_new(command);
+  sha1_to_hex(&task_iid.id[0], &hex[0]);
+  utstring_printf(command, "HMSET queue:%s", &hex[0]);
+  print_task(task, command);
+  redisAsyncCommand(db->context, NULL, NULL, utstring_body(command));
+  if (db->context->err) {
+    LOG_REDIS_ERR(db->context, "error in task_queue submit_task");
+  }
+  utstring_free(command);
+}
+
 void send_redis_command(int socket_fd, const char *format, ...) {
   char *cmd;
   va_list ap;
diff --git a/task.c b/task.c
index 5d5271d91..fd4708d78 100644
--- a/task.c
+++ b/task.c
@@ -2,6 +2,8 @@
 #include <stdio.h>
 #include <string.h>
 
+#include "utarray.h"
+
 #include "task.h"
 #include "common.h"
 #include "io.h"
@@ -30,7 +32,8 @@ typedef struct {
 } task_arg;
 
 struct task_spec_impl {
-  function_id func_id;
+  /* Function ID of the task. */
+  function_id function_id;
   /* Total number of arguments. */
   int64_t num_args;
   /* Index of the last argument that has been constructed. */
@@ -52,14 +55,14 @@ struct task_spec_impl {
   (sizeof(task_spec) + ((NUM_ARGS) + (NUM_RETURNS)) * sizeof(task_arg) + \
    (ARGS_VALUE_SIZE))
 
-task_spec *alloc_task_spec(function_id func_id,
+task_spec *alloc_task_spec(function_id function_id,
                            int64_t num_args,
                            int64_t num_returns,
                            int64_t args_value_size) {
   int64_t size = TASK_SPEC_SIZE(num_args, num_returns, args_value_size);
   task_spec *task = malloc(size);
   memset(task, 0, size);
-  task->func_id = func_id;
+  task->function_id = function_id;
   task->num_args = num_args;
   task->arg_index = 0;
   task->num_returns = num_returns;
@@ -72,6 +75,10 @@ int64_t task_size(task_spec *spec) {
                         spec->args_value_size);
 }
 
+unique_id *task_function(task_spec *spec) {
+  return &spec->function_id;
+}
+
 int64_t task_num_args(task_spec *spec) {
   return spec->num_args;
 }
@@ -153,3 +160,86 @@ task_spec *read_task(int fd) {
   CHECK(task_size(spec) == length);
   return spec;
 }
+
+void print_task(task_spec *spec, UT_string *output) {
+  /* For converting an id to hex, which has double the number
+   * of bytes compared to the id (+ 1 byte for '\0'). */
+  static char hex[2 * UNIQUE_ID_SIZE + 1];
+  /* Print function id. */
+  sha1_to_hex(&task_function(spec)->id[0], &hex[0]);
+  utstring_printf(output, "fun %s ", &hex[0]);
+  /* Print arguments. */
+  for (int i = 0; i < task_num_args(spec); ++i) {
+    sha1_to_hex(&task_arg_id(spec, i)->id[0], &hex[0]);
+    utstring_printf(output, " id:%d %s", i, &hex[0]);
+  }
+  /* Print return ids. */
+  for (int i = 0; i < task_num_returns(spec); ++i) {
+    object_id *object_id = task_return(spec, i);
+    sha1_to_hex(&object_id->id[0], &hex[0]);
+    utstring_printf(output, " ret:%d %s", i, &hex[0]);
+  }
+}
+
+UT_icd unique_id_icd = {sizeof(unique_id), NULL, NULL, NULL};
+
+task_spec *parse_task(char *task_string, int64_t task_length) {
+  /* We make one pass through task_string to store all the argument ids
+   * in "args" and all the return ids in "returns". */
+  UT_array *args;
+  utarray_new(args, &unique_id_icd);
+  UT_array *returns;
+  utarray_new(returns, &unique_id_icd);
+  function_id function_id;
+  char *cursor = strtok(task_string, " ");
+  int index = 0;
+  while (cursor != NULL) {
+    /* This will be equal to "args" or "returns" depending on whether we
+     * are processing an argument id or a return id. */
+    UT_array *target = NULL;
+    if (strncmp("fun", cursor, 3) == 0) {
+      /* Parse function id. */
+      CHECK(cursor + 2 * UNIQUE_ID_SIZE + 1 <= task_string + task_length);
+      cursor = strtok(NULL, " ");
+      hex_to_sha1(cursor, &function_id.id[0]);
+      cursor = strtok(NULL, " ");
+      CHECK(cursor);
+      continue;
+    } else if (strncmp("id:", cursor, 3) == 0) {
+      /* Parse pass by reference argument. */
+      sscanf(cursor, "id:%d", &index);
+      target = args;
+    } else if (strncmp("val:", cursor, 4) == 0) {
+      /* Parse pass by value argument. */
+      sscanf(cursor, "val:%d", &index);
+      CHECK(0); /* Not implemented yet */
+    } else if (strncmp("ret:", cursor, 4) == 0) {
+      /* Parse return object reference. */
+      sscanf(cursor, "ret:%d", &index);
+      target = returns;
+    }
+    cursor = strtok(NULL, " ");
+    CHECK(cursor);
+    if (index >= utarray_len(target)) {
+      utarray_resize(target, index + 1);
+    }
+    object_id *id = (object_id *) utarray_eltptr(target, index);
+    hex_to_sha1(cursor, &id->id[0]);
+    cursor = strtok(NULL, " ");
+  }
+  /* TODO(pcm): Implement pass by value. */
+  /* Now assemble the task specification. */
+  task_spec *spec =
+      alloc_task_spec(function_id, utarray_len(args), utarray_len(returns), 0);
+  for (int i = 0; i < utarray_len(args); ++i) {
+    object_id *id = (object_id *) utarray_eltptr(args, i);
+    task_args_add_ref(spec, *id);
+  }
+  for (int i = 0; i < utarray_len(returns); ++i) {
+    object_id *id = (object_id *) utarray_eltptr(returns, i);
+    *task_return(spec, i) = *id;
+  }
+  utarray_free(args);
+  utarray_free(returns);
+  return spec;
+}
diff --git a/task.h b/task.h
index ad85540ee..96c97b80d 100644
--- a/task.h
+++ b/task.h
@@ -10,6 +10,7 @@
 #include <stddef.h>
 #include <stdint.h>
 #include "common.h"
+#include "utstring.h"
 
 typedef unique_id function_id;
 typedef unique_id object_id;
@@ -22,7 +23,7 @@ enum arg_type { ARG_BY_REF, ARG_BY_VAL };
 /* Construct and modify task specifications. */
 
 /* Allocating and initializing a task. */
-task_spec *alloc_task_spec(function_id func_id,
+task_spec *alloc_task_spec(function_id function_id,
                            int64_t num_args,
                            int64_t num_returns,
                            int64_t args_value_size);
@@ -30,6 +31,9 @@ task_spec *alloc_task_spec(function_id func_id,
 /* Size of the task in bytes. */
 int64_t task_size(task_spec *spec);
 
+/* Return the function ID of the task. */
+unique_id *task_function(task_spec *spec);
+
 /* Getting the number of arguments and returns. */
 int64_t task_num_args(task_spec *spec);
 int64_t task_num_returns(task_spec *spec);
@@ -58,4 +62,10 @@ void write_task(int fd, task_spec *spec);
  * responsibility to free the task after it has been used. */
 task_spec *read_task(int fd);
 
+/* Print task as a humanly readable string. */
+void print_task(task_spec *spec, UT_string *output);
+
+/* Parse task as printed by print_task. */
+task_spec *parse_task(char *task_string, int64_t task_length);
+
 #endif
diff --git a/test/common_tests.c b/test/common_tests.c
new file mode 100644
index 000000000..3673c335d
--- /dev/null
+++ b/test/common_tests.c
@@ -0,0 +1,27 @@
+#include "greatest.h"
+
+#include "common.h"
+
+SUITE(common_tests);
+
+TEST sha1_test(void) {
+  static char hex[2 * UNIQUE_ID_SIZE + 1];
+  static unsigned char id[UNIQUE_ID_SIZE];
+  unique_id uid = globally_unique_id();
+  sha1_to_hex(&uid.id[0], &hex[0]);
+  hex_to_sha1(&hex[0], &id[0]);
+  ASSERT(memcmp(&uid.id[0], &id[0], 20) == 0);
+  PASS();
+}
+
+SUITE(common_tests) {
+  RUN_TEST(sha1_test);
+}
+
+GREATEST_MAIN_DEFS();
+
+int main(int argc, char **argv) {
+  GREATEST_MAIN_BEGIN();
+  RUN_SUITE(common_tests);
+  GREATEST_MAIN_END();
+}
diff --git a/test/db_tests.c b/test/db_tests.c
index 568eeaeb1..5bdbf25df 100644
--- a/test/db_tests.c
+++ b/test/db_tests.c
@@ -3,6 +3,7 @@
 #include <assert.h>
 
 #include "event_loop.h"
+#include "test/example_task.h"
 #include "state/db.h"
 #include "state/object_table.h"
 #include "state/redis.h"
@@ -62,7 +63,7 @@ TEST object_table_lookup_test(void) {
   object_table_add(&conn2, id);
   object_table_lookup(&conn1, id, sync_test_callback);
   while (!lookup_successful) {
-    int num_ready = event_loop_poll(&loop);
+    int num_ready = event_loop_poll(&loop, -1);
     if (num_ready < 0) {
       exit(-1);
     }
@@ -81,7 +82,7 @@ TEST object_table_lookup_test(void) {
   lookup_successful = 0;
   object_table_lookup(&conn1, id, test_callback);
   while (!lookup_successful) {
-    int num_ready = event_loop_poll(&loop);
+    int num_ready = event_loop_poll(&loop, -1);
     if (num_ready < 0) {
       exit(-1);
     }
@@ -112,10 +113,44 @@ TEST object_table_lookup_test(void) {
   PASS();
 }
 
+TEST task_queue_test(void) {
+  event_loop loop;
+  event_loop_init(&loop);
+  db_conn conn;
+  db_connect("127.0.0.1", 6379, "local_scheduler", "", -1, &conn);
+  int64_t index = db_attach(&conn, &loop, 0);
+
+  task_spec *task = example_task();
+  task_queue_submit_task(&conn, globally_unique_id(), task);
+  while (1) {
+    int num_ready = event_loop_poll(&loop, 100);
+    if (num_ready < 0) {
+      exit(-1);
+    }
+    if (num_ready == 0) {
+      break;
+    }
+    for (int i = 0; i < event_loop_size(&loop); ++i) {
+      struct pollfd *waiting = event_loop_get(&loop, i);
+      if (waiting->revents == 0)
+        continue;
+      if (i == index) {
+        db_event(&conn);
+      }
+    }
+  }
+
+  free_task_spec(task);
+  db_disconnect(&conn);
+  event_loop_free(&loop);
+  PASS();
+}
+
 SUITE(db_tests) {
   redisContext *context = redisConnect("127.0.0.1", 6379);
   redisCommand(context, "FLUSHALL");
   RUN_REDIS_TEST(context, object_table_lookup_test);
+  RUN_TEST(task_queue_test);
   redisFree(context);
 }
 
diff --git a/test/example_task.h b/test/example_task.h
new file mode 100644
index 000000000..0dddc4dc1
--- /dev/null
+++ b/test/example_task.h
@@ -0,0 +1,14 @@
+#ifndef EXAMPLE_TASK_H
+#define EXAMPLE_TASK_H
+
+#include "task.h"
+
+task_spec *example_task(void) {
+  function_id func_id = globally_unique_id();
+  task_spec *task = alloc_task_spec(func_id, 2, 1, 0);
+  task_args_add_ref(task, globally_unique_id());
+  task_args_add_ref(task, globally_unique_id());
+  return task;
+}
+
+#endif
diff --git a/test/redis_tests.c b/test/redis_tests.c
index 9efd3bace..2f0f09110 100644
--- a/test/redis_tests.c
+++ b/test/redis_tests.c
@@ -85,7 +85,7 @@ TEST async_redis_socket_test(void) {
   send_redis_command(client_fd, test_set_format, test_key, test_value);
 
   while (!lookup_successful) {
-    int num_ready = event_loop_poll(&loop);
+    int num_ready = event_loop_poll(&loop, -1);
     if (num_ready < 0) {
       exit(-1);
     }
diff --git a/test/task_tests.c b/test/task_tests.c
index 68a6a6537..4293eec04 100644
--- a/test/task_tests.c
+++ b/test/task_tests.c
@@ -5,6 +5,7 @@
 #include <sys/socket.h>
 
 #include "common.h"
+#include "test/example_task.h"
 #include "task.h"
 
 SUITE(task_tests);
@@ -56,9 +57,28 @@ TEST send_task(void) {
   PASS();
 }
 
+TEST print_and_parse_task(void) {
+  task_spec *task = example_task();
+
+  UT_string *output;
+  utstring_new(output);
+  print_task(task, output);
+  task_spec *result = parse_task(utstring_body(output), utstring_len(output));
+  utstring_free(output);
+
+  ASSERT_EQ(task_size(task), task_size(result));
+  ASSERT(memcmp(task, result, task_size(task)) == 0);
+
+  free_task_spec(task);
+  free_task_spec(result);
+
+  PASS();
+}
+
 SUITE(task_tests) {
   RUN_TEST(task_test);
   RUN_TEST(send_task);
+  RUN_TEST(print_and_parse_task);
 }
 
 GREATEST_MAIN_DEFS();
diff --git a/thirdparty/utstring.h b/thirdparty/utstring.h
new file mode 100644
index 000000000..debe5f3df
--- /dev/null
+++ b/thirdparty/utstring.h
@@ -0,0 +1,398 @@
+/*
+Copyright (c) 2008-2016, Troy D. Hanson   http://troydhanson.github.com/uthash/
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* a dynamic string implementation using macros
+ */
+#ifndef UTSTRING_H
+#define UTSTRING_H
+
+#define UTSTRING_VERSION 2.0.1
+
+#ifdef __GNUC__
+#define _UNUSED_ __attribute__ ((__unused__))
+#else
+#define _UNUSED_
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+#ifndef oom
+#define oom() exit(-1)
+#endif
+
+typedef struct {
+    char *d;
+    size_t n; /* allocd size */
+    size_t i; /* index of first unused byte */
+} UT_string;
+
+#define utstring_reserve(s,amt)                            \
+do {                                                       \
+  if (((s)->n - (s)->i) < (size_t)(amt)) {                 \
+    char *utstring_tmp = (char*)realloc(                   \
+      (s)->d, (s)->n + (amt));                             \
+    if (utstring_tmp == NULL) oom();                       \
+    (s)->d = utstring_tmp;                                 \
+    (s)->n += (amt);                                       \
+  }                                                        \
+} while(0)
+
+#define utstring_init(s)                                   \
+do {                                                       \
+  (s)->n = 0; (s)->i = 0; (s)->d = NULL;                   \
+  utstring_reserve(s,100);                                 \
+  (s)->d[0] = '\0';                                        \
+} while(0)
+
+#define utstring_done(s)                                   \
+do {                                                       \
+  if ((s)->d != NULL) free((s)->d);                        \
+  (s)->n = 0;                                              \
+} while(0)
+
+#define utstring_free(s)                                   \
+do {                                                       \
+  utstring_done(s);                                        \
+  free(s);                                                 \
+} while(0)
+
+#define utstring_new(s)                                    \
+do {                                                       \
+   s = (UT_string*)calloc(sizeof(UT_string),1);            \
+   if (!s) oom();                                          \
+   utstring_init(s);                                       \
+} while(0)
+
+#define utstring_renew(s)                                  \
+do {                                                       \
+   if (s) {                                                \
+     utstring_clear(s);                                    \
+   } else {                                                \
+     utstring_new(s);                                      \
+   }                                                       \
+} while(0)
+
+#define utstring_clear(s)                                  \
+do {                                                       \
+  (s)->i = 0;                                              \
+  (s)->d[0] = '\0';                                        \
+} while(0)
+
+#define utstring_bincpy(s,b,l)                             \
+do {                                                       \
+  utstring_reserve((s),(l)+1);                             \
+  if (l) memcpy(&(s)->d[(s)->i], b, l);                    \
+  (s)->i += (l);                                           \
+  (s)->d[(s)->i]='\0';                                     \
+} while(0)
+
+#define utstring_concat(dst,src)                                 \
+do {                                                             \
+  utstring_reserve((dst),((src)->i)+1);                          \
+  if ((src)->i) memcpy(&(dst)->d[(dst)->i], (src)->d, (src)->i); \
+  (dst)->i += (src)->i;                                          \
+  (dst)->d[(dst)->i]='\0';                                       \
+} while(0)
+
+#define utstring_len(s) ((unsigned)((s)->i))
+
+#define utstring_body(s) ((s)->d)
+
+_UNUSED_ static void utstring_printf_va(UT_string *s, const char *fmt, va_list ap) {
+   int n;
+   va_list cp;
+   for (;;) {
+#ifdef _WIN32
+      cp = ap;
+#else
+      va_copy(cp, ap);
+#endif
+      n = vsnprintf (&s->d[s->i], s->n-s->i, fmt, cp);
+      va_end(cp);
+
+      if ((n > -1) && ((size_t) n < (s->n-s->i))) {
+        s->i += n;
+        return;
+      }
+
+      /* Else try again with more space. */
+      if (n > -1) utstring_reserve(s,n+1); /* exact */
+      else utstring_reserve(s,(s->n)*2);   /* 2x */
+   }
+}
+#ifdef __GNUC__
+/* support printf format checking (2=the format string, 3=start of varargs) */
+static void utstring_printf(UT_string *s, const char *fmt, ...)
+  __attribute__ (( format( printf, 2, 3) ));
+#endif
+_UNUSED_ static void utstring_printf(UT_string *s, const char *fmt, ...) {
+   va_list ap;
+   va_start(ap,fmt);
+   utstring_printf_va(s,fmt,ap);
+   va_end(ap);
+}
+
+/*******************************************************************************
+ * begin substring search functions                                            *
+ ******************************************************************************/
+/* Build KMP table from left to right. */
+_UNUSED_ static void _utstring_BuildTable(
+    const char *P_Needle,
+    size_t P_NeedleLen,
+    long *P_KMP_Table)
+{
+    long i, j;
+
+    i = 0;
+    j = i - 1;
+    P_KMP_Table[i] = j;
+    while (i < (long) P_NeedleLen)
+    {
+        while ( (j > -1) && (P_Needle[i] != P_Needle[j]) )
+        {
+           j = P_KMP_Table[j];
+        }
+        i++;
+        j++;
+        if (i < (long) P_NeedleLen)
+        {
+            if (P_Needle[i] == P_Needle[j])
+            {
+                P_KMP_Table[i] = P_KMP_Table[j];
+            }
+            else
+            {
+                P_KMP_Table[i] = j;
+            }
+        }
+        else
+        {
+            P_KMP_Table[i] = j;
+        }
+    }
+
+    return;
+}
+
+
+/* Build KMP table from right to left. */
+_UNUSED_ static void _utstring_BuildTableR(
+    const char *P_Needle,
+    size_t P_NeedleLen,
+    long *P_KMP_Table)
+{
+    long i, j;
+
+    i = P_NeedleLen - 1;
+    j = i + 1;
+    P_KMP_Table[i + 1] = j;
+    while (i >= 0)
+    {
+        while ( (j < (long) P_NeedleLen) && (P_Needle[i] != P_Needle[j]) )
+        {
+           j = P_KMP_Table[j + 1];
+        }
+        i--;
+        j--;
+        if (i >= 0)
+        {
+            if (P_Needle[i] == P_Needle[j])
+            {
+                P_KMP_Table[i + 1] = P_KMP_Table[j + 1];
+            }
+            else
+            {
+                P_KMP_Table[i + 1] = j;
+            }
+        }
+        else
+        {
+            P_KMP_Table[i + 1] = j;
+        }
+    }
+
+    return;
+}
+
+
+/* Search data from left to right. ( Multiple search mode. ) */
+_UNUSED_ static long _utstring_find(
+    const char *P_Haystack,
+    size_t P_HaystackLen,
+    const char *P_Needle,
+    size_t P_NeedleLen,
+    long *P_KMP_Table)
+{
+    long i, j;
+    long V_FindPosition = -1;
+
+    /* Search from left to right. */
+    i = j = 0;
+    while ( (j < (int)P_HaystackLen) && (((P_HaystackLen - j) + i) >= P_NeedleLen) )
+    {
+        while ( (i > -1) && (P_Needle[i] != P_Haystack[j]) )
+        {
+            i = P_KMP_Table[i];
+        }
+        i++;
+        j++;
+        if (i >= (int)P_NeedleLen)
+        {
+            /* Found. */
+            V_FindPosition = j - i;
+            break;
+        }
+    }
+
+    return V_FindPosition;
+}
+
+
+/* Search data from right to left. ( Multiple search mode. ) */
+_UNUSED_ static long _utstring_findR(
+    const char *P_Haystack,
+    size_t P_HaystackLen,
+    const char *P_Needle,
+    size_t P_NeedleLen,
+    long *P_KMP_Table)
+{
+    long i, j;
+    long V_FindPosition = -1;
+
+    /* Search from right to left. */
+    j = (P_HaystackLen - 1);
+    i = (P_NeedleLen - 1);
+    while ( (j >= 0) && (j >= i) )
+    {
+        while ( (i < (int)P_NeedleLen) && (P_Needle[i] != P_Haystack[j]) )
+        {
+            i = P_KMP_Table[i + 1];
+        }
+        i--;
+        j--;
+        if (i < 0)
+        {
+            /* Found. */
+            V_FindPosition = j + 1;
+            break;
+        }
+    }
+
+    return V_FindPosition;
+}
+
+
+/* Search data from left to right. ( One time search mode. ) */
+_UNUSED_ static long utstring_find(
+    UT_string *s,
+    long P_StartPosition,   /* Start from 0. -1 means last position. */
+    const char *P_Needle,
+    size_t P_NeedleLen)
+{
+    long V_StartPosition;
+    long V_HaystackLen;
+    long *V_KMP_Table;
+    long V_FindPosition = -1;
+
+    if (P_StartPosition < 0)
+    {
+        V_StartPosition = s->i + P_StartPosition;
+    }
+    else
+    {
+        V_StartPosition = P_StartPosition;
+    }
+    V_HaystackLen = s->i - V_StartPosition;
+    if ( (V_HaystackLen >= (long) P_NeedleLen) && (P_NeedleLen > 0) )
+    {
+        V_KMP_Table = (long *)malloc(sizeof(long) * (P_NeedleLen + 1));
+        if (V_KMP_Table != NULL)
+        {
+            _utstring_BuildTable(P_Needle, P_NeedleLen, V_KMP_Table);
+
+            V_FindPosition = _utstring_find(s->d + V_StartPosition,
+                                            V_HaystackLen,
+                                            P_Needle,
+                                            P_NeedleLen,
+                                            V_KMP_Table);
+            if (V_FindPosition >= 0)
+            {
+                V_FindPosition += V_StartPosition;
+            }
+
+            free(V_KMP_Table);
+        }
+    }
+
+    return V_FindPosition;
+}
+
+
+/* Search data from right to left. ( One time search mode. ) */
+_UNUSED_ static long utstring_findR(
+    UT_string *s,
+    long P_StartPosition,   /* Start from 0. -1 means last position. */
+    const char *P_Needle,
+    size_t P_NeedleLen)
+{
+    long V_StartPosition;
+    long V_HaystackLen;
+    long *V_KMP_Table;
+    long V_FindPosition = -1;
+
+    if (P_StartPosition < 0)
+    {
+        V_StartPosition = s->i + P_StartPosition;
+    }
+    else
+    {
+        V_StartPosition = P_StartPosition;
+    }
+    V_HaystackLen = V_StartPosition + 1;
+    if ( (V_HaystackLen >= (long) P_NeedleLen) && (P_NeedleLen > 0) )
+    {
+        V_KMP_Table = (long *)malloc(sizeof(long) * (P_NeedleLen + 1));
+        if (V_KMP_Table != NULL)
+        {
+            _utstring_BuildTableR(P_Needle, P_NeedleLen, V_KMP_Table);
+
+            V_FindPosition = _utstring_findR(s->d,
+                                             V_HaystackLen,
+                                             P_Needle,
+                                             P_NeedleLen,
+                                             V_KMP_Table);
+
+            free(V_KMP_Table);
+        }
+    }
+
+    return V_FindPosition;
+}
+/*******************************************************************************
+ * end substring search functions                                              *
+ ******************************************************************************/
+
+#endif /* UTSTRING_H */

From d41566a499d75a31d1345b774e9a9a1cb433e13f Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Fri, 23 Sep 2016 15:07:50 -0700
Subject: [PATCH 39/91] Implement delete and contains for plasma client and
 store. (#28)

* Implement delete and contains for plasma client and store.

* Fix style, free object, add tests.
---
 lib/python/plasma.py | 28 ++++++++++++++++++++++
 src/plasma.h         | 16 +++++++++----
 src/plasma_client.c  | 28 ++++++++++++++++++----
 src/plasma_client.h  | 12 ++++++++++
 src/plasma_manager.c |  2 +-
 src/plasma_store.c   | 42 +++++++++++++++++++++++++++++++++
 test/test.py         | 55 ++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 173 insertions(+), 10 deletions(-)

diff --git a/lib/python/plasma.py b/lib/python/plasma.py
index 2bbcd6d49..37fa8f72c 100644
--- a/lib/python/plasma.py
+++ b/lib/python/plasma.py
@@ -43,7 +43,9 @@ class PlasmaClient(object):
     self.client.plasma_store_connect.restype = ctypes.c_void_p
     self.client.plasma_create.restype = None
     self.client.plasma_get.restype = None
+    self.client.plasma_contains.restype = None
     self.client.plasma_seal.restype = None
+    self.client.plasma_delete.restype = None
 
     self.buffer_from_memory = ctypes.pythonapi.PyBuffer_FromMemory
     self.buffer_from_memory.argtypes = [ctypes.c_void_p, ctypes.c_int64]
@@ -111,6 +113,22 @@ class PlasmaClient(object):
     buf = self.client.plasma_get(self.store_conn, make_plasma_id(object_id), ctypes.byref(size), ctypes.byref(data), ctypes.byref(metadata_size), ctypes.byref(metadata))
     return self.buffer_from_memory(metadata, metadata_size)
 
+  def contains(self, object_id):
+    """Check if the object is present and has been sealed in the PlasmaStore.
+
+    Args:
+      object_id (str): A string used to identify an object.
+    """
+    has_object = ctypes.c_int()
+    self.client.plasma_contains(self.store_conn, make_plasma_id(object_id), ctypes.byref(has_object))
+    has_object = has_object.value
+    if has_object == 1:
+      return True
+    elif has_object == 0:
+      return False
+    else:
+      raise Exception("This code should be unreachable.")
+
   def seal(self, object_id):
     """Seal the buffer in the PlasmaStore for a particular object ID.
 
@@ -122,6 +140,16 @@ class PlasmaClient(object):
     """
     self.client.plasma_seal(self.store_conn, make_plasma_id(object_id))
 
+  def delete(self, object_id):
+    """Delete the buffer in the PlasmaStore for a particular object ID.
+
+    Once a buffer has been deleted, the buffer is no longer accessible.
+
+    Args:
+      object_id (str): A string used to identify an object.
+    """
+    self.client.plasma_delete(self.store_conn, make_plasma_id(object_id))
+
   def transfer(self, addr, port, object_id):
     """Transfer local object with id object_id to another plasma instance
 
diff --git a/src/plasma.h b/src/plasma.h
index ddf89ad34..4e8e15464 100644
--- a/src/plasma.h
+++ b/src/plasma.h
@@ -51,11 +51,15 @@ enum plasma_request_type {
   PLASMA_CREATE,
   /* Get an object. */
   PLASMA_GET,
-  /* seal an object */
+  /* Check if an object is present. */
+  PLASMA_CONTAINS,
+  /* Seal an object. */
   PLASMA_SEAL,
-  /* request transfer to another store */
+  /* Delete an object. */
+  PLASMA_DELETE,
+  /* Request transfer to another store. */
   PLASMA_TRANSFER,
-  /* Header for sending data */
+  /* Header for sending data. */
   PLASMA_DATA,
 };
 
@@ -81,6 +85,8 @@ typedef struct {
   int64_t data_size;
   /* The size of the metadata. */
   int64_t metadata_size;
+  /* 1 if the object is present and 0 otherwise. Used for plasma_contains. */
+  int has_object;
   /* Numerical value of the fd of the memory mapped file in the store. */
   int store_fd_val;
 } plasma_reply;
@@ -112,6 +118,8 @@ typedef struct {
   client_mmap_table_entry *mmap_table;
 } plasma_store_conn;
 
-void plasma_send(int conn, plasma_request *req);
+void plasma_send_request(int conn, plasma_request *req);
+
+void plasma_send_reply(int conn, plasma_reply *req);
 
 #endif
diff --git a/src/plasma_client.c b/src/plasma_client.c
index 3d94e503b..e486e1aa8 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -16,7 +16,7 @@
 #include "plasma_client.h"
 #include "fling.h"
 
-void plasma_send(int fd, plasma_request *req) {
+void plasma_send_request(int fd, plasma_request *req) {
   int req_count = sizeof(plasma_request);
   if (write(fd, req, req_count) != req_count) {
     LOG_ERR("write error, fd = %d", fd);
@@ -66,7 +66,7 @@ void plasma_create(plasma_store_conn *conn,
                         .object_id = object_id,
                         .data_size = data_size,
                         .metadata_size = metadata_size};
-  plasma_send(conn->conn, &req);
+  plasma_send_request(conn->conn, &req);
   plasma_reply reply;
   int fd = recv_fd(conn->conn, (char *) &reply, sizeof(plasma_reply));
   assert(reply.data_size == data_size);
@@ -92,7 +92,7 @@ void plasma_get(plasma_store_conn *conn,
                 int64_t *metadata_size,
                 uint8_t **metadata) {
   plasma_request req = {.type = PLASMA_GET, .object_id = object_id};
-  plasma_send(conn->conn, &req);
+  plasma_send_request(conn->conn, &req);
   plasma_reply reply;
   int fd = recv_fd(conn->conn, (char *) &reply, sizeof(plasma_reply));
   *data = lookup_or_mmap(conn, fd, reply.store_fd_val, reply.map_size) +
@@ -105,9 +105,27 @@ void plasma_get(plasma_store_conn *conn,
   }
 }
 
+/* This method is used to query whether the plasma store contains an object. */
+void plasma_contains(plasma_store_conn *conn,
+                     plasma_id object_id,
+                     int *has_object) {
+  plasma_request req = {.type = PLASMA_CONTAINS, .object_id = object_id};
+  plasma_send_request(conn->conn, &req);
+  plasma_reply reply;
+  int r = read(conn->conn, &reply, sizeof(plasma_reply));
+  PLASMA_CHECK(r != -1, "read error");
+  PLASMA_CHECK(r != 0, "connection disconnected");
+  *has_object = reply.has_object;
+}
+
 void plasma_seal(plasma_store_conn *conn, plasma_id object_id) {
   plasma_request req = {.type = PLASMA_SEAL, .object_id = object_id};
-  plasma_send(conn->conn, &req);
+  plasma_send_request(conn->conn, &req);
+}
+
+void plasma_delete(plasma_store_conn *conn, plasma_id object_id) {
+  plasma_request req = {.type = PLASMA_DELETE, .object_id = object_id};
+  plasma_send_request(conn->conn, &req);
 }
 
 plasma_store_conn *plasma_store_connect(const char *socket_name) {
@@ -187,5 +205,5 @@ void plasma_transfer(int manager,
     /* skip the '.' */
     end += 1;
   }
-  plasma_send(manager, &req);
+  plasma_send_request(manager, &req);
 }
diff --git a/src/plasma_client.h b/src/plasma_client.h
index 4c7cc008e..87a33269f 100644
--- a/src/plasma_client.h
+++ b/src/plasma_client.h
@@ -22,6 +22,18 @@ void plasma_get(plasma_store_conn *conn,
                 int64_t *metadata_size,
                 uint8_t **metadata);
 
+/* Check if the object store contains a particular object and the object has
+ * been sealed. The result will be stored in has_object. TODO(rkn): We may want
+ * to indicate whether the object is currently being written. */
+void plasma_contains(plasma_store_conn *conn,
+                     plasma_id object_id,
+                     int *has_object);
+
 void plasma_seal(plasma_store_conn *conn, plasma_id object_id);
 
+/* Delete an object from the object store. This currently assumes that the
+ * object is present and has been sealed. TODO(rkn): We may want to allow the
+ * deletion of objects that are not present or haven't been sealed. */
+void plasma_delete(plasma_store_conn *conn, plasma_id object_id);
+
 #endif
diff --git a/src/plasma_manager.c b/src/plasma_manager.c
index 7cad02504..a4952295d 100644
--- a/src/plasma_manager.c
+++ b/src/plasma_manager.c
@@ -74,7 +74,7 @@ void initiate_transfer(plasma_manager_state *s, plasma_request *req) {
                                 .object_id = req->object_id,
                                 .data_size = buf.data_size,
                                 .metadata_size = buf.metadata_size};
-  plasma_send(fd, &manager_req);
+  plasma_send_request(fd, &manager_req);
 }
 
 /* Start reading data from another object manager.
diff --git a/src/plasma_store.c b/src/plasma_store.c
index a7aef2c00..85c9ad0b9 100644
--- a/src/plasma_store.c
+++ b/src/plasma_store.c
@@ -30,12 +30,21 @@
 #define MAX_NUM_CLIENTS 100
 
 void* dlmalloc(size_t);
+void dlfree(void*);
 
 typedef struct {
   /* Event loop for the plasma store. */
   event_loop* loop;
 } plasma_store_state;
 
+void plasma_send_reply(int fd, plasma_reply* reply) {
+  int reply_count = sizeof(plasma_reply);
+  if (write(fd, reply, reply_count) != reply_count) {
+    LOG_ERR("write error, fd = %d", fd);
+    exit(-1);
+  }
+}
+
 void init_state(plasma_store_state* s) {
   s->loop = malloc(sizeof(event_loop));
   event_loop_init(s->loop);
@@ -54,6 +63,8 @@ typedef struct {
   ptrdiff_t offset;
   /* Handle for the uthash table. */
   UT_hash_handle handle;
+  /* Pointer to the object data. Needed to free the object. */
+  uint8_t* pointer;
 } object_table_entry;
 
 /* Objects that are still being written by their owner process. */
@@ -96,6 +107,7 @@ void create_object(int conn, plasma_request* req) {
   memcpy(&entry->object_id, &req->object_id, 20);
   entry->info.data_size = req->data_size;
   entry->info.metadata_size = req->metadata_size;
+  entry->pointer = pointer;
   /* TODO(pcm): set the other fields */
   entry->fd = fd;
   entry->map_size = map_size;
@@ -145,6 +157,16 @@ void get_object(int conn, plasma_request* req) {
   }
 }
 
+/* Check if an object is present. */
+void check_if_object_present(int conn, plasma_request* req) {
+  object_table_entry* entry;
+  HASH_FIND(handle, sealed_objects, &req->object_id, sizeof(plasma_id), entry);
+  plasma_reply reply;
+  memset(&reply, 0, sizeof(plasma_reply));
+  reply.has_object = entry ? 1 : 0;
+  plasma_send_reply(conn, &reply);
+}
+
 /* Seal an object that has been created in the hash table. */
 void seal_object(int conn, plasma_request* req) {
   LOG_INFO("sealing object");  // TODO(pcm): add object_id here
@@ -176,6 +198,20 @@ void seal_object(int conn, plasma_request* req) {
   free(notify_entry);
 }
 
+/* Delete an object that has been created in the hash table. */
+void delete_object(int conn, plasma_request* req) {
+  LOG_INFO("deleting object");  // TODO(rkn): add object_id here
+  object_table_entry* entry;
+  HASH_FIND(handle, sealed_objects, &req->object_id, sizeof(plasma_id), entry);
+  /* TODO(rkn): This should probably not fail, but should instead throw an
+   * error. Maybe we should also support deleting objects that have been created
+   * but not sealed. */
+  PLASMA_CHECK(entry != NULL, "To delete an object it must have been sealed.");
+  uint8_t* pointer = entry->pointer;
+  HASH_DELETE(handle, sealed_objects, entry);
+  dlfree(pointer);
+}
+
 void process_event(int conn, plasma_request* req) {
   switch (req->type) {
   case PLASMA_CREATE:
@@ -184,9 +220,15 @@ void process_event(int conn, plasma_request* req) {
   case PLASMA_GET:
     get_object(conn, req);
     break;
+  case PLASMA_CONTAINS:
+    check_if_object_present(conn, req);
+    break;
   case PLASMA_SEAL:
     seal_object(conn, req);
     break;
+  case PLASMA_DELETE:
+    delete_object(conn, req);
+    break;
   default:
     LOG_ERR("invalid request %d", req->type);
     exit(-1);
diff --git a/test/test.py b/test/test.py
index aae9f2b9e..fcf0be437 100644
--- a/test/test.py
+++ b/test/test.py
@@ -90,6 +90,61 @@ class TestPlasmaClient(unittest.TestCase):
       for i in range(len(metadata)):
         self.assertEqual(metadata[i], metadata_buffer[i])
 
+  def test_contains(self):
+    fake_object_ids = [random_object_id() for _ in range(100)]
+    real_object_ids = [random_object_id() for _ in range(100)]
+    for object_id in real_object_ids:
+      self.assertFalse(self.plasma_client.contains(object_id))
+      memory_buffer = self.plasma_client.create(object_id, 100)
+      self.plasma_client.seal(object_id)
+      self.assertTrue(self.plasma_client.contains(object_id))
+    for object_id in fake_object_ids:
+      self.assertFalse(self.plasma_client.contains(object_id))
+    for object_id in real_object_ids:
+      self.assertTrue(self.plasma_client.contains(object_id))
+
+  def test_individual_delete(self):
+    length = 100
+    # Create an object id string.
+    object_id = random_object_id()
+    # Create a random metadata string.
+    metadata = generate_metadata(100)
+    # Create a new buffer and write to it.
+    memory_buffer = self.plasma_client.create(object_id, length, metadata)
+    for i in range(length):
+      memory_buffer[i] = chr(i % 256)
+    # Seal the object.
+    self.plasma_client.seal(object_id)
+    # Check that the object is present.
+    self.assertTrue(self.plasma_client.contains(object_id))
+    # Delete the object.
+    self.plasma_client.delete(object_id)
+    # Make sure the object is no longer present.
+    self.assertFalse(self.plasma_client.contains(object_id))
+
+  def test_delete(self):
+    # Create some objects.
+    object_ids = [random_object_id() for _ in range(100)]
+    for object_id in object_ids:
+      length = 100
+      # Create a random metadata string.
+      metadata = generate_metadata(100)
+      # Create a new buffer and write to it.
+      memory_buffer = self.plasma_client.create(object_id, length, metadata)
+      for i in range(length):
+        memory_buffer[i] = chr(i % 256)
+      # Seal the object.
+      self.plasma_client.seal(object_id)
+      # Check that the object is present.
+      self.assertTrue(self.plasma_client.contains(object_id))
+
+    # Delete the objects and make sure they are no longer present.
+    for object_id in object_ids:
+      # Delete the object.
+      self.plasma_client.delete(object_id)
+      # Make sure the object is no longer present.
+      self.assertFalse(self.plasma_client.contains(object_id))
+
   def test_illegal_functionality(self):
     # Create an object id string.
     object_id = random_object_id()

From e1b8711a017d5823375f7c973150446abcc92d19 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Fri, 23 Sep 2016 17:10:15 -0700
Subject: [PATCH 40/91] Redis logging (#17)

* Redis logging

* Rearrange logging interfaces

* Fix test case

* Changes to logging interface and test case for logging

* Fixes

* Fix memory leaks

* Add interface method to destroy logger

* is_local -> is_direct

* Merge fix
---
 Makefile           |  4 +--
 io.c               | 15 ++++++++
 io.h               |  1 +
 logging.c          | 78 ++++++++++++++++++++++++++++++++++++++++
 logging.h          | 39 ++++++++++++++++++++
 state/redis.c      | 20 -----------
 state/redis.h      |  5 ++-
 test/redis_tests.c | 89 ++++++++++++++++++++++++++++++++++++++++------
 8 files changed, 217 insertions(+), 34 deletions(-)
 create mode 100644 logging.c
 create mode 100644 logging.h

diff --git a/Makefile b/Makefile
index 36793877f..aa5ee292e 100644
--- a/Makefile
+++ b/Makefile
@@ -19,8 +19,8 @@ $(BUILD)/io_tests: test/io_tests.c $(BUILD)/libcommon.a
 $(BUILD)/task_tests: test/task_tests.c $(BUILD)/libcommon.a
 	$(CC) -o $@ $^ $(CFLAGS)
 
-$(BUILD)/redis_tests: hiredis test/redis_tests.c $(BUILD)/libcommon.a
-	$(CC) -o $@ test/redis_tests.c $(BUILD)/libcommon.a thirdparty/hiredis/libhiredis.a $(CFLAGS)
+$(BUILD)/redis_tests: hiredis test/redis_tests.c $(BUILD)/libcommon.a logging.h
+	$(CC) -o $@ test/redis_tests.c logging.c $(BUILD)/libcommon.a thirdparty/hiredis/libhiredis.a $(CFLAGS)
 
 clean:
 	rm -f *.o state/*.o test/*.o
diff --git a/io.c b/io.c
index 99295512b..d4a89a4a4 100644
--- a/io.c
+++ b/io.c
@@ -7,6 +7,8 @@
 #include <string.h>
 #include <stdio.h>
 #include <inttypes.h>
+#include <stdarg.h>
+#include <utstring.h>
 
 #include "common.h"
 
@@ -139,3 +141,16 @@ char *read_string(int fd) {
   read_bytes(fd, &bytes, &length);
   return (char *) bytes;
 }
+
+void write_formatted_string(int socket_fd, const char *format, ...) {
+  UT_string *cmd;
+  va_list ap;
+
+  utstring_new(cmd);
+  va_start(ap, format);
+  utstring_printf_va(cmd, format, ap);
+  va_end(ap);
+
+  write_string(socket_fd, utstring_body(cmd));
+  utstring_free(cmd);
+}
diff --git a/io.h b/io.h
index c6dd3bb30..e6f227c98 100644
--- a/io.h
+++ b/io.h
@@ -16,6 +16,7 @@ void write_bytes(int fd, uint8_t *bytes, int64_t length);
 void read_bytes(int fd, uint8_t **bytes, int64_t *length);
 
 void write_string(int fd, char *message);
+void write_formatted_string(int fd, const char *format, ...);
 char *read_string(int fd);
 
 #endif
diff --git a/logging.c b/logging.c
new file mode 100644
index 000000000..38abc7bf9
--- /dev/null
+++ b/logging.c
@@ -0,0 +1,78 @@
+#include "logging.h"
+
+#include <hiredis/hiredis.h>
+#include <utstring.h>
+
+#include "state/redis.h"
+#include "io.h"
+
+static const char *log_levels[5] = {"DEBUG", "INFO", "WARN", "ERROR", "FATAL"};
+static const char *log_fmt =
+    "HMSET log:%s:%s:%s log_level %s event_type %s message %s timestamp %s";
+
+struct ray_logger_impl {
+  /* String that identifies this client type. */
+  char *client_type;
+  /* Suppress all log messages below this level. */
+  int log_level;
+  /* Whether or not we have a direct connection to Redis. */
+  int is_direct;
+  /* Either a db_conn or a socket to a process with a db_conn,
+   * depending on the is_direct flag. */
+  void *conn;
+};
+
+ray_logger *init_ray_logger(const char *client_type,
+                            int log_level,
+                            int is_direct,
+                            void *conn) {
+  ray_logger *logger = malloc(sizeof(ray_logger));
+  logger->client_type = client_type;
+  logger->log_level = log_level;
+  logger->is_direct = is_direct;
+  logger->conn = conn;
+  return logger;
+}
+
+void free_ray_logger(ray_logger *logger) {
+  free(logger);
+}
+
+void ray_log(ray_logger *logger,
+             int log_level,
+             const char *event_type,
+             const char *message) {
+  if (log_level < logger->log_level) {
+    return;
+  }
+  if (log_level < RAY_DEBUG || log_level > RAY_FATAL) {
+    return;
+  }
+  struct timeval tv;
+  UT_string *timestamp;
+  utstring_new(timestamp);
+  gettimeofday(&tv, NULL);
+  utstring_printf(timestamp, "%ld.%ld", tv.tv_sec, tv.tv_usec);
+
+  UT_string *origin_id;
+  utstring_new(origin_id);
+  if (logger->is_direct) {
+    db_conn *db = (db_conn *) logger->conn;
+    utstring_printf(origin_id, "%ld:%s", db->client_id, "");
+    redisAsyncCommand(db->context, NULL, NULL, log_fmt,
+                      utstring_body(timestamp), logger->client_type,
+                      utstring_body(origin_id), log_levels[log_level],
+                      event_type, message, utstring_body(timestamp));
+  } else {
+    /* If we don't own a Redis connection, we leave our client
+     * ID to be filled in by someone else. */
+    utstring_printf(origin_id, "%s:%s", "%ld", "%ld");
+    int *socket_fd = (int *) logger->conn;
+    write_formatted_string(*socket_fd, log_fmt, utstring_body(timestamp),
+                           logger->client_type, utstring_body(origin_id),
+                           log_levels[log_level], event_type, message,
+                           utstring_body(timestamp));
+  }
+  utstring_free(origin_id);
+  utstring_free(timestamp);
+}
diff --git a/logging.h b/logging.h
new file mode 100644
index 000000000..4ef7c8fcb
--- /dev/null
+++ b/logging.h
@@ -0,0 +1,39 @@
+#ifndef LOGGING_H
+#define LOGGING_H
+
+#define RAY_VERBOSE -1
+#define RAY_DEBUG 0
+#define RAY_INFO 1
+#define RAY_WARNING 2
+#define RAY_ERROR 3
+#define RAY_FATAL 4
+
+/* Entity types. */
+#define RAY_FUNCTION "FUNCTION"
+#define RAY_OBJECT "OBJECT"
+#define RAY_TASK "TASK"
+
+typedef struct ray_logger_impl ray_logger;
+
+/* Initialize a Ray logger for the given client type and logging level. If the
+ * is_direct flag is set, the logger will treat the given connection as a
+ * direct connection to the log. Otherwise, it will treat it as a socket to
+ * another process with a connection to the log.
+ * NOTE: User is responsible for freeing the returned logger. */
+ray_logger *init_ray_logger(const char *client_type,
+                            int log_level,
+                            int is_direct,
+                            void *conn);
+
+/* Free the logger. This does not free the connection to the log. */
+void free_ray_logger(ray_logger *logger);
+
+/* Log an event at the given log level with the given event_type.
+ * NOTE: message cannot contain spaces! JSON format is recommended.
+ * TODO: Support spaces in messages. */
+void ray_log(ray_logger *logger,
+             int log_level,
+             const char *event_type,
+             const char *message);
+
+#endif
diff --git a/state/redis.c b/state/redis.c
index c2e01df5e..ac56206e8 100644
--- a/state/redis.c
+++ b/state/redis.c
@@ -232,23 +232,3 @@ void task_queue_submit_task(db_conn *db, task_iid task_iid, task_spec *task) {
   }
   utstring_free(command);
 }
-
-void send_redis_command(int socket_fd, const char *format, ...) {
-  char *cmd;
-  va_list ap;
-  int len;
-
-  va_start(ap, format);
-  len = redisvFormatCommand(&cmd, format, ap);
-  va_end(ap);
-  if (len == -1) {
-    LOG_ERR("Out of memory while formatting Redis command.");
-    return;
-  } else if (len == -2) {
-    LOG_ERR("Invalid Redis format string.");
-    return;
-  }
-
-  write_string(socket_fd, cmd);
-  free(cmd);
-}
diff --git a/state/redis.h b/state/redis.h
index 132724e67..09d66bea6 100644
--- a/state/redis.h
+++ b/state/redis.h
@@ -1,3 +1,6 @@
+#ifndef REDIS_H
+#define REDIS_H
+
 #include "db.h"
 #include "object_table.h"
 
@@ -47,4 +50,4 @@ void object_table_lookup_callback(redisAsyncContext *c,
                                   void *r,
                                   void *privdata);
 
-void send_redis_command(int socket_fd, const char *format, ...);
+#endif
diff --git a/test/redis_tests.c b/test/redis_tests.c
index 2f0f09110..d8df538e8 100644
--- a/test/redis_tests.c
+++ b/test/redis_tests.c
@@ -7,6 +7,7 @@
 #include "state/db.h"
 #include "state/redis.h"
 #include "io.h"
+#include "logging.h"
 
 SUITE(redis_tests);
 
@@ -31,6 +32,16 @@ void async_redis_socket_test_callback(redisAsyncContext *ac,
   lookup_successful = 1;
 }
 
+void logging_test_callback(redisAsyncContext *ac, void *r, void *privdata) {
+  redisContext *context = redisConnect("127.0.0.1", 6379);
+  redisReply *reply = redisCommand(context, "KEYS %s", "log:*");
+  redisFree(context);
+  assert(reply != NULL);
+  assert(reply->elements > 0);
+  freeReplyObject(reply);
+  lookup_successful = 1;
+}
+
 TEST redis_socket_test(void) {
   const char *socket_pathname = "redis-test-socket";
   redisContext *context = redisConnect("127.0.0.1", 6379);
@@ -40,8 +51,7 @@ TEST redis_socket_test(void) {
 
   int client_fd = connect_ipc_sock(socket_pathname);
   ASSERT(client_fd >= 0);
-
-  send_redis_command(client_fd, test_set_format, test_key, test_value);
+  write_formatted_string(client_fd, test_set_format, test_key, test_value);
 
   int server_fd = accept_client(socket_fd);
   char *cmd = read_string(server_fd);
@@ -50,11 +60,10 @@ TEST redis_socket_test(void) {
   close(socket_fd);
   unlink(socket_pathname);
 
-  redisAppendFormattedCommand(context, cmd, strlen(cmd));
-  redisReply *tmp;
-  redisGetReply(context, &tmp);
-  freeReplyObject(tmp);
-  redisReply *reply = redisCommand(context, "GET %s", test_key);
+  redisReply *reply;
+  reply = redisCommand(context, cmd, 0, 0);
+  freeReplyObject(reply);
+  reply = redisCommand(context, "GET %s", test_key);
   ASSERT(reply != NULL);
   ASSERT_STR_EQ(reply->str, test_value);
   freeReplyObject(reply);
@@ -82,7 +91,7 @@ TEST async_redis_socket_test(void) {
   /* Send a command to the Redis process. */
   client_fd = connect_ipc_sock(socket_pathname);
   ASSERT(client_fd >= 0);
-  send_redis_command(client_fd, test_set_format, test_key, test_value);
+  write_formatted_string(client_fd, test_set_format, test_key, test_value);
 
   while (!lookup_successful) {
     int num_ready = event_loop_poll(&loop, -1);
@@ -104,9 +113,8 @@ TEST async_redis_socket_test(void) {
         event_loop_attach(&loop, 1, NULL, server_fd, POLLIN);
       } else {
         char *cmd = read_string(waiting->fd);
-        redisAsyncFormattedCommand(conn.context,
-                                   async_redis_socket_test_callback, NULL, cmd,
-                                   strlen(cmd));
+        redisAsyncCommand(conn.context, async_redis_socket_test_callback, NULL,
+                          cmd, conn.client_id, 0);
         free(cmd);
       }
     }
@@ -121,11 +129,70 @@ TEST async_redis_socket_test(void) {
   PASS();
 }
 
+TEST logging_test(void) {
+  int socket_fd, server_fd, client_fd;
+  event_loop loop;
+  event_loop_init(&loop);
+  /* Start IPC channel. */
+  const char *socket_pathname = "logging-test-socket";
+  socket_fd = bind_ipc_sock(socket_pathname);
+  ASSERT(socket_fd >= 0);
+  int64_t ipc_index = event_loop_attach(&loop, 1, NULL, socket_fd, POLLIN);
+
+  /* Start connection to Redis. */
+  db_conn conn;
+  db_connect("127.0.0.1", 6379, "", "", 0, &conn);
+  int64_t db_index = db_attach(&conn, &loop, 0);
+
+  /* Send a command to the Redis process. */
+  client_fd = connect_ipc_sock(socket_pathname);
+  ASSERT(client_fd >= 0);
+  ray_logger *logger = init_ray_logger("worker", RAY_INFO, 0, &client_fd);
+  ray_log(logger, RAY_INFO, "TEST", "Message");
+
+  while (!lookup_successful) {
+    int num_ready = event_loop_poll(&loop, -1);
+    if (num_ready < 0) {
+      exit(-1);
+    }
+    for (int i = 0; i < event_loop_size(&loop); ++i) {
+      struct pollfd *waiting = event_loop_get(&loop, i);
+      if (waiting->revents == 0)
+        continue;
+      if (i == db_index) {
+        db_event(&conn);
+      } else if (i == ipc_index) {
+        /* For some reason, this check is necessary for Travis
+         * to pass these tests. */
+        ASSERT(waiting->revents & POLLIN);
+        server_fd = accept_client(socket_fd);
+        ASSERT(server_fd >= 0);
+        event_loop_attach(&loop, 1, NULL, server_fd, POLLIN);
+      } else {
+        char *cmd = read_string(waiting->fd);
+        redisAsyncCommand(conn.context, logging_test_callback, NULL, cmd,
+                          conn.client_id, 0);
+        free(cmd);
+      }
+    }
+  }
+  free_ray_logger(logger);
+  db_disconnect(&conn);
+  event_loop_free(&loop);
+  close(server_fd);
+  close(client_fd);
+  close(socket_fd);
+  unlink(socket_pathname);
+  lookup_successful = 0;
+  PASS();
+}
+
 SUITE(redis_tests) {
   redisContext *context = redisConnect("127.0.0.1", 6379);
   freeReplyObject(redisCommand(context, "FLUSHALL"));
   RUN_REDIS_TEST(context, redis_socket_test);
   RUN_REDIS_TEST(context, async_redis_socket_test);
+  RUN_REDIS_TEST(context, logging_test);
   redisFree(context);
 }
 

From 79079926096df869efb9e4d4fe782d0d46fea1f1 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Fri, 23 Sep 2016 22:53:58 -0700
Subject: [PATCH 41/91] [WIP] Event loop refactoring (#19)

* task queue tests and extensions

* event loop refactor

* fix formating
---
 Makefile                  |   6 +-
 common.h                  |   2 +
 event_loop.c              | 120 ++++------
 event_loop.h              |  93 +++++---
 state/db.h                |   9 +-
 state/redis.c             |  67 +-----
 state/redis.h             |   2 +-
 test/db_tests.c           |  99 ++------
 test/io_tests.c           |   1 +
 test/redis_tests.c        | 202 +++++++++--------
 thirdparty/ae/ae.c        | 465 ++++++++++++++++++++++++++++++++++++++
 thirdparty/ae/ae.h        | 123 ++++++++++
 thirdparty/ae/ae_epoll.c  | 135 +++++++++++
 thirdparty/ae/ae_evport.c | 320 ++++++++++++++++++++++++++
 thirdparty/ae/ae_kqueue.c | 138 +++++++++++
 thirdparty/ae/ae_select.c | 106 +++++++++
 thirdparty/ae/config.h    |  54 +++++
 thirdparty/ae/zmalloc.h   |  16 ++
 18 files changed, 1610 insertions(+), 348 deletions(-)
 create mode 100644 thirdparty/ae/ae.c
 create mode 100644 thirdparty/ae/ae.h
 create mode 100644 thirdparty/ae/ae_epoll.c
 create mode 100644 thirdparty/ae/ae_evport.c
 create mode 100644 thirdparty/ae/ae_kqueue.c
 create mode 100644 thirdparty/ae/ae_select.c
 create mode 100644 thirdparty/ae/config.h
 create mode 100644 thirdparty/ae/zmalloc.h

diff --git a/Makefile b/Makefile
index aa5ee292e..f71273119 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,10 @@
 CC = gcc
-CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -fPIC -I. -Ithirdparty
+CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -fPIC -I. -Ithirdparty -Ithirdparty/ae
 BUILD = build
 
 all: $(BUILD)/libcommon.a
 
-$(BUILD)/libcommon.a: event_loop.o common.o task.o io.o state/redis.o
+$(BUILD)/libcommon.a: event_loop.o common.o task.o io.o state/redis.o thirdparty/ae/ae.o
 	ar rcs $@ $^
 
 $(BUILD)/common_tests: test/common_tests.c $(BUILD)/libcommon.a
@@ -23,7 +23,7 @@ $(BUILD)/redis_tests: hiredis test/redis_tests.c $(BUILD)/libcommon.a logging.h
 	$(CC) -o $@ test/redis_tests.c logging.c $(BUILD)/libcommon.a thirdparty/hiredis/libhiredis.a $(CFLAGS)
 
 clean:
-	rm -f *.o state/*.o test/*.o
+	rm -f *.o state/*.o test/*.o thirdparty/ae/*.o
 	rm -rf $(BUILD)/*
 
 redis:
diff --git a/common.h b/common.h
index 7e4b73ebe..5444739bc 100644
--- a/common.h
+++ b/common.h
@@ -1,6 +1,8 @@
 #ifndef COMMON_H
 #define COMMON_H
 
+#include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <errno.h>
 
diff --git a/event_loop.c b/event_loop.c
index d89710ba5..6928705e7 100644
--- a/event_loop.c
+++ b/event_loop.c
@@ -1,98 +1,62 @@
 #include "event_loop.h"
 
-#include <assert.h>
+#include "common.h"
+#include <errno.h>
 
-UT_icd item_icd = {sizeof(event_loop_item), NULL, NULL, NULL};
-UT_icd poll_icd = {sizeof(struct pollfd), NULL, NULL, NULL};
+#define INITIAL_EVENT_LOOP_SIZE 1024
 
-/* Initializes the event loop.
- * This function needs to be called before any other event loop function. */
-void event_loop_init(event_loop *loop) {
-  utarray_new(loop->items, &item_icd);
-  utarray_new(loop->waiting, &poll_icd);
+event_loop *event_loop_create() {
+  return aeCreateEventLoop(INITIAL_EVENT_LOOP_SIZE);
 }
 
-/* Free the space associated to the event loop.
- * Does not free the event_loop datastructure itself. */
-void event_loop_free(event_loop *loop) {
-  utarray_free(loop->items);
-  utarray_free(loop->waiting);
-}
-
-/* Add a new file descriptor fd to the event loop.
- * This function sets a user defined type and id for the file descriptor
- * which can be queried using event_loop_type and event_loop_id. The parameter
- * events is the same as in http://linux.die.net/man/2/poll.
- * Returns the index of the item in the event loop. */
-int64_t event_loop_attach(event_loop *loop,
-                          int type,
-                          void *data,
-                          int fd,
-                          int events) {
-  assert(utarray_len(loop->items) == utarray_len(loop->waiting));
-  int64_t index = utarray_len(loop->items);
-  event_loop_item item = {.type = type, .data = data};
-  utarray_push_back(loop->items, &item);
-  struct pollfd waiting = {.fd = fd, .events = events};
-  utarray_push_back(loop->waiting, &waiting);
-  return index;
-}
-
-/* Detach a file descriptor from the event loop.
- * This invalidates all other indices into the event loop items, but leaves
- * the ids of the event loop items valid. */
-void event_loop_detach(event_loop *loop, int64_t index, int shall_close) {
-  struct pollfd *waiting_item =
-      (struct pollfd *) utarray_eltptr(loop->waiting, index);
-  struct pollfd *waiting_back = (struct pollfd *) utarray_back(loop->waiting);
-  if (shall_close) {
-    close(waiting_item->fd);
+void event_loop_destroy(event_loop *loop) {
+  /* Clean up timer events. This is to make valgrind happy. */
+  aeTimeEvent *te = loop->timeEventHead;
+  while (te) {
+    aeTimeEvent *next = te->next;
+    free(te);
+    te = next;
   }
-  *waiting_item = *waiting_back;
-  utarray_pop_back(loop->waiting);
-
-  event_loop_item *items_item =
-      (event_loop_item *) utarray_eltptr(loop->items, index);
-  event_loop_item *items_back = (event_loop_item *) utarray_back(loop->items);
-  *items_item = *items_back;
-  utarray_pop_back(loop->items);
+  aeDeleteEventLoop(loop);
 }
 
-/* Poll the file descriptors associated to this event loop.
- * See http://linux.die.net/man/2/poll. The timeout is in milliseconds. */
-int event_loop_poll(event_loop *loop, int timeout) {
-  return poll((struct pollfd *) utarray_front(loop->waiting),
-              utarray_len(loop->waiting), timeout);
+void event_loop_add_file(event_loop *loop,
+                         int fd,
+                         int events,
+                         event_loop_file_handler handler,
+                         void *context) {
+  /* Try to add the file descriptor. */
+  int err = aeCreateFileEvent(loop, fd, events, handler, context);
+  /* If it cannot be added, increase the size of the event loop. */
+  if (err == AE_ERR && errno == ERANGE) {
+    err = aeResizeSetSize(loop, 3 * aeGetSetSize(loop) / 2);
+    CHECK(err == AE_OK);
+    err = aeCreateFileEvent(loop, fd, events, handler, context);
+  }
+  /* In any case, test if there were errors. */
+  CHECK(err == AE_OK);
 }
 
-/* Get the total number of file descriptors participating in the event loop. */
-int64_t event_loop_size(event_loop *loop) {
-  return utarray_len(loop->waiting);
+void event_loop_remove_file(event_loop *loop, int fd) {
+  aeDeleteFileEvent(loop, fd, EVENT_LOOP_READ | EVENT_LOOP_WRITE);
 }
 
-/* Get the pollfd structure associated to a file descriptor participating in the
- * event loop. */
-struct pollfd *event_loop_get(event_loop *loop, int64_t index) {
-  return (struct pollfd *) utarray_eltptr(loop->waiting, index);
+int64_t event_loop_add_timer(event_loop *loop,
+                             int64_t milliseconds,
+                             event_loop_timer_handler handler,
+                             void *context) {
+  return aeCreateTimeEvent(loop, milliseconds, handler, context, NULL);
 }
 
-/* Set the data connection information for participant in the event loop. */
-void event_loop_set_data(event_loop *loop, int64_t index, void *data) {
-  event_loop_item *item =
-      (event_loop_item *) utarray_eltptr(loop->items, index);
-  item->data = data;
+void event_loop_remove_timer(event_loop *loop, int64_t id) {
+  int err = aeDeleteTimeEvent(loop, id);
+  CHECK(err == AE_OK); /* timer id found? */
 }
 
-/* Get the data connection information for participant in the event loop. */
-void *event_loop_get_data(event_loop *loop, int64_t index) {
-  event_loop_item *item =
-      (event_loop_item *) utarray_eltptr(loop->items, index);
-  return item->data;
+void event_loop_run(event_loop *loop) {
+  aeMain(loop);
 }
 
-/* Return the type of connection. */
-int event_loop_type(event_loop *loop, int64_t index) {
-  event_loop_item *item =
-      (event_loop_item *) utarray_eltptr(loop->items, index);
-  return item->type;
+void event_loop_stop(event_loop *loop) {
+  aeStop(loop);
 }
diff --git a/event_loop.h b/event_loop.h
index 840abfd8c..bb6afdb93 100644
--- a/event_loop.h
+++ b/event_loop.h
@@ -1,39 +1,74 @@
 #ifndef EVENT_LOOP_H
 #define EVENT_LOOP_H
 
-#include <poll.h>
 #include <stdint.h>
+#include "ae/ae.h"
 
-#include "utarray.h"
+typedef aeEventLoop event_loop;
 
-typedef struct {
-  /* The type of connection (e.g. redis, client, manager, data transfer). */
-  int type;
-  /* Data associated with the connection (managed by the user) */
-  void *data;
-} event_loop_item;
+/* File descriptor is readable. */
+#define EVENT_LOOP_READ AE_READABLE
 
-typedef struct {
-  /* Array of event_loop_items that hold information for connections. */
-  UT_array *items;
-  /* Array of file descriptors that are waiting, corresponding to items. */
-  UT_array *waiting;
-} event_loop;
+/* File descriptor is writable. */
+#define EVENT_LOOP_WRITE AE_WRITABLE
 
-/* Event loop functions. */
-void event_loop_init(event_loop *loop);
-void event_loop_free(event_loop *loop);
-int64_t event_loop_attach(event_loop *loop,
-                          int type,
-                          void *data,
-                          int fd,
-                          int events);
-void event_loop_detach(event_loop *loop, int64_t index, int shall_close);
-int event_loop_poll(event_loop *loop, int timeout);
-int64_t event_loop_size(event_loop *loop);
-struct pollfd *event_loop_get(event_loop *loop, int64_t index);
-void event_loop_set_data(event_loop *loop, int64_t index, void *data);
-void *event_loop_get_data(event_loop *loop, int64_t index);
-int event_loop_type(event_loop *loop, int64_t index);
+/* Signature of the handler that will be called when there is a new event
+ * on the file descriptor that this handler has been registered for. The
+ * context is the one that was passed into add_file by the user. The
+ * events parameter indicates which event is available on the file,
+ * it can be EVENT_LOOP_READ or EVENT_LOOP_WRITE. */
+typedef void (*event_loop_file_handler)(event_loop *loop,
+                                        int fd,
+                                        void *context,
+                                        int events);
+
+/* This handler will be called when a timer times out. The id of the timer
+ * as well as the context that was specified when registering this handler
+ * are passed as arguments. */
+typedef int64_t (*event_loop_timer_handler)(event_loop *loop,
+                                            int64_t id,
+                                            void *context);
+
+/* Create and return a new event loop. */
+event_loop *event_loop_create();
+
+/* Deallocate space associated with the event loop that was created
+ * with the "create" function. */
+void event_loop_destroy(event_loop *loop);
+
+/* Register a handler that will be called any time a new event happens on
+ * a file descriptor. Can specify a context that will be passed as an
+ * argument to the handler. Currently there can only be one handler per file.
+ * The events parameter specifies which events we listen to: EVENT_LOOP_READ
+ * or EVENT_LOOP_WRITE. */
+void event_loop_add_file(event_loop *loop,
+                         int fd,
+                         int events,
+                         event_loop_file_handler handler,
+                         void *context);
+
+/* Remove a registered file event handler from the event loop. */
+void event_loop_remove_file(event_loop *loop, int fd);
+
+/* Register a handler that will be called after a time slice of
+ * "milliseconds" milliseconds. Can specify a context that will be passed
+ * as an argument to the handler. Return the id of the time event. */
+int64_t event_loop_add_timer(event_loop *loop,
+                             int64_t milliseconds,
+                             event_loop_timer_handler handler,
+                             void *context);
+
+/* Reset the timer timeout to a given number of milliseconds.
+ * NOTE: This is not implemented yet. */
+void event_loop_reset_timer(event_loop *loop, int64_t id, int64_t milliseconds);
+
+/* Remove a registered time event handler from the event loop. */
+void event_loop_remove_timer(event_loop *loop, int64_t id);
+
+/* Run the event loop. */
+void event_loop_run(event_loop *loop);
+
+/* Stop the event loop. */
+void event_loop_stop(event_loop *loop);
 
 #endif
diff --git a/state/db.h b/state/db.h
index b586f9acf..3fcf658fd 100644
--- a/state/db.h
+++ b/state/db.h
@@ -15,13 +15,8 @@ void db_connect(const char *db_address,
                 int client_port,
                 db_conn *db);
 
-/* Attach global system store onnection to event loop. Returns the index of the
- * connection in the loop. */
-int64_t db_attach(db_conn *db, event_loop *loop, int connection_type);
-
-/* This function will be called by the user if there is a new event in the
- * event loop associated with the global system store connection. */
-void db_event(db_conn *db);
+/* Attach global system store connection to event loop. */
+void db_attach(db_conn *db, event_loop *loop);
 
 /* Disconnect from the global system store. */
 void db_disconnect(db_conn *db);
diff --git a/state/redis.c b/state/redis.c
index ac56206e8..9c4b81f03 100644
--- a/state/redis.c
+++ b/state/redis.c
@@ -2,6 +2,8 @@
 
 #include <assert.h>
 
+#include <stdlib.h>
+#include "hiredis/adapters/ae.h"
 #include "utstring.h"
 
 #include "common.h"
@@ -12,38 +14,6 @@
 #include "redis.h"
 #include "io.h"
 
-static void poll_add_read(void *privdata) {
-  db_conn *conn = (db_conn *) privdata;
-  if (!conn->reading) {
-    conn->reading = 1;
-    event_loop_get(conn->loop, conn->db_index)->events |= POLLIN;
-  }
-}
-
-static void poll_del_read(void *privdata) {
-  db_conn *conn = (db_conn *) privdata;
-  if (conn->reading) {
-    conn->reading = 0;
-    event_loop_get(conn->loop, conn->db_index)->events &= ~POLLIN;
-  }
-}
-
-static void poll_add_write(void *privdata) {
-  db_conn *conn = (db_conn *) privdata;
-  if (!conn->writing) {
-    conn->writing = 1;
-    event_loop_get(conn->loop, conn->db_index)->events |= POLLOUT;
-  }
-}
-
-static void poll_del_write(void *privdata) {
-  db_conn *conn = (db_conn *) privdata;
-  if (conn->writing) {
-    conn->writing = 0;
-    event_loop_get(conn->loop, conn->db_index)->events &= ~POLLOUT;
-  }
-}
-
 #define LOG_REDIS_ERR(context, M, ...)                                        \
   fprintf(stderr, "[ERROR] (%s:%d: message: %s) " M "\n", __FILE__, __LINE__, \
           context->errstr, ##__VA_ARGS__)
@@ -119,37 +89,8 @@ void db_disconnect(db_conn *db) {
   free(db->client_type);
 }
 
-void db_event(db_conn *db) {
-  if (db->reading) {
-    redisAsyncHandleRead(db->context);
-  }
-  if (db->writing) {
-    redisAsyncHandleWrite(db->context);
-  }
-}
-
-int64_t db_attach(db_conn *db, event_loop *loop, int connection_type) {
-  db->loop = loop;
-
-  redisAsyncContext *ac = db->context;
-  redisContext *c = &(ac->c);
-
-  if (ac->ev.data != NULL) {
-    return REDIS_ERR;
-  }
-
-  ac->ev.addRead = poll_add_read;
-  ac->ev.delRead = poll_del_read;
-  ac->ev.addWrite = poll_add_write;
-  ac->ev.delWrite = poll_del_write;
-  // TODO(pcm): Implement cleanup function
-
-  ac->ev.data = db;
-
-  int64_t index =
-      event_loop_attach(loop, connection_type, NULL, c->fd, POLLIN | POLLOUT);
-  db->db_index = index;
-  return index;
+void db_attach(db_conn *db, event_loop *loop) {
+  redisAeAttach(loop, db->context);
 }
 
 void object_table_add(db_conn *db, unique_id object_id) {
diff --git a/state/redis.h b/state/redis.h
index 09d66bea6..c579e7065 100644
--- a/state/redis.h
+++ b/state/redis.h
@@ -12,7 +12,7 @@ typedef struct {
   /* Unique ID for this service. */
   int service_id;
   /* IP address and port of this service. */
-  const char *addr;
+  char *addr;
   /* Handle for the uthash table. */
   UT_hash_handle hh;
 } service_cache_entry;
diff --git a/test/db_tests.c b/test/db_tests.c
index 5bdbf25df..d9dfcb563 100644
--- a/test/db_tests.c
+++ b/test/db_tests.c
@@ -6,12 +6,12 @@
 #include "test/example_task.h"
 #include "state/db.h"
 #include "state/object_table.h"
+#include "state/task_queue.h"
 #include "state/redis.h"
 #include "task.h"
 
 SUITE(db_tests);
 
-int lookup_successful = 0;
 const char *manager_addr = "127.0.0.1";
 int manager_port1 = 12345;
 int manager_port2 = 12346;
@@ -20,20 +20,11 @@ char received_port1[6] = {0};
 char received_addr2[16] = {0};
 char received_port2[6] = {0};
 
-/* This is for synchronizing to make sure both entries have been written. */
-void sync_test_callback(object_id object_id,
-                        int manager_count,
-                        const char *manager_vector[]) {
-  lookup_successful = 1;
-  free(manager_vector);
-}
-
-/* This performs the actual test. */
+/* Test if entries have been written to the database. */
 void test_callback(object_id object_id,
                    int manager_count,
                    const char *manager_vector[]) {
   CHECK(manager_count == 2);
-  lookup_successful = 1;
   if (!manager_vector[0] ||
       sscanf(manager_vector[0], "%15[0-9.]:%5[0-9]", received_addr1,
              received_port1) != 2) {
@@ -47,57 +38,29 @@ void test_callback(object_id object_id,
   free(manager_vector);
 }
 
+int64_t timeout_handler(event_loop *loop, int64_t id, void *context) {
+  event_loop_stop(loop);
+  return -1;
+}
+
 TEST object_table_lookup_test(void) {
-  event_loop loop;
-  event_loop_init(&loop);
+  event_loop *loop = event_loop_create();
   db_conn conn1;
   db_connect("127.0.0.1", 6379, "plasma_manager", manager_addr, manager_port1,
              &conn1);
   db_conn conn2;
   db_connect("127.0.0.1", 6379, "plasma_manager", manager_addr, manager_port2,
              &conn2);
-  int64_t index1 = db_attach(&conn1, &loop, 0);
-  int64_t index2 = db_attach(&conn2, &loop, 1);
+  db_attach(&conn1, loop);
+  db_attach(&conn2, loop);
   unique_id id = globally_unique_id();
   object_table_add(&conn1, id);
   object_table_add(&conn2, id);
-  object_table_lookup(&conn1, id, sync_test_callback);
-  while (!lookup_successful) {
-    int num_ready = event_loop_poll(&loop, -1);
-    if (num_ready < 0) {
-      exit(-1);
-    }
-    for (int i = 0; i < event_loop_size(&loop); ++i) {
-      struct pollfd *waiting = event_loop_get(&loop, i);
-      if (waiting->revents == 0)
-        continue;
-      if (i == index1) {
-        db_event(&conn1);
-      }
-      if (i == index2) {
-        db_event(&conn2);
-      }
-    }
-  }
-  lookup_successful = 0;
+  event_loop_add_timer(loop, 100, timeout_handler, NULL);
+  event_loop_run(loop);
   object_table_lookup(&conn1, id, test_callback);
-  while (!lookup_successful) {
-    int num_ready = event_loop_poll(&loop, -1);
-    if (num_ready < 0) {
-      exit(-1);
-    }
-    for (int i = 0; i < event_loop_size(&loop); ++i) {
-      struct pollfd *waiting = event_loop_get(&loop, i);
-      if (waiting->revents == 0)
-        continue;
-      if (i == index1) {
-        db_event(&conn1);
-      }
-      if (i == index2) {
-        db_event(&conn2);
-      }
-    }
-  }
+  event_loop_add_timer(loop, 100, timeout_handler, NULL);
+  event_loop_run(loop);
   int port1 = atoi(received_port1);
   int port2 = atoi(received_port2);
   ASSERT_STR_EQ(&received_addr1[0], manager_addr);
@@ -107,50 +70,32 @@ TEST object_table_lookup_test(void) {
   db_disconnect(&conn1);
   db_disconnect(&conn2);
 
-  event_loop_free(&loop);
-
-  lookup_successful = 0;
+  event_loop_destroy(loop);
   PASS();
 }
 
 TEST task_queue_test(void) {
-  event_loop loop;
-  event_loop_init(&loop);
+  event_loop *loop = event_loop_create();
   db_conn conn;
   db_connect("127.0.0.1", 6379, "local_scheduler", "", -1, &conn);
-  int64_t index = db_attach(&conn, &loop, 0);
+  db_attach(&conn, loop);
 
   task_spec *task = example_task();
   task_queue_submit_task(&conn, globally_unique_id(), task);
-  while (1) {
-    int num_ready = event_loop_poll(&loop, 100);
-    if (num_ready < 0) {
-      exit(-1);
-    }
-    if (num_ready == 0) {
-      break;
-    }
-    for (int i = 0; i < event_loop_size(&loop); ++i) {
-      struct pollfd *waiting = event_loop_get(&loop, i);
-      if (waiting->revents == 0)
-        continue;
-      if (i == index) {
-        db_event(&conn);
-      }
-    }
-  }
+  event_loop_add_timer(loop, 100, timeout_handler, NULL);
+  event_loop_run(loop);
 
   free_task_spec(task);
   db_disconnect(&conn);
-  event_loop_free(&loop);
+  event_loop_destroy(loop);
   PASS();
 }
 
 SUITE(db_tests) {
   redisContext *context = redisConnect("127.0.0.1", 6379);
-  redisCommand(context, "FLUSHALL");
+  freeReplyObject(redisCommand(context, "FLUSHALL"));
   RUN_REDIS_TEST(context, object_table_lookup_test);
-  RUN_TEST(task_queue_test);
+  RUN_REDIS_TEST(context, task_queue_test);
   redisFree(context);
 }
 
diff --git a/test/io_tests.c b/test/io_tests.c
index 9216aa56e..1e8d20832 100644
--- a/test/io_tests.c
+++ b/test/io_tests.c
@@ -35,6 +35,7 @@ TEST ipc_socket_test(void) {
     uint8_t *bytes;
     read_bytes(client_fd, &bytes, &len);
     ASSERT(memcmp(test_bytes, bytes, len) == 0);
+    free(bytes);
     close(client_fd);
     close(socket_fd);
     unlink(socket_pathname);
diff --git a/test/redis_tests.c b/test/redis_tests.c
index d8df538e8..d527d0476 100644
--- a/test/redis_tests.c
+++ b/test/redis_tests.c
@@ -3,6 +3,8 @@
 #include <assert.h>
 #include <unistd.h>
 
+#include "utarray.h"
+
 #include "event_loop.h"
 #include "state/db.h"
 #include "state/redis.h"
@@ -11,35 +13,27 @@
 
 SUITE(redis_tests);
 
-int lookup_successful = 0;
 const char *test_set_format = "SET %s %s";
 const char *test_get_format = "GET %s";
 const char *test_key = "foo";
 const char *test_value = "bar";
+UT_array *connections = NULL;
+
+int async_redis_socket_test_callback_called = 0;
 
 void async_redis_socket_test_callback(redisAsyncContext *ac,
                                       void *r,
                                       void *privdata) {
+  async_redis_socket_test_callback_called = 1;
   redisContext *context = redisConnect("127.0.0.1", 6379);
   redisReply *reply = redisCommand(context, test_get_format, test_key);
   redisFree(context);
-  assert(reply != NULL);
+  CHECK(reply != NULL);
   if (strcmp(reply->str, test_value)) {
     freeReplyObject(reply);
-    assert(0);
+    CHECK(0);
   }
   freeReplyObject(reply);
-  lookup_successful = 1;
-}
-
-void logging_test_callback(redisAsyncContext *ac, void *r, void *privdata) {
-  redisContext *context = redisConnect("127.0.0.1", 6379);
-  redisReply *reply = redisCommand(context, "KEYS %s", "log:*");
-  redisFree(context);
-  assert(reply != NULL);
-  assert(reply->elements > 0);
-  freeReplyObject(reply);
-  lookup_successful = 1;
 }
 
 TEST redis_socket_test(void) {
@@ -73,117 +67,145 @@ TEST redis_socket_test(void) {
   PASS();
 }
 
+void redis_read_callback(event_loop *loop, int fd, void *context, int events) {
+  db_conn *conn = context;
+  char *cmd = read_string(fd);
+  redisAsyncCommand(conn->context, async_redis_socket_test_callback, NULL, cmd,
+                    conn->client_id, 0);
+  free(cmd);
+}
+
+void redis_accept_callback(event_loop *loop,
+                           int socket_fd,
+                           void *context,
+                           int events) {
+  int accept_fd = accept_client(socket_fd);
+  CHECK(accept_fd >= 0);
+  utarray_push_back(connections, &accept_fd);
+  event_loop_add_file(loop, accept_fd, EVENT_LOOP_READ, redis_read_callback,
+                      context);
+}
+
+int64_t timeout_handler(event_loop *loop, int64_t id, void *context) {
+  event_loop_stop(loop);
+  return -1;
+}
+
 TEST async_redis_socket_test(void) {
-  int socket_fd, server_fd, client_fd;
-  event_loop loop;
-  event_loop_init(&loop);
+  utarray_new(connections, &ut_int_icd);
+  event_loop *loop = event_loop_create();
+
   /* Start IPC channel. */
   const char *socket_pathname = "async-redis-test-socket";
-  socket_fd = bind_ipc_sock(socket_pathname);
+  int socket_fd = bind_ipc_sock(socket_pathname);
   ASSERT(socket_fd >= 0);
-  int64_t ipc_index = event_loop_attach(&loop, 1, NULL, socket_fd, POLLIN);
+  utarray_push_back(connections, &socket_fd);
 
   /* Start connection to Redis. */
   db_conn conn;
   db_connect("127.0.0.1", 6379, "", "", 0, &conn);
-  int64_t db_index = db_attach(&conn, &loop, 0);
+  db_attach(&conn, loop);
 
   /* Send a command to the Redis process. */
-  client_fd = connect_ipc_sock(socket_pathname);
+  int client_fd = connect_ipc_sock(socket_pathname);
   ASSERT(client_fd >= 0);
+  utarray_push_back(connections, &client_fd);
   write_formatted_string(client_fd, test_set_format, test_key, test_value);
 
-  while (!lookup_successful) {
-    int num_ready = event_loop_poll(&loop, -1);
-    if (num_ready < 0) {
-      exit(-1);
-    }
-    for (int i = 0; i < event_loop_size(&loop); ++i) {
-      struct pollfd *waiting = event_loop_get(&loop, i);
-      if (waiting->revents == 0)
-        continue;
-      if (i == db_index) {
-        db_event(&conn);
-      } else if (i == ipc_index) {
-        /* For some reason, this check is necessary for Travis
-         * to pass these tests. */
-        ASSERT(waiting->revents & POLLIN);
-        server_fd = accept_client(socket_fd);
-        ASSERT(server_fd >= 0);
-        event_loop_attach(&loop, 1, NULL, server_fd, POLLIN);
-      } else {
-        char *cmd = read_string(waiting->fd);
-        redisAsyncCommand(conn.context, async_redis_socket_test_callback, NULL,
-                          cmd, conn.client_id, 0);
-        free(cmd);
-      }
-    }
-  }
+  event_loop_add_file(loop, client_fd, EVENT_LOOP_READ, redis_read_callback,
+                      &conn);
+  event_loop_add_file(loop, socket_fd, EVENT_LOOP_READ, redis_accept_callback,
+                      &conn);
+  event_loop_add_timer(loop, 100, timeout_handler, NULL);
+  event_loop_run(loop);
+
+  CHECK(async_redis_socket_test_callback_called);
+
   db_disconnect(&conn);
-  event_loop_free(&loop);
-  close(server_fd);
-  close(client_fd);
-  close(socket_fd);
+  event_loop_destroy(loop);
+  for (int *p = (int *) utarray_front(connections); p != NULL;
+       p = (int *) utarray_next(connections, p)) {
+    close(*p);
+  }
   unlink(socket_pathname);
-  lookup_successful = 0;
+  utarray_free(connections);
   PASS();
 }
 
+int logging_test_callback_called = 0;
+
+void logging_test_callback(redisAsyncContext *ac, void *r, void *privdata) {
+  logging_test_callback_called = 1;
+  redisContext *context = redisConnect("127.0.0.1", 6379);
+  redisReply *reply = redisCommand(context, "KEYS %s", "log:*");
+  redisFree(context);
+  CHECK(reply != NULL);
+  CHECK(reply->elements > 0);
+  freeReplyObject(reply);
+}
+
+void logging_read_callback(event_loop *loop,
+                           int fd,
+                           void *context,
+                           int events) {
+  db_conn *conn = context;
+  char *cmd = read_string(fd);
+  redisAsyncCommand(conn->context, logging_test_callback, NULL, cmd,
+                    conn->client_id, 0);
+  free(cmd);
+}
+
+void logging_accept_callback(event_loop *loop,
+                             int socket_fd,
+                             void *context,
+                             int events) {
+  int accept_fd = accept_client(socket_fd);
+  CHECK(accept_fd >= 0);
+  utarray_push_back(connections, &accept_fd);
+  event_loop_add_file(loop, accept_fd, EVENT_LOOP_READ, logging_read_callback,
+                      context);
+}
+
 TEST logging_test(void) {
-  int socket_fd, server_fd, client_fd;
-  event_loop loop;
-  event_loop_init(&loop);
+  utarray_new(connections, &ut_int_icd);
+  event_loop *loop = event_loop_create();
+
   /* Start IPC channel. */
   const char *socket_pathname = "logging-test-socket";
-  socket_fd = bind_ipc_sock(socket_pathname);
+  int socket_fd = bind_ipc_sock(socket_pathname);
   ASSERT(socket_fd >= 0);
-  int64_t ipc_index = event_loop_attach(&loop, 1, NULL, socket_fd, POLLIN);
+  utarray_push_back(connections, &socket_fd);
 
   /* Start connection to Redis. */
   db_conn conn;
   db_connect("127.0.0.1", 6379, "", "", 0, &conn);
-  int64_t db_index = db_attach(&conn, &loop, 0);
+  db_attach(&conn, loop);
 
   /* Send a command to the Redis process. */
-  client_fd = connect_ipc_sock(socket_pathname);
+  int client_fd = connect_ipc_sock(socket_pathname);
   ASSERT(client_fd >= 0);
+  utarray_push_back(connections, &client_fd);
   ray_logger *logger = init_ray_logger("worker", RAY_INFO, 0, &client_fd);
   ray_log(logger, RAY_INFO, "TEST", "Message");
 
-  while (!lookup_successful) {
-    int num_ready = event_loop_poll(&loop, -1);
-    if (num_ready < 0) {
-      exit(-1);
-    }
-    for (int i = 0; i < event_loop_size(&loop); ++i) {
-      struct pollfd *waiting = event_loop_get(&loop, i);
-      if (waiting->revents == 0)
-        continue;
-      if (i == db_index) {
-        db_event(&conn);
-      } else if (i == ipc_index) {
-        /* For some reason, this check is necessary for Travis
-         * to pass these tests. */
-        ASSERT(waiting->revents & POLLIN);
-        server_fd = accept_client(socket_fd);
-        ASSERT(server_fd >= 0);
-        event_loop_attach(&loop, 1, NULL, server_fd, POLLIN);
-      } else {
-        char *cmd = read_string(waiting->fd);
-        redisAsyncCommand(conn.context, logging_test_callback, NULL, cmd,
-                          conn.client_id, 0);
-        free(cmd);
-      }
-    }
-  }
+  event_loop_add_file(loop, socket_fd, EVENT_LOOP_READ, logging_accept_callback,
+                      &conn);
+  event_loop_add_file(loop, client_fd, EVENT_LOOP_READ, logging_read_callback,
+                      &conn);
+  event_loop_add_timer(loop, 100, timeout_handler, NULL);
+  event_loop_run(loop);
+
+  CHECK(logging_test_callback_called);
+
   free_ray_logger(logger);
   db_disconnect(&conn);
-  event_loop_free(&loop);
-  close(server_fd);
-  close(client_fd);
-  close(socket_fd);
+  event_loop_destroy(loop);
+  for (int *p = (int *) utarray_front(connections); p != NULL;
+       p = (int *) utarray_next(connections, p)) {
+    close(*p);
+  }
   unlink(socket_pathname);
-  lookup_successful = 0;
+  utarray_free(connections);
   PASS();
 }
 
diff --git a/thirdparty/ae/ae.c b/thirdparty/ae/ae.c
new file mode 100644
index 000000000..e66808a81
--- /dev/null
+++ b/thirdparty/ae/ae.c
@@ -0,0 +1,465 @@
+/* A simple event-driven programming library. Originally I wrote this code
+ * for the Jim's event-loop (Jim is a Tcl interpreter) but later translated
+ * it in form of a library for easy reuse.
+ *
+ * Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Redis nor the names of its contributors may be used
+ *     to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <poll.h>
+#include <string.h>
+#include <time.h>
+#include <errno.h>
+
+#include "ae.h"
+#include "zmalloc.h"
+#include "config.h"
+
+/* Include the best multiplexing layer supported by this system.
+ * The following should be ordered by performances, descending. */
+#ifdef HAVE_EVPORT
+#include "ae_evport.c"
+#else
+    #ifdef HAVE_EPOLL
+    #include "ae_epoll.c"
+    #else
+        #ifdef HAVE_KQUEUE
+        #include "ae_kqueue.c"
+        #else
+        #include "ae_select.c"
+        #endif
+    #endif
+#endif
+
+aeEventLoop *aeCreateEventLoop(int setsize) {
+    aeEventLoop *eventLoop;
+    int i;
+
+    if ((eventLoop = zmalloc(sizeof(*eventLoop))) == NULL) goto err;
+    eventLoop->events = zmalloc(sizeof(aeFileEvent)*setsize);
+    eventLoop->fired = zmalloc(sizeof(aeFiredEvent)*setsize);
+    if (eventLoop->events == NULL || eventLoop->fired == NULL) goto err;
+    eventLoop->setsize = setsize;
+    eventLoop->lastTime = time(NULL);
+    eventLoop->timeEventHead = NULL;
+    eventLoop->timeEventNextId = 0;
+    eventLoop->stop = 0;
+    eventLoop->maxfd = -1;
+    eventLoop->beforesleep = NULL;
+    if (aeApiCreate(eventLoop) == -1) goto err;
+    /* Events with mask == AE_NONE are not set. So let's initialize the
+     * vector with it. */
+    for (i = 0; i < setsize; i++)
+        eventLoop->events[i].mask = AE_NONE;
+    return eventLoop;
+
+err:
+    if (eventLoop) {
+        zfree(eventLoop->events);
+        zfree(eventLoop->fired);
+        zfree(eventLoop);
+    }
+    return NULL;
+}
+
+/* Return the current set size. */
+int aeGetSetSize(aeEventLoop *eventLoop) {
+    return eventLoop->setsize;
+}
+
+/* Resize the maximum set size of the event loop.
+ * If the requested set size is smaller than the current set size, but
+ * there is already a file descriptor in use that is >= the requested
+ * set size minus one, AE_ERR is returned and the operation is not
+ * performed at all.
+ *
+ * Otherwise AE_OK is returned and the operation is successful. */
+int aeResizeSetSize(aeEventLoop *eventLoop, int setsize) {
+    int i;
+
+    if (setsize == eventLoop->setsize) return AE_OK;
+    if (eventLoop->maxfd >= setsize) return AE_ERR;
+    if (aeApiResize(eventLoop,setsize) == -1) return AE_ERR;
+
+    eventLoop->events = zrealloc(eventLoop->events,sizeof(aeFileEvent)*setsize);
+    eventLoop->fired = zrealloc(eventLoop->fired,sizeof(aeFiredEvent)*setsize);
+    eventLoop->setsize = setsize;
+
+    /* Make sure that if we created new slots, they are initialized with
+     * an AE_NONE mask. */
+    for (i = eventLoop->maxfd+1; i < setsize; i++)
+        eventLoop->events[i].mask = AE_NONE;
+    return AE_OK;
+}
+
+void aeDeleteEventLoop(aeEventLoop *eventLoop) {
+    aeApiFree(eventLoop);
+    zfree(eventLoop->events);
+    zfree(eventLoop->fired);
+    zfree(eventLoop);
+}
+
+void aeStop(aeEventLoop *eventLoop) {
+    eventLoop->stop = 1;
+}
+
+int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask,
+        aeFileProc *proc, void *clientData)
+{
+    if (fd >= eventLoop->setsize) {
+        errno = ERANGE;
+        return AE_ERR;
+    }
+    aeFileEvent *fe = &eventLoop->events[fd];
+
+    if (aeApiAddEvent(eventLoop, fd, mask) == -1)
+        return AE_ERR;
+    fe->mask |= mask;
+    if (mask & AE_READABLE) fe->rfileProc = proc;
+    if (mask & AE_WRITABLE) fe->wfileProc = proc;
+    fe->clientData = clientData;
+    if (fd > eventLoop->maxfd)
+        eventLoop->maxfd = fd;
+    return AE_OK;
+}
+
+void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask)
+{
+    if (fd >= eventLoop->setsize) return;
+    aeFileEvent *fe = &eventLoop->events[fd];
+    if (fe->mask == AE_NONE) return;
+
+    aeApiDelEvent(eventLoop, fd, mask);
+    fe->mask = fe->mask & (~mask);
+    if (fd == eventLoop->maxfd && fe->mask == AE_NONE) {
+        /* Update the max fd */
+        int j;
+
+        for (j = eventLoop->maxfd-1; j >= 0; j--)
+            if (eventLoop->events[j].mask != AE_NONE) break;
+        eventLoop->maxfd = j;
+    }
+}
+
+int aeGetFileEvents(aeEventLoop *eventLoop, int fd) {
+    if (fd >= eventLoop->setsize) return 0;
+    aeFileEvent *fe = &eventLoop->events[fd];
+
+    return fe->mask;
+}
+
+static void aeGetTime(long *seconds, long *milliseconds)
+{
+    struct timeval tv;
+
+    gettimeofday(&tv, NULL);
+    *seconds = tv.tv_sec;
+    *milliseconds = tv.tv_usec/1000;
+}
+
+static void aeAddMillisecondsToNow(long long milliseconds, long *sec, long *ms) {
+    long cur_sec, cur_ms, when_sec, when_ms;
+
+    aeGetTime(&cur_sec, &cur_ms);
+    when_sec = cur_sec + milliseconds/1000;
+    when_ms = cur_ms + milliseconds%1000;
+    if (when_ms >= 1000) {
+        when_sec ++;
+        when_ms -= 1000;
+    }
+    *sec = when_sec;
+    *ms = when_ms;
+}
+
+long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds,
+        aeTimeProc *proc, void *clientData,
+        aeEventFinalizerProc *finalizerProc)
+{
+    long long id = eventLoop->timeEventNextId++;
+    aeTimeEvent *te;
+
+    te = zmalloc(sizeof(*te));
+    if (te == NULL) return AE_ERR;
+    te->id = id;
+    aeAddMillisecondsToNow(milliseconds,&te->when_sec,&te->when_ms);
+    te->timeProc = proc;
+    te->finalizerProc = finalizerProc;
+    te->clientData = clientData;
+    te->next = eventLoop->timeEventHead;
+    eventLoop->timeEventHead = te;
+    return id;
+}
+
+int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id)
+{
+    aeTimeEvent *te = eventLoop->timeEventHead;
+    while(te) {
+        if (te->id == id) {
+            te->id = AE_DELETED_EVENT_ID;
+            return AE_OK;
+        }
+        te = te->next;
+    }
+    return AE_ERR; /* NO event with the specified ID found */
+}
+
+/* Search the first timer to fire.
+ * This operation is useful to know how many time the select can be
+ * put in sleep without to delay any event.
+ * If there are no timers NULL is returned.
+ *
+ * Note that's O(N) since time events are unsorted.
+ * Possible optimizations (not needed by Redis so far, but...):
+ * 1) Insert the event in order, so that the nearest is just the head.
+ *    Much better but still insertion or deletion of timers is O(N).
+ * 2) Use a skiplist to have this operation as O(1) and insertion as O(log(N)).
+ */
+static aeTimeEvent *aeSearchNearestTimer(aeEventLoop *eventLoop)
+{
+    aeTimeEvent *te = eventLoop->timeEventHead;
+    aeTimeEvent *nearest = NULL;
+
+    while(te) {
+        if (!nearest || te->when_sec < nearest->when_sec ||
+                (te->when_sec == nearest->when_sec &&
+                 te->when_ms < nearest->when_ms))
+            nearest = te;
+        te = te->next;
+    }
+    return nearest;
+}
+
+/* Process time events */
+static int processTimeEvents(aeEventLoop *eventLoop) {
+    int processed = 0;
+    aeTimeEvent *te, *prev;
+    long long maxId;
+    time_t now = time(NULL);
+
+    /* If the system clock is moved to the future, and then set back to the
+     * right value, time events may be delayed in a random way. Often this
+     * means that scheduled operations will not be performed soon enough.
+     *
+     * Here we try to detect system clock skews, and force all the time
+     * events to be processed ASAP when this happens: the idea is that
+     * processing events earlier is less dangerous than delaying them
+     * indefinitely, and practice suggests it is. */
+    if (now < eventLoop->lastTime) {
+        te = eventLoop->timeEventHead;
+        while(te) {
+            te->when_sec = 0;
+            te = te->next;
+        }
+    }
+    eventLoop->lastTime = now;
+
+    prev = NULL;
+    te = eventLoop->timeEventHead;
+    maxId = eventLoop->timeEventNextId-1;
+    while(te) {
+        long now_sec, now_ms;
+        long long id;
+
+        /* Remove events scheduled for deletion. */
+        if (te->id == AE_DELETED_EVENT_ID) {
+            aeTimeEvent *next = te->next;
+            if (prev == NULL)
+                eventLoop->timeEventHead = te->next;
+            else
+                prev->next = te->next;
+            if (te->finalizerProc)
+                te->finalizerProc(eventLoop, te->clientData);
+            zfree(te);
+            te = next;
+            continue;
+        }
+
+        /* Make sure we don't process time events created by time events in
+         * this iteration. Note that this check is currently useless: we always
+         * add new timers on the head, however if we change the implementation
+         * detail, this check may be useful again: we keep it here for future
+         * defense. */
+        if (te->id > maxId) {
+            te = te->next;
+            continue;
+        }
+        aeGetTime(&now_sec, &now_ms);
+        if (now_sec > te->when_sec ||
+            (now_sec == te->when_sec && now_ms >= te->when_ms))
+        {
+            int retval;
+
+            id = te->id;
+            retval = te->timeProc(eventLoop, id, te->clientData);
+            processed++;
+            if (retval != AE_NOMORE) {
+                aeAddMillisecondsToNow(retval,&te->when_sec,&te->when_ms);
+            } else {
+                te->id = AE_DELETED_EVENT_ID;
+            }
+        }
+        prev = te;
+        te = te->next;
+    }
+    return processed;
+}
+
+/* Process every pending time event, then every pending file event
+ * (that may be registered by time event callbacks just processed).
+ * Without special flags the function sleeps until some file event
+ * fires, or when the next time event occurs (if any).
+ *
+ * If flags is 0, the function does nothing and returns.
+ * if flags has AE_ALL_EVENTS set, all the kind of events are processed.
+ * if flags has AE_FILE_EVENTS set, file events are processed.
+ * if flags has AE_TIME_EVENTS set, time events are processed.
+ * if flags has AE_DONT_WAIT set the function returns ASAP until all
+ * the events that's possible to process without to wait are processed.
+ *
+ * The function returns the number of events processed. */
+int aeProcessEvents(aeEventLoop *eventLoop, int flags)
+{
+    int processed = 0, numevents;
+
+    /* Nothing to do? return ASAP */
+    if (!(flags & AE_TIME_EVENTS) && !(flags & AE_FILE_EVENTS)) return 0;
+
+    /* Note that we want call select() even if there are no
+     * file events to process as long as we want to process time
+     * events, in order to sleep until the next time event is ready
+     * to fire. */
+    if (eventLoop->maxfd != -1 ||
+        ((flags & AE_TIME_EVENTS) && !(flags & AE_DONT_WAIT))) {
+        int j;
+        aeTimeEvent *shortest = NULL;
+        struct timeval tv, *tvp;
+
+        if (flags & AE_TIME_EVENTS && !(flags & AE_DONT_WAIT))
+            shortest = aeSearchNearestTimer(eventLoop);
+        if (shortest) {
+            long now_sec, now_ms;
+
+            aeGetTime(&now_sec, &now_ms);
+            tvp = &tv;
+
+            /* How many milliseconds we need to wait for the next
+             * time event to fire? */
+            long long ms =
+                (shortest->when_sec - now_sec)*1000 +
+                shortest->when_ms - now_ms;
+
+            if (ms > 0) {
+                tvp->tv_sec = ms/1000;
+                tvp->tv_usec = (ms % 1000)*1000;
+            } else {
+                tvp->tv_sec = 0;
+                tvp->tv_usec = 0;
+            }
+        } else {
+            /* If we have to check for events but need to return
+             * ASAP because of AE_DONT_WAIT we need to set the timeout
+             * to zero */
+            if (flags & AE_DONT_WAIT) {
+                tv.tv_sec = tv.tv_usec = 0;
+                tvp = &tv;
+            } else {
+                /* Otherwise we can block */
+                tvp = NULL; /* wait forever */
+            }
+        }
+
+        numevents = aeApiPoll(eventLoop, tvp);
+        for (j = 0; j < numevents; j++) {
+            aeFileEvent *fe = &eventLoop->events[eventLoop->fired[j].fd];
+            int mask = eventLoop->fired[j].mask;
+            int fd = eventLoop->fired[j].fd;
+            int rfired = 0;
+
+	    /* note the fe->mask & mask & ... code: maybe an already processed
+             * event removed an element that fired and we still didn't
+             * processed, so we check if the event is still valid. */
+            if (fe->mask & mask & AE_READABLE) {
+                rfired = 1;
+                fe->rfileProc(eventLoop,fd,fe->clientData,mask);
+            }
+            if (fe->mask & mask & AE_WRITABLE) {
+                if (!rfired || fe->wfileProc != fe->rfileProc)
+                    fe->wfileProc(eventLoop,fd,fe->clientData,mask);
+            }
+            processed++;
+        }
+    }
+    /* Check time events */
+    if (flags & AE_TIME_EVENTS)
+        processed += processTimeEvents(eventLoop);
+
+    return processed; /* return the number of processed file/time events */
+}
+
+/* Wait for milliseconds until the given file descriptor becomes
+ * writable/readable/exception */
+int aeWait(int fd, int mask, long long milliseconds) {
+    struct pollfd pfd;
+    int retmask = 0, retval;
+
+    memset(&pfd, 0, sizeof(pfd));
+    pfd.fd = fd;
+    if (mask & AE_READABLE) pfd.events |= POLLIN;
+    if (mask & AE_WRITABLE) pfd.events |= POLLOUT;
+
+    if ((retval = poll(&pfd, 1, milliseconds))== 1) {
+        if (pfd.revents & POLLIN) retmask |= AE_READABLE;
+        if (pfd.revents & POLLOUT) retmask |= AE_WRITABLE;
+	if (pfd.revents & POLLERR) retmask |= AE_WRITABLE;
+        if (pfd.revents & POLLHUP) retmask |= AE_WRITABLE;
+        return retmask;
+    } else {
+        return retval;
+    }
+}
+
+void aeMain(aeEventLoop *eventLoop) {
+    eventLoop->stop = 0;
+    while (!eventLoop->stop) {
+        if (eventLoop->beforesleep != NULL)
+            eventLoop->beforesleep(eventLoop);
+        aeProcessEvents(eventLoop, AE_ALL_EVENTS);
+    }
+}
+
+char *aeGetApiName(void) {
+    return aeApiName();
+}
+
+void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep) {
+    eventLoop->beforesleep = beforesleep;
+}
diff --git a/thirdparty/ae/ae.h b/thirdparty/ae/ae.h
new file mode 100644
index 000000000..827c4c9e4
--- /dev/null
+++ b/thirdparty/ae/ae.h
@@ -0,0 +1,123 @@
+/* A simple event-driven programming library. Originally I wrote this code
+ * for the Jim's event-loop (Jim is a Tcl interpreter) but later translated
+ * it in form of a library for easy reuse.
+ *
+ * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Redis nor the names of its contributors may be used
+ *     to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __AE_H__
+#define __AE_H__
+
+#include <time.h>
+
+#define AE_OK 0
+#define AE_ERR -1
+
+#define AE_NONE 0
+#define AE_READABLE 1
+#define AE_WRITABLE 2
+
+#define AE_FILE_EVENTS 1
+#define AE_TIME_EVENTS 2
+#define AE_ALL_EVENTS (AE_FILE_EVENTS|AE_TIME_EVENTS)
+#define AE_DONT_WAIT 4
+
+#define AE_NOMORE -1
+#define AE_DELETED_EVENT_ID -1
+
+/* Macros */
+#define AE_NOTUSED(V) ((void) V)
+
+struct aeEventLoop;
+
+/* Types and data structures */
+typedef void aeFileProc(struct aeEventLoop *eventLoop, int fd, void *clientData, int mask);
+typedef int aeTimeProc(struct aeEventLoop *eventLoop, long long id, void *clientData);
+typedef void aeEventFinalizerProc(struct aeEventLoop *eventLoop, void *clientData);
+typedef void aeBeforeSleepProc(struct aeEventLoop *eventLoop);
+
+/* File event structure */
+typedef struct aeFileEvent {
+    int mask; /* one of AE_(READABLE|WRITABLE) */
+    aeFileProc *rfileProc;
+    aeFileProc *wfileProc;
+    void *clientData;
+} aeFileEvent;
+
+/* Time event structure */
+typedef struct aeTimeEvent {
+    long long id; /* time event identifier. */
+    long when_sec; /* seconds */
+    long when_ms; /* milliseconds */
+    aeTimeProc *timeProc;
+    aeEventFinalizerProc *finalizerProc;
+    void *clientData;
+    struct aeTimeEvent *next;
+} aeTimeEvent;
+
+/* A fired event */
+typedef struct aeFiredEvent {
+    int fd;
+    int mask;
+} aeFiredEvent;
+
+/* State of an event based program */
+typedef struct aeEventLoop {
+    int maxfd;   /* highest file descriptor currently registered */
+    int setsize; /* max number of file descriptors tracked */
+    long long timeEventNextId;
+    time_t lastTime;     /* Used to detect system clock skew */
+    aeFileEvent *events; /* Registered events */
+    aeFiredEvent *fired; /* Fired events */
+    aeTimeEvent *timeEventHead;
+    int stop;
+    void *apidata; /* This is used for polling API specific data */
+    aeBeforeSleepProc *beforesleep;
+} aeEventLoop;
+
+/* Prototypes */
+aeEventLoop *aeCreateEventLoop(int setsize);
+void aeDeleteEventLoop(aeEventLoop *eventLoop);
+void aeStop(aeEventLoop *eventLoop);
+int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask,
+        aeFileProc *proc, void *clientData);
+void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask);
+int aeGetFileEvents(aeEventLoop *eventLoop, int fd);
+long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds,
+        aeTimeProc *proc, void *clientData,
+        aeEventFinalizerProc *finalizerProc);
+int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id);
+int aeProcessEvents(aeEventLoop *eventLoop, int flags);
+int aeWait(int fd, int mask, long long milliseconds);
+void aeMain(aeEventLoop *eventLoop);
+char *aeGetApiName(void);
+void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep);
+int aeGetSetSize(aeEventLoop *eventLoop);
+int aeResizeSetSize(aeEventLoop *eventLoop, int setsize);
+
+#endif
diff --git a/thirdparty/ae/ae_epoll.c b/thirdparty/ae/ae_epoll.c
new file mode 100644
index 000000000..410aac70d
--- /dev/null
+++ b/thirdparty/ae/ae_epoll.c
@@ -0,0 +1,135 @@
+/* Linux epoll(2) based ae.c module
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Redis nor the names of its contributors may be used
+ *     to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <sys/epoll.h>
+
+typedef struct aeApiState {
+    int epfd;
+    struct epoll_event *events;
+} aeApiState;
+
+static int aeApiCreate(aeEventLoop *eventLoop) {
+    aeApiState *state = zmalloc(sizeof(aeApiState));
+
+    if (!state) return -1;
+    state->events = zmalloc(sizeof(struct epoll_event)*eventLoop->setsize);
+    if (!state->events) {
+        zfree(state);
+        return -1;
+    }
+    state->epfd = epoll_create(1024); /* 1024 is just a hint for the kernel */
+    if (state->epfd == -1) {
+        zfree(state->events);
+        zfree(state);
+        return -1;
+    }
+    eventLoop->apidata = state;
+    return 0;
+}
+
+static int aeApiResize(aeEventLoop *eventLoop, int setsize) {
+    aeApiState *state = eventLoop->apidata;
+
+    state->events = zrealloc(state->events, sizeof(struct epoll_event)*setsize);
+    return 0;
+}
+
+static void aeApiFree(aeEventLoop *eventLoop) {
+    aeApiState *state = eventLoop->apidata;
+
+    close(state->epfd);
+    zfree(state->events);
+    zfree(state);
+}
+
+static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
+    aeApiState *state = eventLoop->apidata;
+    struct epoll_event ee = {0}; /* avoid valgrind warning */
+    /* If the fd was already monitored for some event, we need a MOD
+     * operation. Otherwise we need an ADD operation. */
+    int op = eventLoop->events[fd].mask == AE_NONE ?
+            EPOLL_CTL_ADD : EPOLL_CTL_MOD;
+
+    ee.events = 0;
+    mask |= eventLoop->events[fd].mask; /* Merge old events */
+    if (mask & AE_READABLE) ee.events |= EPOLLIN;
+    if (mask & AE_WRITABLE) ee.events |= EPOLLOUT;
+    ee.data.fd = fd;
+    if (epoll_ctl(state->epfd,op,fd,&ee) == -1) return -1;
+    return 0;
+}
+
+static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int delmask) {
+    aeApiState *state = eventLoop->apidata;
+    struct epoll_event ee = {0}; /* avoid valgrind warning */
+    int mask = eventLoop->events[fd].mask & (~delmask);
+
+    ee.events = 0;
+    if (mask & AE_READABLE) ee.events |= EPOLLIN;
+    if (mask & AE_WRITABLE) ee.events |= EPOLLOUT;
+    ee.data.fd = fd;
+    if (mask != AE_NONE) {
+        epoll_ctl(state->epfd,EPOLL_CTL_MOD,fd,&ee);
+    } else {
+        /* Note, Kernel < 2.6.9 requires a non null event pointer even for
+         * EPOLL_CTL_DEL. */
+        epoll_ctl(state->epfd,EPOLL_CTL_DEL,fd,&ee);
+    }
+}
+
+static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
+    aeApiState *state = eventLoop->apidata;
+    int retval, numevents = 0;
+
+    retval = epoll_wait(state->epfd,state->events,eventLoop->setsize,
+            tvp ? (tvp->tv_sec*1000 + tvp->tv_usec/1000) : -1);
+    if (retval > 0) {
+        int j;
+
+        numevents = retval;
+        for (j = 0; j < numevents; j++) {
+            int mask = 0;
+            struct epoll_event *e = state->events+j;
+
+            if (e->events & EPOLLIN) mask |= AE_READABLE;
+            if (e->events & EPOLLOUT) mask |= AE_WRITABLE;
+            if (e->events & EPOLLERR) mask |= AE_WRITABLE;
+            if (e->events & EPOLLHUP) mask |= AE_WRITABLE;
+            eventLoop->fired[j].fd = e->data.fd;
+            eventLoop->fired[j].mask = mask;
+        }
+    }
+    return numevents;
+}
+
+static char *aeApiName(void) {
+    return "epoll";
+}
diff --git a/thirdparty/ae/ae_evport.c b/thirdparty/ae/ae_evport.c
new file mode 100644
index 000000000..5c317becb
--- /dev/null
+++ b/thirdparty/ae/ae_evport.c
@@ -0,0 +1,320 @@
+/* ae.c module for illumos event ports.
+ *
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Redis nor the names of its contributors may be used
+ *     to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <assert.h>
+#include <errno.h>
+#include <port.h>
+#include <poll.h>
+
+#include <sys/types.h>
+#include <sys/time.h>
+
+#include <stdio.h>
+
+static int evport_debug = 0;
+
+/*
+ * This file implements the ae API using event ports, present on Solaris-based
+ * systems since Solaris 10.  Using the event port interface, we associate file
+ * descriptors with the port.  Each association also includes the set of poll(2)
+ * events that the consumer is interested in (e.g., POLLIN and POLLOUT).
+ *
+ * There's one tricky piece to this implementation: when we return events via
+ * aeApiPoll, the corresponding file descriptors become dissociated from the
+ * port.  This is necessary because poll events are level-triggered, so if the
+ * fd didn't become dissociated, it would immediately fire another event since
+ * the underlying state hasn't changed yet.  We must re-associate the file
+ * descriptor, but only after we know that our caller has actually read from it.
+ * The ae API does not tell us exactly when that happens, but we do know that
+ * it must happen by the time aeApiPoll is called again.  Our solution is to
+ * keep track of the last fds returned by aeApiPoll and re-associate them next
+ * time aeApiPoll is invoked.
+ *
+ * To summarize, in this module, each fd association is EITHER (a) represented
+ * only via the in-kernel association OR (b) represented by pending_fds and
+ * pending_masks.  (b) is only true for the last fds we returned from aeApiPoll,
+ * and only until we enter aeApiPoll again (at which point we restore the
+ * in-kernel association).
+ */
+#define MAX_EVENT_BATCHSZ 512
+
+typedef struct aeApiState {
+    int     portfd;                             /* event port */
+    int     npending;                           /* # of pending fds */
+    int     pending_fds[MAX_EVENT_BATCHSZ];     /* pending fds */
+    int     pending_masks[MAX_EVENT_BATCHSZ];   /* pending fds' masks */
+} aeApiState;
+
+static int aeApiCreate(aeEventLoop *eventLoop) {
+    int i;
+    aeApiState *state = zmalloc(sizeof(aeApiState));
+    if (!state) return -1;
+
+    state->portfd = port_create();
+    if (state->portfd == -1) {
+        zfree(state);
+        return -1;
+    }
+
+    state->npending = 0;
+
+    for (i = 0; i < MAX_EVENT_BATCHSZ; i++) {
+        state->pending_fds[i] = -1;
+        state->pending_masks[i] = AE_NONE;
+    }
+
+    eventLoop->apidata = state;
+    return 0;
+}
+
+static int aeApiResize(aeEventLoop *eventLoop, int setsize) {
+    /* Nothing to resize here. */
+    return 0;
+}
+
+static void aeApiFree(aeEventLoop *eventLoop) {
+    aeApiState *state = eventLoop->apidata;
+
+    close(state->portfd);
+    zfree(state);
+}
+
+static int aeApiLookupPending(aeApiState *state, int fd) {
+    int i;
+
+    for (i = 0; i < state->npending; i++) {
+        if (state->pending_fds[i] == fd)
+            return (i);
+    }
+
+    return (-1);
+}
+
+/*
+ * Helper function to invoke port_associate for the given fd and mask.
+ */
+static int aeApiAssociate(const char *where, int portfd, int fd, int mask) {
+    int events = 0;
+    int rv, err;
+
+    if (mask & AE_READABLE)
+        events |= POLLIN;
+    if (mask & AE_WRITABLE)
+        events |= POLLOUT;
+
+    if (evport_debug)
+        fprintf(stderr, "%s: port_associate(%d, 0x%x) = ", where, fd, events);
+
+    rv = port_associate(portfd, PORT_SOURCE_FD, fd, events,
+        (void *)(uintptr_t)mask);
+    err = errno;
+
+    if (evport_debug)
+        fprintf(stderr, "%d (%s)\n", rv, rv == 0 ? "no error" : strerror(err));
+
+    if (rv == -1) {
+        fprintf(stderr, "%s: port_associate: %s\n", where, strerror(err));
+
+        if (err == EAGAIN)
+            fprintf(stderr, "aeApiAssociate: event port limit exceeded.");
+    }
+
+    return rv;
+}
+
+static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
+    aeApiState *state = eventLoop->apidata;
+    int fullmask, pfd;
+
+    if (evport_debug)
+        fprintf(stderr, "aeApiAddEvent: fd %d mask 0x%x\n", fd, mask);
+
+    /*
+     * Since port_associate's "events" argument replaces any existing events, we
+     * must be sure to include whatever events are already associated when
+     * we call port_associate() again.
+     */
+    fullmask = mask | eventLoop->events[fd].mask;
+    pfd = aeApiLookupPending(state, fd);
+
+    if (pfd != -1) {
+        /*
+         * This fd was recently returned from aeApiPoll.  It should be safe to
+         * assume that the consumer has processed that poll event, but we play
+         * it safer by simply updating pending_mask.  The fd will be
+         * re-associated as usual when aeApiPoll is called again.
+         */
+        if (evport_debug)
+            fprintf(stderr, "aeApiAddEvent: adding to pending fd %d\n", fd);
+        state->pending_masks[pfd] |= fullmask;
+        return 0;
+    }
+
+    return (aeApiAssociate("aeApiAddEvent", state->portfd, fd, fullmask));
+}
+
+static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) {
+    aeApiState *state = eventLoop->apidata;
+    int fullmask, pfd;
+
+    if (evport_debug)
+        fprintf(stderr, "del fd %d mask 0x%x\n", fd, mask);
+
+    pfd = aeApiLookupPending(state, fd);
+
+    if (pfd != -1) {
+        if (evport_debug)
+            fprintf(stderr, "deleting event from pending fd %d\n", fd);
+
+        /*
+         * This fd was just returned from aeApiPoll, so it's not currently
+         * associated with the port.  All we need to do is update
+         * pending_mask appropriately.
+         */
+        state->pending_masks[pfd] &= ~mask;
+
+        if (state->pending_masks[pfd] == AE_NONE)
+            state->pending_fds[pfd] = -1;
+
+        return;
+    }
+
+    /*
+     * The fd is currently associated with the port.  Like with the add case
+     * above, we must look at the full mask for the file descriptor before
+     * updating that association.  We don't have a good way of knowing what the
+     * events are without looking into the eventLoop state directly.  We rely on
+     * the fact that our caller has already updated the mask in the eventLoop.
+     */
+
+    fullmask = eventLoop->events[fd].mask;
+    if (fullmask == AE_NONE) {
+        /*
+         * We're removing *all* events, so use port_dissociate to remove the
+         * association completely.  Failure here indicates a bug.
+         */
+        if (evport_debug)
+            fprintf(stderr, "aeApiDelEvent: port_dissociate(%d)\n", fd);
+
+        if (port_dissociate(state->portfd, PORT_SOURCE_FD, fd) != 0) {
+            perror("aeApiDelEvent: port_dissociate");
+            abort(); /* will not return */
+        }
+    } else if (aeApiAssociate("aeApiDelEvent", state->portfd, fd,
+        fullmask) != 0) {
+        /*
+         * ENOMEM is a potentially transient condition, but the kernel won't
+         * generally return it unless things are really bad.  EAGAIN indicates
+         * we've reached an resource limit, for which it doesn't make sense to
+         * retry (counter-intuitively).  All other errors indicate a bug.  In any
+         * of these cases, the best we can do is to abort.
+         */
+        abort(); /* will not return */
+    }
+}
+
+static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
+    aeApiState *state = eventLoop->apidata;
+    struct timespec timeout, *tsp;
+    int mask, i;
+    uint_t nevents;
+    port_event_t event[MAX_EVENT_BATCHSZ];
+
+    /*
+     * If we've returned fd events before, we must re-associate them with the
+     * port now, before calling port_get().  See the block comment at the top of
+     * this file for an explanation of why.
+     */
+    for (i = 0; i < state->npending; i++) {
+        if (state->pending_fds[i] == -1)
+            /* This fd has since been deleted. */
+            continue;
+
+        if (aeApiAssociate("aeApiPoll", state->portfd,
+            state->pending_fds[i], state->pending_masks[i]) != 0) {
+            /* See aeApiDelEvent for why this case is fatal. */
+            abort();
+        }
+
+        state->pending_masks[i] = AE_NONE;
+        state->pending_fds[i] = -1;
+    }
+
+    state->npending = 0;
+
+    if (tvp != NULL) {
+        timeout.tv_sec = tvp->tv_sec;
+        timeout.tv_nsec = tvp->tv_usec * 1000;
+        tsp = &timeout;
+    } else {
+        tsp = NULL;
+    }
+
+    /*
+     * port_getn can return with errno == ETIME having returned some events (!).
+     * So if we get ETIME, we check nevents, too.
+     */
+    nevents = 1;
+    if (port_getn(state->portfd, event, MAX_EVENT_BATCHSZ, &nevents,
+        tsp) == -1 && (errno != ETIME || nevents == 0)) {
+        if (errno == ETIME || errno == EINTR)
+            return 0;
+
+        /* Any other error indicates a bug. */
+        perror("aeApiPoll: port_get");
+        abort();
+    }
+
+    state->npending = nevents;
+
+    for (i = 0; i < nevents; i++) {
+            mask = 0;
+            if (event[i].portev_events & POLLIN)
+                mask |= AE_READABLE;
+            if (event[i].portev_events & POLLOUT)
+                mask |= AE_WRITABLE;
+
+            eventLoop->fired[i].fd = event[i].portev_object;
+            eventLoop->fired[i].mask = mask;
+
+            if (evport_debug)
+                fprintf(stderr, "aeApiPoll: fd %d mask 0x%x\n",
+                    (int)event[i].portev_object, mask);
+
+            state->pending_fds[i] = event[i].portev_object;
+            state->pending_masks[i] = (uintptr_t)event[i].portev_user;
+    }
+
+    return nevents;
+}
+
+static char *aeApiName(void) {
+    return "evport";
+}
diff --git a/thirdparty/ae/ae_kqueue.c b/thirdparty/ae/ae_kqueue.c
new file mode 100644
index 000000000..6796f4ceb
--- /dev/null
+++ b/thirdparty/ae/ae_kqueue.c
@@ -0,0 +1,138 @@
+/* Kqueue(2)-based ae.c module
+ *
+ * Copyright (C) 2009 Harish Mallipeddi - harish.mallipeddi@gmail.com
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Redis nor the names of its contributors may be used
+ *     to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <sys/types.h>
+#include <sys/event.h>
+#include <sys/time.h>
+
+typedef struct aeApiState {
+    int kqfd;
+    struct kevent *events;
+} aeApiState;
+
+static int aeApiCreate(aeEventLoop *eventLoop) {
+    aeApiState *state = zmalloc(sizeof(aeApiState));
+
+    if (!state) return -1;
+    state->events = zmalloc(sizeof(struct kevent)*eventLoop->setsize);
+    if (!state->events) {
+        zfree(state);
+        return -1;
+    }
+    state->kqfd = kqueue();
+    if (state->kqfd == -1) {
+        zfree(state->events);
+        zfree(state);
+        return -1;
+    }
+    eventLoop->apidata = state;
+    return 0;
+}
+
+static int aeApiResize(aeEventLoop *eventLoop, int setsize) {
+    aeApiState *state = eventLoop->apidata;
+
+    state->events = zrealloc(state->events, sizeof(struct kevent)*setsize);
+    return 0;
+}
+
+static void aeApiFree(aeEventLoop *eventLoop) {
+    aeApiState *state = eventLoop->apidata;
+
+    close(state->kqfd);
+    zfree(state->events);
+    zfree(state);
+}
+
+static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
+    aeApiState *state = eventLoop->apidata;
+    struct kevent ke;
+
+    if (mask & AE_READABLE) {
+        EV_SET(&ke, fd, EVFILT_READ, EV_ADD, 0, 0, NULL);
+        if (kevent(state->kqfd, &ke, 1, NULL, 0, NULL) == -1) return -1;
+    }
+    if (mask & AE_WRITABLE) {
+        EV_SET(&ke, fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL);
+        if (kevent(state->kqfd, &ke, 1, NULL, 0, NULL) == -1) return -1;
+    }
+    return 0;
+}
+
+static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) {
+    aeApiState *state = eventLoop->apidata;
+    struct kevent ke;
+
+    if (mask & AE_READABLE) {
+        EV_SET(&ke, fd, EVFILT_READ, EV_DELETE, 0, 0, NULL);
+        kevent(state->kqfd, &ke, 1, NULL, 0, NULL);
+    }
+    if (mask & AE_WRITABLE) {
+        EV_SET(&ke, fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
+        kevent(state->kqfd, &ke, 1, NULL, 0, NULL);
+    }
+}
+
+static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
+    aeApiState *state = eventLoop->apidata;
+    int retval, numevents = 0;
+
+    if (tvp != NULL) {
+        struct timespec timeout;
+        timeout.tv_sec = tvp->tv_sec;
+        timeout.tv_nsec = tvp->tv_usec * 1000;
+        retval = kevent(state->kqfd, NULL, 0, state->events, eventLoop->setsize,
+                        &timeout);
+    } else {
+        retval = kevent(state->kqfd, NULL, 0, state->events, eventLoop->setsize,
+                        NULL);
+    }
+
+    if (retval > 0) {
+        int j;
+
+        numevents = retval;
+        for(j = 0; j < numevents; j++) {
+            int mask = 0;
+            struct kevent *e = state->events+j;
+
+            if (e->filter == EVFILT_READ) mask |= AE_READABLE;
+            if (e->filter == EVFILT_WRITE) mask |= AE_WRITABLE;
+            eventLoop->fired[j].fd = e->ident;
+            eventLoop->fired[j].mask = mask;
+        }
+    }
+    return numevents;
+}
+
+static char *aeApiName(void) {
+    return "kqueue";
+}
diff --git a/thirdparty/ae/ae_select.c b/thirdparty/ae/ae_select.c
new file mode 100644
index 000000000..c039a8ea3
--- /dev/null
+++ b/thirdparty/ae/ae_select.c
@@ -0,0 +1,106 @@
+/* Select()-based ae.c module.
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Redis nor the names of its contributors may be used
+ *     to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <sys/select.h>
+#include <string.h>
+
+typedef struct aeApiState {
+    fd_set rfds, wfds;
+    /* We need to have a copy of the fd sets as it's not safe to reuse
+     * FD sets after select(). */
+    fd_set _rfds, _wfds;
+} aeApiState;
+
+static int aeApiCreate(aeEventLoop *eventLoop) {
+    aeApiState *state = zmalloc(sizeof(aeApiState));
+
+    if (!state) return -1;
+    FD_ZERO(&state->rfds);
+    FD_ZERO(&state->wfds);
+    eventLoop->apidata = state;
+    return 0;
+}
+
+static int aeApiResize(aeEventLoop *eventLoop, int setsize) {
+    /* Just ensure we have enough room in the fd_set type. */
+    if (setsize >= FD_SETSIZE) return -1;
+    return 0;
+}
+
+static void aeApiFree(aeEventLoop *eventLoop) {
+    zfree(eventLoop->apidata);
+}
+
+static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
+    aeApiState *state = eventLoop->apidata;
+
+    if (mask & AE_READABLE) FD_SET(fd,&state->rfds);
+    if (mask & AE_WRITABLE) FD_SET(fd,&state->wfds);
+    return 0;
+}
+
+static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) {
+    aeApiState *state = eventLoop->apidata;
+
+    if (mask & AE_READABLE) FD_CLR(fd,&state->rfds);
+    if (mask & AE_WRITABLE) FD_CLR(fd,&state->wfds);
+}
+
+static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
+    aeApiState *state = eventLoop->apidata;
+    int retval, j, numevents = 0;
+
+    memcpy(&state->_rfds,&state->rfds,sizeof(fd_set));
+    memcpy(&state->_wfds,&state->wfds,sizeof(fd_set));
+
+    retval = select(eventLoop->maxfd+1,
+                &state->_rfds,&state->_wfds,NULL,tvp);
+    if (retval > 0) {
+        for (j = 0; j <= eventLoop->maxfd; j++) {
+            int mask = 0;
+            aeFileEvent *fe = &eventLoop->events[j];
+
+            if (fe->mask == AE_NONE) continue;
+            if (fe->mask & AE_READABLE && FD_ISSET(j,&state->_rfds))
+                mask |= AE_READABLE;
+            if (fe->mask & AE_WRITABLE && FD_ISSET(j,&state->_wfds))
+                mask |= AE_WRITABLE;
+            eventLoop->fired[numevents].fd = j;
+            eventLoop->fired[numevents].mask = mask;
+            numevents++;
+        }
+    }
+    return numevents;
+}
+
+static char *aeApiName(void) {
+    return "select";
+}
diff --git a/thirdparty/ae/config.h b/thirdparty/ae/config.h
new file mode 100644
index 000000000..4f8e1ea1b
--- /dev/null
+++ b/thirdparty/ae/config.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Redis nor the names of its contributors may be used
+ *     to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CONFIG_H
+#define __CONFIG_H
+
+#ifdef __APPLE__
+#include <AvailabilityMacros.h>
+#endif
+
+/* Test for polling API */
+#ifdef __linux__
+#define HAVE_EPOLL 1
+#endif
+
+#if (defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined (__NetBSD__)
+#define HAVE_KQUEUE 1
+#endif
+
+#ifdef __sun
+#include <sys/feature_tests.h>
+#ifdef _DTRACE_VERSION
+#define HAVE_EVPORT 1
+#endif
+#endif
+
+
+#endif
diff --git a/thirdparty/ae/zmalloc.h b/thirdparty/ae/zmalloc.h
new file mode 100644
index 000000000..54c8a69cb
--- /dev/null
+++ b/thirdparty/ae/zmalloc.h
@@ -0,0 +1,16 @@
+#ifndef _ZMALLOC_H
+#define _ZMALLOC_H
+
+#ifndef zmalloc
+#define zmalloc malloc
+#endif
+
+#ifndef zfree
+#define zfree free
+#endif
+
+#ifndef zrealloc
+#define zrealloc realloc
+#endif
+
+#endif /* _ZMALLOC_H */

From 1e08629013b6ef4c9fa737c89fae8f134b01d982 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Sun, 25 Sep 2016 16:51:24 -0700
Subject: [PATCH 42/91] fix submit_task for task queue (#21)

---
 state/redis.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/state/redis.c b/state/redis.c
index 9c4b81f03..d10b06b75 100644
--- a/state/redis.c
+++ b/state/redis.c
@@ -165,7 +165,7 @@ void task_queue_submit_task(db_conn *db, task_iid task_iid, task_spec *task) {
   UT_string *command;
   utstring_new(command);
   sha1_to_hex(&task_iid.id[0], &hex[0]);
-  utstring_printf(command, "HMSET queue:%s", &hex[0]);
+  utstring_printf(command, "HMSET queue:%s ", &hex[0]);
   print_task(task, command);
   redisAsyncCommand(db->context, NULL, NULL, utstring_body(command));
   if (db->context->err) {

From db8c0acc711ffa643585bcec0ee659bcf2887675 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Sun, 25 Sep 2016 21:52:06 -0700
Subject: [PATCH 43/91] Refactor state database (#22)

* make db_connect return the connection

* rename db_conn -> db_handle

* more renaming

* clang-format

* free the db_handle
---
 logging.c            |  4 ++--
 state/db.h           | 23 +++++++++++------------
 state/object_table.h |  8 +++++---
 state/redis.c        | 26 ++++++++++++++------------
 state/redis.h        |  2 +-
 state/task_queue.h   |  6 +++---
 state/task_table.h   |  4 ++--
 test/db_tests.c      | 33 +++++++++++++++------------------
 test/redis_tests.c   | 30 ++++++++++++++----------------
 9 files changed, 67 insertions(+), 69 deletions(-)

diff --git a/logging.c b/logging.c
index 38abc7bf9..bf37f1ee5 100644
--- a/logging.c
+++ b/logging.c
@@ -17,7 +17,7 @@ struct ray_logger_impl {
   int log_level;
   /* Whether or not we have a direct connection to Redis. */
   int is_direct;
-  /* Either a db_conn or a socket to a process with a db_conn,
+  /* Either a db_handle or a socket to a process with a db_handle,
    * depending on the is_direct flag. */
   void *conn;
 };
@@ -57,7 +57,7 @@ void ray_log(ray_logger *logger,
   UT_string *origin_id;
   utstring_new(origin_id);
   if (logger->is_direct) {
-    db_conn *db = (db_conn *) logger->conn;
+    db_handle *db = (db_handle *) logger->conn;
     utstring_printf(origin_id, "%ld:%s", db->client_id, "");
     redisAsyncCommand(db->context, NULL, NULL, log_fmt,
                       utstring_body(timestamp), logger->client_type,
diff --git a/state/db.h b/state/db.h
index 3fcf658fd..e6ca089a0 100644
--- a/state/db.h
+++ b/state/db.h
@@ -3,22 +3,21 @@
 
 #include "event_loop.h"
 
-typedef struct db_conn_impl db_conn;
+typedef struct db_handle_impl db_handle;
 
-/* Connect to the global system store at address and port. The last
- * parameter is an output parameter and we assume the memory is
- * allocated by the caller. */
-void db_connect(const char *db_address,
-                int db_port,
-                const char *client_type,
-                const char *client_addr,
-                int client_port,
-                db_conn *db);
+/* Connect to the global system store at address and port. Returns
+ * a handle to the database, which must be freed with db_disconnect
+ * after use. */
+db_handle *db_connect(const char *db_address,
+                      int db_port,
+                      const char *client_type,
+                      const char *client_addr,
+                      int client_port);
 
 /* Attach global system store connection to event loop. */
-void db_attach(db_conn *db, event_loop *loop);
+void db_attach(db_handle *db, event_loop *loop);
 
 /* Disconnect from the global system store. */
-void db_disconnect(db_conn *db);
+void db_disconnect(db_handle *db);
 
 #endif
diff --git a/state/object_table.h b/state/object_table.h
index 7c00ab2ba..e2eb89433 100644
--- a/state/object_table.h
+++ b/state/object_table.h
@@ -10,12 +10,14 @@ typedef void (*lookup_callback)(object_id object_id,
 
 /* Register a new object with the directory. */
 /* TODO(pcm): Retry, print for each attempt. */
-void object_table_add(db_conn *db, object_id object_id);
+void object_table_add(db_handle *db, object_id object_id);
 
 /* Remove object from the directory. */
-void object_table_remove(db_conn *db, object_id object_id, const char *manager);
+void object_table_remove(db_handle *db,
+                         object_id object_id,
+                         const char *manager);
 
 /* Look up entry from the directory */
-void object_table_lookup(db_conn *db,
+void object_table_lookup(db_handle *db,
                          object_id object_id,
                          lookup_callback callback);
diff --git a/state/redis.c b/state/redis.c
index d10b06b75..aa2011d9a 100644
--- a/state/redis.c
+++ b/state/redis.c
@@ -31,12 +31,12 @@
     }                                                      \
   } while (0);
 
-void db_connect(const char *address,
-                int port,
-                const char *client_type,
-                const char *client_addr,
-                int client_port,
-                db_conn *db) {
+db_handle *db_connect(const char *address,
+                      int port,
+                      const char *client_type,
+                      const char *client_addr,
+                      int client_port) {
+  db_handle *db = malloc(sizeof(db_handle));
   /* Sync connection for initial handshake */
   redisReply *reply;
   long long num_clients;
@@ -75,9 +75,10 @@ void db_connect(const char *address,
   CHECK_REDIS_CONNECT(redisAsyncContext, db->context,
                       "could not connect to redis %s:%d", address, port);
   db->context->data = (void *) db;
+  return db;
 }
 
-void db_disconnect(db_conn *db) {
+void db_disconnect(db_handle *db) {
   redisFree(db->sync_context);
   redisAsyncFree(db->context);
   service_cache_entry *e, *tmp;
@@ -87,13 +88,14 @@ void db_disconnect(db_conn *db) {
     free(e);
   }
   free(db->client_type);
+  free(db);
 }
 
-void db_attach(db_conn *db, event_loop *loop) {
+void db_attach(db_handle *db, event_loop *loop) {
   redisAeAttach(loop, db->context);
 }
 
-void object_table_add(db_conn *db, unique_id object_id) {
+void object_table_add(db_handle *db, unique_id object_id) {
   static char hex_object_id[2 * UNIQUE_ID_SIZE + 1];
   sha1_to_hex(&object_id.id[0], &hex_object_id[0]);
   redisAsyncCommand(db->context, NULL, NULL, "SADD obj:%s %d",
@@ -104,7 +106,7 @@ void object_table_add(db_conn *db, unique_id object_id) {
 }
 
 void object_table_get_entry(redisAsyncContext *c, void *r, void *privdata) {
-  db_conn *db = c->data;
+  db_handle *db = c->data;
   lookup_callback_data *cb_data = privdata;
   redisReply *reply = r;
   if (reply == NULL)
@@ -143,7 +145,7 @@ void object_table_get_entry(redisAsyncContext *c, void *r, void *privdata) {
   free(result);
 }
 
-void object_table_lookup(db_conn *db,
+void object_table_lookup(db_handle *db,
                          object_id object_id,
                          lookup_callback callback) {
   static char hex_object_id[2 * UNIQUE_ID_SIZE + 1];
@@ -158,7 +160,7 @@ void object_table_lookup(db_conn *db,
   }
 }
 
-void task_queue_submit_task(db_conn *db, task_iid task_iid, task_spec *task) {
+void task_queue_submit_task(db_handle *db, task_iid task_iid, task_spec *task) {
   /* For converting an id to hex, which has double the number
    * of bytes compared to the id (+ 1 byte for '\0'). */
   static char hex[2 * UNIQUE_ID_SIZE + 1];
diff --git a/state/redis.h b/state/redis.h
index c579e7065..23ebb2ba6 100644
--- a/state/redis.h
+++ b/state/redis.h
@@ -17,7 +17,7 @@ typedef struct {
   UT_hash_handle hh;
 } service_cache_entry;
 
-struct db_conn_impl {
+struct db_handle_impl {
   /* String that identifies this client type. */
   char *client_type;
   /* Unique ID for this client within the type. */
diff --git a/state/task_queue.h b/state/task_queue.h
index 0226c501b..92968707e 100644
--- a/state/task_queue.h
+++ b/state/task_queue.h
@@ -21,13 +21,13 @@ typedef unique_id node_id;
 typedef void (*task_queue_callback)(task_iid *task_iid, task_spec *task);
 
 /* Submit task to the global scheduler. */
-void task_queue_submit_task(db_conn *db, task_iid task_iid, task_spec *task);
+void task_queue_submit_task(db_handle *db, task_iid task_iid, task_spec *task);
 
 /* Submit task to a local scheduler based on the decision made by the global
  * scheduler. */
-void task_queue_schedule_task(db_conn *db, task_iid task_iid, node_id node);
+void task_queue_schedule_task(db_handle *db, task_iid task_iid, node_id node);
 
 /* Subscribe to task queue. */
-void task_queue_register_callback(db_conn *db, task_queue_callback callback);
+void task_queue_register_callback(db_handle *db, task_queue_callback callback);
 
 #endif
diff --git a/state/task_table.h b/state/task_table.h
index 64285da67..71e879c2c 100644
--- a/state/task_table.h
+++ b/state/task_table.h
@@ -5,9 +5,9 @@
 #include "task.h"
 
 /* Add task to the task table, handle errors here. */
-status task_table_add_task(db_conn *db, task_iid task_iid, task_spec *task);
+status task_table_add_task(db_handle *db, task_iid task_iid, task_spec *task);
 
 /* Get specific task from the task table. */
-status task_table_get_task(db_conn *db, task_iid task_iid, task_spec *task);
+status task_table_get_task(db_handle *db, task_iid task_iid, task_spec *task);
 
 #endif /* TASK_TABLE_H */
diff --git a/test/db_tests.c b/test/db_tests.c
index d9dfcb563..99fad7e18 100644
--- a/test/db_tests.c
+++ b/test/db_tests.c
@@ -45,20 +45,18 @@ int64_t timeout_handler(event_loop *loop, int64_t id, void *context) {
 
 TEST object_table_lookup_test(void) {
   event_loop *loop = event_loop_create();
-  db_conn conn1;
-  db_connect("127.0.0.1", 6379, "plasma_manager", manager_addr, manager_port1,
-             &conn1);
-  db_conn conn2;
-  db_connect("127.0.0.1", 6379, "plasma_manager", manager_addr, manager_port2,
-             &conn2);
-  db_attach(&conn1, loop);
-  db_attach(&conn2, loop);
+  db_handle *db1 = db_connect("127.0.0.1", 6379, "plasma_manager", manager_addr,
+                              manager_port1);
+  db_handle *db2 = db_connect("127.0.0.1", 6379, "plasma_manager", manager_addr,
+                              manager_port2);
+  db_attach(db1, loop);
+  db_attach(db2, loop);
   unique_id id = globally_unique_id();
-  object_table_add(&conn1, id);
-  object_table_add(&conn2, id);
+  object_table_add(db1, id);
+  object_table_add(db2, id);
   event_loop_add_timer(loop, 100, timeout_handler, NULL);
   event_loop_run(loop);
-  object_table_lookup(&conn1, id, test_callback);
+  object_table_lookup(db1, id, test_callback);
   event_loop_add_timer(loop, 100, timeout_handler, NULL);
   event_loop_run(loop);
   int port1 = atoi(received_port1);
@@ -67,8 +65,8 @@ TEST object_table_lookup_test(void) {
   ASSERT((port1 == manager_port1 && port2 == manager_port2) ||
          (port2 == manager_port1 && port1 == manager_port2));
 
-  db_disconnect(&conn1);
-  db_disconnect(&conn2);
+  db_disconnect(db1);
+  db_disconnect(db2);
 
   event_loop_destroy(loop);
   PASS();
@@ -76,17 +74,16 @@ TEST object_table_lookup_test(void) {
 
 TEST task_queue_test(void) {
   event_loop *loop = event_loop_create();
-  db_conn conn;
-  db_connect("127.0.0.1", 6379, "local_scheduler", "", -1, &conn);
-  db_attach(&conn, loop);
+  db_handle *db = db_connect("127.0.0.1", 6379, "local_scheduler", "", -1);
+  db_attach(db, loop);
 
   task_spec *task = example_task();
-  task_queue_submit_task(&conn, globally_unique_id(), task);
+  task_queue_submit_task(db, globally_unique_id(), task);
   event_loop_add_timer(loop, 100, timeout_handler, NULL);
   event_loop_run(loop);
 
   free_task_spec(task);
-  db_disconnect(&conn);
+  db_disconnect(db);
   event_loop_destroy(loop);
   PASS();
 }
diff --git a/test/redis_tests.c b/test/redis_tests.c
index d527d0476..b1cde98b6 100644
--- a/test/redis_tests.c
+++ b/test/redis_tests.c
@@ -68,10 +68,10 @@ TEST redis_socket_test(void) {
 }
 
 void redis_read_callback(event_loop *loop, int fd, void *context, int events) {
-  db_conn *conn = context;
+  db_handle *db = context;
   char *cmd = read_string(fd);
-  redisAsyncCommand(conn->context, async_redis_socket_test_callback, NULL, cmd,
-                    conn->client_id, 0);
+  redisAsyncCommand(db->context, async_redis_socket_test_callback, NULL, cmd,
+                    db->client_id, 0);
   free(cmd);
 }
 
@@ -102,9 +102,8 @@ TEST async_redis_socket_test(void) {
   utarray_push_back(connections, &socket_fd);
 
   /* Start connection to Redis. */
-  db_conn conn;
-  db_connect("127.0.0.1", 6379, "", "", 0, &conn);
-  db_attach(&conn, loop);
+  db_handle *db = db_connect("127.0.0.1", 6379, "", "", 0);
+  db_attach(db, loop);
 
   /* Send a command to the Redis process. */
   int client_fd = connect_ipc_sock(socket_pathname);
@@ -113,15 +112,15 @@ TEST async_redis_socket_test(void) {
   write_formatted_string(client_fd, test_set_format, test_key, test_value);
 
   event_loop_add_file(loop, client_fd, EVENT_LOOP_READ, redis_read_callback,
-                      &conn);
+                      db);
   event_loop_add_file(loop, socket_fd, EVENT_LOOP_READ, redis_accept_callback,
-                      &conn);
+                      db);
   event_loop_add_timer(loop, 100, timeout_handler, NULL);
   event_loop_run(loop);
 
   CHECK(async_redis_socket_test_callback_called);
 
-  db_disconnect(&conn);
+  db_disconnect(db);
   event_loop_destroy(loop);
   for (int *p = (int *) utarray_front(connections); p != NULL;
        p = (int *) utarray_next(connections, p)) {
@@ -148,7 +147,7 @@ void logging_read_callback(event_loop *loop,
                            int fd,
                            void *context,
                            int events) {
-  db_conn *conn = context;
+  db_handle *conn = context;
   char *cmd = read_string(fd);
   redisAsyncCommand(conn->context, logging_test_callback, NULL, cmd,
                     conn->client_id, 0);
@@ -177,9 +176,8 @@ TEST logging_test(void) {
   utarray_push_back(connections, &socket_fd);
 
   /* Start connection to Redis. */
-  db_conn conn;
-  db_connect("127.0.0.1", 6379, "", "", 0, &conn);
-  db_attach(&conn, loop);
+  db_handle *conn = db_connect("127.0.0.1", 6379, "", "", 0);
+  db_attach(conn, loop);
 
   /* Send a command to the Redis process. */
   int client_fd = connect_ipc_sock(socket_pathname);
@@ -189,16 +187,16 @@ TEST logging_test(void) {
   ray_log(logger, RAY_INFO, "TEST", "Message");
 
   event_loop_add_file(loop, socket_fd, EVENT_LOOP_READ, logging_accept_callback,
-                      &conn);
+                      conn);
   event_loop_add_file(loop, client_fd, EVENT_LOOP_READ, logging_read_callback,
-                      &conn);
+                      conn);
   event_loop_add_timer(loop, 100, timeout_handler, NULL);
   event_loop_run(loop);
 
   CHECK(logging_test_callback_called);
 
   free_ray_logger(logger);
-  db_disconnect(&conn);
+  db_disconnect(conn);
   event_loop_destroy(loop);
   for (int *p = (int *) utarray_front(connections); p != NULL;
        p = (int *) utarray_next(connections, p)) {

From 675ea40171b6ccec135b64e8d2b9166e04d20003 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Sun, 25 Sep 2016 22:49:55 -0700
Subject: [PATCH 44/91] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b95056c2f..92e08a503 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,2 @@
-# Halo
+# Photon
 A local scheduler and node manager for Ray.

From 631de921703a750ad8223089bb61a099a04cf248 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Mon, 26 Sep 2016 00:12:11 -0700
Subject: [PATCH 45/91] Build redis before libcommon. (#20)

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index f71273119..ad955de98 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ CC = gcc
 CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -fPIC -I. -Ithirdparty -Ithirdparty/ae
 BUILD = build
 
-all: $(BUILD)/libcommon.a
+all: hiredis $(BUILD)/libcommon.a
 
 $(BUILD)/libcommon.a: event_loop.o common.o task.o io.o state/redis.o thirdparty/ae/ae.o
 	ar rcs $@ $^

From 184242a85c7ed85766e20dfcd117304ba22c8c5a Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Tue, 27 Sep 2016 00:13:28 -0700
Subject: [PATCH 46/91] Add basic travis file and lint check.

---
 .travis.yml                              |  36 ++
 .travis/check-git-clang-format-output.sh |  18 +
 .travis/git-clang-format                 | 476 +++++++++++++++++++++++
 3 files changed, 530 insertions(+)
 create mode 100644 .travis.yml
 create mode 100644 .travis/check-git-clang-format-output.sh
 create mode 100644 .travis/git-clang-format

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 000000000..e77f446da
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,36 @@
+sudo: required
+
+language: generic
+
+matrix:
+  include:
+    - os: linux
+      dist: trusty
+      python: "2.7"
+    - os: linux
+      dist: trusty
+      python: "3.5"
+    - os: osx
+      osx_image: xcode7
+      python: "2.7"
+    - os: osx
+      osx_image: xcode7
+      python: "3.5"
+    - os: linux
+      dist: trusty
+      python: "2.7"
+      env: LINT=1
+      before_install:
+        # In case we ever want to use a different version of clang-format:
+        #- wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
+        #- echo "deb http://apt.llvm.org/trusty/ llvm-toolchain-trusty main" | sudo tee -a /etc/apt/sources.list > /dev/null
+        - sudo apt-get update -qq
+        - sudo apt-get install -qq clang-format-3.8
+      install: []
+      script:
+        - .travis/check-git-clang-format-output.sh
+
+install:
+  - make
+
+script:
diff --git a/.travis/check-git-clang-format-output.sh b/.travis/check-git-clang-format-output.sh
new file mode 100644
index 000000000..d71f78357
--- /dev/null
+++ b/.travis/check-git-clang-format-output.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+if [ "$TRAVIS_PULL_REQUEST" == "false" ] ; then
+  # Not in a pull request, so compare against parent commit
+  base_commit="HEAD^"
+  echo "Running clang-format against parent commit $(git rev-parse $base_commit)"
+else
+  base_commit="$TRAVIS_BRANCH"
+  echo "Running clang-format against branch $base_commit, with hash $(git rev-parse $base_commit)"
+fi
+output="$(.travis/git-clang-format --binary clang-format-3.8 --commit $base_commit --diff --exclude ^thirdparty/)"
+if [ "$output" == "no modified files to format" ] || [ "$output" == "clang-format did not modify any files" ] ; then
+  echo "clang-format passed."
+  exit 0
+else
+  echo "clang-format failed:"
+  echo "$output"
+  exit 1
+fi
diff --git a/.travis/git-clang-format b/.travis/git-clang-format
new file mode 100644
index 000000000..b0e458303
--- /dev/null
+++ b/.travis/git-clang-format
@@ -0,0 +1,476 @@
+#!/usr/bin/env python
+#
+#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+r"""
+clang-format git integration
+============================
+
+This file provides a clang-format integration for git. Put it somewhere in your
+path and ensure that it is executable. Then, "git clang-format" will invoke
+clang-format on the changes in current files or a specific commit.
+
+For further details, run:
+git clang-format -h
+
+Requires Python 2.7
+"""
+
+import argparse
+import collections
+import contextlib
+import errno
+import os
+import re
+import subprocess
+import sys
+
+usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]'
+
+desc = '''
+Run clang-format on all lines that differ between the working directory
+and <commit>, which defaults to HEAD.  Changes are only applied to the working
+directory.
+The following git-config settings set the default of the corresponding option:
+  clangFormat.binary
+  clangFormat.commit
+  clangFormat.extension
+  clangFormat.style
+'''
+
+# Name of the temporary index file in which save the output of clang-format.
+# This file is created within the .git directory.
+temp_index_basename = 'clang-format-index'
+
+
+Range = collections.namedtuple('Range', 'start, count')
+
+
+def main():
+  config = load_git_config()
+
+  # In order to keep '--' yet allow options after positionals, we need to
+  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
+  # nargs=argparse.REMAINDER disallows options after positionals.)
+  argv = sys.argv[1:]
+  try:
+    idx = argv.index('--')
+  except ValueError:
+    dash_dash = []
+  else:
+    dash_dash = argv[idx:]
+    argv = argv[:idx]
+
+  default_extensions = ','.join([
+      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
+      'c', 'h',  # C
+      'm',  # ObjC
+      'mm',  # ObjC++
+      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
+      # Other languages that clang-format supports
+      'proto', 'protodevel',  # Protocol Buffers
+      'js',  # JavaScript
+      'ts',  # TypeScript
+      ])
+
+  p = argparse.ArgumentParser(
+    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
+    description=desc)
+  p.add_argument('--binary',
+                 default=config.get('clangformat.binary', 'clang-format'),
+                 help='path to clang-format'),
+  p.add_argument('--commit',
+                 default=config.get('clangformat.commit', 'HEAD'),
+                 help='default commit to use if none is specified'),
+  p.add_argument('--diff', action='store_true',
+                 help='print a diff instead of applying the changes')
+  p.add_argument('--extensions',
+                 default=config.get('clangformat.extensions',
+                                    default_extensions),
+                 help=('comma-separated list of file extensions to format, '
+                       'excluding the period and case-insensitive')),
+  p.add_argument('--exclude', help='Exclude files matching this regex.')
+  p.add_argument('-f', '--force', action='store_true',
+                 help='allow changes to unstaged files')
+  p.add_argument('-p', '--patch', action='store_true',
+                 help='select hunks interactively')
+  p.add_argument('-q', '--quiet', action='count', default=0,
+                 help='print less information')
+  p.add_argument('--style',
+                 default=config.get('clangformat.style', None),
+                 help='passed to clang-format'),
+  p.add_argument('-v', '--verbose', action='count', default=0,
+                 help='print extra information')
+  # We gather all the remaining positional arguments into 'args' since we need
+  # to use some heuristics to determine whether or not <commit> was present.
+  # However, to print pretty messages, we make use of metavar and help.
+  p.add_argument('args', nargs='*', metavar='<commit>',
+                 help='revision from which to compute the diff')
+  p.add_argument('ignored', nargs='*', metavar='<file>...',
+                 help='if specified, only consider differences in these files')
+  opts = p.parse_args(argv)
+
+  opts.verbose -= opts.quiet
+  del opts.quiet
+
+  commit, files = interpret_args(opts.args, dash_dash, opts.commit)
+  changed_lines = compute_diff_and_extract_lines(commit, files)
+  if opts.verbose >= 1:
+    ignored_files = set(changed_lines)
+  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
+  if opts.exclude:
+    for filename in changed_lines.keys():
+      if re.match(opts.exclude, filename):
+        del changed_lines[filename]
+  if opts.verbose >= 1:
+    ignored_files.difference_update(changed_lines)
+    if ignored_files:
+      print 'Ignoring changes in the following files:'
+      for filename in ignored_files:
+        print '   ', filename
+    if changed_lines:
+      print 'Running clang-format on the following files:'
+      for filename in changed_lines:
+        print '   ', filename
+  if not changed_lines:
+    print 'no modified files to format'
+    return
+  # The computed diff outputs absolute paths, so we must cd before accessing
+  # those files.
+  cd_to_toplevel()
+  old_tree = create_tree_from_workdir(changed_lines)
+  new_tree = run_clang_format_and_save_to_tree(changed_lines,
+                                               binary=opts.binary,
+                                               style=opts.style)
+  if opts.verbose >= 1:
+    print 'old tree:', old_tree
+    print 'new tree:', new_tree
+  if old_tree == new_tree:
+    if opts.verbose >= 0:
+      print 'clang-format did not modify any files'
+  elif opts.diff:
+    print_diff(old_tree, new_tree)
+  else:
+    changed_files = apply_changes(old_tree, new_tree, force=opts.force,
+                                  patch_mode=opts.patch)
+    if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
+      print 'changed files:'
+      for filename in changed_files:
+        print '   ', filename
+
+
+def load_git_config(non_string_options=None):
+  """Return the git configuration as a dictionary.
+  All options are assumed to be strings unless in `non_string_options`, in which
+  is a dictionary mapping option name (in lower case) to either "--bool" or
+  "--int"."""
+  if non_string_options is None:
+    non_string_options = {}
+  out = {}
+  for entry in run('git', 'config', '--list', '--null').split('\0'):
+    if entry:
+      name, value = entry.split('\n', 1)
+      if name in non_string_options:
+        value = run('git', 'config', non_string_options[name], name)
+      out[name] = value
+  return out
+
+
+def interpret_args(args, dash_dash, default_commit):
+  """Interpret `args` as "[commit] [--] [files...]" and return (commit, files).
+  It is assumed that "--" and everything that follows has been removed from
+  args and placed in `dash_dash`.
+  If "--" is present (i.e., `dash_dash` is non-empty), the argument to its
+  left (if present) is taken as commit.  Otherwise, the first argument is
+  checked if it is a commit or a file.  If commit is not given,
+  `default_commit` is used."""
+  if dash_dash:
+    if len(args) == 0:
+      commit = default_commit
+    elif len(args) > 1:
+      die('at most one commit allowed; %d given' % len(args))
+    else:
+      commit = args[0]
+    object_type = get_object_type(commit)
+    if object_type not in ('commit', 'tag'):
+      if object_type is None:
+        die("'%s' is not a commit" % commit)
+      else:
+        die("'%s' is a %s, but a commit was expected" % (commit, object_type))
+    files = dash_dash[1:]
+  elif args:
+    if disambiguate_revision(args[0]):
+      commit = args[0]
+      files = args[1:]
+    else:
+      commit = default_commit
+      files = args
+  else:
+    commit = default_commit
+    files = []
+  return commit, files
+
+
+def disambiguate_revision(value):
+  """Returns True if `value` is a revision, False if it is a file, or dies."""
+  # If `value` is ambiguous (neither a commit nor a file), the following
+  # command will die with an appropriate error message.
+  run('git', 'rev-parse', value, verbose=False)
+  object_type = get_object_type(value)
+  if object_type is None:
+    return False
+  if object_type in ('commit', 'tag'):
+    return True
+  die('`%s` is a %s, but a commit or filename was expected' %
+      (value, object_type))
+
+
+def get_object_type(value):
+  """Returns a string description of an object's type, or None if it is not
+  a valid git object."""
+  cmd = ['git', 'cat-file', '-t', value]
+  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+  stdout, stderr = p.communicate()
+  if p.returncode != 0:
+    return None
+  return stdout.strip()
+
+
+def compute_diff_and_extract_lines(commit, files):
+  """Calls compute_diff() followed by extract_lines()."""
+  diff_process = compute_diff(commit, files)
+  changed_lines = extract_lines(diff_process.stdout)
+  diff_process.stdout.close()
+  diff_process.wait()
+  if diff_process.returncode != 0:
+    # Assume error was already printed to stderr.
+    sys.exit(2)
+  return changed_lines
+
+
+def compute_diff(commit, files):
+  """Return a subprocess object producing the diff from `commit`.
+  The return value's `stdin` file object will produce a patch with the
+  differences between the working directory and `commit`, filtered on `files`
+  (if non-empty).  Zero context lines are used in the patch."""
+  cmd = ['git', 'diff-index', '-p', '-U0', commit, '--']
+  cmd.extend(files)
+  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+  p.stdin.close()
+  return p
+
+
+def extract_lines(patch_file):
+  """Extract the changed lines in `patch_file`.
+  The return value is a dictionary mapping filename to a list of (start_line,
+  line_count) pairs.
+  The input must have been produced with ``-U0``, meaning unidiff format with
+  zero lines of context.  The return value is a dict mapping filename to a
+  list of line `Range`s."""
+  matches = {}
+  for line in patch_file:
+    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
+    if match:
+      filename = match.group(1).rstrip('\r\n')
+    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
+    if match:
+      start_line = int(match.group(1))
+      line_count = 1
+      if match.group(3):
+        line_count = int(match.group(3))
+      if line_count > 0:
+        matches.setdefault(filename, []).append(Range(start_line, line_count))
+  return matches
+
+
+def filter_by_extension(dictionary, allowed_extensions):
+  """Delete every key in `dictionary` that doesn't have an allowed extension.
+  `allowed_extensions` must be a collection of lowercase file extensions,
+  excluding the period."""
+  allowed_extensions = frozenset(allowed_extensions)
+  for filename in dictionary.keys():
+    base_ext = filename.rsplit('.', 1)
+    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
+      del dictionary[filename]
+
+
+def cd_to_toplevel():
+  """Change to the top level of the git repository."""
+  toplevel = run('git', 'rev-parse', '--show-toplevel')
+  os.chdir(toplevel)
+
+
+def create_tree_from_workdir(filenames):
+  """Create a new git tree with the given files from the working directory.
+  Returns the object ID (SHA-1) of the created tree."""
+  return create_tree(filenames, '--stdin')
+
+
+def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format',
+                                      style=None):
+  """Run clang-format on each file and save the result to a git tree.
+  Returns the object ID (SHA-1) of the created tree."""
+  def index_info_generator():
+    for filename, line_ranges in changed_lines.iteritems():
+      mode = oct(os.stat(filename).st_mode)
+      blob_id = clang_format_to_blob(filename, line_ranges, binary=binary,
+                                     style=style)
+      yield '%s %s\t%s' % (mode, blob_id, filename)
+  return create_tree(index_info_generator(), '--index-info')
+
+
+def create_tree(input_lines, mode):
+  """Create a tree object from the given input.
+  If mode is '--stdin', it must be a list of filenames.  If mode is
+  '--index-info' is must be a list of values suitable for "git update-index
+  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
+  is invalid."""
+  assert mode in ('--stdin', '--index-info')
+  cmd = ['git', 'update-index', '--add', '-z', mode]
+  with temporary_index_file():
+    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
+    for line in input_lines:
+      p.stdin.write('%s\0' % line)
+    p.stdin.close()
+    if p.wait() != 0:
+      die('`%s` failed' % ' '.join(cmd))
+    tree_id = run('git', 'write-tree')
+    return tree_id
+
+
+def clang_format_to_blob(filename, line_ranges, binary='clang-format',
+                         style=None):
+  """Run clang-format on the given file and save the result to a git blob.
+  Returns the object ID (SHA-1) of the created blob."""
+  clang_format_cmd = [binary, filename]
+  if style:
+    clang_format_cmd.extend(['-style='+style])
+  clang_format_cmd.extend([
+      '-lines=%s:%s' % (start_line, start_line+line_count-1)
+      for start_line, line_count in line_ranges])
+  try:
+    clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE,
+                                    stdout=subprocess.PIPE)
+  except OSError as e:
+    if e.errno == errno.ENOENT:
+      die('cannot find executable "%s"' % binary)
+    else:
+      raise
+  clang_format.stdin.close()
+  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
+  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
+                                 stdout=subprocess.PIPE)
+  clang_format.stdout.close()
+  stdout = hash_object.communicate()[0]
+  if hash_object.returncode != 0:
+    die('`%s` failed' % ' '.join(hash_object_cmd))
+  if clang_format.wait() != 0:
+    die('`%s` failed' % ' '.join(clang_format_cmd))
+  return stdout.rstrip('\r\n')
+
+
+@contextlib.contextmanager
+def temporary_index_file(tree=None):
+  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
+  the file afterward."""
+  index_path = create_temporary_index(tree)
+  old_index_path = os.environ.get('GIT_INDEX_FILE')
+  os.environ['GIT_INDEX_FILE'] = index_path
+  try:
+    yield
+  finally:
+    if old_index_path is None:
+      del os.environ['GIT_INDEX_FILE']
+    else:
+      os.environ['GIT_INDEX_FILE'] = old_index_path
+    os.remove(index_path)
+
+
+def create_temporary_index(tree=None):
+  """Create a temporary index file and return the created file's path.
+  If `tree` is not None, use that as the tree to read in.  Otherwise, an
+  empty index is created."""
+  gitdir = run('git', 'rev-parse', '--git-dir')
+  path = os.path.join(gitdir, temp_index_basename)
+  if tree is None:
+    tree = '--empty'
+  run('git', 'read-tree', '--index-output='+path, tree)
+  return path
+
+
+def print_diff(old_tree, new_tree):
+  """Print the diff between the two trees to stdout."""
+  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
+  # is expected to be viewed by the user, and only the former does nice things
+  # like color and pagination.
+  subprocess.check_call(['git', 'diff', old_tree, new_tree, '--'])
+
+
+def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
+  """Apply the changes in `new_tree` to the working directory.
+  Bails if there are local changes in those files and not `force`.  If
+  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
+  changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree,
+                      new_tree).rstrip('\0').split('\0')
+  if not force:
+    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
+    if unstaged_files:
+      print >>sys.stderr, ('The following files would be modified but '
+                           'have unstaged changes:')
+      print >>sys.stderr, unstaged_files
+      print >>sys.stderr, 'Please commit, stage, or stash them first.'
+      sys.exit(2)
+  if patch_mode:
+    # In patch mode, we could just as well create an index from the new tree
+    # and checkout from that, but then the user will be presented with a
+    # message saying "Discard ... from worktree".  Instead, we use the old
+    # tree as the index and checkout from new_tree, which gives the slightly
+    # better message, "Apply ... to index and worktree".  This is not quite
+    # right, since it won't be applied to the user's index, but oh well.
+    with temporary_index_file(old_tree):
+      subprocess.check_call(['git', 'checkout', '--patch', new_tree])
+    index_tree = old_tree
+  else:
+    with temporary_index_file(new_tree):
+      run('git', 'checkout-index', '-a', '-f')
+  return changed_files
+
+
+def run(*args, **kwargs):
+  stdin = kwargs.pop('stdin', '')
+  verbose = kwargs.pop('verbose', True)
+  strip = kwargs.pop('strip', True)
+  for name in kwargs:
+    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
+  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                       stdin=subprocess.PIPE)
+  stdout, stderr = p.communicate(input=stdin)
+  if p.returncode == 0:
+    if stderr:
+      if verbose:
+        print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
+      print >>sys.stderr, stderr.rstrip()
+    if strip:
+      stdout = stdout.rstrip('\r\n')
+    return stdout
+  if verbose:
+    print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode)
+  if stderr:
+    print >>sys.stderr, stderr.rstrip()
+  sys.exit(2)
+
+
+def die(message):
+  print >>sys.stderr, 'error:', message
+  sys.exit(2)
+
+
+if __name__ == '__main__':
+  main()

From 57214b3a11b43f1e62c91111fa083b7dc761440e Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Tue, 27 Sep 2016 00:15:54 -0700
Subject: [PATCH 47/91] Fixes.

---
 .travis.yml                              | 1 -
 .travis/check-git-clang-format-output.sh | 0
 2 files changed, 1 deletion(-)
 mode change 100644 => 100755 .travis/check-git-clang-format-output.sh

diff --git a/.travis.yml b/.travis.yml
index e77f446da..fd43573ca 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -31,6 +31,5 @@ matrix:
         - .travis/check-git-clang-format-output.sh
 
 install:
-  - make
 
 script:
diff --git a/.travis/check-git-clang-format-output.sh b/.travis/check-git-clang-format-output.sh
old mode 100644
new mode 100755

From 2776d6a37cab0be22d4d15f410fab5b03b2151c6 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Tue, 27 Sep 2016 00:19:21 -0700
Subject: [PATCH 48/91] Fix permissions.

---
 .travis/git-clang-format | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 .travis/git-clang-format

diff --git a/.travis/git-clang-format b/.travis/git-clang-format
old mode 100644
new mode 100755

From 084220b0e70de6bed466e97e08f4b6909133aafb Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Tue, 27 Sep 2016 18:51:35 -0700
Subject: [PATCH 49/91] Allow reading/writing generic message types, not just
 tasks. (#24)

* Allow reading/writing generic message types, not just tasks.

* Allow messages of length 0 to be read/written, and handle closed sockets.

* Address comments.

* Simplify accept_client.

* Allow ports to be reused in bind_ipc_sock.
---
 io.c               | 100 +++++++++++++++++++++++++++------------------
 io.h               |  19 ++++++---
 logging.c          |   8 ++--
 task.c             |  13 ------
 test/io_tests.c    |  11 +++--
 test/redis_tests.c |  10 ++---
 test/task_tests.c  |  10 ++++-
 7 files changed, 99 insertions(+), 72 deletions(-)

diff --git a/io.c b/io.c
index d4a89a4a4..16073d32a 100644
--- a/io.c
+++ b/io.c
@@ -18,13 +18,19 @@
  * occurred. */
 int bind_ipc_sock(const char *socket_pathname) {
   struct sockaddr_un socket_address;
-  int socket_fd;
-
-  socket_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+  int socket_fd = socket(AF_UNIX, SOCK_STREAM, 0);
   if (socket_fd < 0) {
     LOG_ERR("socket() failed for pathname %s.", socket_pathname);
     return -1;
   }
+  /* Tell the system to allow the port to be reused. */
+  int on = 1;
+  if (setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, (char *) &on,
+                 sizeof(on)) < 0) {
+    LOG_ERR("setsockopt failed");
+    close(socket_fd);
+    exit(-1);
+  }
 
   unlink(socket_pathname);
   memset(&socket_address, 0, sizeof(struct sockaddr_un));
@@ -80,11 +86,7 @@ int connect_ipc_sock(const char *socket_pathname) {
 /* Accept a new client connection on the given socket
  * descriptor. Returns a descriptor for the new socket. */
 int accept_client(int socket_fd) {
-  struct sockaddr_un client_addr;
-  int client_fd, client_len;
-  client_len = sizeof(client_addr);
-  client_fd = accept(socket_fd, (struct sockaddr *) &client_addr,
-                     (socklen_t *) &client_len);
+  int client_fd = accept(socket_fd, NULL, NULL);
   if (client_fd < 0) {
     LOG_ERR("Error reading from socket.");
     return -1;
@@ -92,57 +94,77 @@ int accept_client(int socket_fd) {
   return client_fd;
 }
 
-/* Write a sequence of bytes on a file descriptor. */
-void write_bytes(int fd, uint8_t *bytes, int64_t length) {
-  ssize_t nbytes = write(fd, (char *) &length, sizeof(length));
-  if (nbytes == -1) {
-    LOG_ERR("Error sending to socket.\n");
-    return;
-  }
+/**
+ * Write a sequence of bytes on a file descriptor. The bytes should then be read
+ * by read_message.
+ *
+ * @param fd The file descriptor to write to.
+ * @param type The type of the message to send.
+ * @param length The size in bytes of the bytes parameter.
+ * @param bytes The address of the message to send.
+ * @return Void.
+ */
+void write_message(int fd, int64_t type, int64_t length, uint8_t *bytes) {
+  ssize_t nbytes = write(fd, (char *) &type, sizeof(type));
+  CHECK(nbytes == sizeof(int64_t));
+  nbytes = write(fd, (char *) &length, sizeof(length));
+  CHECK(nbytes == sizeof(int64_t));
   nbytes = write(fd, (char *) bytes, length * sizeof(char));
-  if (nbytes == -1) {
-    LOG_ERR("Error sending to socket.\n");
-    return;
-  }
+  CHECK(nbytes >= 0);
 }
 
-/* Read a sequence of bytes written by write_bytes from a file descriptor.
- * Allocates and returns a pointer to the bytes.
- * NOTE: Caller must free the memory! */
-void read_bytes(int fd, uint8_t **bytes, int64_t *length) {
-  ssize_t nbytes = read(fd, length, sizeof(int64_t));
-  if (nbytes < 0) {
-    LOG_ERR("Error reading length of message from socket.");
+/**
+ * Read a sequence of bytes written by write_bytes from a file descriptor. This
+ * allocates space for the message.
+ *
+ * @note The caller must free the memory.
+ *
+ * @param fd The file descriptor to read from.
+ * @param type The type of the message that is read will be written at this
+          address.
+ * @param length The size in bytes of the message that is read will be written
+          at this address. This size does not include the bytes used to encode
+          the type and length.
+ * @param bytes The address at which to write the pointer to the bytes that are
+          read and allocated by this function.
+ * @return Void.
+ */
+void read_message(int fd, int64_t *type, int64_t *length, uint8_t **bytes) {
+  ssize_t nbytes = read(fd, type, sizeof(int64_t));
+  CHECK(nbytes >= 0);
+  /* Handle the case in which the socket is closed. */
+  if (nbytes == 0) {
+    *type = DISCONNECT_CLIENT;
+    *length = 0;
     *bytes = NULL;
     return;
   }
-
+  nbytes = read(fd, length, sizeof(int64_t));
+  CHECK(nbytes == sizeof(int64_t));
   *bytes = malloc(*length * sizeof(uint8_t));
   nbytes = read(fd, *bytes, *length);
-  if (nbytes < 0) {
-    LOG_ERR("Error reading message from socket.");
-    free(*bytes);
-    *bytes = NULL;
-  }
+  CHECK(nbytes >= 0);
 }
 
 /* Write a null-terminated string to a file descriptor. */
-void write_string(int fd, char *message) {
+void write_log_message(int fd, char *message) {
   /* Account for the \0 at the end of the string. */
-  write_bytes(fd, (uint8_t *) message, strlen(message) + 1);
+  write_message(fd, LOG_MESSAGE, strlen(message) + 1, (uint8_t *) message);
 }
 
 /* Reads a null-terminated string from the file descriptor that has been
- * written by write_string. Allocates and returns a pointer to the string.
+ * written by write_log_message. Allocates and returns a pointer to the string.
  * NOTE: Caller must free the memory! */
-char *read_string(int fd) {
+char *read_log_message(int fd) {
   uint8_t *bytes;
+  int64_t type;
   int64_t length;
-  read_bytes(fd, &bytes, &length);
+  read_message(fd, &type, &length, &bytes);
+  CHECK(type == LOG_MESSAGE);
   return (char *) bytes;
 }
 
-void write_formatted_string(int socket_fd, const char *format, ...) {
+void write_formatted_log_message(int socket_fd, const char *format, ...) {
   UT_string *cmd;
   va_list ap;
 
@@ -151,6 +173,6 @@ void write_formatted_string(int socket_fd, const char *format, ...) {
   utstring_printf_va(cmd, format, ap);
   va_end(ap);
 
-  write_string(socket_fd, utstring_body(cmd));
+  write_log_message(socket_fd, utstring_body(cmd));
   utstring_free(cmd);
 }
diff --git a/io.h b/io.h
index e6f227c98..2299806f7 100644
--- a/io.h
+++ b/io.h
@@ -3,6 +3,15 @@
 
 #include <stdint.h>
 
+enum common_message_type {
+  /** Disconnect a client. */
+  DISCONNECT_CLIENT,
+  /** Log a message from a client. */
+  LOG_MESSAGE,
+  /** Submit a task to the local scheduler. */
+  SUBMIT_TASK,
+};
+
 /* Helper functions for socket communication. */
 
 int bind_ipc_sock(const char *socket_pathname);
@@ -12,11 +21,11 @@ int accept_client(int socket_fd);
 
 /* Reading and writing data */
 
-void write_bytes(int fd, uint8_t *bytes, int64_t length);
-void read_bytes(int fd, uint8_t **bytes, int64_t *length);
+void write_message(int fd, int64_t type, int64_t length, uint8_t *bytes);
+void read_message(int fd, int64_t *type, int64_t *length, uint8_t **bytes);
 
-void write_string(int fd, char *message);
-void write_formatted_string(int fd, const char *format, ...);
-char *read_string(int fd);
+void write_log_message(int fd, char *message);
+void write_formatted_log_message(int fd, const char *format, ...);
+char *read_log_message(int fd);
 
 #endif
diff --git a/logging.c b/logging.c
index bf37f1ee5..1a8e96820 100644
--- a/logging.c
+++ b/logging.c
@@ -68,10 +68,10 @@ void ray_log(ray_logger *logger,
      * ID to be filled in by someone else. */
     utstring_printf(origin_id, "%s:%s", "%ld", "%ld");
     int *socket_fd = (int *) logger->conn;
-    write_formatted_string(*socket_fd, log_fmt, utstring_body(timestamp),
-                           logger->client_type, utstring_body(origin_id),
-                           log_levels[log_level], event_type, message,
-                           utstring_body(timestamp));
+    write_formatted_log_message(*socket_fd, log_fmt, utstring_body(timestamp),
+                                logger->client_type, utstring_body(origin_id),
+                                log_levels[log_level], event_type, message,
+                                utstring_body(timestamp));
   }
   utstring_free(origin_id);
   utstring_free(timestamp);
diff --git a/task.c b/task.c
index fd4708d78..30fde8d31 100644
--- a/task.c
+++ b/task.c
@@ -148,19 +148,6 @@ void free_task_spec(task_spec *spec) {
   free(spec);
 }
 
-void write_task(int fd, task_spec *spec) {
-  write_bytes(fd, (uint8_t *) spec, task_size(spec));
-}
-
-task_spec *read_task(int fd) {
-  uint8_t *bytes;
-  int64_t length;
-  read_bytes(fd, &bytes, &length);
-  task_spec *spec = (task_spec *) bytes;
-  CHECK(task_size(spec) == length);
-  return spec;
-}
-
 void print_task(task_spec *spec, UT_string *output) {
   /* For converting an id to hex, which has double the number
    * of bytes compared to the id (+ 1 byte for '\0'). */
diff --git a/test/io_tests.c b/test/io_tests.c
index 1e8d20832..b73207326 100644
--- a/test/io_tests.c
+++ b/test/io_tests.c
@@ -20,20 +20,23 @@ TEST ipc_socket_test(void) {
     close(socket_fd);
     socket_fd = connect_ipc_sock(socket_pathname);
     ASSERT(socket_fd >= 0);
-    write_string(socket_fd, test_string);
-    write_bytes(socket_fd, (uint8_t *) test_bytes, strlen(test_bytes));
+    write_log_message(socket_fd, test_string);
+    write_message(socket_fd, LOG_MESSAGE, strlen(test_bytes),
+                  (uint8_t *) test_bytes);
     close(socket_fd);
     exit(0);
   } else {
     int client_fd = accept_client(socket_fd);
     ASSERT(client_fd >= 0);
-    char *message = read_string(client_fd);
+    char *message = read_log_message(client_fd);
     ASSERT(message != NULL);
     ASSERT_STR_EQ(test_string, message);
     free(message);
+    int64_t type;
     int64_t len;
     uint8_t *bytes;
-    read_bytes(client_fd, &bytes, &len);
+    read_message(client_fd, &type, &len, &bytes);
+    ASSERT(type == LOG_MESSAGE);
     ASSERT(memcmp(test_bytes, bytes, len) == 0);
     free(bytes);
     close(client_fd);
diff --git a/test/redis_tests.c b/test/redis_tests.c
index b1cde98b6..2277174c0 100644
--- a/test/redis_tests.c
+++ b/test/redis_tests.c
@@ -45,10 +45,10 @@ TEST redis_socket_test(void) {
 
   int client_fd = connect_ipc_sock(socket_pathname);
   ASSERT(client_fd >= 0);
-  write_formatted_string(client_fd, test_set_format, test_key, test_value);
+  write_formatted_log_message(client_fd, test_set_format, test_key, test_value);
 
   int server_fd = accept_client(socket_fd);
-  char *cmd = read_string(server_fd);
+  char *cmd = read_log_message(server_fd);
   close(client_fd);
   close(server_fd);
   close(socket_fd);
@@ -69,7 +69,7 @@ TEST redis_socket_test(void) {
 
 void redis_read_callback(event_loop *loop, int fd, void *context, int events) {
   db_handle *db = context;
-  char *cmd = read_string(fd);
+  char *cmd = read_log_message(fd);
   redisAsyncCommand(db->context, async_redis_socket_test_callback, NULL, cmd,
                     db->client_id, 0);
   free(cmd);
@@ -109,7 +109,7 @@ TEST async_redis_socket_test(void) {
   int client_fd = connect_ipc_sock(socket_pathname);
   ASSERT(client_fd >= 0);
   utarray_push_back(connections, &client_fd);
-  write_formatted_string(client_fd, test_set_format, test_key, test_value);
+  write_formatted_log_message(client_fd, test_set_format, test_key, test_value);
 
   event_loop_add_file(loop, client_fd, EVENT_LOOP_READ, redis_read_callback,
                       db);
@@ -148,7 +148,7 @@ void logging_read_callback(event_loop *loop,
                            void *context,
                            int events) {
   db_handle *conn = context;
-  char *cmd = read_string(fd);
+  char *cmd = read_log_message(fd);
   redisAsyncCommand(conn->context, logging_test_callback, NULL, cmd,
                     conn->client_id, 0);
   free(cmd);
diff --git a/test/task_tests.c b/test/task_tests.c
index 4293eec04..fcb714737 100644
--- a/test/task_tests.c
+++ b/test/task_tests.c
@@ -7,6 +7,7 @@
 #include "common.h"
 #include "test/example_task.h"
 #include "task.h"
+#include "io.h"
 
 SUITE(task_tests);
 
@@ -48,8 +49,13 @@ TEST send_task(void) {
   *task_return(task, 1) = globally_unique_id();
   int fd[2];
   socketpair(AF_UNIX, SOCK_STREAM, 0, fd);
-  write_task(fd[0], task);
-  task_spec *result = read_task(fd[1]);
+  write_message(fd[0], SUBMIT_TASK, task_size(task), task);
+  int64_t type;
+  int64_t length;
+  uint8_t *message;
+  read_message(fd[1], &type, &length, &message);
+  task_spec *result = (task_spec *) message;
+  ASSERT(type == SUBMIT_TASK);
   ASSERT(memcmp(task, result, task_size(task)) == 0);
   ASSERT(memcmp(task, result, task_size(result)) == 0);
   free(task);

From ff8018db753a4a7643b9f611ad1785c68b3b2e13 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Tue, 27 Sep 2016 19:11:09 -0700
Subject: [PATCH 50/91] First pass at local scheduler (#2)

* draft of local scheduler

* API

* update APIs

* fix

* update

* Rename halo -> photon.

* Add build directory.

* Update common submodule.

* More renaming.

* Fix python ctypes.

* Compile in travis.

* Process generic messages and not just tasks.

* Move free outside of switch.

* Formatting and address comments.

* Remove event loop from local scheduler state.

* Use accept_client from common.

* Use bind_ipc_sock from common.

* Fix tests.

* Update common submodule.

* Fix formatting.
---
 .gitmodules          |   3 ++
 .travis.yml          |   6 +++
 Makefile             |  21 +++++++++
 build/.gitkeep       |   0
 common               |   1 +
 lib/python/photon.py |  38 +++++++++++++++
 photon.c             | 109 +++++++++++++++++++++++++++++++++++++++++++
 photon.h             |  14 ++++++
 photon_client.c      |  27 +++++++++++
 photon_client.h      |  27 +++++++++++
 photon_scheduler.h   |  15 ++++++
 setup-env.sh         |   5 ++
 test/test.py         |  37 +++++++++++++++
 13 files changed, 303 insertions(+)
 create mode 100644 .gitmodules
 create mode 100644 Makefile
 create mode 100644 build/.gitkeep
 create mode 160000 common
 create mode 100644 lib/python/photon.py
 create mode 100644 photon.c
 create mode 100644 photon.h
 create mode 100644 photon_client.c
 create mode 100644 photon_client.h
 create mode 100644 photon_scheduler.h
 create mode 100755 setup-env.sh
 create mode 100644 test/test.py

diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..3f2c8add4
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "common"]
+	path = common
+	url = https://github.com/ray-project/common
diff --git a/.travis.yml b/.travis.yml
index fd43573ca..45ef9a286 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -31,5 +31,11 @@ matrix:
         - .travis/check-git-clang-format-output.sh
 
 install:
+  - make
 
 script:
+  - cd common
+  - make test
+  - cd ..
+  - source setup-env.sh
+  - python test/test.py
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..7c494a74e
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,21 @@
+CC = gcc
+CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -Icommon/thirdparty
+BUILD = build
+
+all: $(BUILD)/photon_scheduler $(BUILD)/photon_client.so
+
+$(BUILD)/photon_client.so: photon_client.h photon_client.c common
+	$(CC) $(CFLAGS) photon_client.c common/build/libcommon.a -fPIC -shared -o $(BUILD)/photon_client.so
+
+$(BUILD)/photon_scheduler: photon.h photon.c common
+	$(CC) $(CFLAGS) -o $@ photon.c common/build/libcommon.a common/thirdparty/hiredis/libhiredis.a -Icommon/thirdparty -Icommon/
+
+common: FORCE
+	git submodule update --init --recursive
+	cd common; make
+
+clean:
+	cd common; make clean
+	rm -r $(BUILD)/*
+
+FORCE:
diff --git a/build/.gitkeep b/build/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/common b/common
new file mode 160000
index 000000000..084220b0e
--- /dev/null
+++ b/common
@@ -0,0 +1 @@
+Subproject commit 084220b0e70de6bed466e97e08f4b6909133aafb
diff --git a/lib/python/photon.py b/lib/python/photon.py
new file mode 100644
index 000000000..b0fc263a5
--- /dev/null
+++ b/lib/python/photon.py
@@ -0,0 +1,38 @@
+import ctypes
+import os
+
+photon_client_library_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../../build/photon_client.so")
+photon_client_library = ctypes.cdll.LoadLibrary(photon_client_library_path)
+photon_client_library.alloc_task_spec.restype = ctypes.c_void_p
+photon_client_library.photon_connect.restype = ctypes.c_void_p
+photon_client_library.photon_submit.restype = None
+
+ID = ctypes.c_ubyte * 20
+
+class UniqueID(ctypes.Structure):
+  _fields_ = [("unique_id", ID)]
+
+def make_id(string):
+  if len(string) != 20:
+    raise Exception("PlasmaIDs must be 20 characters long")
+  unique_id = map(ord, string)
+  return UniqueID(unique_id=ID(*unique_id))
+
+class Task(object):
+  def __init__(self, function_id, args):
+    function_id = make_id(function_id)
+    self.task_spec = ctypes.c_void_p(photon_client_library.alloc_task_spec(function_id, len(args), 1, 0))
+    for arg in args:
+      photon_client_library.task_args_add_ref(self.task_spec, arg)
+
+  def __del__(self):
+    photon_client_library.free_task_spec(self.task_spec)
+
+class PhotonClient(object):
+
+  def __init__(self, socket_name):
+    self.photon_conn = ctypes.c_void_p(photon_client_library.photon_connect(socket_name))
+
+  def submit(self, function_id, args):
+    task = Task(function_id, args)
+    photon_client_library.photon_submit(self.photon_conn, task.task_spec)
diff --git a/photon.c b/photon.c
new file mode 100644
index 000000000..8c84c2b95
--- /dev/null
+++ b/photon.c
@@ -0,0 +1,109 @@
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "event_loop.h"
+#include "io.h"
+#include "photon.h"
+#include "state/db.h"
+#include "state/task_queue.h"
+#include "task.h"
+#include "utarray.h"
+
+typedef struct {
+  db_handle *db;
+  UT_array *task_queue;
+} local_scheduler_state;
+
+event_loop *init_local_scheduler() { return event_loop_create(); };
+
+void process_message(event_loop *loop, int client_sock, void *context,
+                     int events) {
+  local_scheduler_state *s = context;
+
+  uint8_t *message;
+  int64_t type;
+  int64_t length;
+  read_message(client_sock, &type, &length, &message);
+
+  switch (type) {
+  case SUBMIT_TASK: {
+    task_spec *task = (task_spec *)message;
+    CHECK(task_size(task) == length);
+    unique_id id = globally_unique_id();
+    task_queue_submit_task(s->db, id, task);
+  } break;
+  case TASK_DONE: {
+  } break;
+  case DISCONNECT_CLIENT: {
+    LOG_INFO("Disconnecting client on fd %d", client_sock);
+    event_loop_remove_file(loop, client_sock);
+  } break;
+  case LOG_MESSAGE: {
+  } break;
+  default:
+    /* This code should be unreachable. */
+    CHECK(0);
+  }
+  free(message);
+}
+
+void new_client_connection(event_loop *loop, int listener_sock, void *context,
+                           int events) {
+  local_scheduler_state *s = context;
+  int new_socket = accept_client(listener_sock);
+  event_loop_add_file(loop, new_socket, EVENT_LOOP_READ, process_message, s);
+  LOG_INFO("new connection with fd %d", new_socket);
+}
+
+void start_server(const char *socket_name, const char *redis_addr,
+                  int redis_port) {
+  int fd = bind_ipc_sock(socket_name);
+  local_scheduler_state state;
+  event_loop *loop = init_local_scheduler();
+
+  state.db = db_connect(redis_addr, redis_port, "photon", "", -1);
+  db_attach(state.db, loop);
+
+  /* Run event loop. */
+  event_loop_add_file(loop, fd, EVENT_LOOP_READ, new_client_connection, &state);
+  event_loop_run(loop);
+}
+
+int main(int argc, char *argv[]) {
+  /* Path of the listening socket of the local scheduler. */
+  char *scheduler_socket_name = NULL;
+  /* IP address and port of redis. */
+  char *redis_addr_port = NULL;
+  int c;
+  while ((c = getopt(argc, argv, "s:r:")) != -1) {
+    switch (c) {
+    case 's':
+      scheduler_socket_name = optarg;
+      break;
+    case 'r':
+      redis_addr_port = optarg;
+      break;
+    default:
+      LOG_ERR("unknown option %c", c);
+      exit(-1);
+    }
+  }
+  if (!scheduler_socket_name) {
+    LOG_ERR("please specify socket for incoming connections with -s switch");
+    exit(-1);
+  }
+  char redis_addr[16] = {0};
+  char redis_port[6] = {0};
+  if (!redis_addr_port ||
+      sscanf(redis_addr_port, "%15[0-9.]:%5[0-9]", redis_addr, redis_port) !=
+          2) {
+    LOG_ERR("need to specify redis address like 127.0.0.1:6379 with -r switch");
+    exit(-1);
+  }
+  start_server(scheduler_socket_name, &redis_addr[0], atoi(redis_port));
+}
diff --git a/photon.h b/photon.h
new file mode 100644
index 000000000..6a213c4a5
--- /dev/null
+++ b/photon.h
@@ -0,0 +1,14 @@
+#ifndef PHOTON_H
+#define PHOTON_H
+
+enum photon_message_type {
+  /** Notify the local scheduler that a task has finished. */
+  TASK_DONE = 64,
+};
+
+struct photon_conn_impl {
+  /* File descriptor of the Unix domain socket that connects to photon. */
+  int conn;
+};
+
+#endif
diff --git a/photon_client.c b/photon_client.c
new file mode 100644
index 000000000..a33b25631
--- /dev/null
+++ b/photon_client.c
@@ -0,0 +1,27 @@
+#include "photon_client.h"
+
+#include "common/io.h"
+#include "common/task.h"
+#include <stdlib.h>
+
+photon_conn *photon_connect(const char *photon_socket) {
+  photon_conn *result = malloc(sizeof(photon_conn));
+  result->conn = connect_ipc_sock(photon_socket);
+  return result;
+}
+
+void photon_submit(photon_conn *conn, task_spec *task) {
+  write_message(conn->conn, SUBMIT_TASK, task_size(task), (uint8_t *)task);
+}
+
+void photon_task_done(photon_conn *conn) {
+  write_message(conn->conn, TASK_DONE, 0, NULL);
+}
+
+void photon_disconnect(photon_conn *conn) {
+  write_message(conn->conn, DISCONNECT_CLIENT, 0, NULL);
+}
+
+void photon_log_message(photon_conn *conn) {
+  write_message(conn->conn, LOG_MESSAGE, 0, NULL);
+}
diff --git a/photon_client.h b/photon_client.h
new file mode 100644
index 000000000..3163d8b7d
--- /dev/null
+++ b/photon_client.h
@@ -0,0 +1,27 @@
+#ifndef PHOTON_CLIENT_H
+#define PHOTON_CLIENT_H
+
+#include "common/task.h"
+#include "photon.h"
+
+typedef struct photon_conn_impl photon_conn;
+
+/* Connect to the local scheduler. */
+photon_conn *photon_connect(const char *photon_socket);
+
+/* Submit a task to the local scheduler. */
+void photon_submit(photon_conn *conn, task_spec *task);
+
+/* Get next task for this client. */
+task_spec *photon_get_task(photon_conn *conn);
+
+/* Tell the local scheduler that the client has finished executing a task. */
+void photon_task_done(photon_conn *conn);
+
+/* Disconnect from the local scheduler. */
+void photon_disconnect(photon_conn *conn);
+
+/* Send a log message to the local scheduler. */
+void photon_log_message(photon_conn *conn);
+
+#endif
diff --git a/photon_scheduler.h b/photon_scheduler.h
new file mode 100644
index 000000000..cce91155d
--- /dev/null
+++ b/photon_scheduler.h
@@ -0,0 +1,15 @@
+#ifndef PHOTON_SCHEDULER
+#define PHOTON_SCHEDULER
+
+/* Establish a connection to a new client. */
+void new_client_connection(local_scheduler_state *s, int listener_sock);
+
+/* schedule a task on a given worker. */
+void schedule_on_worker(local_scheduler_state *s, task_spec *task,
+                        int client_id);
+
+/* Handle new incoming task that was scheduled by the globl scheduler on
+ * this local scheduler. */
+void schedule_task(local_scheduler_state *s, task_spec *task)
+
+#endif
diff --git a/setup-env.sh b/setup-env.sh
new file mode 100755
index 000000000..7c4350150
--- /dev/null
+++ b/setup-env.sh
@@ -0,0 +1,5 @@
+echo "Adding Photon to PYTHONPATH" 1>&2
+
+ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
+
+export PYTHONPATH="$ROOT_DIR/lib/python/:$PYTHONPATH"
diff --git a/test/test.py b/test/test.py
new file mode 100644
index 000000000..2307f2276
--- /dev/null
+++ b/test/test.py
@@ -0,0 +1,37 @@
+from __future__ import print_function
+
+import os
+import subprocess
+import sys
+import unittest
+import random
+import time
+
+import photon
+
+class TestPhotonClient(unittest.TestCase):
+
+  def setUp(self):
+    # Start Redis.
+    redis_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../common/thirdparty/redis-3.2.3/src/redis-server")
+    self.p1 = subprocess.Popen([redis_executable, "--loglevel", "warning"])
+    time.sleep(0.1)
+    scheduler_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../build/photon_scheduler")
+    scheduler_name = "/tmp/scheduler{}".format(random.randint(0, 10000))
+    self.p2 = subprocess.Popen([scheduler_executable, "-s", scheduler_name, "-r", "127.0.0.1:6379"])
+    time.sleep(0.1)
+    # Connect to the scheduler.
+    self.photon_client = photon.PhotonClient(scheduler_name)
+
+  def tearDown(self):
+    # Kill the Redis server.
+    self.p1.kill()
+    # Kill the local scheduler.
+    self.p2.kill()
+
+  def test_create(self):
+    l = [photon.make_id(20 * "a"), photon.make_id(20 * "b"), photon.make_id(20 * "c")]
+    self.photon_client.submit(20 * "a", l)
+
+if __name__ == "__main__":
+  unittest.main(verbosity=2)

From 227eab3b5a5537b870466effbba5e2f89368f563 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Wed, 28 Sep 2016 18:59:00 -0700
Subject: [PATCH 51/91] Use doxygen for generating documentation (#29)

* Auto-generated doxygen config file.

* Customize doxygen config file for Plasma.

* Format plasma_client.h and plasma.h for doxygen.
---
 doc/plasma-doxy-config | 2473 ++++++++++++++++++++++++++++++++++++++++
 src/plasma.h           |   71 +-
 src/plasma_client.h    |   85 +-
 3 files changed, 2596 insertions(+), 33 deletions(-)
 create mode 100644 doc/plasma-doxy-config

diff --git a/doc/plasma-doxy-config b/doc/plasma-doxy-config
new file mode 100644
index 000000000..9c291f838
--- /dev/null
+++ b/doc/plasma-doxy-config
@@ -0,0 +1,2473 @@
+# Doxyfile 1.8.13
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a double hash (##) is considered a comment and is placed in
+# front of the TAG it is preceding.
+#
+# All text after a single hash (#) is considered a comment and will be ignored.
+# The format is:
+# TAG = value [value, ...]
+# For lists, items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (\" \").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all text
+# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
+# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv
+# for the list of possible encodings.
+# The default value is: UTF-8.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
+# double-quotes, unless you are using Doxywizard) that should identify the
+# project for which the documentation is generated. This name is used in the
+# title of most generated pages and in a few other places.
+# The default value is: My Project.
+
+PROJECT_NAME           = "Plasma"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
+# could be handy for archiving the generated documentation or if some version
+# control system is used.
+
+PROJECT_NUMBER         =
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer a
+# quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF          =
+
+# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
+# in the documentation. The maximum height of the logo should not exceed 55
+# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
+# the logo to the output directory.
+
+PROJECT_LOGO           =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
+# into which the generated documentation will be written. If a relative path is
+# entered, it will be relative to the location where doxygen was started. If
+# left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       =
+
+# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
+# directories (in 2 levels) under the output directory of each output format and
+# will distribute the generated files over these directories. Enabling this
+# option can be useful when feeding doxygen a huge amount of source files, where
+# putting all generated files in the same directory would otherwise causes
+# performance problems for the file system.
+# The default value is: NO.
+
+CREATE_SUBDIRS         = NO
+
+# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
+# characters to appear in the names of generated files. If set to NO, non-ASCII
+# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
+# U+3044.
+# The default value is: NO.
+
+ALLOW_UNICODE_NAMES    = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
+# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
+# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
+# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
+# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
+# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
+# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
+# Ukrainian and Vietnamese.
+# The default value is: English.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
+# descriptions after the members that are listed in the file and class
+# documentation (similar to Javadoc). Set to NO to disable this.
+# The default value is: YES.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief
+# description of a member or function before the detailed description
+#
+# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+# The default value is: YES.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator that is
+# used to form the text in various listings. Each string in this list, if found
+# as the leading text of the brief description, will be stripped from the text
+# and the result, after processing the whole list, is used as the annotated
+# text. Otherwise, the brief description is used as-is. If left blank, the
+# following values are used ($name is automatically replaced with the name of
+# the entity):The $name class, The $name widget, The $name file, is, provides,
+# specifies, contains, represents, a, an and the.
+
+ABBREVIATE_BRIEF       = "The $name class" \
+                         "The $name widget" \
+                         "The $name file" \
+                         is \
+                         provides \
+                         specifies \
+                         contains \
+                         represents \
+                         a \
+                         an \
+                         the
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# doxygen will generate a detailed section even if there is only a brief
+# description.
+# The default value is: NO.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+# The default value is: NO.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path
+# before files name in the file list and in the header files. If set to NO the
+# shortest path that makes the file name unique will be used
+# The default value is: YES.
+
+FULL_PATH_NAMES        = YES
+
+# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
+# Stripping is only done if one of the specified strings matches the left-hand
+# part of the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the path to
+# strip.
+#
+# Note that you can specify absolute paths here, but also relative paths, which
+# will be relative from the directory where doxygen is started.
+# This tag requires that the tag FULL_PATH_NAMES is set to YES.
+
+STRIP_FROM_PATH        =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
+# path mentioned in the documentation of a class, which tells the reader which
+# header file to include in order to use a class. If left blank only the name of
+# the header file containing the class definition is used. Otherwise one should
+# specify the list of include paths that are normally passed to the compiler
+# using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
+# less readable) file names. This can be useful is your file systems doesn't
+# support long names like on DOS, Mac, or CD-ROM.
+# The default value is: NO.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
+# first line (until the first dot) of a Javadoc-style comment as the brief
+# description. If set to NO, the Javadoc-style will behave just like regular Qt-
+# style comments (thus requiring an explicit @brief command for a brief
+# description.)
+# The default value is: NO.
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
+# line (until the first dot) of a Qt-style comment as the brief description. If
+# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
+# requiring an explicit \brief command for a brief description.)
+# The default value is: NO.
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
+# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
+# a brief description. This used to be the default behavior. The new default is
+# to treat a multi-line C++ comment block as a detailed description. Set this
+# tag to YES if you prefer the old behavior instead.
+#
+# Note that setting this tag to YES also means that rational rose comments are
+# not recognized any more.
+# The default value is: NO.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
+# documentation from any documented member that it re-implements.
+# The default value is: YES.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new
+# page for each member. If set to NO, the documentation of a member will be part
+# of the file/class/namespace that contains it.
+# The default value is: NO.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
+# uses this value to replace tabs by spaces in code fragments.
+# Minimum value: 1, maximum value: 16, default value: 4.
+
+TAB_SIZE               = 2
+
+# This tag can be used to specify a number of aliases that act as commands in
+# the documentation. An alias has the form:
+# name=value
+# For example adding
+# "sideeffect=@par Side Effects:\n"
+# will allow you to put the command \sideeffect (or @sideeffect) in the
+# documentation, which will result in a user-defined paragraph with heading
+# "Side Effects:". You can put \n's in the value part of an alias to insert
+# newlines.
+
+ALIASES                =
+
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding "class=itcl::class"
+# will allow you to use the command class in the itcl::class meaning.
+
+TCL_SUBST              =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C. For
+# instance, some of the names that are used will be different. The list of all
+# members will be omitted, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
+# Python sources only. Doxygen will then generate output that is more tailored
+# for that language. For instance, namespaces will be presented as packages,
+# qualified scopes will look different, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources. Doxygen will then generate output that is tailored for Fortran.
+# The default value is: NO.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for VHDL.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension, and
+# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
+# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran:
+# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran:
+# Fortran. In the later case the parser tries to guess whether the code is fixed
+# or free formatted code, this is the default for Fortran type files), VHDL. For
+# instance to make doxygen treat .inc files as Fortran files (default is PHP),
+# and .f files as C (default is Fortran), use: inc=Fortran f=C.
+#
+# Note: For files without extension you can use no_extension as a placeholder.
+#
+# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
+# the files are not read by doxygen.
+
+EXTENSION_MAPPING      =
+
+# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
+# according to the Markdown format, which allows for more readable
+# documentation. See http://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you can
+# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
+# case of backward compatibilities issues.
+# The default value is: YES.
+
+MARKDOWN_SUPPORT       = YES
+
+# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up
+# to that level are automatically included in the table of contents, even if
+# they do not have an id attribute.
+# Note: This feature currently applies only to Markdown headings.
+# Minimum value: 0, maximum value: 99, default value: 0.
+# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
+
+TOC_INCLUDE_HEADINGS   = 0
+
+# When enabled doxygen tries to link words that correspond to documented
+# classes, or namespaces to their corresponding documentation. Such a link can
+# be prevented in individual cases by putting a % sign in front of the word or
+# globally by setting AUTOLINK_SUPPORT to NO.
+# The default value is: YES.
+
+AUTOLINK_SUPPORT       = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should set this
+# tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string);
+# versus func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+# The default value is: NO.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+# The default value is: NO.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
+# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen
+# will parse them like normal C++ but will assume all classes use public instead
+# of private inheritance when no explicit protection keyword is present.
+# The default value is: NO.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate
+# getter and setter methods for a property. Setting this option to YES will make
+# doxygen to replace the get and set methods by a property in the documentation.
+# This will only work if the methods are indeed getting or setting a simple
+# type. If this is not the case, or you want to show the methods anyway, you
+# should set this option to NO.
+# The default value is: YES.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+# The default value is: NO.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# If one adds a struct or class to a group and this option is enabled, then also
+# any nested class or struct is added to the same group. By default this option
+# is disabled and one has to add nested compounds explicitly via \ingroup.
+# The default value is: NO.
+
+GROUP_NESTED_COMPOUNDS = NO
+
+# Set the SUBGROUPING tag to YES to allow class member groups of the same type
+# (for instance a group of public functions) to be put as a subgroup of that
+# type (e.g. under the Public Functions section). Set it to NO to prevent
+# subgrouping. Alternatively, this can be done per class using the
+# \nosubgrouping command.
+# The default value is: YES.
+
+SUBGROUPING            = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
+# are shown inside the group in which they are included (e.g. using \ingroup)
+# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
+# and RTF).
+#
+# Note that this feature does not work in combination with
+# SEPARATE_MEMBER_PAGES.
+# The default value is: NO.
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
+# with only public data fields or simple typedef fields will be shown inline in
+# the documentation of the scope in which they are defined (i.e. file,
+# namespace, or group documentation), provided this scope is documented. If set
+# to NO, structs, classes, and unions are shown on a separate page (for HTML and
+# Man pages) or section (for LaTeX and RTF).
+# The default value is: NO.
+
+INLINE_SIMPLE_STRUCTS  = NO
+
+# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
+# enum is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically be
+# useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+# The default value is: NO.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
+# cache is used to resolve symbols given their name and scope. Since this can be
+# an expensive process and often the same symbol appears multiple times in the
+# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
+# doxygen will become slower. If the cache is too large, memory is wasted. The
+# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
+# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
+# symbols. At the end of a run doxygen will report the cache usage and suggest
+# the optimal cache size from a speed point of view.
+# Minimum value: 0, maximum value: 9, default value: 0.
+
+LOOKUP_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
+# documentation are documented, even if no documentation was available. Private
+# class members and static file members will be hidden unless the
+# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
+# Note: This will also disable the warnings about undocumented members that are
+# normally produced when WARNINGS is set to YES.
+# The default value is: NO.
+
+EXTRACT_ALL            = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
+# be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIVATE        = NO
+
+# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
+# scope will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PACKAGE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
+# included in the documentation.
+# The default value is: NO.
+
+EXTRACT_STATIC         = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
+# locally in source files will be included in the documentation. If set to NO,
+# only classes defined in header files are included. Does not have any effect
+# for Java sources.
+# The default value is: YES.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. If set to YES, local methods,
+# which are defined in the implementation section but not in the interface are
+# included in the documentation. If set to NO, only methods in the interface are
+# included.
+# The default value is: NO.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base name of
+# the file that contains the anonymous namespace. By default anonymous namespace
+# are hidden.
+# The default value is: NO.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
+# undocumented members inside documented classes or files. If set to NO these
+# members will be included in the various overviews, but no documentation
+# section is generated. This option has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy. If set
+# to NO, these classes will be included in the various overviews. This option
+# has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
+# (class|struct|union) declarations. If set to NO, these declarations will be
+# included in the documentation.
+# The default value is: NO.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
+# documentation blocks found inside the body of a function. If set to NO, these
+# blocks will be appended to the function's detailed documentation block.
+# The default value is: NO.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation that is typed after a
+# \internal command is included. If the tag is set to NO then the documentation
+# will be excluded. Set it to YES to include the internal documentation.
+# The default value is: NO.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
+# names in lower-case letters. If set to YES, upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+# The default value is: system dependent.
+
+CASE_SENSE_NAMES       = NO
+
+# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
+# their full class and namespace scopes in the documentation. If set to YES, the
+# scope will be hidden.
+# The default value is: NO.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
+# append additional text to a page's title, such as Class Reference. If set to
+# YES the compound reference will be hidden.
+# The default value is: NO.
+
+HIDE_COMPOUND_REFERENCE= NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
+# the files that are included by a file in the documentation of that file.
+# The default value is: YES.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
+# grouped member an include statement to the documentation, telling the reader
+# which file to include in order to use the member.
+# The default value is: NO.
+
+SHOW_GROUPED_MEMB_INC  = NO
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
+# files with double quotes in the documentation rather than with sharp brackets.
+# The default value is: NO.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
+# documentation for inline members.
+# The default value is: YES.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
+# (detailed) documentation of file and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order.
+# The default value is: YES.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
+# descriptions of file, namespace and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order. Note that
+# this will also influence the order of the classes in the class list.
+# The default value is: NO.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
+# (brief and detailed) documentation of class members so that constructors and
+# destructors are listed first. If set to NO the constructors will appear in the
+# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
+# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
+# member documentation.
+# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
+# detailed member documentation.
+# The default value is: NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
+# of group names into alphabetical order. If set to NO the group names will
+# appear in their defined order.
+# The default value is: NO.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
+# fully-qualified names, including namespaces. If set to NO, the class list will
+# be sorted only by class name, not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the alphabetical
+# list.
+# The default value is: NO.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
+# type resolution of all parameters of a function it will reject a match between
+# the prototype and the implementation of a member function even if there is
+# only one candidate or it is obvious which candidate to choose by doing a
+# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
+# accept a match between prototype and implementation in such cases.
+# The default value is: NO.
+
+STRICT_PROTO_MATCHING  = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
+# list. This list is created by putting \todo commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
+# list. This list is created by putting \test commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
+# list. This list is created by putting \bug commands in the documentation.
+# The default value is: YES.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
+# the deprecated list. This list is created by putting \deprecated commands in
+# the documentation.
+# The default value is: YES.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional documentation
+# sections, marked by \if <section_label> ... \endif and \cond <section_label>
+# ... \endcond blocks.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
+# initial value of a variable or macro / define can have for it to appear in the
+# documentation. If the initializer consists of more lines than specified here
+# it will be hidden. Use a value of 0 to hide initializers completely. The
+# appearance of the value of individual variables and macros / defines can be
+# controlled using \showinitializer or \hideinitializer command in the
+# documentation regardless of this setting.
+# Minimum value: 0, maximum value: 10000, default value: 30.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
+# the bottom of the documentation of classes and structs. If set to YES, the
+# list will mention the files that were used to generate the documentation.
+# The default value is: YES.
+
+SHOW_USED_FILES        = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
+# will remove the Files entry from the Quick Index and from the Folder Tree View
+# (if specified).
+# The default value is: YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
+# page. This will remove the Namespaces entry from the Quick Index and from the
+# Folder Tree View (if specified).
+# The default value is: YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command command input-file, where command is the value of the
+# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
+# by doxygen. Whatever the program writes to standard output is used as the file
+# version. For an example see the documentation.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option. You can
+# optionally specify a file name after the option, if omitted DoxygenLayout.xml
+# will be used as the name of the layout file.
+#
+# Note that if you run doxygen from a directory containing a file called
+# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
+# tag is left empty.
+
+LAYOUT_FILE            =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
+# the reference definitions. This must be a list of .bib files. The .bib
+# extension is automatically appended if omitted. This requires the bibtex tool
+# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info.
+# For LaTeX the style of the bibliography can be controlled using
+# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
+# search path. See also \cite for info how to create references.
+
+CITE_BIB_FILES         =
+
+#---------------------------------------------------------------------------
+# Configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated to
+# standard output by doxygen. If QUIET is set to YES this implies that the
+# messages are off.
+# The default value is: NO.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
+# this implies that the warnings are on.
+#
+# Tip: Turn warnings on while writing the documentation.
+# The default value is: YES.
+
+WARNINGS               = YES
+
+# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
+# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: YES.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some parameters
+# in a documented function, or documenting parameters that don't exist or using
+# markup commands wrongly.
+# The default value is: YES.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
+# are documented, but have no documentation for their parameters or return
+# value. If set to NO, doxygen will only warn about wrong or incomplete
+# parameter documentation, but not about the absence of documentation.
+# The default value is: NO.
+
+WARN_NO_PARAMDOC       = NO
+
+# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
+# a warning is encountered.
+# The default value is: NO.
+
+WARN_AS_ERROR          = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that doxygen
+# can produce. The string should contain the $file, $line, and $text tags, which
+# will be replaced by the file and line number from which the warning originated
+# and the warning text. Optionally the format may contain $version, which will
+# be replaced by the version of the file (if it could be obtained via
+# FILE_VERSION_FILTER)
+# The default value is: $file:$line: $text.
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning and error
+# messages should be written. If left blank the output is written to standard
+# error (stderr).
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag is used to specify the files and/or directories that contain
+# documented source files. You may enter file names like myfile.cpp or
+# directories like /usr/src/myproject. Separate the files or directories with
+# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
+# Note: If this tag is empty the current directory is searched.
+
+INPUT                  = ../src
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
+# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
+# documentation (see: http://www.gnu.org/software/libiconv) for the list of
+# possible encodings.
+# The default value is: UTF-8.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
+# *.h) to filter out the source-files in the directories.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# read by doxygen.
+#
+# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
+# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
+# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
+# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08,
+# *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf and *.qsf.
+
+FILE_PATTERNS          = *.c \
+                         *.cc \
+                         *.cxx \
+                         *.cpp \
+                         *.c++ \
+                         *.java \
+                         *.ii \
+                         *.ixx \
+                         *.ipp \
+                         *.i++ \
+                         *.inl \
+                         *.idl \
+                         *.ddl \
+                         *.odl \
+                         *.h \
+                         *.hh \
+                         *.hxx \
+                         *.hpp \
+                         *.h++ \
+                         *.cs \
+                         *.d \
+                         *.php \
+                         *.php4 \
+                         *.php5 \
+                         *.phtml \
+                         *.inc \
+                         *.m \
+                         *.markdown \
+                         *.md \
+                         *.mm \
+                         *.dox \
+                         *.py \
+                         *.pyw \
+                         *.f90 \
+                         *.f95 \
+                         *.f03 \
+                         *.f08 \
+                         *.f \
+                         *.for \
+                         *.tcl \
+                         *.vhd \
+                         *.vhdl \
+                         *.ucf \
+                         *.qsf
+
+# The RECURSIVE tag can be used to specify whether or not subdirectories should
+# be searched for input files as well.
+# The default value is: NO.
+
+RECURSIVE              = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+#
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE                = ../src/utarray.h ../src/uthash.h
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+# The default value is: NO.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories use the pattern */test/*
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or directories
+# that contain example code fragments that are included (see the \include
+# command).
+
+EXAMPLE_PATH           =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank all
+# files are included.
+
+EXAMPLE_PATTERNS       = *
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude commands
+# irrespective of the value of the RECURSIVE tag.
+# The default value is: NO.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or directories
+# that contain images that are to be included in the documentation (see the
+# \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command:
+#
+# <filter> <input-file>
+#
+# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
+# name of an input file. Doxygen will then use the output that the filter
+# program writes to standard output. If FILTER_PATTERNS is specified, this tag
+# will be ignored.
+#
+# Note that the filter must not add or remove lines; it is applied before the
+# code is scanned, but not when the output code is generated. If lines are added
+# or removed, the anchors will not be placed correctly.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form: pattern=filter
+# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
+# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
+# patterns match the file name, INPUT_FILTER is applied.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will also be used to filter the input files that are used for
+# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
+# The default value is: NO.
+
+FILTER_SOURCE_FILES    = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
+# it is also possible to disable source filtering for a specific pattern using
+# *.ext= (so without naming a filter).
+# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
+
+FILTER_SOURCE_PATTERNS =
+
+# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page
+# (index.html). This can be useful if you have a project on for instance GitHub
+# and want to reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE =
+
+#---------------------------------------------------------------------------
+# Configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
+# generated. Documented entities will be cross-referenced with these sources.
+#
+# Note: To get rid of all source code in the generated output, make sure that
+# also VERBATIM_HEADERS is set to NO.
+# The default value is: NO.
+
+SOURCE_BROWSER         = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body of functions,
+# classes and enums directly into the documentation.
+# The default value is: NO.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
+# special comment blocks from generated source code fragments. Normal C, C++ and
+# Fortran comments will always remain visible.
+# The default value is: YES.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
+# function all documented functions referencing it will be listed.
+# The default value is: NO.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES then for each documented function
+# all documented entities called/used by that function will be listed.
+# The default value is: NO.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
+# to YES then the hyperlinks from functions in REFERENCES_RELATION and
+# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
+# link to the documentation.
+# The default value is: YES.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
+# source code will show a tooltip with additional information such as prototype,
+# brief description and links to the definition and documentation. Since this
+# will make the HTML file larger and loading of large files a bit slower, you
+# can opt to disable this feature.
+# The default value is: YES.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+SOURCE_TOOLTIPS        = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code will
+# point to the HTML generated by the htags(1) tool instead of doxygen built-in
+# source browser. The htags tool is part of GNU's global source tagging system
+# (see http://www.gnu.org/software/global/global.html). You will need version
+# 4.8.6 or higher.
+#
+# To use it do the following:
+# - Install the latest version of global
+# - Enable SOURCE_BROWSER and USE_HTAGS in the config file
+# - Make sure the INPUT points to the root of the source tree
+# - Run doxygen as normal
+#
+# Doxygen will invoke htags (and that will in turn invoke gtags), so these
+# tools must be available from the command line (i.e. in the search path).
+#
+# The result: instead of the source browser generated by doxygen, the links to
+# source code will now point to the output of htags.
+# The default value is: NO.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
+# verbatim copy of the header file for each class for which an include is
+# specified. Set to NO to disable this.
+# See also: Section \class.
+# The default value is: YES.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
+# compounds will be generated. Enable this if the project contains a lot of
+# classes, structs, unions or interfaces.
+# The default value is: YES.
+
+ALPHABETICAL_INDEX     = YES
+
+# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
+# which the alphabetical index list will be split.
+# Minimum value: 1, maximum value: 20, default value: 5.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all classes will
+# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
+# can be used to specify a prefix (or a list of prefixes) that should be ignored
+# while generating the index headers.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output
+# The default value is: YES.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
+# generated HTML page (for example: .htm, .php, .asp).
+# The default value is: .html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
+# each generated HTML page. If the tag is left blank doxygen will generate a
+# standard header.
+#
+# To get valid HTML the header file that includes any scripts and style sheets
+# that doxygen needs, which is dependent on the configuration options used (e.g.
+# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
+# default header using
+# doxygen -w html new_header.html new_footer.html new_stylesheet.css
+# YourConfigFile
+# and then modify the file new_header.html. See also section "Doxygen usage"
+# for information on how to generate the default header that doxygen normally
+# uses.
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. For a description
+# of the possible markers and block names see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
+# generated HTML page. If the tag is left blank doxygen will generate a standard
+# footer. See HTML_HEADER for more information on how to generate a default
+# footer and what special commands can be used inside the footer. See also
+# section "Doxygen usage" for information on how to generate the default footer
+# that doxygen normally uses.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
+# sheet that is used by each HTML page. It can be used to fine-tune the look of
+# the HTML output. If left blank doxygen will generate a default style sheet.
+# See also section "Doxygen usage" for information on how to generate the style
+# sheet that doxygen normally uses.
+# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
+# it is more robust and this tag (HTML_STYLESHEET) will in the future become
+# obsolete.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_STYLESHEET        =
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# cascading style sheets that are included after the standard style sheets
+# created by doxygen. Using this option one can overrule certain style aspects.
+# This is preferred over using HTML_STYLESHEET since it does not replace the
+# standard style sheet and is therefore more robust against future updates.
+# Doxygen will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list). For an example see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_STYLESHEET  =
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
+# files will be copied as-is; there are no commands or markers available.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_FILES       =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
+# will adjust the colors in the style sheet and background images according to
+# this color. Hue is specified as an angle on a colorwheel, see
+# http://en.wikipedia.org/wiki/Hue for more information. For instance the value
+# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
+# purple, and 360 is red again.
+# Minimum value: 0, maximum value: 359, default value: 220.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_HUE    = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
+# in the HTML output. For a value of 0 the output will use grayscales only. A
+# value of 255 will produce the most vivid colors.
+# Minimum value: 0, maximum value: 255, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_SAT    = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
+# luminance component of the colors in the HTML output. Values below 100
+# gradually make the output lighter, whereas values above 100 make the output
+# darker. The value divided by 100 is the actual gamma applied, so 80 represents
+# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
+# change the gamma.
+# Minimum value: 40, maximum value: 240, default value: 80.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_GAMMA  = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting this
+# to YES can help to show when doxygen was last run and thus if the
+# documentation is up to date.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_TIMESTAMP         = NO
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
+# shown in the various tree structured indices initially; the user can expand
+# and collapse entries dynamically later on. Doxygen will expand the tree to
+# such a level that at most the specified number of entries are visible (unless
+# a fully collapsed tree already exceeds this amount). So setting the number of
+# entries 1 will produce a full collapsed tree by default. 0 is a special value
+# representing an infinite number of entries and will result in a full expanded
+# tree by default.
+# Minimum value: 0, maximum value: 9999, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files will be
+# generated that can be used as input for Apple's Xcode 3 integrated development
+# environment (see: http://developer.apple.com/tools/xcode/), introduced with
+# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a
+# Makefile in the HTML output directory. Running make will produce the docset in
+# that directory and running make install will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
+# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_DOCSET        = NO
+
+# This tag determines the name of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# The default value is: Doxygen generated docs.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# This tag specifies a string that should uniquely identify the documentation
+# set bundle. This should be a reverse domain-name style string, e.g.
+# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+# The default value is: org.doxygen.Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+
+# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
+# The default value is: Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_NAME  = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
+# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
+# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
+# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on
+# Windows.
+#
+# The HTML Help Workshop contains a compiler that can convert all HTML output
+# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
+# files are now used as the Windows 98 help format, and will replace the old
+# Windows help format (.hlp) on all Windows platforms in the future. Compressed
+# HTML files also contain an index, a table of contents, and you can search for
+# words in the documentation. The HTML workshop also contains a viewer for
+# compressed HTML files.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_HTMLHELP      = NO
+
+# The CHM_FILE tag can be used to specify the file name of the resulting .chm
+# file. You can add a path in front of the file if the result should not be
+# written to the html output directory.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_FILE               =
+
+# The HHC_LOCATION tag can be used to specify the location (absolute path
+# including file name) of the HTML help compiler (hhc.exe). If non-empty,
+# doxygen will try to run the HTML help compiler on the generated index.hhp.
+# The file has to be specified with full path.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+HHC_LOCATION           =
+
+# The GENERATE_CHI flag controls if a separate .chi index file is generated
+# (YES) or that it should be included in the master .chm file (NO).
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+GENERATE_CHI           = NO
+
+# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc)
+# and project file content.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_INDEX_ENCODING     =
+
+# The BINARY_TOC flag controls whether a binary table of contents is generated
+# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it
+# enables the Previous and Next buttons.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members to
+# the table of contents of the HTML help documentation and to the tree view.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
+# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
+# (.qch) of the generated HTML documentation.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
+# the file name of the resulting .qch file. The path specified is relative to
+# the HTML output folder.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
+# Project output. For more information please see Qt Help Project / Namespace
+# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace).
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
+# Help Project output. For more information please see Qt Help Project / Virtual
+# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual-
+# folders).
+# The default value is: doc.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
+# filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's filter section matches. Qt Help Project / Filter Attributes (see:
+# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# The QHG_LOCATION tag can be used to specify the location of Qt's
+# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
+# generated .qhp file.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHG_LOCATION           =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
+# generated, together with the HTML files, they form an Eclipse help plugin. To
+# install this plugin and make it available under the help contents menu in
+# Eclipse, the contents of the directory containing the HTML and XML files needs
+# to be copied into the plugins directory of eclipse. The name of the directory
+# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
+# After copying Eclipse needs to be restarted before the help appears.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the Eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have this
+# name. Each documentation set should have its own identifier.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# If you want full control over the layout of the generated HTML pages it might
+# be necessary to disable the index and replace it with your own. The
+# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
+# of each HTML page. A value of NO enables the index and the value YES disables
+# it. Since the tabs in the index contain the same information as the navigation
+# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+DISABLE_INDEX          = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information. If the tag
+# value is set to YES, a side panel will be generated containing a tree-like
+# index structure (just like the one that is generated for HTML Help). For this
+# to work a browser that supports JavaScript, DHTML, CSS and frames is required
+# (i.e. any modern browser). Windows users are probably better off using the
+# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
+# further fine-tune the look of the index. As an example, the default style
+# sheet generated by doxygen has an example that shows how to put an image at
+# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
+# the same information as the tab index, you could consider setting
+# DISABLE_INDEX to YES when enabling this option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_TREEVIEW      = NO
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
+# doxygen will group on one line in the generated HTML documentation.
+#
+# Note that a value of 0 will completely suppress the enum values from appearing
+# in the overview section.
+# Minimum value: 0, maximum value: 20, default value: 4.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
+# to set the initial width (in pixels) of the frame in which the tree is shown.
+# Minimum value: 0, maximum value: 1500, default value: 250.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+TREEVIEW_WIDTH         = 250
+
+# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to
+# external symbols imported via tag files in a separate window.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+EXT_LINKS_IN_WINDOW    = NO
+
+# Use this tag to change the font size of LaTeX formulas included as images in
+# the HTML documentation. When you change the font size after a successful
+# doxygen run you need to manually remove any form_*.png images from the HTML
+# output directory to force them to be regenerated.
+# Minimum value: 8, maximum value: 50, default value: 10.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_FONTSIZE       = 10
+
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are not
+# supported properly for IE 6.0, but are supported on all modern browsers.
+#
+# Note that when changing this option you need to delete any form_*.png files in
+# the HTML output directory before the changes have effect.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_TRANSPARENT    = YES
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
+# http://www.mathjax.org) which uses client side Javascript for the rendering
+# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
+# installed or if you want to formulas look prettier in the HTML output. When
+# enabled you may also need to install MathJax separately and configure the path
+# to it using the MATHJAX_RELPATH option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+USE_MATHJAX            = NO
+
+# When MathJax is enabled you can set the default output format to be used for
+# the MathJax output. See the MathJax site (see:
+# http://docs.mathjax.org/en/latest/output.html) for more details.
+# Possible values are: HTML-CSS (which is slower, but has the best
+# compatibility), NativeMML (i.e. MathML) and SVG.
+# The default value is: HTML-CSS.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_FORMAT         = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the HTML
+# output directory using the MATHJAX_RELPATH option. The destination directory
+# should contain the MathJax.js script. For instance, if the mathjax directory
+# is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
+# Content Delivery Network so you can quickly see the result without installing
+# MathJax. However, it is strongly recommended to install a local copy of
+# MathJax from http://www.mathjax.org before deployment.
+# The default value is: http://cdn.mathjax.org/mathjax/latest.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
+# extension names that should be enabled during MathJax rendering. For example
+# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_EXTENSIONS     =
+
+# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
+# of code that will be used on startup of the MathJax code. See the MathJax site
+# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# example see the documentation.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_CODEFILE       =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
+# the HTML output. The underlying search engine uses javascript and DHTML and
+# should work on any modern browser. Note that when using HTML help
+# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
+# there is already a search function so this one should typically be disabled.
+# For large projects the javascript based search engine can be slow, then
+# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
+# search using the keyboard; to jump to the search box use <access key> + S
+# (what the <access key> is depends on the OS and browser, but it is typically
+# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
+# key> to jump into the search results window, the results can be navigated
+# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
+# the search. The filter options can be selected when the cursor is inside the
+# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
+# to select a filter and <Enter> or <escape> to activate or cancel the filter
+# option.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a web server instead of a web client using Javascript. There
+# are two flavors of web server based searching depending on the EXTERNAL_SEARCH
+# setting. When disabled, doxygen will generate a PHP script for searching and
+# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
+# and searching needs to be provided by external tools. See the section
+# "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SERVER_BASED_SEARCH    = NO
+
+# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
+# script for searching. Instead the search results are written to an XML file
+# which needs to be processed by an external indexer. Doxygen will invoke an
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
+# search results.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: http://xapian.org/).
+#
+# See the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH        = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server
+# which will return the search results when EXTERNAL_SEARCH is enabled.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: http://xapian.org/). See the section "External Indexing and
+# Searching" for details.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHENGINE_URL       =
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
+# search data is written to a file for indexing by an external tool. With the
+# SEARCHDATA_FILE tag the name of this file can be specified.
+# The default file is: searchdata.xml.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHDATA_FILE        = searchdata.xml
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
+# projects and redirect the results back to the right project.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH_ID     =
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
+# projects other than the one defined by this configuration file, but that are
+# all added to the same external search index. Each project needs to have a
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
+# to a relative location where the documentation can be found. The format is:
+# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTRA_SEARCH_MAPPINGS  =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
+# The default value is: YES.
+
+GENERATE_LATEX         = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked.
+#
+# Note that when enabling USE_PDFLATEX this option is only used for generating
+# bitmaps for formulas in the HTML output, but not in the Makefile that is
+# written to the output directory.
+# The default file is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
+# index for LaTeX.
+# The default file is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used by the
+# printer.
+# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
+# 14 inches) and executive (7.25 x 10.5 inches).
+# The default value is: a4.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PAPER_TYPE             = a4
+
+# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
+# that should be included in the LaTeX output. The package can be specified just
+# by its name or with the correct syntax as to be used with the LaTeX
+# \usepackage command. To get the times font for instance you can specify :
+# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times}
+# To use the option intlimits with the amsmath package you can specify:
+# EXTRA_PACKAGES=[intlimits]{amsmath}
+# If left blank no extra packages will be included.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
+# generated LaTeX document. The header should contain everything until the first
+# chapter. If it is left blank doxygen will generate a standard header. See
+# section "Doxygen usage" for information on how to let doxygen write the
+# default header to a separate file.
+#
+# Note: Only use a user-defined header if you know what you are doing! The
+# following commands have a special meaning inside the header: $title,
+# $datetime, $date, $doxygenversion, $projectname, $projectnumber,
+# $projectbrief, $projectlogo. Doxygen will replace $title with the empty
+# string, for the replacement values of the other commands the user is referred
+# to HTML_HEADER.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HEADER           =
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
+# generated LaTeX document. The footer should contain everything after the last
+# chapter. If it is left blank doxygen will generate a standard footer. See
+# LATEX_HEADER for more information on how to generate a default footer and what
+# special commands can be used inside the footer.
+#
+# Note: Only use a user-defined footer if you know what you are doing!
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_FOOTER           =
+
+# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# LaTeX style sheets that are included after the standard style sheets created
+# by doxygen. Using this option one can overrule certain style aspects. Doxygen
+# will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list).
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_STYLESHEET =
+
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the LATEX_OUTPUT output
+# directory. Note that the files will be copied as-is; there are no commands or
+# markers available.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_FILES      =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
+# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
+# contain links (just like the HTML output) instead of page references. This
+# makes the output suitable for online browsing using a PDF viewer.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
+# the PDF file directly from the LaTeX files. Set this option to YES, to get a
+# higher quality PDF documentation.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
+# command to the generated LaTeX files. This will instruct LaTeX to keep running
+# if errors occur, instead of asking the user for help. This option is also used
+# when generating formulas in HTML.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BATCHMODE        = NO
+
+# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
+# index chapters (such as File Index, Compound Index, etc.) in the output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HIDE_INDICES     = NO
+
+# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
+# code with syntax highlighting in the LaTeX output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_SOURCE_CODE      = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. See
+# http://en.wikipedia.org/wiki/BibTeX and \cite for more info.
+# The default value is: plain.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BIB_STYLE        = plain
+
+# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
+# page will contain the date and time when the page was generated. Setting this
+# to NO can help when comparing the output of multiple runs.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_TIMESTAMP        = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The
+# RTF output is optimized for Word 97 and may not look too pretty with other RTF
+# readers/editors.
+# The default value is: NO.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: rtf.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
+# contain hyperlink fields. The RTF file will contain links (just like the HTML
+# output) instead of page references. This makes the output suitable for online
+# browsing using Word or some other Word compatible readers that support those
+# fields.
+#
+# Note: WordPad (write) and others do not support links.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's config
+# file, i.e. a series of assignments. You only have to provide replacements,
+# missing definitions are set to their default value.
+#
+# See also section "Doxygen usage" for information on how to generate the
+# default style sheet that doxygen normally uses.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an RTF document. Syntax is
+# similar to doxygen's config file. A template extensions file can be generated
+# using doxygen -e rtf extensionFile.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_EXTENSIONS_FILE    =
+
+# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
+# with syntax highlighting in the RTF output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_SOURCE_CODE        = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for
+# classes and files.
+# The default value is: NO.
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it. A directory man3 will be created inside the directory specified by
+# MAN_OUTPUT.
+# The default directory is: man.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to the generated
+# man pages. In case the manual section does not start with a number, the number
+# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
+# optional.
+# The default value is: .3.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_EXTENSION          = .3
+
+# The MAN_SUBDIR tag determines the name of the directory created within
+# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by
+# MAN_EXTENSION with the initial . removed.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_SUBDIR             =
+
+# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
+# will generate one additional man file for each entity documented in the real
+# man page(s). These additional files only source the real man page, but without
+# them the man command would be unable to find the correct page.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that
+# captures the structure of the code including all documentation.
+# The default value is: NO.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: xml.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_OUTPUT             = xml
+
+# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program
+# listings (including syntax highlighting and cross-referencing information) to
+# the XML output. Note that enabling this will significantly increase the size
+# of the XML output.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files
+# that can be used to generate PDF.
+# The default value is: NO.
+
+GENERATE_DOCBOOK       = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
+# front of it.
+# The default directory is: docbook.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_OUTPUT         = docbook
+
+# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
+# program listings (including syntax highlighting and cross-referencing
+# information) to the DOCBOOK output. Note that enabling this will significantly
+# increase the size of the DOCBOOK output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_PROGRAMLISTING = NO
+
+#---------------------------------------------------------------------------
+# Configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
+# AutoGen Definitions (see http://autogen.sf.net) file that captures the
+# structure of the code including all documentation. Note that this feature is
+# still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module
+# file that captures the structure of the code including all documentation.
+#
+# Note that this feature is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary
+# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
+# output from the Perl module output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely
+# formatted so it can be parsed by a human reader. This is useful if you want to
+# understand what is going on. On the other hand, if this tag is set to NO, the
+# size of the Perl module output will be much smaller and Perl will parse it
+# just the same.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file are
+# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
+# so different doxyrules.make files included by the same Makefile don't
+# overwrite each other's variables.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
+# C-preprocessor directives found in the sources and include files.
+# The default value is: YES.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
+# in the source code. If set to NO, only conditional compilation will be
+# performed. Macro expansion can be done in a controlled way by setting
+# EXPAND_ONLY_PREDEF to YES.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
+# the macro expansion is limited to the macros specified with the PREDEFINED and
+# EXPAND_AS_DEFINED tags.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES, the include files in the
+# INCLUDE_PATH will be searched if a #include is found.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by the
+# preprocessor.
+# This tag requires that the tag SEARCH_INCLUDES is set to YES.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will be
+# used.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that are
+# defined before the preprocessor is started (similar to the -D option of e.g.
+# gcc). The argument of the tag is a list of macros of the form: name or
+# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
+# is assumed. To prevent a macro definition from being undefined via #undef or
+# recursively expanded use the := operator instead of the = operator.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+PREDEFINED             =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
+# tag can be used to specify a list of macro names that should be expanded. The
+# macro definition that is found in the sources will be used. Use the PREDEFINED
+# tag if you want to use a different macro definition that overrules the
+# definition found in the source code.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
+# remove all references to function-like macros that are alone on a line, have
+# an all uppercase name, and do not end with a semicolon. Such function macros
+# are typically used for boiler-plate code, and will confuse the parser if not
+# removed.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES tag can be used to specify one or more tag files. For each tag
+# file the location of the external documentation should be added. The format of
+# a tag file without this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where loc1 and loc2 can be relative or absolute paths or URLs. See the
+# section "Linking to external documentation" for more information about the use
+# of tag files.
+# Note: Each tag file must have a unique name (where the name does NOT include
+# the path). If a tag file is not located in the directory in which doxygen is
+# run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
+# tag file that is based on the input files it reads. See section "Linking to
+# external documentation" for more information about the usage of tag files.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
+# the class index. If set to NO, only the inherited external classes will be
+# listed.
+# The default value is: NO.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will be
+# listed.
+# The default value is: YES.
+
+EXTERNAL_GROUPS        = YES
+
+# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in
+# the related pages index. If set to NO, only the current project's pages will
+# be listed.
+# The default value is: YES.
+
+EXTERNAL_PAGES         = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of 'which perl').
+# The default file (with absolute path) is: /usr/bin/perl.
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
+# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
+# NO turns the diagrams off. Note that this option also works with HAVE_DOT
+# disabled, but it is recommended to install and use dot, since it yields more
+# powerful graphs.
+# The default value is: YES.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see:
+# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH            =
+
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
+
+DIA_PATH               =
+
+# If set to YES the inheritance and collaboration graphs will hide inheritance
+# and usage relations if the target is undocumented or is not a class.
+# The default value is: YES.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz (see:
+# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# Bell Labs. The other options in this section have no effect if this option is
+# set to NO
+# The default value is: NO.
+
+HAVE_DOT               = NO
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
+# to run in parallel. When set to 0 doxygen will base this on the number of
+# processors available in the system. You can set it explicitly to a value
+# larger than 0 to get control over the balance between CPU load and processing
+# speed.
+# Minimum value: 0, maximum value: 32, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NUM_THREADS        = 0
+
+# When you want a differently looking font in the dot files that doxygen
+# generates you can specify the font name using DOT_FONTNAME. You need to make
+# sure dot is able to find the font, which can be done by putting it in a
+# standard location or by setting the DOTFONTPATH environment variable or by
+# setting DOT_FONTPATH to the directory containing the font.
+# The default value is: Helvetica.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTNAME           = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
+# dot graphs.
+# Minimum value: 4, maximum value: 24, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the default font as specified with
+# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
+# the path where dot can find it using this tag.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
+# each documented class showing the direct and indirect inheritance relations.
+# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
+# graph for each documented class showing the direct and indirect implementation
+# dependencies (inheritance, containment, and class references variables) of the
+# class with other documented classes.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
+# groups, showing the direct groups dependencies.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LOOK               = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
+# class node. If there are many fields or methods and many nodes the graph may
+# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
+# number of items for each type to make the size more manageable. Set this to 0
+# for no limit. Note that the threshold may be exceeded by 50% before the limit
+# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
+# but if the number exceeds 15, the total amount of fields shown is limited to
+# 10.
+# Minimum value: 0, maximum value: 100, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LIMIT_NUM_FIELDS   = 10
+
+# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
+# collaboration graphs will show the relations between templates and their
+# instances.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
+# YES then doxygen will generate a graph for each documented file showing the
+# direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDE_GRAPH          = YES
+
+# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
+# set to YES then doxygen will generate a graph for each documented file showing
+# the direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command. Disabling a call graph can be
+# accomplished by means of the command \hidecallgraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable caller graphs for selected
+# functions only using the \callergraph command. Disabling a caller graph can be
+# accomplished by means of the command \hidecallergraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
+# hierarchy of all classes instead of a textual one.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
+# dependencies a directory has on other directories in a graphical way. The
+# dependency relations are determined by the #include relations between the
+# files in the directories.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. For an explanation of the image formats see the section
+# output formats in the documentation of the dot tool (Graphviz (see:
+# http://www.graphviz.org/)).
+# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
+# to make the SVG files visible in IE 9+ (other browsers do not have this
+# requirement).
+# Possible values are: png, jpg, gif, svg, png:gd, png:gd:gd, png:cairo,
+# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and
+# png:gdiplus:gdiplus.
+# The default value is: png.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_IMAGE_FORMAT       = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+#
+# Note that this requires a modern browser other than Internet Explorer. Tested
+# and working are Firefox, Chrome, Safari, and Opera.
+# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
+# the SVG files visible. Older versions of IE do not have SVG support.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INTERACTIVE_SVG        = NO
+
+# The DOT_PATH tag can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the \dotfile
+# command).
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOTFILE_DIRS           =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS           =
+
+# The DIAFILE_DIRS tag can be used to specify one or more directories that
+# contain dia files that are included in the documentation (see the \diafile
+# command).
+
+DIAFILE_DIRS           =
+
+# When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
+# path where java can find the plantuml.jar file. If left blank, it is assumed
+# PlantUML is not used or called during a preprocessing step. Doxygen will
+# generate a warning when it encounters a \startuml command in this case and
+# will not generate output for the diagram.
+
+PLANTUML_JAR_PATH      =
+
+# When using plantuml, the PLANTUML_CFG_FILE tag can be used to specify a
+# configuration file for plantuml.
+
+PLANTUML_CFG_FILE      =
+
+# When using plantuml, the specified paths are searched for files specified by
+# the !include statement in a plantuml block.
+
+PLANTUML_INCLUDE_PATH  =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
+# that will be shown in the graph. If the number of nodes in a graph becomes
+# larger than this value, doxygen will truncate the graph, which is visualized
+# by representing a node as a red box. Note that doxygen if the number of direct
+# children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
+# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+# Minimum value: 0, maximum value: 10000, default value: 50.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
+# generated by dot. A depth value of 3 means that only nodes reachable from the
+# root by following a path via at most 3 edges will be shown. Nodes that lay
+# further from the root node will be omitted. Note that setting this option to 1
+# or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+# Minimum value: 0, maximum value: 1000, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not seem
+# to support this out of the box.
+#
+# Warning: Depending on the platform used, enabling this option may lead to
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
+# read).
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10) support
+# this, this feature is disabled by default.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
+# explaining the meaning of the various boxes and arrows in the dot generated
+# graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
+# files that are used to generate the various graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_CLEANUP            = YES
diff --git a/src/plasma.h b/src/plasma.h
index 4e8e15464..a36114c18 100644
--- a/src/plasma.h
+++ b/src/plasma.h
@@ -43,51 +43,58 @@ typedef struct {
   int64_t construct_duration;
 } plasma_object_info;
 
-/* Represents an object id hash, can hold a full SHA1 hash */
+/** Represents an object ID hash, can hold a full SHA1 hash. */
 typedef struct { unsigned char id[20]; } plasma_id;
 
 enum plasma_request_type {
-  /* Create a new object. */
+  /** Create a new object. */
   PLASMA_CREATE,
-  /* Get an object. */
+  /** Get an object. */
   PLASMA_GET,
-  /* Check if an object is present. */
+  /** Check if an object is present. */
   PLASMA_CONTAINS,
-  /* Seal an object. */
+  /** Seal an object. */
   PLASMA_SEAL,
-  /* Delete an object. */
+  /** Delete an object. */
   PLASMA_DELETE,
-  /* Request transfer to another store. */
+  /** Request transfer to another store. */
   PLASMA_TRANSFER,
-  /* Header for sending data. */
+  /** Header for sending data. */
   PLASMA_DATA,
 };
 
 typedef struct {
+  /** The type of the request. */
   int type;
+  /** The ID of the object that the request is about. */
   plasma_id object_id;
-  /* The size of the data. */
+  /** The size of the object's data. */
   int64_t data_size;
-  /* The size of the metadata. */
+  /** The size of the object's metadata. */
   int64_t metadata_size;
+  /** In a transfer request, this is the IP address of the Plasma Manager to
+   *  transfer the object to. */
   uint8_t addr[4];
+  /** In a transfer request, this is the port of the Plasma Manager to transfer
+   *  the object to. */
   int port;
 } plasma_request;
 
 typedef struct {
-  /* The offset in the memory mapped file of the data. */
+  /** The offset in bytes in the memory mapped file of the data. */
   ptrdiff_t data_offset;
-  /* The offset in the memory mapped file of the metadata. */
+  /** The offset in bytes in the memory mapped file of the metadata. */
   ptrdiff_t metadata_offset;
-  /* The size of the memory mapped file. */
+  /** The size in bytes of the memory mapped file. */
   int64_t map_size;
-  /* The size of the data. */
+  /** The size in bytesof the data. */
   int64_t data_size;
-  /* The size of the metadata. */
+  /** The size in bytes of the metadata. */
   int64_t metadata_size;
-  /* 1 if the object is present and 0 otherwise. Used for plasma_contains. */
+  /** This is used only to respond to requests of type PLASMA_CONTAINS. It is 1
+   *  if the object is present and 0 otherwise. Used for plasma_contains. */
   int has_object;
-  /* Numerical value of the fd of the memory mapped file in the store. */
+  /** The file descriptor of the memory mapped file in the store. */
   int store_fd_val;
 } plasma_reply;
 
@@ -101,25 +108,41 @@ typedef struct {
 } plasma_buffer;
 
 typedef struct {
-  /* Key that uniquely identifies the  memory mapped file. In practice, we
-   * take the numerical value of the file descriptor in the object store. */
+  /** Key that uniquely identifies the  memory mapped file. In practice, we
+   *  take the numerical value of the file descriptor in the object store. */
   int key;
-  /* The result of mmap for this file descriptor. */
+  /** The result of mmap for this file descriptor. */
   uint8_t *pointer;
-  /* Handle for the uthash table. */
+  /** Handle for the uthash table. */
   UT_hash_handle hh;
 } client_mmap_table_entry;
 
-/* A client connection with a plasma store */
+/** Information about a connection between a Plasma Client and Plasma Store.
+ *  This is used to avoid mapping the same files into memory multiple times. */
 typedef struct {
-  /* File descriptor of the Unix domain socket that connects to the store. */
+  /** File descriptor of the Unix domain socket that connects to the store. */
   int conn;
-  /* Table of dlmalloc buffer files that have been memory mapped so far. */
+  /** Table of dlmalloc buffer files that have been memory mapped so far. */
   client_mmap_table_entry *mmap_table;
 } plasma_store_conn;
 
+/**
+ * This is used by the Plasma Client to send a request to the Plasma Store or
+ * the Plasma Manager.
+ *
+ * @param conn The file descriptor to use to send the request.
+ * @param req The address of the request to send.
+ * @return Void.
+ */
 void plasma_send_request(int conn, plasma_request *req);
 
+/**
+ * This is used by the Plasma Store to send a reply to the Plasma Client.
+ *
+ * @param conn The file descriptor to use to send the reply.
+ * @param req The address of the reply to send.
+ * @return Void.
+ */
 void plasma_send_reply(int conn, plasma_reply *req);
 
 #endif
diff --git a/src/plasma_client.h b/src/plasma_client.h
index 87a33269f..148b74f93 100644
--- a/src/plasma_client.h
+++ b/src/plasma_client.h
@@ -1,13 +1,40 @@
 #ifndef PLASMA_CLIENT_H
 #define PLASMA_CLIENT_H
 
-/* Connect to the local plasma store UNIX domain socket with path socket_name
- * and return the resulting connection. */
+/**
+ * Connect to the local plasma store UNIX domain socket with path socket_name
+ * and return the resulting connection.
+ *
+ * @param socket_name The name of the socket to use to connect to the Plasma
+ *        Store.
+ * @return The object containing the connection state.
+ */
 plasma_store_conn *plasma_store_connect(const char *socket_name);
 
-/* Connect to a possibly remote plasma manager */
+/**
+ * Connect to a possibly remote Plasma Manager.
+ *
+ * @param addr The IP address of the Plasma Manager to connect to.
+ * @param port The port of the Plasma Manager to connect to.
+ * @return The file descriptor to use to send messages to the Plasma Manager.
+ */
 int plasma_manager_connect(const char *addr, int port);
 
+/**
+ * Create an object in the Plasma Store. Any metadata for this object must be
+ * be passed in when the object is created.
+ *
+ * @param conn The object containing the connection state.
+ * @param object_id The ID to use for the newly created object.
+ * @param size The size in bytes of the space to be allocated for this object's
+          data (this does not include space used for metadata).
+ * @param metadata The object's metadata. If there is no metadata, this pointer
+          should be NULL.
+ * @param metadata_size The size in bytes of the metadata. If there is no
+          metadata, this should be 0.
+ * @param data The address of the newly created object will be written here.
+ * @return Void.
+ */
 void plasma_create(plasma_store_conn *conn,
                    plasma_id object_id,
                    int64_t size,
@@ -15,6 +42,21 @@ void plasma_create(plasma_store_conn *conn,
                    int64_t metadata_size,
                    uint8_t **data);
 
+/**
+ * Get an object from the Plasma Store. This function will block until the
+ * object has been created and sealed in the Plasma Store.
+ *
+ * @param conn The object containing the connection state.
+ * @param object_id The ID of the object to get.
+ * @param size The size in bytes of the retrieved object will be written at this
+          address.
+ * @param data The address of the object will be written at this address.
+ * @param metadata_size The size in bytes of the object's metadata will be
+ *        written at this address.
+ * @param metadata The address of the object's metadata will be written at this
+ *        address.
+ * @return Void.
+ */
 void plasma_get(plasma_store_conn *conn,
                 plasma_id object_id,
                 int64_t *size,
@@ -22,18 +64,43 @@ void plasma_get(plasma_store_conn *conn,
                 int64_t *metadata_size,
                 uint8_t **metadata);
 
-/* Check if the object store contains a particular object and the object has
- * been sealed. The result will be stored in has_object. TODO(rkn): We may want
- * to indicate whether the object is currently being written. */
+/**
+ * Check if the object store contains a particular object and the object has
+ * been sealed. The result will be stored in has_object.
+ *
+ * @todo: We may want to indicate if the object has been created but not sealed.
+ *
+ * @param conn The object containing the connection state.
+ * @param object_id The ID of the object whose presence we are checking.
+ * @param has_object The function will write 1 at this address if the object is
+ *        present and 0 if it is not present.
+ * @return Void.
+ */
 void plasma_contains(plasma_store_conn *conn,
                      plasma_id object_id,
                      int *has_object);
 
+/**
+ * Seal an object in the object store. The object will be immutable after this
+ * call.
+ *
+ * @param conn The object containing the connection state.
+ * @param object_id The ID of the object to seal.
+ * @return Void.
+ */
 void plasma_seal(plasma_store_conn *conn, plasma_id object_id);
 
-/* Delete an object from the object store. This currently assumes that the
- * object is present and has been sealed. TODO(rkn): We may want to allow the
- * deletion of objects that are not present or haven't been sealed. */
+/**
+ * Delete an object from the object store. This currently assumes that the
+ * object is present and has been sealed.
+ *
+ * @todo We may want to allow the deletion of objects that are not present or
+ *       haven't been sealed.
+ *
+ * @param conn The object containing the connection state.
+ * @param object_id The ID of the object to delete.
+ * @return Void.
+ */
 void plasma_delete(plasma_store_conn *conn, plasma_id object_id);
 
 #endif

From e21e9f68df4d4ff28a9fbf52b2abb801afe0ea2c Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Thu, 29 Sep 2016 21:12:06 -0700
Subject: [PATCH 52/91] API for task log and scheduled task (#25)

* API revision

* update

* make status a bitmap

* update api

* tests working

* new task log APIs

* update APIs

* write binary structures to redis

* update tests

* fix clang-format

* Fix formatting.
---
 common.c            |  63 ++------------------------
 common.h            |   7 +--
 state/redis.c       |  99 ++++++++++++++++++++++++++++++++---------
 state/redis.h       |  17 ++++++-
 state/task_log.h    |  41 +++++++++++++++++
 state/task_queue.h  |  33 --------------
 state/task_table.h  |  11 ++++-
 task.c              | 106 +++++++++++++++++++-------------------------
 task.h              |  68 ++++++++++++++++++++++++++--
 test/common_tests.c |   3 --
 test/db_tests.c     |  55 ++++++++++++++++++++---
 test/task_tests.c   |  19 --------
 12 files changed, 307 insertions(+), 215 deletions(-)
 create mode 100644 state/task_log.h
 delete mode 100644 state/task_queue.h

diff --git a/common.c b/common.c
index 53e32fe13..d9fe1e951 100644
--- a/common.c
+++ b/common.c
@@ -6,6 +6,9 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 
+const unique_id NIL_ID = {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+                           255, 255, 255, 255, 255, 255, 255, 255, 255, 255}};
+
 unique_id globally_unique_id(void) {
   /* Use /dev/urandom for "real" randomness. */
   int fd;
@@ -31,63 +34,3 @@ char *sha1_to_hex(const unsigned char *sha1, char *buffer) {
 
   return buffer;
 }
-
-const signed char hexval_table[256] = {
-    -1, -1, -1, -1, -1, -1, -1, -1, /* 00-07 */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* 08-0f */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* 10-17 */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* 18-1f */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* 20-27 */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* 28-2f */
-    +0, +1, +2, +3, +4, +5, +6, +7, /* 30-37 */
-    +8, +9, -1, -1, -1, -1, -1, -1, /* 38-3f */
-    -1, 10, 11, 12, 13, 14, 15, -1, /* 40-47 */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* 48-4f */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* 50-57 */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* 58-5f */
-    -1, 10, 11, 12, 13, 14, 15, -1, /* 60-67 */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* 68-67 */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* 70-77 */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* 78-7f */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* 80-87 */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* 88-8f */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* 90-97 */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* 98-9f */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* a0-a7 */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* a8-af */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* b0-b7 */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* b8-bf */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* c0-c7 */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* c8-cf */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* d0-d7 */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* d8-df */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* e0-e7 */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* e8-ef */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* f0-f7 */
-    -1, -1, -1, -1, -1, -1, -1, -1, /* f8-ff */
-};
-
-static inline unsigned int hexval(unsigned char c) {
-  return hexval_table[c];
-}
-
-/*
- * Convert two consecutive hexadecimal digits into a char.  Return a
- * negative value on error.  Don't run over the end of short strings.
- */
-static inline int hex2chr(const char *s) {
-  int val = hexval(s[0]);
-  return (val < 0) ? val : (val << 4) | hexval(s[1]);
-}
-
-int hex_to_sha1(const char *hex, unsigned char *sha1) {
-  int i;
-  for (i = 0; i < UNIQUE_ID_SIZE; i++) {
-    int val = hex2chr(hex);
-    if (val < 0)
-      return -1;
-    *sha1++ = val;
-    hex += 2;
-  }
-  return 0;
-}
diff --git a/common.h b/common.h
index 5444739bc..047644cc8 100644
--- a/common.h
+++ b/common.h
@@ -38,6 +38,8 @@
 
 typedef struct { unsigned char id[UNIQUE_ID_SIZE]; } unique_id;
 
+extern const unique_id NIL_ID;
+
 /* Generate a globally unique ID. */
 unique_id globally_unique_id(void);
 
@@ -46,11 +48,6 @@ unique_id globally_unique_id(void);
  * UNIQUE_ID_SIZE + 1 */
 char *sha1_to_hex(const unsigned char *sha1, char *buffer);
 
-/* Convert a hexdecimal string of length 40 to a 20 byte sha1 hash. This
- * function assumes that sha1 points to an already allocated char array of size
- * UNIQUE_ID_SIZE. */
-int hex_to_sha1(const char *hex, unsigned char *sha1);
-
 typedef unique_id object_id;
 
 #endif
diff --git a/state/redis.c b/state/redis.c
index aa2011d9a..db008cb5d 100644
--- a/state/redis.c
+++ b/state/redis.c
@@ -9,7 +9,7 @@
 #include "common.h"
 #include "db.h"
 #include "object_table.h"
-#include "task_queue.h"
+#include "task_log.h"
 #include "event_loop.h"
 #include "redis.h"
 #include "io.h"
@@ -65,22 +65,28 @@ db_handle *db_connect(const char *address,
 
   db->client_type = strdup(client_type);
   db->client_id = num_clients;
-  db->reading = 0;
-  db->writing = 0;
   db->service_cache = NULL;
   db->sync_context = context;
+  utarray_new(db->callback_freelist, &ut_ptr_icd);
 
   /* Establish async connection */
   db->context = redisAsyncConnect(address, port);
   CHECK_REDIS_CONNECT(redisAsyncContext, db->context,
                       "could not connect to redis %s:%d", address, port);
   db->context->data = (void *) db;
+  /* Establish async connection for subscription */
+  db->sub_context = redisAsyncConnect(address, port);
+  CHECK_REDIS_CONNECT(redisAsyncContext, db->sub_context,
+                      "could not connect to redis %s:%d", address, port);
+  db->sub_context->data = (void *) db;
+
   return db;
 }
 
 void db_disconnect(db_handle *db) {
   redisFree(db->sync_context);
   redisAsyncFree(db->context);
+  redisAsyncFree(db->sub_context);
   service_cache_entry *e, *tmp;
   HASH_ITER(hh, db->service_cache, e, tmp) {
     free(e->addr);
@@ -88,18 +94,22 @@ void db_disconnect(db_handle *db) {
     free(e);
   }
   free(db->client_type);
+  void **p = NULL;
+  while ((p = (void **) utarray_next(db->callback_freelist, p))) {
+    free(*p);
+  }
+  utarray_free(db->callback_freelist);
   free(db);
 }
 
 void db_attach(db_handle *db, event_loop *loop) {
   redisAeAttach(loop, db->context);
+  redisAeAttach(loop, db->sub_context);
 }
 
 void object_table_add(db_handle *db, unique_id object_id) {
-  static char hex_object_id[2 * UNIQUE_ID_SIZE + 1];
-  sha1_to_hex(&object_id.id[0], &hex_object_id[0]);
-  redisAsyncCommand(db->context, NULL, NULL, "SADD obj:%s %d",
-                    &hex_object_id[0], db->client_id);
+  redisAsyncCommand(db->context, NULL, NULL, "SADD obj:%b %d", &object_id.id[0],
+                    UNIQUE_ID_SIZE, db->client_id);
   if (db->context->err) {
     LOG_REDIS_ERR(db->context, "could not add object_table entry");
   }
@@ -148,30 +158,75 @@ void object_table_get_entry(redisAsyncContext *c, void *r, void *privdata) {
 void object_table_lookup(db_handle *db,
                          object_id object_id,
                          lookup_callback callback) {
-  static char hex_object_id[2 * UNIQUE_ID_SIZE + 1];
-  sha1_to_hex(&object_id.id[0], &hex_object_id[0]);
   lookup_callback_data *cb_data = malloc(sizeof(lookup_callback_data));
   cb_data->callback = callback;
   cb_data->object_id = object_id;
   redisAsyncCommand(db->context, object_table_get_entry, cb_data,
-                    "SMEMBERS obj:%s", &hex_object_id[0]);
+                    "SMEMBERS obj:%b", &object_id.id[0], UNIQUE_ID_SIZE);
   if (db->context->err) {
     LOG_REDIS_ERR(db->context, "error in object_table lookup");
   }
 }
 
-void task_queue_submit_task(db_handle *db, task_iid task_iid, task_spec *task) {
-  /* For converting an id to hex, which has double the number
-   * of bytes compared to the id (+ 1 byte for '\0'). */
-  static char hex[2 * UNIQUE_ID_SIZE + 1];
-  UT_string *command;
-  utstring_new(command);
-  sha1_to_hex(&task_iid.id[0], &hex[0]);
-  utstring_printf(command, "HMSET queue:%s ", &hex[0]);
-  print_task(task, command);
-  redisAsyncCommand(db->context, NULL, NULL, utstring_body(command));
+void task_log_add_task(db_handle *db, task_instance *task_instance) {
+  task_iid task_iid = *task_instance_id(task_instance);
+  redisAsyncCommand(db->context, NULL, NULL, "HMSET tasklog:%b 0 %b",
+                    (char *) &task_iid.id[0], UNIQUE_ID_SIZE,
+                    (char *) task_instance, task_instance_size(task_instance));
   if (db->context->err) {
-    LOG_REDIS_ERR(db->context, "error in task_queue submit_task");
+    LOG_REDIS_ERR(db->context, "error setting task in task_log_add_task");
+  }
+  node_id node = *task_instance_node(task_instance);
+  int32_t state = *task_instance_state(task_instance);
+  redisAsyncCommand(db->context, NULL, NULL, "PUBLISH task_log:%b:%d %b",
+                    (char *) &node.id[0], UNIQUE_ID_SIZE, state,
+                    (char *) task_instance, task_instance_size(task_instance));
+  if (db->context->err) {
+    LOG_REDIS_ERR(db->context, "error publishing task in task_log_add_task");
+  }
+}
+
+void task_log_redis_callback(redisAsyncContext *c,
+                             void *reply,
+                             void *privdata) {
+  redisReply *r = reply;
+  if (reply == NULL)
+    return;
+  CHECK(r->type == REDIS_REPLY_ARRAY);
+  /* First entry is message type, second is topic, third is payload. */
+  CHECK(r->elements > 2);
+  /* If this condition is true, we got the initial message that acknowledged the
+   * subscription. */
+  if (r->element[2]->str == NULL) {
+    return;
+  }
+  /* Otherwise, parse the task and call the callback. */
+  CHECK(privdata);
+  task_log_callback_data *callback_data = privdata;
+  task_instance *instance = malloc(r->element[2]->len);
+  memcpy(instance, r->element[2]->str, r->element[2]->len);
+  callback_data->callback(instance, callback_data->userdata);
+  task_instance_free(instance);
+}
+void task_log_register_callback(db_handle *db,
+                                task_log_callback callback,
+                                node_id node,
+                                int32_t state,
+                                void *userdata) {
+  task_log_callback_data *callback_data =
+      malloc(sizeof(task_log_callback_data));
+  utarray_push_back(db->callback_freelist, &callback_data);
+  callback_data->callback = callback;
+  callback_data->userdata = userdata;
+  if (memcmp(&node.id[0], &NIL_ID.id[0], UNIQUE_ID_SIZE) == 0) {
+    redisAsyncCommand(db->sub_context, task_log_redis_callback, callback_data,
+                      "PSUBSCRIBE task_log:*:%d", state);
+  } else {
+    redisAsyncCommand(db->sub_context, task_log_redis_callback, callback_data,
+                      "SUBSCRIBE task_log:%b:%d", (char *) &node.id[0],
+                      UNIQUE_ID_SIZE, state);
+  }
+  if (db->sub_context->err) {
+    LOG_REDIS_ERR(db->sub_context, "error in task_log_register_callback");
   }
-  utstring_free(command);
 }
diff --git a/state/redis.h b/state/redis.h
index 23ebb2ba6..51479f8f0 100644
--- a/state/redis.h
+++ b/state/redis.h
@@ -3,10 +3,12 @@
 
 #include "db.h"
 #include "object_table.h"
+#include "task_log.h"
 
 #include "hiredis/hiredis.h"
 #include "hiredis/async.h"
 #include "uthash.h"
+#include "utarray.h"
 
 typedef struct {
   /* Unique ID for this service. */
@@ -17,6 +19,13 @@ typedef struct {
   UT_hash_handle hh;
 } service_cache_entry;
 
+typedef struct {
+  /* The callback that will be called. */
+  task_log_callback callback;
+  /* Userdata associated with the callback. */
+  void *userdata;
+} task_log_callback_data;
+
 struct db_handle_impl {
   /* String that identifies this client type. */
   char *client_type;
@@ -24,8 +33,10 @@ struct db_handle_impl {
   int64_t client_id;
   /* Redis context for this global state store connection. */
   redisAsyncContext *context;
-  /* Which events are we processing (read, write)? */
-  int reading, writing;
+  /* Redis context for "subscribe" communication.
+   * Yes, we need a separate one for that, see
+   * https://github.com/redis/hiredis/issues/55 */
+  redisAsyncContext *sub_context;
   /* The event loop this global state store connection is part of. */
   event_loop *loop;
   /* Index of the database connection in the event loop */
@@ -35,6 +46,8 @@ struct db_handle_impl {
   /* Redis context for synchronous connections.
    * Should only be used very rarely, it is not asynchronous. */
   redisContext *sync_context;
+  /* Data structure for callbacks that needs to be freed. */
+  UT_array *callback_freelist;
 };
 
 typedef struct {
diff --git a/state/task_log.h b/state/task_log.h
new file mode 100644
index 000000000..acf5dbcd0
--- /dev/null
+++ b/state/task_log.h
@@ -0,0 +1,41 @@
+#ifndef TASK_LOG_H
+#define TASK_LOG_H
+
+#include "db.h"
+#include "task.h"
+
+/* The task log is a message bus that is used for all communication between
+ * local and global schedulers (and also persisted to the state database).
+ * Here are examples of events that are recorded by the task log:
+ *
+ * 1) local scheduler writes it when submits a task to the global scheduler;
+ * 2) global scheduler reads it to get the task submitted by local schedulers;
+ * 3) global scheduler writes it when assigning the task to a local scheduler;
+ * 4) local scheduler reads it to get its tasks assigned by global scheduler;
+ * 5) local scheduler writes it when a task finishes execution;
+ * 6) global scheduler reads it to get the tasks that have finished; */
+
+/* Callback for subscribing to the task log. */
+typedef void (*task_log_callback)(task_instance *task_instance, void *userdata);
+
+/* Initially add a task instance to the task log. */
+void task_log_add_task(db_handle *db, task_instance *task_instance);
+
+/* Update task instance in the task log. */
+void task_log_update_task(db_handle *db,
+                          task_iid task_iid,
+                          int32_t state,
+                          node_id node);
+
+/* Register callback for a certain event. The node specifies the node whose
+ * events we want to listen to. If you want to listen to all events for this
+ * node, use state_filter =
+ *     TASK_WAITING | TASK_SCHEDULED | TASK_RUNNING | TASK_DONE.
+ * If you want to register to updates from all nodes, set node = NIL_ID. */
+void task_log_register_callback(db_handle *db,
+                                task_log_callback callback,
+                                node_id node,
+                                int32_t state_filter,
+                                void *userdata);
+
+#endif /* TASK_LOG_H */
diff --git a/state/task_queue.h b/state/task_queue.h
deleted file mode 100644
index 92968707e..000000000
--- a/state/task_queue.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef TASK_QUEUE_H
-#define TASK_QUEUE_H
-
-#include "db.h"
-#include "task.h"
-
-/* The task ID is a deterministic hash of the function ID that
- * the task executes and the argument IDs or argument values */
-typedef unique_id task_id;
-
-/* The task instance ID is a globally unique ID generated which
- * identifies this particular execution of the task */
-typedef unique_id task_iid;
-
-/* The node id is an identifier for the node the task is
- * scheduled on */
-typedef unique_id node_id;
-
-/* Callback for subscribing to the task queue. The only argument this
- * callback gets is the task_id of the. */
-typedef void (*task_queue_callback)(task_iid *task_iid, task_spec *task);
-
-/* Submit task to the global scheduler. */
-void task_queue_submit_task(db_handle *db, task_iid task_iid, task_spec *task);
-
-/* Submit task to a local scheduler based on the decision made by the global
- * scheduler. */
-void task_queue_schedule_task(db_handle *db, task_iid task_iid, node_id node);
-
-/* Subscribe to task queue. */
-void task_queue_register_callback(db_handle *db, task_queue_callback callback);
-
-#endif
diff --git a/state/task_table.h b/state/task_table.h
index 71e879c2c..3a1852522 100644
--- a/state/task_table.h
+++ b/state/task_table.h
@@ -5,9 +5,16 @@
 #include "task.h"
 
 /* Add task to the task table, handle errors here. */
-status task_table_add_task(db_handle *db, task_iid task_iid, task_spec *task);
+status task_table_add_task(db_handle *db, task_spec *task);
+
+/* Callback for getting an entry from the task table. Task spec will be freed
+ * by the system after the callback */
+typedef void (*task_table_callback)(task_spec *task, void *context);
 
 /* Get specific task from the task table. */
-status task_table_get_task(db_handle *db, task_iid task_iid, task_spec *task);
+status task_table_get_task(db_handle *db,
+                           task_id task_id,
+                           task_table_callback callback,
+                           void *context);
 
 #endif /* TASK_TABLE_H */
diff --git a/task.c b/task.c
index 30fde8d31..0086eb337 100644
--- a/task.c
+++ b/task.c
@@ -8,6 +8,8 @@
 #include "common.h"
 #include "io.h"
 
+/* TASK SPECIFICATIONS */
+
 /* Tasks are stored in a consecutive chunk of memory, the first
  * sizeof(task_spec) bytes are arranged according to the struct
  * task_spec. Then there is an array of task_args of length
@@ -168,65 +170,49 @@ void print_task(task_spec *spec, UT_string *output) {
   }
 }
 
-UT_icd unique_id_icd = {sizeof(unique_id), NULL, NULL, NULL};
+/* TASK INSTANCES */
 
-task_spec *parse_task(char *task_string, int64_t task_length) {
-  /* We make one pass through task_string to store all the argument ids
-   * in "args" and all the return ids in "returns". */
-  UT_array *args;
-  utarray_new(args, &unique_id_icd);
-  UT_array *returns;
-  utarray_new(returns, &unique_id_icd);
-  function_id function_id;
-  char *cursor = strtok(task_string, " ");
-  int index = 0;
-  while (cursor != NULL) {
-    /* This will be equal to "args" or "returns" depending on whether we
-     * are processing an argument id or a return id. */
-    UT_array *target = NULL;
-    if (strncmp("fun", cursor, 3) == 0) {
-      /* Parse function id. */
-      CHECK(cursor + 2 * UNIQUE_ID_SIZE + 1 <= task_string + task_length);
-      cursor = strtok(NULL, " ");
-      hex_to_sha1(cursor, &function_id.id[0]);
-      cursor = strtok(NULL, " ");
-      CHECK(cursor);
-      continue;
-    } else if (strncmp("id:", cursor, 3) == 0) {
-      /* Parse pass by reference argument. */
-      sscanf(cursor, "id:%d", &index);
-      target = args;
-    } else if (strncmp("val:", cursor, 4) == 0) {
-      /* Parse pass by value argument. */
-      sscanf(cursor, "val:%d", &index);
-      CHECK(0); /* Not implemented yet */
-    } else if (strncmp("ret:", cursor, 4) == 0) {
-      /* Parse return object reference. */
-      sscanf(cursor, "ret:%d", &index);
-      target = returns;
-    }
-    cursor = strtok(NULL, " ");
-    CHECK(cursor);
-    if (index >= utarray_len(target)) {
-      utarray_resize(target, index + 1);
-    }
-    object_id *id = (object_id *) utarray_eltptr(target, index);
-    hex_to_sha1(cursor, &id->id[0]);
-    cursor = strtok(NULL, " ");
-  }
-  /* TODO(pcm): Implement pass by value. */
-  /* Now assemble the task specification. */
-  task_spec *spec =
-      alloc_task_spec(function_id, utarray_len(args), utarray_len(returns), 0);
-  for (int i = 0; i < utarray_len(args); ++i) {
-    object_id *id = (object_id *) utarray_eltptr(args, i);
-    task_args_add_ref(spec, *id);
-  }
-  for (int i = 0; i < utarray_len(returns); ++i) {
-    object_id *id = (object_id *) utarray_eltptr(returns, i);
-    *task_return(spec, i) = *id;
-  }
-  utarray_free(args);
-  utarray_free(returns);
-  return spec;
+struct task_instance_impl {
+  task_iid iid;
+  int32_t state;
+  node_id node;
+  task_spec spec;
+};
+
+task_instance *make_task_instance(task_iid task_iid,
+                                  task_spec *spec,
+                                  int32_t state,
+                                  node_id node) {
+  int64_t size = sizeof(task_instance) - sizeof(task_spec) + task_size(spec);
+  task_instance *result = malloc(size);
+  memset(result, 0, size);
+  result->iid = task_iid;
+  result->state = state;
+  result->node = node;
+  memcpy(&result->spec, spec, task_size(spec));
+  return result;
+}
+
+int64_t task_instance_size(task_instance *instance) {
+  return sizeof(task_instance) - sizeof(task_spec) + task_size(&instance->spec);
+}
+
+task_iid *task_instance_id(task_instance *instance) {
+  return &instance->iid;
+}
+
+int32_t *task_instance_state(task_instance *instance) {
+  return &instance->state;
+}
+
+node_id *task_instance_node(task_instance *instance) {
+  return &instance->node;
+}
+
+task_spec *task_instance_task_spec(task_instance *instance) {
+  return &instance->spec;
+}
+
+void task_instance_free(task_instance *instance) {
+  free(instance);
 }
diff --git a/task.h b/task.h
index 96c97b80d..9267edb65 100644
--- a/task.h
+++ b/task.h
@@ -1,7 +1,7 @@
 #ifndef TASK_H
 #define TASK_H
 
-/* This API specifies the task data structure. It is in C so we can
+/* This API specifies the task data structures. It is in C so we can
  * easily construct tasks from other languages like Python. The datastructures
  * are also defined in such a way that memory is contiguous and all pointers
  * are relative, so that we can memcpy the datastructure and ship it over the
@@ -15,6 +15,24 @@
 typedef unique_id function_id;
 typedef unique_id object_id;
 
+/* The task ID is a deterministic hash of the function ID that
+ * the task executes and the argument IDs or argument values */
+typedef unique_id task_id;
+
+/* The task instance ID is a globally unique ID generated which
+ * identifies this particular execution of the task */
+typedef unique_id task_iid;
+
+/* The node id is an identifier for the node the task is
+ * scheduled on */
+typedef unique_id node_id;
+
+/*
+ * TASK SPECIFICATIONS: Contain all the information neccessary
+ * to execute the task (function id, arguments, return object ids).
+ *
+ */
+
 typedef struct task_spec_impl task_spec;
 
 /* If argument is passed by value or reference. */
@@ -65,7 +83,51 @@ task_spec *read_task(int fd);
 /* Print task as a humanly readable string. */
 void print_task(task_spec *spec, UT_string *output);
 
-/* Parse task as printed by print_task. */
-task_spec *parse_task(char *task_string, int64_t task_length);
+/*
+ * SCHEDULED TASK: Contains information about a scheduled task:
+ * the task iid, the task specification and the task status
+ * (WAITING, SCHEDULED, RUNNING, DONE) and which node the
+ * task is scheduled on.
+ *
+ */
+
+/* The scheduling_state can be used as a flag when we are listening
+ * for an event, for example TASK_WAITING | TASK_SCHEDULED. */
+enum scheduling_state {
+  TASK_WAITING = 1,
+  TASK_SCHEDULED = 2,
+  TASK_RUNNING = 4,
+  TASK_DONE = 8
+};
+
+/* A task instance is one execution of a task specification.
+ * It has a unique instance id, a state of execution (see scheduling_state)
+ * and a node it is scheduled on or running on. */
+typedef struct task_instance_impl task_instance;
+
+/* Allocate and initialize a new task instance. Must be freed with
+ * scheduled_task_free after use. */
+task_instance *make_task_instance(task_iid task_iid,
+                                  task_spec *task,
+                                  int32_t state,
+                                  node_id node);
+
+/* Size of task instance structure in bytes. */
+int64_t task_instance_size(task_instance *instance);
+
+/* Instance ID of the task instance. */
+task_iid *task_instance_id(task_instance *instance);
+
+/* The scheduling state of the task instance. */
+int32_t *task_instance_state(task_instance *instance);
+
+/* Node this task instance has been assigned to or is running on. */
+node_id *task_instance_node(task_instance *instance);
+
+/* Task specification of this task instance. */
+task_spec *task_instance_task_spec(task_instance *instance);
+
+/* Free this task instance datastructure. */
+void task_instance_free(task_instance *instance);
 
 #endif
diff --git a/test/common_tests.c b/test/common_tests.c
index 3673c335d..47b643039 100644
--- a/test/common_tests.c
+++ b/test/common_tests.c
@@ -6,11 +6,8 @@ SUITE(common_tests);
 
 TEST sha1_test(void) {
   static char hex[2 * UNIQUE_ID_SIZE + 1];
-  static unsigned char id[UNIQUE_ID_SIZE];
   unique_id uid = globally_unique_id();
   sha1_to_hex(&uid.id[0], &hex[0]);
-  hex_to_sha1(&hex[0], &id[0]);
-  ASSERT(memcmp(&uid.id[0], &id[0], 20) == 0);
   PASS();
 }
 
diff --git a/test/db_tests.c b/test/db_tests.c
index 99fad7e18..96f16b528 100644
--- a/test/db_tests.c
+++ b/test/db_tests.c
@@ -6,7 +6,7 @@
 #include "test/example_task.h"
 #include "state/db.h"
 #include "state/object_table.h"
-#include "state/task_queue.h"
+#include "state/task_log.h"
 #include "state/redis.h"
 #include "task.h"
 
@@ -72,27 +72,70 @@ TEST object_table_lookup_test(void) {
   PASS();
 }
 
-TEST task_queue_test(void) {
+void task_log_test_callback(task_instance *instance, void *userdata) {
+  task_instance *other = userdata;
+  CHECK(*task_instance_state(instance) == TASK_SCHEDULED);
+  CHECK(task_instance_size(instance) == task_instance_size(other));
+  CHECK(memcmp(instance, other, task_instance_size(instance)) == 0);
+}
+
+TEST task_log_test(void) {
   event_loop *loop = event_loop_create();
   db_handle *db = db_connect("127.0.0.1", 6379, "local_scheduler", "", -1);
   db_attach(db, loop);
-
+  node_id node = globally_unique_id();
   task_spec *task = example_task();
-  task_queue_submit_task(db, globally_unique_id(), task);
+  task_instance *instance =
+      make_task_instance(globally_unique_id(), task, TASK_SCHEDULED, node);
+  task_log_register_callback(db, task_log_test_callback, node, TASK_SCHEDULED,
+                             instance);
+  task_log_add_task(db, instance);
   event_loop_add_timer(loop, 100, timeout_handler, NULL);
   event_loop_run(loop);
-
+  task_instance_free(instance);
   free_task_spec(task);
   db_disconnect(db);
   event_loop_destroy(loop);
   PASS();
 }
 
+int num_test_callback_called = 0;
+
+void task_log_all_test_callback(task_instance *instance, void *userdata) {
+  num_test_callback_called += 1;
+}
+
+TEST task_log_all_test(void) {
+  event_loop *loop = event_loop_create();
+  db_handle *db = db_connect("127.0.0.1", 6379, "local_scheduler", "", -1);
+  db_attach(db, loop);
+  task_spec *task = example_task();
+  /* Schedule two tasks on different nodes. */
+  task_instance *instance1 = make_task_instance(
+      globally_unique_id(), task, TASK_SCHEDULED, globally_unique_id());
+  task_instance *instance2 = make_task_instance(
+      globally_unique_id(), task, TASK_SCHEDULED, globally_unique_id());
+  task_log_register_callback(db, task_log_all_test_callback, NIL_ID,
+                             TASK_SCHEDULED, NULL);
+  task_log_add_task(db, instance1);
+  task_log_add_task(db, instance2);
+  event_loop_add_timer(loop, 100, timeout_handler, NULL);
+  event_loop_run(loop);
+  task_instance_free(instance2);
+  task_instance_free(instance1);
+  free_task_spec(task);
+  db_disconnect(db);
+  event_loop_destroy(loop);
+  ASSERT(num_test_callback_called == 2);
+  PASS();
+}
+
 SUITE(db_tests) {
   redisContext *context = redisConnect("127.0.0.1", 6379);
   freeReplyObject(redisCommand(context, "FLUSHALL"));
   RUN_REDIS_TEST(context, object_table_lookup_test);
-  RUN_REDIS_TEST(context, task_queue_test);
+  RUN_REDIS_TEST(context, task_log_test);
+  RUN_REDIS_TEST(context, task_log_all_test);
   redisFree(context);
 }
 
diff --git a/test/task_tests.c b/test/task_tests.c
index fcb714737..f72a0e2c2 100644
--- a/test/task_tests.c
+++ b/test/task_tests.c
@@ -63,28 +63,9 @@ TEST send_task(void) {
   PASS();
 }
 
-TEST print_and_parse_task(void) {
-  task_spec *task = example_task();
-
-  UT_string *output;
-  utstring_new(output);
-  print_task(task, output);
-  task_spec *result = parse_task(utstring_body(output), utstring_len(output));
-  utstring_free(output);
-
-  ASSERT_EQ(task_size(task), task_size(result));
-  ASSERT(memcmp(task, result, task_size(task)) == 0);
-
-  free_task_spec(task);
-  free_task_spec(result);
-
-  PASS();
-}
-
 SUITE(task_tests) {
   RUN_TEST(task_test);
   RUN_TEST(send_task);
-  RUN_TEST(print_and_parse_task);
 }
 
 GREATEST_MAIN_DEFS();

From 9c223a1e4823047fa0143a125baef4e6a10976f5 Mon Sep 17 00:00:00 2001
From: Richard Shin <shin.richard@gmail.com>
Date: Thu, 29 Sep 2016 23:46:04 -0700
Subject: [PATCH 53/91] Increase dlmalloc's granularity to 1 << 25 (#30)

* Increase dlmalloc's granularity to 1 << 10

* Prevent trimming in dlmalloc
---
 src/malloc.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/malloc.c b/src/malloc.c
index f18535437..fbf8c8d92 100644
--- a/src/malloc.c
+++ b/src/malloc.c
@@ -18,6 +18,7 @@ int fake_munmap(void *, size_t);
 #define DIRECT_MUNMAP(a, s) fake_munmap(a, s)
 #define USE_DL_PREFIX
 #define HAVE_MORECORE 0
+#define DEFAULT_GRANULARITY ((size_t) 1U << 25)
 
 #include "thirdparty/dlmalloc.c"
 
@@ -27,6 +28,7 @@ int fake_munmap(void *, size_t);
 #undef DIRECT_MUNMAP
 #undef USE_DL_PREFIX
 #undef HAVE_MORECORE
+#undef DEFAULT_GRANULARITY
 
 struct mmap_record {
   int fd;
@@ -98,7 +100,11 @@ int fake_munmap(void *addr, size_t size) {
   struct mmap_record *record;
 
   HASH_FIND(hh_pointer, records_by_pointer, &addr, sizeof(addr), record);
-  assert(record != NULL);
+  if (record == NULL || record->size != size) {
+    /* Reject requests to munmap that don't directly match previous
+     * calls to mmap, to prevent dlmalloc from trimming. */
+    return -1;
+  }
   close(record->fd);
 
   HASH_DELETE(hh_fd, records_by_fd, record);

From eb71c2e84ac62ceb5cd1896ac43e6f2d2c0a051e Mon Sep 17 00:00:00 2001
From: Ujval Misra <misraujval@gmail.com>
Date: Sat, 1 Oct 2016 16:03:33 -0700
Subject: [PATCH 54/91] Increase allocation granularity dynamically with each
 MMAP call (#32)

* Increase allocation granularity dynamically with each MMAP call

* Fewer MMAP calls required when workload contains several objects.

* Delay hitting the per-process file descriptor constraint.

* Change type of GRANULARITY_MULTIPLIER

* Make granularity update more concise.
---
 src/malloc.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/malloc.c b/src/malloc.c
index fbf8c8d92..0cbc8f082 100644
--- a/src/malloc.c
+++ b/src/malloc.c
@@ -12,13 +12,15 @@
 void *fake_mmap(size_t);
 int fake_munmap(void *, size_t);
 
+size_t dlmalloc_granularity = ((size_t) 128U * 1024U);
+
 #define MMAP(s) fake_mmap(s)
 #define MUNMAP(a, s) fake_munmap(a, s)
 #define DIRECT_MMAP(s) fake_mmap(s)
 #define DIRECT_MUNMAP(a, s) fake_munmap(a, s)
 #define USE_DL_PREFIX
 #define HAVE_MORECORE 0
-#define DEFAULT_GRANULARITY ((size_t) 1U << 25)
+#define DEFAULT_GRANULARITY (dlmalloc_granularity)
 
 #include "thirdparty/dlmalloc.c"
 
@@ -42,6 +44,8 @@ struct mmap_record {
 struct mmap_record *records_by_fd = NULL;
 struct mmap_record *records_by_pointer = NULL;
 
+const int GRANULARITY_MULTIPLIER = 2;
+
 /* Create a buffer. This is creating a temporary file and then
  * immediately unlinking it so we do not leave traces in the system. */
 int create_buffer(int64_t size) {
@@ -79,6 +83,10 @@ void *fake_mmap(size_t size) {
     return pointer;
   }
 
+  /* Update dlmalloc's allocation granularity for future calls */
+  dlmalloc_granularity *= GRANULARITY_MULTIPLIER;
+  dlmallopt(M_GRANULARITY, dlmalloc_granularity);
+
   struct mmap_record *record = malloc(sizeof(struct mmap_record));
   record->fd = fd;
   record->pointer = pointer;

From f4037ad19f38dc68b186c9338d3f67c9058c556c Mon Sep 17 00:00:00 2001
From: Stephanie Wang <swang93@mit.edu>
Date: Mon, 3 Oct 2016 17:55:57 -0700
Subject: [PATCH 55/91] Plasma fixes (#29)

* Allow CHECK to take in a message

* Turn off debug statements by default

* format arguments to CHECK message

* UT list

* Socket methods that block until full message is read/written

* Address Robert and Philipp's comments

* Formatting
---
 common.h            |  10 +-
 io.c                | 102 ++++-
 test/io_tests.c     |  46 +++
 thirdparty/utlist.h | 895 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 1032 insertions(+), 21 deletions(-)
 create mode 100644 thirdparty/utlist.h

diff --git a/common.h b/common.h
index 047644cc8..f09e91cd5 100644
--- a/common.h
+++ b/common.h
@@ -6,7 +6,7 @@
 #include <string.h>
 #include <errno.h>
 
-#ifdef NDEBUG
+#ifndef RAY_COMMON_DEBUG
 #define LOG_DEBUG(M, ...)
 #else
 #define LOG_DEBUG(M, ...) \
@@ -28,6 +28,14 @@
     }                                      \
   } while (0);
 
+#define CHECKM(COND, M, ...)                                   \
+  do {                                                         \
+    if (!(COND)) {                                             \
+      LOG_ERR("Check failure: %s \n" M, #COND, ##__VA_ARGS__); \
+      exit(-1);                                                \
+    }                                                          \
+  } while (0);
+
 #define UNIQUE_ID_SIZE 20
 
 /* Cleanup method for running tests with the greatest library.
diff --git a/io.c b/io.c
index 16073d32a..32f3ed4b9 100644
--- a/io.c
+++ b/io.c
@@ -94,6 +94,28 @@ int accept_client(int socket_fd) {
   return client_fd;
 }
 
+/**
+ * Reliably write a sequence of bytes into a file descriptor. This will block
+ * until one of the following happens: (1) there is an error (2) end of file,
+ * or (3) all length bytes have been written.
+ *
+ * @param fd The file descriptor to write to.
+ * @param cursor The cursor pointing to the beginning of the bytes to send.
+ * @param length The size of the bytes sequence to write.
+ * @return Void.
+ */
+void write_bytes(int fd, uint8_t *cursor, size_t length) {
+  ssize_t nbytes = 0;
+  while (length > 0) {
+    /* While we haven't written the whole message, write to the file
+     * descriptor, advance the cursor, and decrease the amount left to write. */
+    nbytes = write(fd, cursor, length);
+    CHECK(nbytes > 0);
+    cursor += nbytes;
+    length -= nbytes;
+  }
+}
+
 /**
  * Write a sequence of bytes on a file descriptor. The bytes should then be read
  * by read_message.
@@ -105,17 +127,49 @@ int accept_client(int socket_fd) {
  * @return Void.
  */
 void write_message(int fd, int64_t type, int64_t length, uint8_t *bytes) {
-  ssize_t nbytes = write(fd, (char *) &type, sizeof(type));
-  CHECK(nbytes == sizeof(int64_t));
-  nbytes = write(fd, (char *) &length, sizeof(length));
-  CHECK(nbytes == sizeof(int64_t));
-  nbytes = write(fd, (char *) bytes, length * sizeof(char));
-  CHECK(nbytes >= 0);
+  write_bytes(fd, (uint8_t *) &type, sizeof(type));
+  write_bytes(fd, (uint8_t *) &length, sizeof(length));
+  write_bytes(fd, bytes, length * sizeof(char));
 }
 
 /**
- * Read a sequence of bytes written by write_bytes from a file descriptor. This
- * allocates space for the message.
+ * Reliably read a sequence of bytes from a file descriptor into a buffer. This
+ * will block until one of the following happens: (1) there is an error (2) end
+ * of file, or (3) all length bytes have been written.
+ *
+ * @note The buffer pointed to by cursor must already have length number of
+ * bytes allocated before calling this method.
+ *
+ * @param fd The file descriptor to read from.
+ * @param cursor The cursor pointing to the beginning of the buffer.
+ * @param length The size of the byte sequence to read.
+ * @return Void.
+ */
+int read_bytes(int fd, uint8_t *cursor, size_t length) {
+  ssize_t nbytes = 0;
+  while (length > 0) {
+    /* While we haven't read the whole message, read from the file descriptor,
+     * advance the cursor, and decrease the amount left to read. */
+    nbytes = read(fd, cursor, length);
+    if (nbytes < 0) {
+      if (errno == EAGAIN || errno == EWOULDBLOCK) {
+        continue;
+      }
+      /* Force an exit if there was any other type of error. */
+      CHECK(nbytes < 0);
+    }
+    if (nbytes == 0) {
+      return -1;
+    }
+    cursor += nbytes;
+    length -= nbytes;
+  }
+  return 0;
+}
+
+/**
+ * Read a sequence of bytes written by write_message from a file descriptor.
+ * This allocates space for the message.
  *
  * @note The caller must free the memory.
  *
@@ -130,20 +184,28 @@ void write_message(int fd, int64_t type, int64_t length, uint8_t *bytes) {
  * @return Void.
  */
 void read_message(int fd, int64_t *type, int64_t *length, uint8_t **bytes) {
-  ssize_t nbytes = read(fd, type, sizeof(int64_t));
-  CHECK(nbytes >= 0);
-  /* Handle the case in which the socket is closed. */
-  if (nbytes == 0) {
-    *type = DISCONNECT_CLIENT;
-    *length = 0;
-    *bytes = NULL;
-    return;
+  int closed = read_bytes(fd, (uint8_t *) type, sizeof(int64_t));
+  if (closed) {
+    goto disconnected;
+  }
+  closed = read_bytes(fd, (uint8_t *) length, sizeof(int64_t));
+  if (closed) {
+    goto disconnected;
   }
-  nbytes = read(fd, length, sizeof(int64_t));
-  CHECK(nbytes == sizeof(int64_t));
   *bytes = malloc(*length * sizeof(uint8_t));
-  nbytes = read(fd, *bytes, *length);
-  CHECK(nbytes >= 0);
+  closed = read_bytes(fd, *bytes, *length);
+  if (closed) {
+    free(*bytes);
+    goto disconnected;
+  }
+  return;
+
+disconnected:
+  /* Handle the case in which the socket is closed. */
+  *type = DISCONNECT_CLIENT;
+  *length = 0;
+  *bytes = NULL;
+  return;
 }
 
 /* Write a null-terminated string to a file descriptor. */
diff --git a/test/io_tests.c b/test/io_tests.c
index b73207326..56ebf0607 100644
--- a/test/io_tests.c
+++ b/test/io_tests.c
@@ -5,6 +5,7 @@
 #include <inttypes.h>
 
 #include "io.h"
+#include "utstring.h"
 
 SUITE(io_tests);
 
@@ -47,8 +48,53 @@ TEST ipc_socket_test(void) {
   PASS();
 }
 
+TEST long_ipc_socket_test(void) {
+  const char *socket_pathname = "long-test-socket";
+  int socket_fd = bind_ipc_sock(socket_pathname);
+  ASSERT(socket_fd >= 0);
+
+  UT_string *test_string;
+  utstring_new(test_string);
+  for (int i = 0; i < 10000; i++) {
+    utstring_printf(test_string, "hello world ");
+  }
+  char *test_bytes = "another string";
+  pid_t pid = fork();
+  if (pid == 0) {
+    close(socket_fd);
+    socket_fd = connect_ipc_sock(socket_pathname);
+    ASSERT(socket_fd >= 0);
+    write_log_message(socket_fd, utstring_body(test_string));
+    write_message(socket_fd, LOG_MESSAGE, strlen(test_bytes),
+                  (uint8_t *) test_bytes);
+    close(socket_fd);
+    exit(0);
+  } else {
+    int client_fd = accept_client(socket_fd);
+    ASSERT(client_fd >= 0);
+    char *message = read_log_message(client_fd);
+    ASSERT(message != NULL);
+    ASSERT_STR_EQ(utstring_body(test_string), message);
+    free(message);
+    int64_t type;
+    int64_t len;
+    uint8_t *bytes;
+    read_message(client_fd, &type, &len, &bytes);
+    ASSERT(type == LOG_MESSAGE);
+    ASSERT(memcmp(test_bytes, bytes, len) == 0);
+    free(bytes);
+    close(client_fd);
+    close(socket_fd);
+    unlink(socket_pathname);
+  }
+
+  utstring_free(test_string);
+  PASS();
+}
+
 SUITE(io_tests) {
   RUN_TEST(ipc_socket_test);
+  RUN_TEST(long_ipc_socket_test);
 }
 
 GREATEST_MAIN_DEFS();
diff --git a/thirdparty/utlist.h b/thirdparty/utlist.h
new file mode 100644
index 000000000..9b5534ffb
--- /dev/null
+++ b/thirdparty/utlist.h
@@ -0,0 +1,895 @@
+/*
+Copyright (c) 2007-2016, Troy D. Hanson   http://troydhanson.github.com/uthash/
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef UTLIST_H
+#define UTLIST_H
+
+#define UTLIST_VERSION 2.0.1
+
+#include <assert.h>
+
+/*
+ * This file contains macros to manipulate singly and doubly-linked lists.
+ *
+ * 1. LL_ macros:  singly-linked lists.
+ * 2. DL_ macros:  doubly-linked lists.
+ * 3. CDL_ macros: circular doubly-linked lists.
+ *
+ * To use singly-linked lists, your structure must have a "next" pointer.
+ * To use doubly-linked lists, your structure must "prev" and "next" pointers.
+ * Either way, the pointer to the head of the list must be initialized to NULL.
+ *
+ * ----------------.EXAMPLE -------------------------
+ * struct item {
+ *      int id;
+ *      struct item *prev, *next;
+ * }
+ *
+ * struct item *list = NULL:
+ *
+ * int main() {
+ *      struct item *item;
+ *      ... allocate and populate item ...
+ *      DL_APPEND(list, item);
+ * }
+ * --------------------------------------------------
+ *
+ * For doubly-linked lists, the append and delete macros are O(1)
+ * For singly-linked lists, append and delete are O(n) but prepend is O(1)
+ * The sort macro is O(n log(n)) for all types of single/double/circular lists.
+ */
+
+/* These macros use decltype or the earlier __typeof GNU extension.
+   As decltype is only available in newer compilers (VS2010 or gcc 4.3+
+   when compiling c++ code), this code uses whatever method is needed
+   or, for VS2008 where neither is available, uses casting workarounds. */
+#ifdef _MSC_VER            /* MS compiler */
+#if _MSC_VER >= 1600 && defined(__cplusplus)  /* VS2010 or newer in C++ mode */
+#define LDECLTYPE(x) decltype(x)
+#else                     /* VS2008 or older (or VS2010 in C mode) */
+#define NO_DECLTYPE
+#endif
+#elif defined(__ICCARM__)
+#define NO_DECLTYPE
+#else                      /* GNU, Sun and other compilers */
+#define LDECLTYPE(x) __typeof(x)
+#endif
+
+/* for VS2008 we use some workarounds to get around the lack of decltype,
+ * namely, we always reassign our tmp variable to the list head if we need
+ * to dereference its prev/next pointers, and save/restore the real head.*/
+#ifdef NO_DECLTYPE
+#define IF_NO_DECLTYPE(x) x
+#define LDECLTYPE(x) char*
+#define _SV(elt,list) _tmp = (char*)(list); {char **_alias = (char**)&(list); *_alias = (elt); }
+#define _NEXT(elt,list,next) ((char*)((list)->next))
+#define _NEXTASGN(elt,list,to,next) { char **_alias = (char**)&((list)->next); *_alias=(char*)(to); }
+/* #define _PREV(elt,list,prev) ((char*)((list)->prev)) */
+#define _PREVASGN(elt,list,to,prev) { char **_alias = (char**)&((list)->prev); *_alias=(char*)(to); }
+#define _RS(list) { char **_alias = (char**)&(list); *_alias=_tmp; }
+#define _CASTASGN(a,b) { char **_alias = (char**)&(a); *_alias=(char*)(b); }
+#else
+#define IF_NO_DECLTYPE(x)
+#define _SV(elt,list)
+#define _NEXT(elt,list,next) ((elt)->next)
+#define _NEXTASGN(elt,list,to,next) ((elt)->next)=(to)
+/* #define _PREV(elt,list,prev) ((elt)->prev) */
+#define _PREVASGN(elt,list,to,prev) ((elt)->prev)=(to)
+#define _RS(list)
+#define _CASTASGN(a,b) (a)=(b)
+#endif
+
+/******************************************************************************
+ * The sort macro is an adaptation of Simon Tatham's O(n log(n)) mergesort    *
+ * Unwieldy variable names used here to avoid shadowing passed-in variables.  *
+ *****************************************************************************/
+#define LL_SORT(list, cmp)                                                                     \
+    LL_SORT2(list, cmp, next)
+
+#define LL_SORT2(list, cmp, next)                                                              \
+do {                                                                                           \
+  LDECLTYPE(list) _ls_p;                                                                       \
+  LDECLTYPE(list) _ls_q;                                                                       \
+  LDECLTYPE(list) _ls_e;                                                                       \
+  LDECLTYPE(list) _ls_tail;                                                                    \
+  IF_NO_DECLTYPE(LDECLTYPE(list) _tmp;)                                                        \
+  int _ls_insize, _ls_nmerges, _ls_psize, _ls_qsize, _ls_i, _ls_looping;                       \
+  if (list) {                                                                                  \
+    _ls_insize = 1;                                                                            \
+    _ls_looping = 1;                                                                           \
+    while (_ls_looping) {                                                                      \
+      _CASTASGN(_ls_p,list);                                                                   \
+      (list) = NULL;                                                                           \
+      _ls_tail = NULL;                                                                         \
+      _ls_nmerges = 0;                                                                         \
+      while (_ls_p) {                                                                          \
+        _ls_nmerges++;                                                                         \
+        _ls_q = _ls_p;                                                                         \
+        _ls_psize = 0;                                                                         \
+        for (_ls_i = 0; _ls_i < _ls_insize; _ls_i++) {                                         \
+          _ls_psize++;                                                                         \
+          _SV(_ls_q,list); _ls_q = _NEXT(_ls_q,list,next); _RS(list);                          \
+          if (!_ls_q) break;                                                                   \
+        }                                                                                      \
+        _ls_qsize = _ls_insize;                                                                \
+        while (_ls_psize > 0 || (_ls_qsize > 0 && _ls_q)) {                                    \
+          if (_ls_psize == 0) {                                                                \
+            _ls_e = _ls_q; _SV(_ls_q,list); _ls_q =                                            \
+              _NEXT(_ls_q,list,next); _RS(list); _ls_qsize--;                                  \
+          } else if (_ls_qsize == 0 || !_ls_q) {                                               \
+            _ls_e = _ls_p; _SV(_ls_p,list); _ls_p =                                            \
+              _NEXT(_ls_p,list,next); _RS(list); _ls_psize--;                                  \
+          } else if (cmp(_ls_p,_ls_q) <= 0) {                                                  \
+            _ls_e = _ls_p; _SV(_ls_p,list); _ls_p =                                            \
+              _NEXT(_ls_p,list,next); _RS(list); _ls_psize--;                                  \
+          } else {                                                                             \
+            _ls_e = _ls_q; _SV(_ls_q,list); _ls_q =                                            \
+              _NEXT(_ls_q,list,next); _RS(list); _ls_qsize--;                                  \
+          }                                                                                    \
+          if (_ls_tail) {                                                                      \
+            _SV(_ls_tail,list); _NEXTASGN(_ls_tail,list,_ls_e,next); _RS(list);                \
+          } else {                                                                             \
+            _CASTASGN(list,_ls_e);                                                             \
+          }                                                                                    \
+          _ls_tail = _ls_e;                                                                    \
+        }                                                                                      \
+        _ls_p = _ls_q;                                                                         \
+      }                                                                                        \
+      if (_ls_tail) {                                                                          \
+        _SV(_ls_tail,list); _NEXTASGN(_ls_tail,list,NULL,next); _RS(list);                     \
+      }                                                                                        \
+      if (_ls_nmerges <= 1) {                                                                  \
+        _ls_looping=0;                                                                         \
+      }                                                                                        \
+      _ls_insize *= 2;                                                                         \
+    }                                                                                          \
+  }                                                                                            \
+} while (0)
+
+
+#define DL_SORT(list, cmp)                                                                     \
+    DL_SORT2(list, cmp, prev, next)
+
+#define DL_SORT2(list, cmp, prev, next)                                                        \
+do {                                                                                           \
+  LDECLTYPE(list) _ls_p;                                                                       \
+  LDECLTYPE(list) _ls_q;                                                                       \
+  LDECLTYPE(list) _ls_e;                                                                       \
+  LDECLTYPE(list) _ls_tail;                                                                    \
+  IF_NO_DECLTYPE(LDECLTYPE(list) _tmp;)                                                        \
+  int _ls_insize, _ls_nmerges, _ls_psize, _ls_qsize, _ls_i, _ls_looping;                       \
+  if (list) {                                                                                  \
+    _ls_insize = 1;                                                                            \
+    _ls_looping = 1;                                                                           \
+    while (_ls_looping) {                                                                      \
+      _CASTASGN(_ls_p,list);                                                                   \
+      (list) = NULL;                                                                           \
+      _ls_tail = NULL;                                                                         \
+      _ls_nmerges = 0;                                                                         \
+      while (_ls_p) {                                                                          \
+        _ls_nmerges++;                                                                         \
+        _ls_q = _ls_p;                                                                         \
+        _ls_psize = 0;                                                                         \
+        for (_ls_i = 0; _ls_i < _ls_insize; _ls_i++) {                                         \
+          _ls_psize++;                                                                         \
+          _SV(_ls_q,list); _ls_q = _NEXT(_ls_q,list,next); _RS(list);                          \
+          if (!_ls_q) break;                                                                   \
+        }                                                                                      \
+        _ls_qsize = _ls_insize;                                                                \
+        while ((_ls_psize > 0) || ((_ls_qsize > 0) && _ls_q)) {                                \
+          if (_ls_psize == 0) {                                                                \
+            _ls_e = _ls_q; _SV(_ls_q,list); _ls_q =                                            \
+              _NEXT(_ls_q,list,next); _RS(list); _ls_qsize--;                                  \
+          } else if ((_ls_qsize == 0) || (!_ls_q)) {                                           \
+            _ls_e = _ls_p; _SV(_ls_p,list); _ls_p =                                            \
+              _NEXT(_ls_p,list,next); _RS(list); _ls_psize--;                                  \
+          } else if (cmp(_ls_p,_ls_q) <= 0) {                                                  \
+            _ls_e = _ls_p; _SV(_ls_p,list); _ls_p =                                            \
+              _NEXT(_ls_p,list,next); _RS(list); _ls_psize--;                                  \
+          } else {                                                                             \
+            _ls_e = _ls_q; _SV(_ls_q,list); _ls_q =                                            \
+              _NEXT(_ls_q,list,next); _RS(list); _ls_qsize--;                                  \
+          }                                                                                    \
+          if (_ls_tail) {                                                                      \
+            _SV(_ls_tail,list); _NEXTASGN(_ls_tail,list,_ls_e,next); _RS(list);                \
+          } else {                                                                             \
+            _CASTASGN(list,_ls_e);                                                             \
+          }                                                                                    \
+          _SV(_ls_e,list); _PREVASGN(_ls_e,list,_ls_tail,prev); _RS(list);                     \
+          _ls_tail = _ls_e;                                                                    \
+        }                                                                                      \
+        _ls_p = _ls_q;                                                                         \
+      }                                                                                        \
+      _CASTASGN((list)->prev, _ls_tail);                                                       \
+      _SV(_ls_tail,list); _NEXTASGN(_ls_tail,list,NULL,next); _RS(list);                       \
+      if (_ls_nmerges <= 1) {                                                                  \
+        _ls_looping=0;                                                                         \
+      }                                                                                        \
+      _ls_insize *= 2;                                                                         \
+    }                                                                                          \
+  }                                                                                            \
+} while (0)
+
+#define CDL_SORT(list, cmp)                                                                    \
+    CDL_SORT2(list, cmp, prev, next)
+
+#define CDL_SORT2(list, cmp, prev, next)                                                       \
+do {                                                                                           \
+  LDECLTYPE(list) _ls_p;                                                                       \
+  LDECLTYPE(list) _ls_q;                                                                       \
+  LDECLTYPE(list) _ls_e;                                                                       \
+  LDECLTYPE(list) _ls_tail;                                                                    \
+  LDECLTYPE(list) _ls_oldhead;                                                                 \
+  LDECLTYPE(list) _tmp;                                                                        \
+  int _ls_insize, _ls_nmerges, _ls_psize, _ls_qsize, _ls_i, _ls_looping;                       \
+  if (list) {                                                                                  \
+    _ls_insize = 1;                                                                            \
+    _ls_looping = 1;                                                                           \
+    while (_ls_looping) {                                                                      \
+      _CASTASGN(_ls_p,list);                                                                   \
+      _CASTASGN(_ls_oldhead,list);                                                             \
+      (list) = NULL;                                                                           \
+      _ls_tail = NULL;                                                                         \
+      _ls_nmerges = 0;                                                                         \
+      while (_ls_p) {                                                                          \
+        _ls_nmerges++;                                                                         \
+        _ls_q = _ls_p;                                                                         \
+        _ls_psize = 0;                                                                         \
+        for (_ls_i = 0; _ls_i < _ls_insize; _ls_i++) {                                         \
+          _ls_psize++;                                                                         \
+          _SV(_ls_q,list);                                                                     \
+          if (_NEXT(_ls_q,list,next) == _ls_oldhead) {                                         \
+            _ls_q = NULL;                                                                      \
+          } else {                                                                             \
+            _ls_q = _NEXT(_ls_q,list,next);                                                    \
+          }                                                                                    \
+          _RS(list);                                                                           \
+          if (!_ls_q) break;                                                                   \
+        }                                                                                      \
+        _ls_qsize = _ls_insize;                                                                \
+        while (_ls_psize > 0 || (_ls_qsize > 0 && _ls_q)) {                                    \
+          if (_ls_psize == 0) {                                                                \
+            _ls_e = _ls_q; _SV(_ls_q,list); _ls_q =                                            \
+              _NEXT(_ls_q,list,next); _RS(list); _ls_qsize--;                                  \
+            if (_ls_q == _ls_oldhead) { _ls_q = NULL; }                                        \
+          } else if (_ls_qsize == 0 || !_ls_q) {                                               \
+            _ls_e = _ls_p; _SV(_ls_p,list); _ls_p =                                            \
+              _NEXT(_ls_p,list,next); _RS(list); _ls_psize--;                                  \
+            if (_ls_p == _ls_oldhead) { _ls_p = NULL; }                                        \
+          } else if (cmp(_ls_p,_ls_q) <= 0) {                                                  \
+            _ls_e = _ls_p; _SV(_ls_p,list); _ls_p =                                            \
+              _NEXT(_ls_p,list,next); _RS(list); _ls_psize--;                                  \
+            if (_ls_p == _ls_oldhead) { _ls_p = NULL; }                                        \
+          } else {                                                                             \
+            _ls_e = _ls_q; _SV(_ls_q,list); _ls_q =                                            \
+              _NEXT(_ls_q,list,next); _RS(list); _ls_qsize--;                                  \
+            if (_ls_q == _ls_oldhead) { _ls_q = NULL; }                                        \
+          }                                                                                    \
+          if (_ls_tail) {                                                                      \
+            _SV(_ls_tail,list); _NEXTASGN(_ls_tail,list,_ls_e,next); _RS(list);                \
+          } else {                                                                             \
+            _CASTASGN(list,_ls_e);                                                             \
+          }                                                                                    \
+          _SV(_ls_e,list); _PREVASGN(_ls_e,list,_ls_tail,prev); _RS(list);                     \
+          _ls_tail = _ls_e;                                                                    \
+        }                                                                                      \
+        _ls_p = _ls_q;                                                                         \
+      }                                                                                        \
+      _CASTASGN((list)->prev,_ls_tail);                                                        \
+      _CASTASGN(_tmp,list);                                                                    \
+      _SV(_ls_tail,list); _NEXTASGN(_ls_tail,list,_tmp,next); _RS(list);                       \
+      if (_ls_nmerges <= 1) {                                                                  \
+        _ls_looping=0;                                                                         \
+      }                                                                                        \
+      _ls_insize *= 2;                                                                         \
+    }                                                                                          \
+  }                                                                                            \
+} while (0)
+
+/******************************************************************************
+ * singly linked list macros (non-circular)                                   *
+ *****************************************************************************/
+#define LL_PREPEND(head,add)                                                                   \
+    LL_PREPEND2(head,add,next)
+
+#define LL_PREPEND2(head,add,next)                                                             \
+do {                                                                                           \
+  (add)->next = (head);                                                                        \
+  (head) = (add);                                                                              \
+} while (0)
+
+#define LL_CONCAT(head1,head2)                                                                 \
+    LL_CONCAT2(head1,head2,next)
+
+#define LL_CONCAT2(head1,head2,next)                                                           \
+do {                                                                                           \
+  LDECLTYPE(head1) _tmp;                                                                       \
+  if (head1) {                                                                                 \
+    _tmp = (head1);                                                                            \
+    while (_tmp->next) { _tmp = _tmp->next; }                                                  \
+    _tmp->next=(head2);                                                                        \
+  } else {                                                                                     \
+    (head1)=(head2);                                                                           \
+  }                                                                                            \
+} while (0)
+
+#define LL_APPEND(head,add)                                                                    \
+    LL_APPEND2(head,add,next)
+
+#define LL_APPEND2(head,add,next)                                                              \
+do {                                                                                           \
+  LDECLTYPE(head) _tmp;                                                                        \
+  (add)->next=NULL;                                                                            \
+  if (head) {                                                                                  \
+    _tmp = (head);                                                                             \
+    while (_tmp->next) { _tmp = _tmp->next; }                                                  \
+    _tmp->next=(add);                                                                          \
+  } else {                                                                                     \
+    (head)=(add);                                                                              \
+  }                                                                                            \
+} while (0)
+
+#define LL_DELETE(head,del)                                                                    \
+    LL_DELETE2(head,del,next)
+
+#define LL_DELETE2(head,del,next)                                                              \
+do {                                                                                           \
+  LDECLTYPE(head) _tmp;                                                                        \
+  if ((head) == (del)) {                                                                       \
+    (head)=(head)->next;                                                                       \
+  } else {                                                                                     \
+    _tmp = (head);                                                                             \
+    while (_tmp->next && (_tmp->next != (del))) {                                              \
+      _tmp = _tmp->next;                                                                       \
+    }                                                                                          \
+    if (_tmp->next) {                                                                          \
+      _tmp->next = (del)->next;                                                                \
+    }                                                                                          \
+  }                                                                                            \
+} while (0)
+
+#define LL_COUNT(head,el,counter)                                                              \
+    LL_COUNT2(head,el,counter,next)                                                            \
+
+#define LL_COUNT2(head,el,counter,next)                                                        \
+do {                                                                                           \
+  (counter) = 0;                                                                               \
+  LL_FOREACH2(head,el,next) { ++(counter); }                                                   \
+} while (0)
+
+#define LL_FOREACH(head,el)                                                                    \
+    LL_FOREACH2(head,el,next)
+
+#define LL_FOREACH2(head,el,next)                                                              \
+    for ((el) = (head); el; (el) = (el)->next)
+
+#define LL_FOREACH_SAFE(head,el,tmp)                                                           \
+    LL_FOREACH_SAFE2(head,el,tmp,next)
+
+#define LL_FOREACH_SAFE2(head,el,tmp,next)                                                     \
+  for ((el) = (head); (el) && ((tmp) = (el)->next, 1); (el) = (tmp))
+
+#define LL_SEARCH_SCALAR(head,out,field,val)                                                   \
+    LL_SEARCH_SCALAR2(head,out,field,val,next)
+
+#define LL_SEARCH_SCALAR2(head,out,field,val,next)                                             \
+do {                                                                                           \
+    LL_FOREACH2(head,out,next) {                                                               \
+      if ((out)->field == (val)) break;                                                        \
+    }                                                                                          \
+} while (0)
+
+#define LL_SEARCH(head,out,elt,cmp)                                                            \
+    LL_SEARCH2(head,out,elt,cmp,next)
+
+#define LL_SEARCH2(head,out,elt,cmp,next)                                                      \
+do {                                                                                           \
+    LL_FOREACH2(head,out,next) {                                                               \
+      if ((cmp(out,elt))==0) break;                                                            \
+    }                                                                                          \
+} while (0)
+
+#define LL_REPLACE_ELEM2(head, el, add, next)                                                  \
+do {                                                                                           \
+ LDECLTYPE(head) _tmp;                                                                         \
+ assert((head) != NULL);                                                                       \
+ assert((el) != NULL);                                                                         \
+ assert((add) != NULL);                                                                        \
+ (add)->next = (el)->next;                                                                     \
+ if ((head) == (el)) {                                                                         \
+  (head) = (add);                                                                              \
+ } else {                                                                                      \
+  _tmp = (head);                                                                               \
+  while (_tmp->next && (_tmp->next != (el))) {                                                 \
+   _tmp = _tmp->next;                                                                          \
+  }                                                                                            \
+  if (_tmp->next) {                                                                            \
+    _tmp->next = (add);                                                                        \
+  }                                                                                            \
+ }                                                                                             \
+} while (0)
+
+#define LL_REPLACE_ELEM(head, el, add)                                                         \
+    LL_REPLACE_ELEM2(head, el, add, next)
+
+#define LL_PREPEND_ELEM2(head, el, add, next)                                                  \
+do {                                                                                           \
+ if (el) {                                                                                     \
+  LDECLTYPE(head) _tmp;                                                                        \
+  assert((head) != NULL);                                                                      \
+  assert((add) != NULL);                                                                       \
+  (add)->next = (el);                                                                          \
+  if ((head) == (el)) {                                                                        \
+   (head) = (add);                                                                             \
+  } else {                                                                                     \
+   _tmp = (head);                                                                              \
+   while (_tmp->next && (_tmp->next != (el))) {                                                \
+    _tmp = _tmp->next;                                                                         \
+   }                                                                                           \
+   if (_tmp->next) {                                                                           \
+     _tmp->next = (add);                                                                       \
+   }                                                                                           \
+  }                                                                                            \
+ } else {                                                                                      \
+  LL_APPEND2(head, add, next);                                                                 \
+ }                                                                                             \
+} while (0)                                                                                    \
+
+#define LL_PREPEND_ELEM(head, el, add)                                                         \
+    LL_PREPEND_ELEM2(head, el, add, next)
+
+#define LL_APPEND_ELEM2(head, el, add, next)                                                   \
+do {                                                                                           \
+ if (el) {                                                                                     \
+  assert((head) != NULL);                                                                      \
+  assert((add) != NULL);                                                                       \
+  (add)->next = (el)->next;                                                                    \
+  (el)->next = (add);                                                                          \
+ } else {                                                                                      \
+  LL_PREPEND2(head, add, next);                                                                \
+ }                                                                                             \
+} while (0)                                                                                    \
+
+#define LL_APPEND_ELEM(head, el, add)                                                          \
+    LL_APPEND_ELEM2(head, el, add, next)
+
+#ifdef NO_DECLTYPE
+/* Here are VS2008 / NO_DECLTYPE replacements for a few functions */
+
+#undef LL_CONCAT2
+#define LL_CONCAT2(head1,head2,next)                                                           \
+do {                                                                                           \
+  char *_tmp;                                                                                  \
+  if (head1) {                                                                                 \
+    _tmp = (char*)(head1);                                                                     \
+    while ((head1)->next) { (head1) = (head1)->next; }                                         \
+    (head1)->next = (head2);                                                                   \
+    _RS(head1);                                                                                \
+  } else {                                                                                     \
+    (head1)=(head2);                                                                           \
+  }                                                                                            \
+} while (0)
+
+#undef LL_APPEND2
+#define LL_APPEND2(head,add,next)                                                              \
+do {                                                                                           \
+  if (head) {                                                                                  \
+    (add)->next = head;     /* use add->next as a temp variable */                             \
+    while ((add)->next->next) { (add)->next = (add)->next->next; }                             \
+    (add)->next->next=(add);                                                                   \
+  } else {                                                                                     \
+    (head)=(add);                                                                              \
+  }                                                                                            \
+  (add)->next=NULL;                                                                            \
+} while (0)
+
+#undef LL_DELETE2
+#define LL_DELETE2(head,del,next)                                                              \
+do {                                                                                           \
+  if ((head) == (del)) {                                                                       \
+    (head)=(head)->next;                                                                       \
+  } else {                                                                                     \
+    char *_tmp = (char*)(head);                                                                \
+    while ((head)->next && ((head)->next != (del))) {                                          \
+      (head) = (head)->next;                                                                   \
+    }                                                                                          \
+    if ((head)->next) {                                                                        \
+      (head)->next = ((del)->next);                                                            \
+    }                                                                                          \
+    _RS(head);                                                                                 \
+  }                                                                                            \
+} while (0)
+
+#undef LL_REPLACE_ELEM2
+#define LL_REPLACE_ELEM2(head, el, add, next)                                                  \
+do {                                                                                           \
+  assert((head) != NULL);                                                                      \
+  assert((el) != NULL);                                                                        \
+  assert((add) != NULL);                                                                       \
+  if ((head) == (el)) {                                                                        \
+    (head) = (add);                                                                            \
+  } else {                                                                                     \
+    (add)->next = head;                                                                        \
+    while ((add)->next->next && ((add)->next->next != (el))) {                                 \
+      (add)->next = (add)->next->next;                                                         \
+    }                                                                                          \
+    if ((add)->next->next) {                                                                   \
+      (add)->next->next = (add);                                                               \
+    }                                                                                          \
+  }                                                                                            \
+  (add)->next = (el)->next;                                                                    \
+} while (0)
+
+#undef LL_PREPEND_ELEM2
+#define LL_PREPEND_ELEM2(head, el, add, next)                                                  \
+do {                                                                                           \
+  if (el) {                                                                                    \
+    assert((head) != NULL);                                                                    \
+    assert((add) != NULL);                                                                     \
+    if ((head) == (el)) {                                                                      \
+      (head) = (add);                                                                          \
+    } else {                                                                                   \
+      (add)->next = (head);                                                                    \
+      while ((add)->next->next && ((add)->next->next != (el))) {                               \
+        (add)->next = (add)->next->next;                                                       \
+      }                                                                                        \
+      if ((add)->next->next) {                                                                 \
+        (add)->next->next = (add);                                                             \
+      }                                                                                        \
+    }                                                                                          \
+    (add)->next = (el);                                                                        \
+  } else {                                                                                     \
+    LL_APPEND2(head, add, next);                                                               \
+  }                                                                                            \
+} while (0)                                                                                    \
+
+#endif /* NO_DECLTYPE */
+
+/******************************************************************************
+ * doubly linked list macros (non-circular)                                   *
+ *****************************************************************************/
+#define DL_PREPEND(head,add)                                                                   \
+    DL_PREPEND2(head,add,prev,next)
+
+#define DL_PREPEND2(head,add,prev,next)                                                        \
+do {                                                                                           \
+ (add)->next = (head);                                                                         \
+ if (head) {                                                                                   \
+   (add)->prev = (head)->prev;                                                                 \
+   (head)->prev = (add);                                                                       \
+ } else {                                                                                      \
+   (add)->prev = (add);                                                                        \
+ }                                                                                             \
+ (head) = (add);                                                                               \
+} while (0)
+
+#define DL_APPEND(head,add)                                                                    \
+    DL_APPEND2(head,add,prev,next)
+
+#define DL_APPEND2(head,add,prev,next)                                                         \
+do {                                                                                           \
+  if (head) {                                                                                  \
+      (add)->prev = (head)->prev;                                                              \
+      (head)->prev->next = (add);                                                              \
+      (head)->prev = (add);                                                                    \
+      (add)->next = NULL;                                                                      \
+  } else {                                                                                     \
+      (head)=(add);                                                                            \
+      (head)->prev = (head);                                                                   \
+      (head)->next = NULL;                                                                     \
+  }                                                                                            \
+} while (0)
+
+#define DL_CONCAT(head1,head2)                                                                 \
+    DL_CONCAT2(head1,head2,prev,next)
+
+#define DL_CONCAT2(head1,head2,prev,next)                                                      \
+do {                                                                                           \
+  LDECLTYPE(head1) _tmp;                                                                       \
+  if (head2) {                                                                                 \
+    if (head1) {                                                                               \
+        _CASTASGN(_tmp, (head2)->prev);                                                        \
+        (head2)->prev = (head1)->prev;                                                         \
+        (head1)->prev->next = (head2);                                                         \
+        _CASTASGN((head1)->prev, _tmp);                                                        \
+    } else {                                                                                   \
+        (head1)=(head2);                                                                       \
+    }                                                                                          \
+  }                                                                                            \
+} while (0)
+
+#define DL_DELETE(head,del)                                                                    \
+    DL_DELETE2(head,del,prev,next)
+
+#define DL_DELETE2(head,del,prev,next)                                                         \
+do {                                                                                           \
+  assert((del)->prev != NULL);                                                                 \
+  if ((del)->prev == (del)) {                                                                  \
+      (head)=NULL;                                                                             \
+  } else if ((del)==(head)) {                                                                  \
+      (del)->next->prev = (del)->prev;                                                         \
+      (head) = (del)->next;                                                                    \
+  } else {                                                                                     \
+      (del)->prev->next = (del)->next;                                                         \
+      if ((del)->next) {                                                                       \
+          (del)->next->prev = (del)->prev;                                                     \
+      } else {                                                                                 \
+          (head)->prev = (del)->prev;                                                          \
+      }                                                                                        \
+  }                                                                                            \
+} while (0)
+
+#define DL_COUNT(head,el,counter)                                                              \
+    DL_COUNT2(head,el,counter,next)                                                            \
+
+#define DL_COUNT2(head,el,counter,next)                                                        \
+do {                                                                                           \
+  (counter) = 0;                                                                               \
+  DL_FOREACH2(head,el,next) { ++(counter); }                                                   \
+} while (0)
+
+#define DL_FOREACH(head,el)                                                                    \
+    DL_FOREACH2(head,el,next)
+
+#define DL_FOREACH2(head,el,next)                                                              \
+    for ((el) = (head); el; (el) = (el)->next)
+
+/* this version is safe for deleting the elements during iteration */
+#define DL_FOREACH_SAFE(head,el,tmp)                                                           \
+    DL_FOREACH_SAFE2(head,el,tmp,next)
+
+#define DL_FOREACH_SAFE2(head,el,tmp,next)                                                     \
+  for ((el) = (head); (el) && ((tmp) = (el)->next, 1); (el) = (tmp))
+
+/* these are identical to their singly-linked list counterparts */
+#define DL_SEARCH_SCALAR LL_SEARCH_SCALAR
+#define DL_SEARCH LL_SEARCH
+#define DL_SEARCH_SCALAR2 LL_SEARCH_SCALAR2
+#define DL_SEARCH2 LL_SEARCH2
+
+#define DL_REPLACE_ELEM2(head, el, add, prev, next)                                            \
+do {                                                                                           \
+ assert((head) != NULL);                                                                       \
+ assert((el) != NULL);                                                                         \
+ assert((add) != NULL);                                                                        \
+ if ((head) == (el)) {                                                                         \
+  (head) = (add);                                                                              \
+  (add)->next = (el)->next;                                                                    \
+  if ((el)->next == NULL) {                                                                    \
+   (add)->prev = (add);                                                                        \
+  } else {                                                                                     \
+   (add)->prev = (el)->prev;                                                                   \
+   (add)->next->prev = (add);                                                                  \
+  }                                                                                            \
+ } else {                                                                                      \
+  (add)->next = (el)->next;                                                                    \
+  (add)->prev = (el)->prev;                                                                    \
+  (add)->prev->next = (add);                                                                   \
+  if ((el)->next == NULL) {                                                                    \
+   (head)->prev = (add);                                                                       \
+  } else {                                                                                     \
+   (add)->next->prev = (add);                                                                  \
+  }                                                                                            \
+ }                                                                                             \
+} while (0)
+
+#define DL_REPLACE_ELEM(head, el, add)                                                         \
+    DL_REPLACE_ELEM2(head, el, add, prev, next)
+
+#define DL_PREPEND_ELEM2(head, el, add, prev, next)                                            \
+do {                                                                                           \
+ if (el) {                                                                                     \
+  assert((head) != NULL);                                                                      \
+  assert((add) != NULL);                                                                       \
+  (add)->next = (el);                                                                          \
+  (add)->prev = (el)->prev;                                                                    \
+  (el)->prev = (add);                                                                          \
+  if ((head) == (el)) {                                                                        \
+   (head) = (add);                                                                             \
+  } else {                                                                                     \
+   (add)->prev->next = (add);                                                                  \
+  }                                                                                            \
+ } else {                                                                                      \
+  DL_APPEND2(head, add, prev, next);                                                           \
+ }                                                                                             \
+} while (0)                                                                                    \
+
+#define DL_PREPEND_ELEM(head, el, add)                                                         \
+    DL_PREPEND_ELEM2(head, el, add, prev, next)
+
+#define DL_APPEND_ELEM2(head, el, add, prev, next)                                             \
+do {                                                                                           \
+ if (el) {                                                                                     \
+  assert((head) != NULL);                                                                      \
+  assert((add) != NULL);                                                                       \
+  (add)->next = (el)->next;                                                                    \
+  (add)->prev = (el);                                                                          \
+  (el)->next = (add);                                                                          \
+  if ((add)->next) {                                                                           \
+   (add)->next->prev = (add);                                                                  \
+  } else {                                                                                     \
+   (head)->prev = (add);                                                                       \
+  }                                                                                            \
+ } else {                                                                                      \
+  DL_PREPEND2(head, add, prev, next);                                                          \
+ }                                                                                             \
+} while (0)                                                                                    \
+
+#define DL_APPEND_ELEM(head, el, add)                                                          \
+   DL_APPEND_ELEM2(head, el, add, prev, next)
+
+/******************************************************************************
+ * circular doubly linked list macros                                         *
+ *****************************************************************************/
+#define CDL_APPEND(head,add)                                                                   \
+    CDL_APPEND2(head,add,prev,next)
+
+#define CDL_APPEND2(head,add,prev,next)                                                        \
+do {                                                                                           \
+ if (head) {                                                                                   \
+   (add)->prev = (head)->prev;                                                                 \
+   (add)->next = (head);                                                                       \
+   (head)->prev = (add);                                                                       \
+   (add)->prev->next = (add);                                                                  \
+ } else {                                                                                      \
+   (add)->prev = (add);                                                                        \
+   (add)->next = (add);                                                                        \
+   (head) = (add);                                                                             \
+ }                                                                                             \
+} while (0)
+
+#define CDL_PREPEND(head,add)                                                                  \
+    CDL_PREPEND2(head,add,prev,next)
+
+#define CDL_PREPEND2(head,add,prev,next)                                                       \
+do {                                                                                           \
+ if (head) {                                                                                   \
+   (add)->prev = (head)->prev;                                                                 \
+   (add)->next = (head);                                                                       \
+   (head)->prev = (add);                                                                       \
+   (add)->prev->next = (add);                                                                  \
+ } else {                                                                                      \
+   (add)->prev = (add);                                                                        \
+   (add)->next = (add);                                                                        \
+ }                                                                                             \
+ (head) = (add);                                                                               \
+} while (0)
+
+#define CDL_DELETE(head,del)                                                                   \
+    CDL_DELETE2(head,del,prev,next)
+
+#define CDL_DELETE2(head,del,prev,next)                                                        \
+do {                                                                                           \
+  if (((head)==(del)) && ((head)->next == (head))) {                                           \
+      (head) = NULL;                                                                           \
+  } else {                                                                                     \
+     (del)->next->prev = (del)->prev;                                                          \
+     (del)->prev->next = (del)->next;                                                          \
+     if ((del) == (head)) (head)=(del)->next;                                                  \
+  }                                                                                            \
+} while (0)
+
+#define CDL_COUNT(head,el,counter)                                                             \
+    CDL_COUNT2(head,el,counter,next)                                                           \
+
+#define CDL_COUNT2(head, el, counter,next)                                                     \
+do {                                                                                           \
+  (counter) = 0;                                                                               \
+  CDL_FOREACH2(head,el,next) { ++(counter); }                                                  \
+} while (0)
+
+#define CDL_FOREACH(head,el)                                                                   \
+    CDL_FOREACH2(head,el,next)
+
+#define CDL_FOREACH2(head,el,next)                                                             \
+    for ((el)=(head);el;(el)=(((el)->next==(head)) ? NULL : (el)->next))
+
+#define CDL_FOREACH_SAFE(head,el,tmp1,tmp2)                                                    \
+    CDL_FOREACH_SAFE2(head,el,tmp1,tmp2,prev,next)
+
+#define CDL_FOREACH_SAFE2(head,el,tmp1,tmp2,prev,next)                                         \
+  for ((el) = (head), (tmp1) = (head) ? (head)->prev : NULL;                                   \
+       (el) && ((tmp2) = (el)->next, 1);                                                       \
+       (el) = ((el) == (tmp1) ? NULL : (tmp2)))
+
+#define CDL_SEARCH_SCALAR(head,out,field,val)                                                  \
+    CDL_SEARCH_SCALAR2(head,out,field,val,next)
+
+#define CDL_SEARCH_SCALAR2(head,out,field,val,next)                                            \
+do {                                                                                           \
+    CDL_FOREACH2(head,out,next) {                                                              \
+      if ((out)->field == (val)) break;                                                        \
+    }                                                                                          \
+} while (0)
+
+#define CDL_SEARCH(head,out,elt,cmp)                                                           \
+    CDL_SEARCH2(head,out,elt,cmp,next)
+
+#define CDL_SEARCH2(head,out,elt,cmp,next)                                                     \
+do {                                                                                           \
+    CDL_FOREACH2(head,out,next) {                                                              \
+      if ((cmp(out,elt))==0) break;                                                            \
+    }                                                                                          \
+} while (0)
+
+#define CDL_REPLACE_ELEM2(head, el, add, prev, next)                                           \
+do {                                                                                           \
+ assert((head) != NULL);                                                                       \
+ assert((el) != NULL);                                                                         \
+ assert((add) != NULL);                                                                        \
+ if ((el)->next == (el)) {                                                                     \
+  (add)->next = (add);                                                                         \
+  (add)->prev = (add);                                                                         \
+  (head) = (add);                                                                              \
+ } else {                                                                                      \
+  (add)->next = (el)->next;                                                                    \
+  (add)->prev = (el)->prev;                                                                    \
+  (add)->next->prev = (add);                                                                   \
+  (add)->prev->next = (add);                                                                   \
+  if ((head) == (el)) {                                                                        \
+   (head) = (add);                                                                             \
+  }                                                                                            \
+ }                                                                                             \
+} while (0)
+
+#define CDL_REPLACE_ELEM(head, el, add)                                                        \
+    CDL_REPLACE_ELEM2(head, el, add, prev, next)
+
+#define CDL_PREPEND_ELEM2(head, el, add, prev, next)                                           \
+do {                                                                                           \
+  if (el) {                                                                                    \
+    assert((head) != NULL);                                                                    \
+    assert((add) != NULL);                                                                     \
+    (add)->next = (el);                                                                        \
+    (add)->prev = (el)->prev;                                                                  \
+    (el)->prev = (add);                                                                        \
+    (add)->prev->next = (add);                                                                 \
+    if ((head) == (el)) {                                                                      \
+      (head) = (add);                                                                          \
+    }                                                                                          \
+  } else {                                                                                     \
+    CDL_APPEND2(head, add, prev, next);                                                        \
+  }                                                                                            \
+} while (0)
+
+#define CDL_PREPEND_ELEM(head, el, add)                                                        \
+    CDL_PREPEND_ELEM2(head, el, add, prev, next)
+
+#define CDL_APPEND_ELEM2(head, el, add, prev, next)                                            \
+do {                                                                                           \
+ if (el) {                                                                                     \
+  assert((head) != NULL);                                                                      \
+  assert((add) != NULL);                                                                       \
+  (add)->next = (el)->next;                                                                    \
+  (add)->prev = (el);                                                                          \
+  (el)->next = (add);                                                                          \
+  (add)->next->prev = (add);                                                                   \
+ } else {                                                                                      \
+  CDL_PREPEND2(head, add, prev, next);                                                         \
+ }                                                                                             \
+} while (0)
+
+#define CDL_APPEND_ELEM(head, el, add)                                                         \
+    CDL_APPEND_ELEM2(head, el, add, prev, next)
+
+#endif /* UTLIST_H */

From eabfa9ab6f8bfd1690f0b1f573477492da8031b7 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Mon, 3 Oct 2016 18:29:18 -0700
Subject: [PATCH 56/91] Stephanie's plasma refactor (#31)

* Add Ray common as a submodule

* Convert to Ray common event loop

* Hide plasma manager state

* Interface changes

* Minor fixes: change LOG_INFO calls to LOG_DEBUG, comments, lint

* Turn off DEBUG by default and make Travis happy

* Allow processes time to clean up during Python tests

* Debugging travis...

* Plasma managers have long-lived connections per manager, not per object

* fix valgrind invalid read and cleanup

* make valgrind happy

* update store API

* put in place manager API

* fixed race condition while sending commands to plasma manager and store -- path sent by Phillip

* clang-format

* Revert "fixed race condition while sending commands to plasma manager and store -- path sent by Phillip"

This reverts commit 79e0f6e6d84f2a309b53155955b65c26c75af071.

* Use reliable socket read/writes from Ray common

* Merge data_connection and plasma_manager_connection structs

* small updates

* restore tests
---
 .gitmodules          |    3 +
 Makefile             |   29 +-
 common               |    1 +
 src/event_loop.c     |   97 ----
 src/event_loop.h     |   43 --
 src/example.c        |   13 +-
 src/malloc.c         |    1 +
 src/plasma.h         |  124 ++---
 src/plasma_client.c  |  123 +++--
 src/plasma_client.h  |   31 +-
 src/plasma_manager.c |  438 ++++++++++-------
 src/plasma_manager.h |  110 ++++-
 src/plasma_store.c   |  309 ++++++------
 src/plasma_store.h   |   49 ++
 src/utarray.h        |  238 ----------
 src/uthash.h         | 1074 ------------------------------------------
 16 files changed, 708 insertions(+), 1975 deletions(-)
 create mode 100644 .gitmodules
 create mode 160000 common
 delete mode 100644 src/event_loop.c
 delete mode 100644 src/event_loop.h
 create mode 100644 src/plasma_store.h
 delete mode 100644 src/utarray.h
 delete mode 100644 src/uthash.h

diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..9d57a168a
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "common"]
+	path = common
+	url = https://github.com/ray-project/common.git
diff --git a/Makefile b/Makefile
index 0b8566021..a91249059 100644
--- a/Makefile
+++ b/Makefile
@@ -1,20 +1,31 @@
 CC = gcc
-CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -I.
+CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -I. -Icommon -Icommon/thirdparty
 BUILD = build
 
 all: $(BUILD)/plasma_store $(BUILD)/plasma_manager $(BUILD)/plasma_client.so $(BUILD)/example
 
+debug: FORCE
+debug: CFLAGS += -DDEBUG=1
+debug: all
+
 clean:
+	cd common; make clean
 	rm -r $(BUILD)/*
 
-$(BUILD)/plasma_store: src/plasma_store.c src/plasma.h src/event_loop.h src/event_loop.c src/fling.h src/fling.c src/malloc.c src/malloc.h thirdparty/dlmalloc.c
-	$(CC) $(CFLAGS) src/plasma_store.c src/event_loop.c src/fling.c src/malloc.c -o $(BUILD)/plasma_store
+$(BUILD)/plasma_store: src/plasma_store.c src/plasma.h src/fling.h src/fling.c src/malloc.c src/malloc.h thirdparty/dlmalloc.c common
+	$(CC) $(CFLAGS) src/plasma_store.c src/fling.c src/malloc.c common/build/libcommon.a -o $(BUILD)/plasma_store
 
-$(BUILD)/plasma_manager: src/plasma_manager.c src/event_loop.h src/event_loop.c src/plasma.h src/plasma_client.c src/fling.h src/fling.c
-	$(CC) $(CFLAGS) src/plasma_manager.c src/event_loop.c src/plasma_client.c src/fling.c -o $(BUILD)/plasma_manager
+$(BUILD)/plasma_manager: src/plasma_manager.c src/plasma.h src/plasma_client.c src/fling.h src/fling.c common
+	$(CC) $(CFLAGS) src/plasma_manager.c src/plasma_client.c src/fling.c common/build/libcommon.a -o $(BUILD)/plasma_manager
 
-$(BUILD)/plasma_client.so: src/plasma_client.c src/fling.h src/fling.c
-	$(CC) $(CFLAGS) src/plasma_client.c src/fling.c -fPIC -shared -o $(BUILD)/plasma_client.so
+$(BUILD)/plasma_client.so: src/plasma_client.c src/fling.h src/fling.c common
+	$(CC) $(CFLAGS) src/plasma_client.c src/fling.c common/build/libcommon.a -fPIC -shared -o $(BUILD)/plasma_client.so
 
-$(BUILD)/example: src/plasma_client.c src/plasma.h src/example.c src/fling.h src/fling.c
-	$(CC) $(CFLAGS) src/plasma_client.c src/example.c src/fling.c -o $(BUILD)/example
+$(BUILD)/example: src/plasma_client.c src/plasma.h src/example.c src/fling.h src/fling.c common
+	$(CC) $(CFLAGS) src/plasma_client.c src/example.c src/fling.c common/build/libcommon.a -o $(BUILD)/example
+
+common: FORCE
+		git submodule update --init --recursive
+		cd common; make
+
+FORCE:
diff --git a/common b/common
new file mode 160000
index 000000000..f4037ad19
--- /dev/null
+++ b/common
@@ -0,0 +1 @@
+Subproject commit f4037ad19f38dc68b186c9338d3f67c9058c556c
diff --git a/src/event_loop.c b/src/event_loop.c
deleted file mode 100644
index de61c0e05..000000000
--- a/src/event_loop.c
+++ /dev/null
@@ -1,97 +0,0 @@
-#include "event_loop.h"
-
-#include <assert.h>
-#include <unistd.h>
-
-UT_icd item_icd = {sizeof(event_loop_item), NULL, NULL, NULL};
-UT_icd poll_icd = {sizeof(struct pollfd), NULL, NULL, NULL};
-
-/* Initializes the event loop.
- * This function needs to be called before any other event loop function. */
-void event_loop_init(event_loop *loop) {
-  utarray_new(loop->items, &item_icd);
-  utarray_new(loop->waiting, &poll_icd);
-}
-
-/* Add a new file descriptor fd to the event loop.
- * This function sets a user defined type and id for the file descriptor
- * which can be queried using event_loop_type and event_loop_id. The parameter
- * events is the same as in http://linux.die.net/man/2/poll.
- * Returns the index of the item in the event loop. */
-int64_t event_loop_attach(event_loop *loop,
-                          int type,
-                          data_connection *connection,
-                          int fd,
-                          int events) {
-  assert(utarray_len(loop->items) == utarray_len(loop->waiting));
-  int64_t index = utarray_len(loop->items);
-  event_loop_item item = {.type = type};
-  if (connection) {
-    item.connection = *connection;
-  }
-  utarray_push_back(loop->items, &item);
-  struct pollfd waiting = {.fd = fd, .events = events};
-  utarray_push_back(loop->waiting, &waiting);
-  return index;
-}
-
-/* Detach a file descriptor from the event loop.
- * This invalidates all other indices into the event loop items, but leaves
- * the ids of the event loop items valid. */
-void event_loop_detach(event_loop *loop, int64_t index, int shall_close) {
-  struct pollfd *waiting_item =
-      (struct pollfd *) utarray_eltptr(loop->waiting, index);
-  struct pollfd *waiting_back = (struct pollfd *) utarray_back(loop->waiting);
-  if (shall_close) {
-    close(waiting_item->fd);
-  }
-  *waiting_item = *waiting_back;
-  utarray_pop_back(loop->waiting);
-
-  event_loop_item *items_item =
-      (event_loop_item *) utarray_eltptr(loop->items, index);
-  event_loop_item *items_back = (event_loop_item *) utarray_back(loop->items);
-  *items_item = *items_back;
-  utarray_pop_back(loop->items);
-}
-
-/* Poll the file descriptors associated to this event loop.
- * See http://linux.die.net/man/2/poll */
-int event_loop_poll(event_loop *loop) {
-  return poll((struct pollfd *) utarray_front(loop->waiting),
-              utarray_len(loop->waiting), -1);
-}
-
-/* Get the total number of file descriptors participating in the event loop. */
-int64_t event_loop_size(event_loop *loop) {
-  return utarray_len(loop->waiting);
-}
-
-/* Get the pollfd structure associated to a file descriptor participating in the
- * event loop. */
-struct pollfd *event_loop_get(event_loop *loop, int64_t index) {
-  return (struct pollfd *) utarray_eltptr(loop->waiting, index);
-}
-
-/* Set the data connection information for participant in the event loop. */
-void event_loop_set_connection(event_loop *loop,
-                               int64_t index,
-                               const data_connection *conn) {
-  event_loop_item *item =
-      (event_loop_item *) utarray_eltptr(loop->items, index);
-  item->connection = *conn;
-}
-
-/* Get the data connection information for participant in the event loop. */
-data_connection *event_loop_get_connection(event_loop *loop, int64_t index) {
-  event_loop_item *item =
-      (event_loop_item *) utarray_eltptr(loop->items, index);
-  return &item->connection;
-}
-
-/* Free the space associated to the event loop.
- * Does not free the event_loop datastructure itself. */
-void event_loop_free(event_loop *loop) {
-  utarray_free(loop->items);
-  utarray_free(loop->waiting);
-}
diff --git a/src/event_loop.h b/src/event_loop.h
deleted file mode 100644
index 5dee29682..000000000
--- a/src/event_loop.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef EVENT_LOOP_H
-#define EVENT_LOOP_H
-
-#include <poll.h>
-#include <string.h>
-
-#include "utarray.h"
-#include "plasma.h"
-#include "plasma_manager.h"
-
-typedef struct {
-  /* The type of connection (e.g. redis, client, manager, data transfer). */
-  int type;
-  /* If type is data transfer, this contains information about the status
-   * of the transfer. */
-  data_connection connection;
-} event_loop_item;
-
-typedef struct {
-  /* Array of event_loop_items that hold information for connections. */
-  UT_array *items;
-  /* Array of file descriptors that are waiting, corresponding to items. */
-  UT_array *waiting;
-} event_loop;
-
-/* Event loop functions. */
-void event_loop_init(event_loop *loop);
-void event_loop_free(event_loop *loop);
-int64_t event_loop_attach(event_loop *loop,
-                          int type,
-                          data_connection *connection,
-                          int fd,
-                          int events);
-void event_loop_detach(event_loop *loop, int64_t index, int shall_close);
-int event_loop_poll(event_loop *loop);
-int64_t event_loop_size(event_loop *loop);
-struct pollfd *event_loop_get(event_loop *loop, int64_t index);
-void event_loop_set_connection(event_loop *loop,
-                               int64_t index,
-                               const data_connection *conn);
-data_connection *event_loop_get_connection(event_loop *loop, int64_t index);
-
-#endif
diff --git a/src/example.c b/src/example.c
index 7d0a9ac14..81763dac7 100644
--- a/src/example.c
+++ b/src/example.c
@@ -13,13 +13,14 @@
 #include <assert.h>
 
 #include "plasma.h"
+#include "plasma_client.h"
 
 int main(int argc, char *argv[]) {
-  int conn = -1;
+  plasma_store_conn *conn = NULL;
   int64_t size;
   uint8_t *data;
   int c;
-  plasma_id id = {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  object_id id = {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                    255, 255, 255, 255, 255, 255, 255, 255, 255, 255}};
   while ((c = getopt(argc, argv, "s:cfg")) != -1) {
     switch (c) {
@@ -27,11 +28,11 @@ int main(int argc, char *argv[]) {
       conn = plasma_store_connect(optarg);
       break;
     case 'c':
-      assert(conn != -1);
+      assert(conn != NULL);
       plasma_create(conn, id, 100, NULL, 0, &data);
       break;
     case 'f':
-      assert(conn != -1);
+      assert(conn != NULL);
       plasma_seal(conn, id);
       break;
     case 'g':
@@ -41,6 +42,6 @@ int main(int argc, char *argv[]) {
       abort();
     }
   }
-  assert(conn != -1);
-  close(conn);
+  assert(conn != NULL);
+  plasma_store_disconnect(conn);
 }
diff --git a/src/malloc.c b/src/malloc.c
index 0cbc8f082..230fee38d 100644
--- a/src/malloc.c
+++ b/src/malloc.c
@@ -6,6 +6,7 @@
 #include <sys/mman.h>
 #include <unistd.h>
 
+#include "common.h"
 #include "plasma.h"
 #include "uthash.h"
 
diff --git a/src/plasma.h b/src/plasma.h
index a36114c18..6a39deb6c 100644
--- a/src/plasma.h
+++ b/src/plasma.h
@@ -7,34 +7,7 @@
 #include <stddef.h>
 #include <string.h>
 
-#include "uthash.h"
-
-#ifdef NDEBUG
-#define LOG_DEBUG(M, ...)
-#else
-#define LOG_DEBUG(M, ...) \
-  fprintf(stderr, "[DEBUG] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
-#endif
-
-#ifdef PLASMA_LOGGIN_ON
-#define LOG_INFO(M, ...) \
-  fprintf(stderr, "[INFO] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
-#else
-#define LOG_INFO(M, ...)
-#endif
-
-#define LOG_ERR(M, ...)                                                     \
-  fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
-          errno == 0 ? "None" : strerror(errno), ##__VA_ARGS__)
-
-#define PLASMA_CHECK(CONDITION, M, ...)                                \
-  do {                                                                 \
-    if (!(CONDITION)) {                                                \
-      fprintf(stderr, "[FATAL] (%s:%d " #CONDITION ") \n" M, __FILE__, \
-              __LINE__);                                               \
-      exit(-1);                                                        \
-    }                                                                  \
-  } while (0)
+#include "common.h"
 
 typedef struct {
   int64_t data_size;
@@ -43,12 +16,34 @@ typedef struct {
   int64_t construct_duration;
 } plasma_object_info;
 
-/** Represents an object ID hash, can hold a full SHA1 hash. */
-typedef struct { unsigned char id[20]; } plasma_id;
+/* Handle to access memory mapped file and map it into client address space */
+typedef struct {
+  /** The file descriptor of the memory mapped file in the store. It is used
+   * as a unique identifier of the file in the client to look up the
+   * corresponding file descriptor on the client's side. */
+  int store_fd;
+  /** The size in bytes of the memory mapped file. */
+  int64_t mmap_size;
+} object_handle;
 
-enum plasma_request_type {
+typedef struct {
+  /** Handle for memory mapped file the object is stored in. */
+  object_handle handle;
+  /** The offset in bytes in the memory mapped file of the data. */
+  ptrdiff_t data_offset;
+  /** The offset in bytes in the memory mapped file of the metadata. */
+  ptrdiff_t metadata_offset;
+  /** The size in bytes of the data. */
+  int64_t data_size;
+  /** The size in bytes of the metadata. */
+  int64_t metadata_size;
+} plasma_object;
+
+enum object_status { OBJECT_NOT_FOUND = 0, OBJECT_FOUND = 1 };
+
+enum plasma_message_type {
   /** Create a new object. */
-  PLASMA_CREATE,
+  PLASMA_CREATE = 128,
   /** Get an object. */
   PLASMA_GET,
   /** Check if an object is present. */
@@ -64,10 +59,8 @@ enum plasma_request_type {
 };
 
 typedef struct {
-  /** The type of the request. */
-  int type;
   /** The ID of the object that the request is about. */
-  plasma_id object_id;
+  object_id object_id;
   /** The size of the object's data. */
   int64_t data_size;
   /** The size of the object's metadata. */
@@ -81,68 +74,11 @@ typedef struct {
 } plasma_request;
 
 typedef struct {
-  /** The offset in bytes in the memory mapped file of the data. */
-  ptrdiff_t data_offset;
-  /** The offset in bytes in the memory mapped file of the metadata. */
-  ptrdiff_t metadata_offset;
-  /** The size in bytes of the memory mapped file. */
-  int64_t map_size;
-  /** The size in bytesof the data. */
-  int64_t data_size;
-  /** The size in bytes of the metadata. */
-  int64_t metadata_size;
+  /** The object that is returned with this reply. */
+  plasma_object object;
   /** This is used only to respond to requests of type PLASMA_CONTAINS. It is 1
    *  if the object is present and 0 otherwise. Used for plasma_contains. */
   int has_object;
-  /** The file descriptor of the memory mapped file in the store. */
-  int store_fd_val;
 } plasma_reply;
 
-typedef struct {
-  plasma_id object_id;
-  uint8_t *data;
-  int64_t data_size;
-  uint8_t *metadata;
-  int64_t metadata_size;
-  int writable;
-} plasma_buffer;
-
-typedef struct {
-  /** Key that uniquely identifies the  memory mapped file. In practice, we
-   *  take the numerical value of the file descriptor in the object store. */
-  int key;
-  /** The result of mmap for this file descriptor. */
-  uint8_t *pointer;
-  /** Handle for the uthash table. */
-  UT_hash_handle hh;
-} client_mmap_table_entry;
-
-/** Information about a connection between a Plasma Client and Plasma Store.
- *  This is used to avoid mapping the same files into memory multiple times. */
-typedef struct {
-  /** File descriptor of the Unix domain socket that connects to the store. */
-  int conn;
-  /** Table of dlmalloc buffer files that have been memory mapped so far. */
-  client_mmap_table_entry *mmap_table;
-} plasma_store_conn;
-
-/**
- * This is used by the Plasma Client to send a request to the Plasma Store or
- * the Plasma Manager.
- *
- * @param conn The file descriptor to use to send the request.
- * @param req The address of the request to send.
- * @return Void.
- */
-void plasma_send_request(int conn, plasma_request *req);
-
-/**
- * This is used by the Plasma Store to send a reply to the Plasma Client.
- *
- * @param conn The file descriptor to use to send the reply.
- * @param req The address of the reply to send.
- * @return Void.
- */
-void plasma_send_reply(int conn, plasma_reply *req);
-
 #endif
diff --git a/src/plasma_client.c b/src/plasma_client.c
index e486e1aa8..bd706ded7 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -12,16 +12,35 @@
 #include <netinet/in.h>
 #include <netdb.h>
 
+#include "common.h"
+#include "io.h"
 #include "plasma.h"
 #include "plasma_client.h"
 #include "fling.h"
+#include "uthash.h"
 
-void plasma_send_request(int fd, plasma_request *req) {
+typedef struct {
+  /** Key that uniquely identifies the  memory mapped file. In practice, we
+   *  take the numerical value of the file descriptor in the object store. */
+  int key;
+  /** The result of mmap for this file descriptor. */
+  uint8_t *pointer;
+  /** Handle for the uthash table. */
+  UT_hash_handle hh;
+} client_mmap_table_entry;
+
+/** Information about a connection between a Plasma Client and Plasma Store.
+ *  This is used to avoid mapping the same files into memory multiple times. */
+struct plasma_store_conn {
+  /** File descriptor of the Unix domain socket that connects to the store. */
+  int conn;
+  /** Table of dlmalloc buffer files that have been memory mapped so far. */
+  client_mmap_table_entry *mmap_table;
+};
+
+void plasma_send_request(int fd, int type, plasma_request *req) {
   int req_count = sizeof(plasma_request);
-  if (write(fd, req, req_count) != req_count) {
-    LOG_ERR("write error, fd = %d", fd);
-    exit(-1);
-  }
+  write_message(fd, type, req_count, (uint8_t *) req);
 }
 
 /* If the file descriptor fd has been mmapped in this client process before,
@@ -53,97 +72,93 @@ uint8_t *lookup_or_mmap(plasma_store_conn *conn,
 }
 
 void plasma_create(plasma_store_conn *conn,
-                   plasma_id object_id,
+                   object_id object_id,
                    int64_t data_size,
                    uint8_t *metadata,
                    int64_t metadata_size,
                    uint8_t **data) {
-  LOG_INFO(
-      "called plasma_create on conn %d with size %d and metadata size "
-      "%d" PRId64,
-      conn, size, metadata_size);
-  plasma_request req = {.type = PLASMA_CREATE,
-                        .object_id = object_id,
+  LOG_DEBUG("called plasma_create on conn %d with size %" PRId64
+            " and metadata size "
+            "%" PRId64,
+            conn->conn, data_size, metadata_size);
+  plasma_request req = {.object_id = object_id,
                         .data_size = data_size,
                         .metadata_size = metadata_size};
-  plasma_send_request(conn->conn, &req);
+  plasma_send_request(conn->conn, PLASMA_CREATE, &req);
   plasma_reply reply;
   int fd = recv_fd(conn->conn, (char *) &reply, sizeof(plasma_reply));
-  assert(reply.data_size == data_size);
-  assert(reply.metadata_size == metadata_size);
+  plasma_object *object = &reply.object;
+  CHECK(object->data_size == data_size);
+  CHECK(object->metadata_size == metadata_size);
   /* The metadata should come right after the data. */
-  assert(reply.metadata_offset == reply.data_offset + data_size);
-  *data = lookup_or_mmap(conn, fd, reply.store_fd_val, reply.map_size) +
-          reply.data_offset;
+  CHECK(object->metadata_offset == object->data_offset + data_size);
+  *data = lookup_or_mmap(conn, fd, object->handle.store_fd,
+                         object->handle.mmap_size) +
+          object->data_offset;
   /* If plasma_create is being called from a transfer, then we will not copy the
    * metadata here. The metadata will be written along with the data streamed
    * from the transfer. */
   if (metadata != NULL) {
     /* Copy the metadata to the buffer. */
-    memcpy(*data + reply.data_size, metadata, metadata_size);
+    memcpy(*data + object->data_size, metadata, metadata_size);
   }
 }
 
 /* This method is used to get both the data and the metadata. */
 void plasma_get(plasma_store_conn *conn,
-                plasma_id object_id,
+                object_id object_id,
                 int64_t *size,
                 uint8_t **data,
                 int64_t *metadata_size,
                 uint8_t **metadata) {
-  plasma_request req = {.type = PLASMA_GET, .object_id = object_id};
-  plasma_send_request(conn->conn, &req);
+  plasma_request req = {.object_id = object_id};
+  plasma_send_request(conn->conn, PLASMA_GET, &req);
   plasma_reply reply;
   int fd = recv_fd(conn->conn, (char *) &reply, sizeof(plasma_reply));
-  *data = lookup_or_mmap(conn, fd, reply.store_fd_val, reply.map_size) +
-          reply.data_offset;
-  *size = reply.data_size;
+  plasma_object *object = &reply.object;
+  *data = lookup_or_mmap(conn, fd, object->handle.store_fd,
+                         object->handle.mmap_size) +
+          object->data_offset;
+  *size = object->data_size;
   /* If requested, return the metadata as well. */
   if (metadata != NULL) {
-    *metadata = *data + reply.data_size;
-    *metadata_size = reply.metadata_size;
+    *metadata = *data + object->data_size;
+    *metadata_size = object->metadata_size;
   }
 }
 
 /* This method is used to query whether the plasma store contains an object. */
 void plasma_contains(plasma_store_conn *conn,
-                     plasma_id object_id,
+                     object_id object_id,
                      int *has_object) {
-  plasma_request req = {.type = PLASMA_CONTAINS, .object_id = object_id};
-  plasma_send_request(conn->conn, &req);
+  plasma_request req = {.object_id = object_id};
+  plasma_send_request(conn->conn, PLASMA_CONTAINS, &req);
   plasma_reply reply;
   int r = read(conn->conn, &reply, sizeof(plasma_reply));
-  PLASMA_CHECK(r != -1, "read error");
-  PLASMA_CHECK(r != 0, "connection disconnected");
+  CHECKM(r != -1, "read error");
+  CHECKM(r != 0, "connection disconnected");
   *has_object = reply.has_object;
 }
 
-void plasma_seal(plasma_store_conn *conn, plasma_id object_id) {
-  plasma_request req = {.type = PLASMA_SEAL, .object_id = object_id};
-  plasma_send_request(conn->conn, &req);
+void plasma_seal(plasma_store_conn *conn, object_id object_id) {
+  plasma_request req = {.object_id = object_id};
+  plasma_send_request(conn->conn, PLASMA_SEAL, &req);
 }
 
-void plasma_delete(plasma_store_conn *conn, plasma_id object_id) {
-  plasma_request req = {.type = PLASMA_DELETE, .object_id = object_id};
-  plasma_send_request(conn->conn, &req);
+void plasma_delete(plasma_store_conn *conn, object_id object_id) {
+  plasma_request req = {.object_id = object_id};
+  plasma_send_request(conn->conn, PLASMA_DELETE, &req);
 }
 
 plasma_store_conn *plasma_store_connect(const char *socket_name) {
   assert(socket_name);
-  struct sockaddr_un addr;
-  int fd;
-  if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
-    LOG_ERR("socket error");
-    exit(-1);
-  }
-  memset(&addr, 0, sizeof(addr));
-  addr.sun_family = AF_UNIX;
-  strncpy(addr.sun_path, socket_name, sizeof(addr.sun_path) - 1);
   /* Try to connect to the Plasma store. If unsuccessful, retry several times.
    */
+  int fd = -1;
   int connected_successfully = 0;
   for (int num_attempts = 0; num_attempts < 50; ++num_attempts) {
-    if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) == 0) {
+    fd = connect_ipc_sock(socket_name);
+    if (fd >= 0) {
       connected_successfully = 1;
       break;
     }
@@ -162,6 +177,11 @@ plasma_store_conn *plasma_store_connect(const char *socket_name) {
   return result;
 }
 
+void plasma_store_disconnect(plasma_store_conn *conn) {
+  close(conn->conn);
+  free(conn);
+}
+
 #define h_addr h_addr_list[0]
 
 int plasma_manager_connect(const char *ip_addr, int port) {
@@ -196,14 +216,13 @@ int plasma_manager_connect(const char *ip_addr, int port) {
 void plasma_transfer(int manager,
                      const char *addr,
                      int port,
-                     plasma_id object_id) {
-  plasma_request req = {
-      .type = PLASMA_TRANSFER, .object_id = object_id, .port = port};
+                     object_id object_id) {
+  plasma_request req = {.object_id = object_id, .port = port};
   char *end = NULL;
   for (int i = 0; i < 4; ++i) {
     req.addr[i] = strtol(end ? end : addr, &end, 10);
     /* skip the '.' */
     end += 1;
   }
-  plasma_send_request(manager, &req);
+  plasma_send_request(manager, PLASMA_TRANSFER, &req);
 }
diff --git a/src/plasma_client.h b/src/plasma_client.h
index 148b74f93..44af5a1f0 100644
--- a/src/plasma_client.h
+++ b/src/plasma_client.h
@@ -1,6 +1,19 @@
 #ifndef PLASMA_CLIENT_H
 #define PLASMA_CLIENT_H
 
+typedef struct plasma_store_conn plasma_store_conn;
+
+/**
+ * This is used by the Plasma Client to send a request to the Plasma Store or
+ * the Plasma Manager.
+ *
+ * @param conn The file descriptor to use to send the request.
+ * @param type The type of request.
+ * @param req The address of the request to send.
+ * @return Void.
+ */
+void plasma_send_request(int conn, int type, plasma_request *req);
+
 /**
  * Connect to the local plasma store UNIX domain socket with path socket_name
  * and return the resulting connection.
@@ -11,6 +24,14 @@
  */
 plasma_store_conn *plasma_store_connect(const char *socket_name);
 
+/**
+ * Disconnect from the local plasma store.
+ *
+ * @param conn The connection to the local plasma store.
+ * @return Void.
+ */
+void plasma_store_disconnect(plasma_store_conn *conn);
+
 /**
  * Connect to a possibly remote Plasma Manager.
  *
@@ -36,7 +57,7 @@ int plasma_manager_connect(const char *addr, int port);
  * @return Void.
  */
 void plasma_create(plasma_store_conn *conn,
-                   plasma_id object_id,
+                   object_id object_id,
                    int64_t size,
                    uint8_t *metadata,
                    int64_t metadata_size,
@@ -58,7 +79,7 @@ void plasma_create(plasma_store_conn *conn,
  * @return Void.
  */
 void plasma_get(plasma_store_conn *conn,
-                plasma_id object_id,
+                object_id object_id,
                 int64_t *size,
                 uint8_t **data,
                 int64_t *metadata_size,
@@ -77,7 +98,7 @@ void plasma_get(plasma_store_conn *conn,
  * @return Void.
  */
 void plasma_contains(plasma_store_conn *conn,
-                     plasma_id object_id,
+                     object_id object_id,
                      int *has_object);
 
 /**
@@ -88,7 +109,7 @@ void plasma_contains(plasma_store_conn *conn,
  * @param object_id The ID of the object to seal.
  * @return Void.
  */
-void plasma_seal(plasma_store_conn *conn, plasma_id object_id);
+void plasma_seal(plasma_store_conn *conn, object_id object_id);
 
 /**
  * Delete an object from the object store. This currently assumes that the
@@ -101,6 +122,6 @@ void plasma_seal(plasma_store_conn *conn, plasma_id object_id);
  * @param object_id The ID of the object to delete.
  * @return Void.
  */
-void plasma_delete(plasma_store_conn *conn, plasma_id object_id);
+void plasma_delete(plasma_store_conn *conn, object_id object_id);
 
 #endif
diff --git a/src/plasma_manager.c b/src/plasma_manager.c
index a4952295d..6bda50d93 100644
--- a/src/plasma_manager.c
+++ b/src/plasma_manager.c
@@ -20,197 +20,290 @@
 #include <netinet/in.h>
 #include <netdb.h>
 
+#include "uthash.h"
+#include "utlist.h"
+#include "utstring.h"
+#include "common.h"
+#include "io.h"
 #include "event_loop.h"
 #include "plasma.h"
 #include "plasma_client.h"
 #include "plasma_manager.h"
 
 typedef struct {
-  /* Connection to local plasma store. */
-  plasma_store_conn *conn;
-  /* Event loop. */
-  event_loop *loop;
+  /** Connection to the local plasma store for reading or writing data. */
+  plasma_store_conn *store_conn;
+  /** Hash table of all contexts for active connections to other plasma
+   * managers. These are used for writing data to other plasma stores. */
+  client_connection *manager_connections;
 } plasma_manager_state;
 
-/* Initialize the plasma manager. This function initializes the event loop
- * of the plasma manager, and stores the address 'store_socket_name' of
- * the local plasma store socket. */
-void init_plasma_manager(plasma_manager_state *s,
-                         const char *store_socket_name) {
-  s->loop = malloc(sizeof(event_loop));
-  event_loop_init(s->loop);
-  s->conn = plasma_store_connect(store_socket_name);
-  LOG_INFO("Connected to object store %s", store_socket_name);
-}
+typedef struct plasma_buffer plasma_buffer;
 
-/* Start transfering data to another object store manager. This establishes
- * a connection to the remote manager and sends the data header to the other
- * object manager. */
-void initiate_transfer(plasma_manager_state *s, plasma_request *req) {
+/* Buffer for reading and writing data between plasma managers. */
+struct plasma_buffer {
+  object_id object_id;
   uint8_t *data;
   int64_t data_size;
   uint8_t *metadata;
   int64_t metadata_size;
-  plasma_get(s->conn, req->object_id, &data_size, &data, &metadata_size,
-             &metadata);
-  assert(metadata == data + data_size);
-  plasma_buffer buf = {.object_id = req->object_id,
-                       .data = data, /* We treat this as a pointer to the
-                                        concatenated data and metadata. */
-                       .data_size = data_size,
-                       .metadata_size = metadata_size,
-                       .writable = 0};
-  char ip_addr[32];
-  snprintf(ip_addr, 32, "%d.%d.%d.%d", req->addr[0], req->addr[1], req->addr[2],
-           req->addr[3]);
+  int writable;
+  /* Pointer to the next buffer that we will write to this plasma manager. This
+   * field is only used if we're transferring data to another plasma manager,
+   * not if we are receiving data. */
+  plasma_buffer *next;
+};
 
-  int fd = plasma_manager_connect(&ip_addr[0], req->port);
-  data_connection conn = {.type = DATA_CONNECTION_WRITE,
-                          .store_conn = s->conn->conn,
-                          .buf = buf,
-                          .cursor = 0};
-  event_loop_attach(s->loop, CONNECTION_DATA, &conn, fd, POLLOUT);
-  plasma_request manager_req = {.type = PLASMA_DATA,
-                                .object_id = req->object_id,
-                                .data_size = buf.data_size,
-                                .metadata_size = buf.metadata_size};
-  plasma_send_request(fd, &manager_req);
-}
+/* Context for a client connection to another plasma manager. */
+struct client_connection {
+  /* Current state for this plasma manager. This is shared between all client
+   * connections to the plasma manager. */
+  plasma_manager_state *manager_state;
+  /* Current position in the buffer. */
+  int64_t cursor;
+  /* Buffer that this connection is reading from. If this is a connection to
+   * write data to another plasma store, then it is a linked list of buffers to
+   * write. */
+  plasma_buffer *transfer_queue;
+  /* File descriptor for the socket connected to the other plasma manager. */
+  int fd;
+  /* Following fields are used only for connections to plasma managers. */
+  /* Key that uniquely identifies the plasma manager that we're connected to.
+   * We will use the string <address>:<port> as an identifier. */
+  char *ip_addr_port;
+  /** Handle for the uthash table. */
+  UT_hash_handle hh;
+};
 
-/* Start reading data from another object manager.
- * Initializes the object we are going to write to in the
- * local plasma store and then switches the data socket to reading mode. */
-void start_reading_data(int64_t index,
-                        plasma_manager_state *s,
-                        plasma_request *req) {
-  plasma_buffer buf = {.object_id = req->object_id,
-                       .data_size = req->data_size,
-                       .metadata_size = req->metadata_size,
-                       .writable = 1};
-  plasma_create(s->conn, req->object_id, req->data_size, NULL,
-                req->metadata_size, &buf.data);
-  data_connection conn = {.type = DATA_CONNECTION_READ,
-                          .store_conn = s->conn->conn,
-                          .buf = buf,
-                          .cursor = 0};
-  event_loop_set_connection(s->loop, index, &conn);
+plasma_manager_state *init_plasma_manager_state(const char *store_socket_name) {
+  plasma_manager_state *state = malloc(sizeof(plasma_manager_state));
+  state->store_conn = plasma_store_connect(store_socket_name);
+  state->manager_connections = NULL;
+  return state;
 }
 
 /* Handle a command request that came in through a socket (transfering data,
  * or accepting incoming data). */
-void process_command(int64_t id,
-                     plasma_manager_state *state,
-                     plasma_request *req) {
-  switch (req->type) {
-  case PLASMA_TRANSFER:
-    LOG_INFO("transfering object to manager with port %d", req->port);
-    initiate_transfer(state, req);
-    break;
-  case PLASMA_DATA:
-    LOG_INFO("starting to stream data");
-    start_reading_data(id, state, req);
-    break;
-  default:
-    LOG_ERR("invalid request %d", req->type);
-    exit(-1);
-  }
-}
+void process_message(event_loop *loop,
+                     int client_sock,
+                     void *context,
+                     int events);
 
-/* Handle data or command event incoming on socket with index "index". */
-void read_from_socket(plasma_manager_state *state,
-                      struct pollfd *waiting,
-                      int64_t index,
-                      plasma_request *req) {
+void write_object_chunk(event_loop *loop,
+                        int data_sock,
+                        void *context,
+                        int events) {
+  client_connection *conn = (client_connection *) context;
+  if (conn->transfer_queue == NULL) {
+    /* If there are no objects to transfer, temporarily remove this connection
+     * from the event loop. It will be reawoken when we receive another
+     * PLASMA_TRANSFER request. */
+    event_loop_remove_file(loop, conn->fd);
+    return;
+  }
+
+  LOG_DEBUG("Writing data");
   ssize_t r, s;
-  data_connection *conn = event_loop_get_connection(state->loop, index);
-  switch (conn->type) {
-  case DATA_CONNECTION_HEADER:
-    r = read(waiting->fd, req, sizeof(plasma_request));
-    if (r == -1) {
-      LOG_ERR("read error");
-    } else if (r == 0) {
-      LOG_INFO("connection with id %" PRId64 " disconnected", index);
-      event_loop_detach(state->loop, index, 1);
-    } else {
-      process_command(index, state, req);
-    }
-    break;
-  case DATA_CONNECTION_READ:
-    LOG_DEBUG("polled DATA_CONNECTION_READ");
-    r = read(waiting->fd, conn->buf.data + conn->cursor, BUFSIZE);
-    if (r == -1) {
-      LOG_ERR("read error");
-    } else if (r == 0) {
-      LOG_INFO("end of file");
-    } else {
-      conn->cursor += r;
-    }
-    if (r == 0) {
-      LOG_DEBUG("reading on channel %" PRId64 " finished", index);
-      plasma_seal(state->conn, conn->buf.object_id);
-      event_loop_detach(state->loop, index, 1);
-    }
-    break;
-  case DATA_CONNECTION_WRITE:
-    LOG_DEBUG("polled DATA_CONNECTION_WRITE");
-    s = conn->buf.data_size + conn->buf.metadata_size - conn->cursor;
-    if (s > BUFSIZE)
-      s = BUFSIZE;
-    r = write(waiting->fd, conn->buf.data + conn->cursor, s);
-    if (r != s) {
-      if (r > 0) {
-        LOG_ERR("partial write on fd %d", waiting->fd);
-      } else {
-        LOG_ERR("write error");
-        exit(-1);
-      }
-    } else {
-      conn->cursor += r;
-    }
-    if (r == 0) {
-      LOG_DEBUG("writing on channel %" PRId64 " finished", index);
-      event_loop_detach(state->loop, index, 1);
-    }
-    break;
-  default:
-    LOG_ERR("invalid connection type");
-    exit(-1);
+  plasma_buffer *buf = conn->transfer_queue;
+  if (conn->cursor == 0) {
+    /* If the cursor is zero, we haven't sent any requests for this object yet,
+     * so send the initial PLASMA_DATA request. */
+    plasma_request manager_req = {.object_id = buf->object_id,
+                                  .data_size = buf->data_size,
+                                  .metadata_size = buf->metadata_size};
+    plasma_send_request(conn->fd, PLASMA_DATA, &manager_req);
   }
-}
 
-/* Main event loop of the plasma manager. */
-void run_event_loop(int sock, plasma_manager_state *s) {
-  /* Add listening socket. */
-  event_loop_attach(s->loop, CONNECTION_LISTENER, NULL, sock, POLLIN);
-  plasma_request req;
-  while (1) {
-    int num_ready = event_loop_poll(s->loop);
-    if (num_ready < 0) {
-      LOG_ERR("poll failed");
+  /* Try to write one BUFSIZE at a time. */
+  s = buf->data_size + buf->metadata_size - conn->cursor;
+  if (s > BUFSIZE)
+    s = BUFSIZE;
+  r = write(conn->fd, buf->data + conn->cursor, s);
+
+  if (r != s) {
+    if (r > 0) {
+      LOG_ERR("partial write on fd %d", conn->fd);
+    } else {
+      LOG_ERR("write error");
       exit(-1);
     }
-    for (int i = 0; i < event_loop_size(s->loop); ++i) {
-      struct pollfd *waiting = event_loop_get(s->loop, i);
-      if (waiting->revents == 0)
-        continue;
-      if (waiting->fd == sock) {
-        /* Handle new incoming connections. */
-        int new_socket = accept(sock, NULL, NULL);
-        if (new_socket < 0) {
-          if (errno != EWOULDBLOCK) {
-            LOG_ERR("accept failed");
-            exit(-1);
-          }
-          break;
-        }
-        data_connection conn = {.type = DATA_CONNECTION_HEADER};
-        event_loop_attach(s->loop, CONNECTION_DATA, &conn, new_socket, POLLIN);
-        LOG_INFO("new connection with id %" PRId64, event_loop_size(s->loop));
-      } else {
-        read_from_socket(s, waiting, i, &req);
-      }
-    }
+  } else {
+    conn->cursor += r;
   }
+  if (r == 0) {
+    /* If we've finished writing this buffer, move on to the next transfer
+     * request and reset the cursor to zero. */
+    LOG_DEBUG("writing on channel %d finished", data_sock);
+    conn->cursor = 0;
+    LL_DELETE(conn->transfer_queue, buf);
+    free(buf);
+  }
+}
+
+void read_object_chunk(event_loop *loop,
+                       int data_sock,
+                       void *context,
+                       int events) {
+  LOG_DEBUG("Reading data");
+  ssize_t r, s;
+  client_connection *conn = (client_connection *) context;
+  plasma_buffer *buf = conn->transfer_queue;
+  CHECK(buf != NULL);
+  /* Try to read one BUFSIZE at a time. */
+  s = buf->data_size + buf->metadata_size - conn->cursor;
+  if (s > BUFSIZE) {
+    s = BUFSIZE;
+  }
+  r = read(data_sock, buf->data + conn->cursor, s);
+
+  if (r == -1) {
+    LOG_ERR("read error");
+  } else if (r == 0) {
+    LOG_DEBUG("end of file");
+  } else {
+    conn->cursor += r;
+  }
+  if (conn->cursor == buf->data_size + buf->metadata_size) {
+    LOG_DEBUG("reading on channel %d finished", data_sock);
+    plasma_seal(conn->manager_state->store_conn, buf->object_id);
+    LL_DELETE(conn->transfer_queue, buf);
+    free(buf);
+    /* Switch to listening for requests from this socket, instead of reading
+     * data. */
+    event_loop_remove_file(loop, data_sock);
+    event_loop_add_file(loop, data_sock, EVENT_LOOP_READ, process_message,
+                        conn);
+  }
+  return;
+}
+
+void start_writing_data(event_loop *loop,
+                        object_id object_id,
+                        uint8_t addr[4],
+                        int port,
+                        client_connection *conn) {
+  uint8_t *data;
+  int64_t data_size;
+  uint8_t *metadata;
+  int64_t metadata_size;
+  plasma_get(conn->manager_state->store_conn, object_id, &data_size, &data,
+             &metadata_size, &metadata);
+  assert(metadata == data + data_size);
+  plasma_buffer *buf = malloc(sizeof(plasma_buffer));
+  buf->object_id = object_id;
+  buf->data = data; /* We treat this as a pointer to the
+                       concatenated data and metadata. */
+  buf->data_size = data_size;
+  buf->metadata_size = metadata_size;
+  buf->writable = 0;
+
+  /* Look to see if we already have a connection to this plasma manager. */
+  UT_string *ip_addr;
+  UT_string *ip_addr_port;
+  utstring_new(ip_addr);
+  utstring_new(ip_addr_port);
+  utstring_printf(ip_addr, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]);
+  utstring_printf(ip_addr_port, "%s:%d", utstring_body(ip_addr), port);
+  client_connection *manager_conn;
+  HASH_FIND_STR(conn->manager_state->manager_connections,
+                utstring_body(ip_addr_port), manager_conn);
+
+  if (!manager_conn) {
+    /* If we don't already have a connection to this manager, start one. */
+    manager_conn = malloc(sizeof(client_connection));
+    manager_conn->fd = plasma_manager_connect(utstring_body(ip_addr), port);
+    manager_conn->manager_state = conn->manager_state;
+    manager_conn->transfer_queue = NULL;
+    manager_conn->cursor = 0;
+
+    manager_conn->ip_addr_port = strdup(utstring_body(ip_addr_port));
+    HASH_ADD_KEYPTR(hh, manager_conn->manager_state->manager_connections,
+                    manager_conn->ip_addr_port,
+                    strlen(manager_conn->ip_addr_port), manager_conn);
+  }
+  utstring_free(ip_addr_port);
+  utstring_free(ip_addr);
+
+  if (manager_conn->transfer_queue == NULL) {
+    /* If we already have a connection to this manager and its inactive,
+     * (re)register it with the event loop again. */
+    event_loop_add_file(loop, manager_conn->fd, EVENT_LOOP_WRITE,
+                        write_object_chunk, manager_conn);
+  }
+  /* Add this transfer request to this connection's transfer queue. */
+  LL_APPEND(manager_conn->transfer_queue, buf);
+}
+
+void start_reading_data(event_loop *loop,
+                        int client_sock,
+                        object_id object_id,
+                        int64_t data_size,
+                        int64_t metadata_size,
+                        client_connection *conn) {
+  plasma_buffer *buf = malloc(sizeof(plasma_buffer));
+  buf->object_id = object_id;
+  buf->data_size = data_size;
+  buf->metadata_size = metadata_size;
+  buf->writable = 1;
+
+  plasma_create(conn->manager_state->store_conn, object_id, data_size, NULL,
+                metadata_size, &(buf->data));
+  LL_APPEND(conn->transfer_queue, buf);
+  conn->cursor = 0;
+
+  /* Switch to reading the data from this socket, instead of listening for
+   * other requests. */
+  event_loop_remove_file(loop, client_sock);
+  event_loop_add_file(loop, client_sock, EVENT_LOOP_READ, read_object_chunk,
+                      conn);
+}
+
+void process_message(event_loop *loop,
+                     int client_sock,
+                     void *context,
+                     int events) {
+  client_connection *conn = (client_connection *) context;
+
+  int64_t type;
+  int64_t length;
+  plasma_request *req;
+  read_message(client_sock, &type, &length, (uint8_t **) &req);
+
+  switch (type) {
+  case PLASMA_TRANSFER:
+    LOG_DEBUG("transfering object to manager with port %d", req->port);
+    start_writing_data(loop, req->object_id, req->addr, req->port, conn);
+    break;
+  case PLASMA_DATA:
+    LOG_DEBUG("starting to stream data");
+    start_reading_data(loop, client_sock, req->object_id, req->data_size,
+                       req->metadata_size, conn);
+    break;
+  case DISCONNECT_CLIENT: {
+    LOG_INFO("Disconnecting client on fd %d", client_sock);
+    event_loop_remove_file(loop, client_sock);
+    close(client_sock);
+    free(conn);
+  } break;
+  default:
+    LOG_ERR("invalid request %" PRId64, type);
+    exit(-1);
+  }
+
+  free(req);
+}
+
+void new_client_connection(event_loop *loop,
+                           int listener_sock,
+                           void *context,
+                           int events) {
+  int new_socket = accept_client(listener_sock);
+  /* Create a new data connection context per client. */
+  client_connection *conn = malloc(sizeof(client_connection));
+  conn->manager_state = (plasma_manager_state *) context;
+  conn->transfer_queue = NULL;
+  event_loop_add_file(loop, new_socket, EVENT_LOOP_READ, process_message, conn);
+  LOG_DEBUG("new connection with fd %d", new_socket);
 }
 
 void start_server(const char *store_socket_name,
@@ -227,24 +320,27 @@ void start_server(const char *store_socket_name,
   name.sin_addr.s_addr = htonl(INADDR_ANY);
   int on = 1;
   /* TODO(pcm): http://stackoverflow.com/q/1150635 */
-  if (ioctl(sock, FIONBIO, (char*) &on) < 0) {
+  if (ioctl(sock, FIONBIO, (char *) &on) < 0) {
     LOG_ERR("ioctl failed");
     close(sock);
     exit(-1);
   }
   setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
-  if (bind(sock, (struct sockaddr*) &name, sizeof(name)) < 0) {
+  if (bind(sock, (struct sockaddr *) &name, sizeof(name)) < 0) {
     LOG_ERR("could not bind socket");
     exit(-1);
   }
-  LOG_INFO("listening on port %d", port);
+  LOG_DEBUG("listening on port %d", port);
   if (listen(sock, 5) == -1) {
     LOG_ERR("could not listen to socket");
     exit(-1);
   }
-  plasma_manager_state state;
-  init_plasma_manager(&state, store_socket_name);
-  run_event_loop(sock, &state);
+
+  event_loop *loop = event_loop_create();
+  plasma_manager_state *state = init_plasma_manager_state(store_socket_name);
+  event_loop_add_file(loop, sock, EVENT_LOOP_READ, new_client_connection,
+                      state);
+  event_loop_run(loop);
 }
 
 int main(int argc, char *argv[]) {
diff --git a/src/plasma_manager.h b/src/plasma_manager.h
index f7cf6b480..27075632e 100644
--- a/src/plasma_manager.h
+++ b/src/plasma_manager.h
@@ -4,30 +4,92 @@
 #include <poll.h>
 #include "utarray.h"
 
+typedef struct client_connection client_connection;
+
+/**
+ * Start transfering data to another object store manager.
+ *
+ * @param loop This is the event loop of the plasma manager.
+ * @param object_id The object_id of the object we will be sending.
+ * @param addr The IP address of the plasma manager we are sending the object
+ * to.
+ * @param port The port of the plasma manager we are sending the object to.
+ * @param conn The client_connection to the other plasma manager.
+ *
+ * This establishes a connection to the remote manager and sends the data
+ * header to the other object manager.
+ */
+void start_writing_data(event_loop *loop,
+                        object_id object_id,
+                        uint8_t addr[4],
+                        int port,
+                        client_connection *conn);
+
+/**
+ * Start reading data from another object manager.
+ *
+ * @param loop This is the event loop of the plasma manager.
+ * @param client_sock The connection to the other plasma manager.
+ * @param object_id The object_id of the object we will be reading.
+ * @param data_size Size of the object.
+ * @param metadata_size Size of the metadata.
+ * @param conn The client_connection to the other plasma manager.
+ *
+ * Initializes the object we are going to write to in the
+ * local plasma store and then switches the data socket to reading mode.
+ */
+void start_reading_data(event_loop *loop,
+                        int client_sock,
+                        object_id object_id,
+                        int64_t data_size,
+                        int64_t metadata_size,
+                        client_connection *conn);
+
+/**
+ * Read the next chunk of the object in transit from the plasma manager
+ * that is connected to the connection with index "conn_index". Once all data
+ * has been read, the socket switches to listening for the next request.
+ *
+ * @param loop This is the event loop of the plasma manager.
+ * @param data_sock The connection to the other plasma manager.
+ * @param context The client_connection to the other plasma manager.
+ *
+ */
+void read_object_chunk(event_loop *loop,
+                       int data_sock,
+                       void *context,
+                       int events);
+
+/**
+ * Write the next chunk of the object currently transfered to the plasma manager
+ * that is connected to the socket "data_sock". If no data has been sent yet,
+ * the initial handshake to transfer the object size is performed.
+ *
+ * @param loop This is the event loop of the plasma manager.
+ * @param data_sock This is the socket the other plasma manager is listening on.
+ * @param context The client_connection to the other plasma manager, contains a
+ *                queue of objects that will be sent.
+ */
+void write_object_chunk(event_loop *loop,
+                        int data_sock,
+                        void *context,
+                        int events);
+
+/**
+ * Register a new client connection with the plasma manager. A client can
+ * either be a worker or another plasma manager.
+ *
+ * @param loop This is the event loop of the plasma manager.
+ * @param listener_socket The socket the plasma manager is listening on.
+ * @param context The plasma manager state.
+ *
+ */
+void new_client_connection(event_loop *loop,
+                           int listener_sock,
+                           void *context,
+                           int events);
+
 /* The buffer size in bytes. Data will get transfered in multiples of this */
 #define BUFSIZE 4096
 
-enum connection_type { CONNECTION_REDIS, CONNECTION_LISTENER, CONNECTION_DATA };
-
-enum data_connection_type {
-  /* Connection to send commands and metadata to the manager. */
-  DATA_CONNECTION_HEADER,
-  /* Connection to send data to another manager. */
-  DATA_CONNECTION_WRITE,
-  /* Connection to receive data from another manager. */
-  DATA_CONNECTION_READ
-};
-
-typedef struct {
-  /* Of type data_connection_type. */
-  int type;
-  /* Local socket of the plasma store that is accessed for reading or writing
-   * data for this connection. */
-  int store_conn;
-  /* Buffer this connection is reading from or writing to. */
-  plasma_buffer buf;
-  /* Current position in the buffer. */
-  int64_t cursor;
-} data_connection;
-
-#endif
+#endif /* PLASMA_MANAGER_H */
diff --git a/src/plasma_store.c b/src/plasma_store.c
index 85c9ad0b9..128a13292 100644
--- a/src/plasma_store.c
+++ b/src/plasma_store.c
@@ -21,23 +21,22 @@
 #include <limits.h>
 #include <poll.h>
 
+#include "common.h"
+#include "event_loop.h"
+#include "io.h"
 #include "uthash.h"
+#include "utarray.h"
 #include "fling.h"
 #include "malloc.h"
 #include "plasma.h"
-#include "event_loop.h"
 
-#define MAX_NUM_CLIENTS 100
+void *dlmalloc(size_t);
+void dlfree(void *);
 
-void* dlmalloc(size_t);
-void dlfree(void*);
-
-typedef struct {
-  /* Event loop for the plasma store. */
-  event_loop* loop;
-} plasma_store_state;
-
-void plasma_send_reply(int fd, plasma_reply* reply) {
+/**
+ * This is used by the Plasma Store to send a reply to the Plasma Client.
+ */
+void plasma_send_reply(int fd, plasma_reply *reply) {
   int reply_count = sizeof(plasma_reply);
   if (write(fd, reply, reply_count) != reply_count) {
     LOG_ERR("write error, fd = %d", fd);
@@ -45,14 +44,9 @@ void plasma_send_reply(int fd, plasma_reply* reply) {
   }
 }
 
-void init_state(plasma_store_state* s) {
-  s->loop = malloc(sizeof(event_loop));
-  event_loop_init(s->loop);
-}
-
 typedef struct {
   /* Object id of this object. */
-  plasma_id object_id;
+  object_id object_id;
   /* Object info like size, creation time and owner. */
   plasma_object_info info;
   /* Memory mapped file containing the object. */
@@ -64,39 +58,41 @@ typedef struct {
   /* Handle for the uthash table. */
   UT_hash_handle handle;
   /* Pointer to the object data. Needed to free the object. */
-  uint8_t* pointer;
+  uint8_t *pointer;
 } object_table_entry;
 
 /* Objects that are still being written by their owner process. */
-object_table_entry* open_objects = NULL;
+object_table_entry *open_objects = NULL;
 
 /* Objects that have already been sealed by their owner process and
  * can now be shared with other processes. */
-object_table_entry* sealed_objects = NULL;
+object_table_entry *sealed_objects = NULL;
 
 typedef struct {
   /* Object id of this object. */
-  plasma_id object_id;
-  /* Number of processes waiting for the object. */
-  int num_waiting;
-  /* Socket connections to waiting clients. */
-  int conn[MAX_NUM_CLIENTS];
+  object_id object_id;
+  /* Socket connections of waiting clients. */
+  UT_array *conns;
   /* Handle for the uthash table. */
   UT_hash_handle handle;
 } object_notify_entry;
 
 /* Objects that processes are waiting for. */
-object_notify_entry* objects_notify = NULL;
+object_notify_entry *objects_notify = NULL;
 
 /* Create a new object buffer in the hash table. */
-void create_object(int conn, plasma_request* req) {
-  LOG_INFO("creating object"); /* TODO(pcm): add object_id here */
+plasma_object create_object(int conn,
+                            object_id object_id,
+                            int64_t data_size,
+                            int64_t metadata_size,
+                            plasma_object *result) {
+  LOG_DEBUG("creating object"); /* TODO(pcm): add object_id here */
 
-  object_table_entry* entry;
-  HASH_FIND(handle, open_objects, &req->object_id, sizeof(plasma_id), entry);
-  PLASMA_CHECK(entry == NULL, "Cannot create object twice.");
+  object_table_entry *entry;
+  HASH_FIND(handle, open_objects, &object_id, sizeof(object_id), entry);
+  CHECKM(entry == NULL, "Cannot create object twice.");
 
-  uint8_t* pointer = dlmalloc(req->data_size + req->metadata_size);
+  uint8_t *pointer = dlmalloc(data_size + metadata_size);
   int fd;
   int64_t map_size;
   ptrdiff_t offset;
@@ -104,197 +100,186 @@ void create_object(int conn, plasma_request* req) {
   assert(fd != -1);
 
   entry = malloc(sizeof(object_table_entry));
-  memcpy(&entry->object_id, &req->object_id, 20);
-  entry->info.data_size = req->data_size;
-  entry->info.metadata_size = req->metadata_size;
+  memcpy(&entry->object_id, &object_id, 20);
+  entry->info.data_size = data_size;
+  entry->info.metadata_size = metadata_size;
   entry->pointer = pointer;
   /* TODO(pcm): set the other fields */
   entry->fd = fd;
   entry->map_size = map_size;
   entry->offset = offset;
-  HASH_ADD(handle, open_objects, object_id, sizeof(plasma_id), entry);
-  plasma_reply reply;
-  memset(&reply, 0, sizeof(reply));
-  reply.data_offset = offset;
-  reply.metadata_offset = offset + req->data_size;
-  reply.map_size = map_size;
-  reply.data_size = req->data_size;
-  reply.metadata_size = req->metadata_size;
-  reply.store_fd_val = fd;
-  send_fd(conn, fd, (char*) &reply, sizeof(reply));
+  HASH_ADD(handle, open_objects, object_id, sizeof(object_id), entry);
+  object_handle handle = {.store_fd = fd, .mmap_size = map_size};
+  result->handle = handle;
+  result->data_offset = offset;
+  result->metadata_offset = offset + data_size;
+  result->data_size = data_size;
+  result->metadata_size = metadata_size;
 }
 
 /* Get an object from the hash table. */
-void get_object(int conn, plasma_request* req) {
-  object_table_entry* entry;
-  HASH_FIND(handle, sealed_objects, &req->object_id, sizeof(plasma_id), entry);
+int get_object(int conn, object_id object_id, plasma_object *result) {
+  object_table_entry *entry;
+  HASH_FIND(handle, sealed_objects, &object_id, sizeof(object_id), entry);
   if (entry) {
-    plasma_reply reply;
-    memset(&reply, 0, sizeof(plasma_reply));
-    reply.data_offset = entry->offset;
-    reply.map_size = entry->map_size;
-    reply.data_size = entry->info.data_size;
-    reply.metadata_size = entry->info.metadata_size;
-    reply.store_fd_val = entry->fd;
-    send_fd(conn, entry->fd, (char*) &reply, sizeof(plasma_reply));
+    object_handle handle = {.store_fd = entry->fd,
+                            .mmap_size = entry->map_size};
+    result->handle = handle;
+    result->data_offset = entry->offset;
+    result->metadata_offset = entry->offset + entry->info.data_size;
+    result->data_size = entry->info.data_size;
+    result->metadata_size = entry->info.metadata_size;
+    return OBJECT_FOUND;
   } else {
-    object_notify_entry* notify_entry;
-    LOG_INFO("object not in hash table of sealed objects");
-    HASH_FIND(handle, objects_notify, &req->object_id, sizeof(plasma_id),
+    object_notify_entry *notify_entry;
+    LOG_DEBUG("object not in hash table of sealed objects");
+    HASH_FIND(handle, objects_notify, &object_id, sizeof(object_id),
               notify_entry);
     if (!notify_entry) {
       notify_entry = malloc(sizeof(object_notify_entry));
       memset(notify_entry, 0, sizeof(object_notify_entry));
-      notify_entry->num_waiting = 0;
-      memcpy(&notify_entry->object_id, &req->object_id, 20);
-      HASH_ADD(handle, objects_notify, object_id, sizeof(plasma_id),
+      utarray_new(notify_entry->conns, &ut_int_icd);
+      memcpy(&notify_entry->object_id, &object_id, 20);
+      HASH_ADD(handle, objects_notify, object_id, sizeof(object_id),
                notify_entry);
     }
-    PLASMA_CHECK(notify_entry->num_waiting < MAX_NUM_CLIENTS - 1,
-                 "This exceeds the maximum number of clients.");
-    notify_entry->conn[notify_entry->num_waiting] = conn;
-    notify_entry->num_waiting += 1;
+    utarray_push_back(notify_entry->conns, &conn);
   }
+  return OBJECT_NOT_FOUND;
 }
 
 /* Check if an object is present. */
-void check_if_object_present(int conn, plasma_request* req) {
-  object_table_entry* entry;
-  HASH_FIND(handle, sealed_objects, &req->object_id, sizeof(plasma_id), entry);
-  plasma_reply reply;
-  memset(&reply, 0, sizeof(plasma_reply));
-  reply.has_object = entry ? 1 : 0;
-  plasma_send_reply(conn, &reply);
+int contains_object(int conn, object_id object_id) {
+  object_table_entry *entry;
+  HASH_FIND(handle, sealed_objects, &object_id, sizeof(object_id), entry);
+  return entry ? OBJECT_FOUND : OBJECT_NOT_FOUND;
 }
 
 /* Seal an object that has been created in the hash table. */
-void seal_object(int conn, plasma_request* req) {
-  LOG_INFO("sealing object");  // TODO(pcm): add object_id here
-  object_table_entry* entry;
-  HASH_FIND(handle, open_objects, &req->object_id, sizeof(plasma_id), entry);
+void seal_object(int conn,
+                 object_id object_id,
+                 UT_array **conns,
+                 plasma_object *result) {
+  LOG_DEBUG("sealing object");  // TODO(pcm): add object_id here
+  object_table_entry *entry;
+  HASH_FIND(handle, open_objects, &object_id, sizeof(object_id), entry);
   if (!entry) {
     return; /* TODO(pcm): return error */
   }
-  int fd = entry->fd;
   HASH_DELETE(handle, open_objects, entry);
-  HASH_ADD(handle, sealed_objects, object_id, sizeof(plasma_id), entry);
+  HASH_ADD(handle, sealed_objects, object_id, sizeof(object_id), entry);
   /* Inform processes that the object is ready now. */
-  object_notify_entry* notify_entry;
-  HASH_FIND(handle, objects_notify, &req->object_id, sizeof(plasma_id),
+  object_notify_entry *notify_entry;
+  HASH_FIND(handle, objects_notify, &object_id, sizeof(object_id),
             notify_entry);
   if (!notify_entry) {
+    *conns = NULL;
     return;
   }
-  plasma_reply reply = {.data_offset = entry->offset,
-                        .map_size = entry->map_size,
-                        .data_size = entry->info.data_size,
-                        .metadata_size = entry->info.metadata_size,
-                        .store_fd_val = fd};
-  for (int i = 0; i < notify_entry->num_waiting; ++i) {
-    send_fd(notify_entry->conn[i], entry->fd, (char*) &reply,
-            sizeof(plasma_reply));
-  }
+  object_handle handle = {.store_fd = entry->fd, .mmap_size = entry->map_size};
+  result->handle = handle;
+  result->data_offset = entry->offset;
+  result->metadata_offset = entry->offset + entry->info.data_size;
+  result->data_size = entry->info.data_size;
+  result->metadata_size = entry->info.metadata_size;
   HASH_DELETE(handle, objects_notify, notify_entry);
+  *conns = notify_entry->conns;
   free(notify_entry);
 }
 
 /* Delete an object that has been created in the hash table. */
-void delete_object(int conn, plasma_request* req) {
-  LOG_INFO("deleting object");  // TODO(rkn): add object_id here
-  object_table_entry* entry;
-  HASH_FIND(handle, sealed_objects, &req->object_id, sizeof(plasma_id), entry);
+void delete_object(int conn, object_id object_id) {
+  LOG_DEBUG("deleting object");  // TODO(rkn): add object_id here
+  object_table_entry *entry;
+  HASH_FIND(handle, sealed_objects, &object_id, sizeof(object_id), entry);
   /* TODO(rkn): This should probably not fail, but should instead throw an
    * error. Maybe we should also support deleting objects that have been created
    * but not sealed. */
-  PLASMA_CHECK(entry != NULL, "To delete an object it must have been sealed.");
-  uint8_t* pointer = entry->pointer;
+  CHECKM(entry != NULL, "To delete an object it must have been sealed.");
+  uint8_t *pointer = entry->pointer;
   HASH_DELETE(handle, sealed_objects, entry);
   dlfree(pointer);
 }
 
-void process_event(int conn, plasma_request* req) {
-  switch (req->type) {
+void process_message(event_loop *loop,
+                     int client_sock,
+                     void *context,
+                     int events) {
+  int64_t type;
+  int64_t length;
+  plasma_request *req;
+  read_message(client_sock, &type, &length, (uint8_t **) &req);
+  plasma_reply reply;
+  memset(&reply, 0, sizeof(reply));
+  UT_array *conns;
+
+  switch (type) {
   case PLASMA_CREATE:
-    create_object(conn, req);
+    create_object(client_sock, req->object_id, req->data_size,
+                  req->metadata_size, &reply.object);
+    send_fd(client_sock, reply.object.handle.store_fd, (char *) &reply,
+            sizeof(reply));
     break;
   case PLASMA_GET:
-    get_object(conn, req);
+    if (get_object(client_sock, req->object_id, &reply.object) ==
+        OBJECT_FOUND) {
+      send_fd(client_sock, reply.object.handle.store_fd, (char *) &reply,
+              sizeof(reply));
+    }
     break;
   case PLASMA_CONTAINS:
-    check_if_object_present(conn, req);
+    if (contains_object(client_sock, req->object_id) == OBJECT_FOUND) {
+      reply.has_object = 1;
+    }
+    plasma_send_reply(client_sock, &reply);
     break;
   case PLASMA_SEAL:
-    seal_object(conn, req);
+    seal_object(client_sock, req->object_id, &conns, &reply.object);
+    if (conns) {
+      for (int *c = (int *) utarray_front(conns); c != NULL;
+           c = (int *) utarray_next(conns, c)) {
+        send_fd(*c, reply.object.handle.store_fd, (char *) &reply,
+                sizeof(reply));
+      }
+      utarray_free(conns);
+    }
     break;
   case PLASMA_DELETE:
-    delete_object(conn, req);
+    delete_object(client_sock, req->object_id);
     break;
+  case DISCONNECT_CLIENT: {
+    LOG_DEBUG("Disconnecting client on fd %d", client_sock);
+    event_loop_remove_file(loop, client_sock);
+  } break;
   default:
-    LOG_ERR("invalid request %d", req->type);
-    exit(-1);
+    /* This code should be unreachable. */
+    CHECK(0);
   }
+
+  free(req);
 }
 
-void run_event_loop(int socket) {
-  plasma_store_state state;
-  init_state(&state);
-  event_loop_attach(state.loop, 0, NULL, socket, POLLIN);
-  plasma_request req;
-  while (1) {
-    int num_ready = event_loop_poll(state.loop);
-    if (num_ready < 0) {
-      LOG_ERR("poll failed");
-      exit(-1);
-    }
-    for (int i = 0; i < event_loop_size(state.loop); ++i) {
-      struct pollfd* waiting = event_loop_get(state.loop, i);
-      if (waiting->revents == 0)
-        continue;
-      if (waiting->fd == socket) {
-        /* Handle new incoming connections. */
-        int new_socket = accept(socket, NULL, NULL);
-        event_loop_attach(state.loop, 0, NULL, new_socket, POLLIN);
-        LOG_INFO("adding new client");
-      } else {
-        int r = read(waiting->fd, &req, sizeof(plasma_request));
-        if (r == -1) {
-          LOG_ERR("read error");
-          continue;
-        } else if (r == 0) {
-          LOG_INFO("connection %d disconnected", i);
-          event_loop_detach(state.loop, i, 1);
-        } else {
-          process_event(waiting->fd, &req);
-        }
-      }
-    }
-  }
+void new_client_connection(event_loop *loop,
+                           int listener_sock,
+                           void *context,
+                           int events) {
+  int new_socket = accept_client(listener_sock);
+  event_loop_add_file(loop, new_socket, EVENT_LOOP_READ, process_message,
+                      context);
+  LOG_DEBUG("new connection with fd %d", new_socket);
 }
 
-void start_server(char* socket_name) {
-  int fd = socket(AF_UNIX, SOCK_STREAM, 0);
-  if (fd == -1) {
-    LOG_ERR("socket error");
-    exit(-1);
-  }
-  int on = 1;
-  if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*) &on, sizeof(on)) < 0) {
-    LOG_ERR("setsockopt failed");
-    close(fd);
-    exit(-1);
-  }
-  struct sockaddr_un addr;
-  memset(&addr, 0, sizeof(addr));
-  addr.sun_family = AF_UNIX;
-  strncpy(addr.sun_path, socket_name, sizeof(addr.sun_path) - 1);
-  unlink(socket_name);
-  bind(fd, (struct sockaddr*) &addr, sizeof(addr));
-  listen(fd, 5);
-  run_event_loop(fd);
+void start_server(char *socket_name) {
+  int socket = bind_ipc_sock(socket_name);
+  CHECK(socket >= 0);
+  event_loop *loop = event_loop_create();
+  event_loop_add_file(loop, socket, EVENT_LOOP_READ, new_client_connection,
+                      NULL);
+  event_loop_run(loop);
 }
 
-int main(int argc, char* argv[]) {
-  char* socket_name = NULL;
+int main(int argc, char *argv[]) {
+  char *socket_name = NULL;
   int c;
   while ((c = getopt(argc, argv, "s:")) != -1) {
     switch (c) {
@@ -309,6 +294,6 @@ int main(int argc, char* argv[]) {
     LOG_ERR("please specify socket for incoming connections with -s switch");
     exit(-1);
   }
-  LOG_INFO("starting server listening on %s", socket_name);
+  LOG_DEBUG("starting server listening on %s", socket_name);
   start_server(socket_name);
 }
diff --git a/src/plasma_store.h b/src/plasma_store.h
new file mode 100644
index 000000000..6b8a0df1d
--- /dev/null
+++ b/src/plasma_store.h
@@ -0,0 +1,49 @@
+#ifndef PLASMA_STORE_H
+#define PLASMA_STORE_H
+
+#include "plasma.h"
+
+/**
+ * Create a new object:
+ *
+ * @param object_id Object ID of the object to be created.
+ * @param data_size Size in bytes of the object to be created.
+ * @param metadata_size Size in bytes of the object metadata.
+ */
+void create_object(int conn,
+                   object_id object_id,
+                   int64_t data_size,
+                   int64_t metadata_size,
+                   plasma_object *result);
+
+/**
+ * Get an object:
+ *
+ * @param object_id Object ID of the object to be gotten.
+ *
+ * Returns the status of the object (object_status in plasma.h).
+ */
+int get_object(int conn, object_id object_id, plasma_object *result);
+
+/**
+ * Seal an object:
+ *
+ * @param object_id Object ID of the object to be sealed.
+ * @param conns Returns the connection that are waiting for this object.
+                The caller is responsible for destroying this array.
+ *
+ * Should notify all the sockets waiting for the object.
+ */
+plasma_object seal_object(int conn,
+                          object_id object_id,
+                          UT_array **conns,
+                          plasma_object *result);
+
+/**
+ * Check if the plasma store contains an object:
+ *
+ * @param object_id Object ID that will be checked.
+ */
+int contains_object(int conn, object_id object_id);
+
+#endif /* PLASMA_STORE_H */
diff --git a/src/utarray.h b/src/utarray.h
deleted file mode 100644
index 979e99e98..000000000
--- a/src/utarray.h
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
-Copyright (c) 2008-2016, Troy D. Hanson   http://troydhanson.github.com/uthash/
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
-OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* a dynamic array implementation using macros
- */
-#ifndef UTARRAY_H
-#define UTARRAY_H
-
-#define UTARRAY_VERSION 2.0.1
-
-#ifdef __GNUC__
-#define _UNUSED_ __attribute__ ((__unused__))
-#else
-#define _UNUSED_
-#endif
-
-#include <stddef.h>  /* size_t */
-#include <string.h>  /* memset, etc */
-#include <stdlib.h>  /* exit */
-
-#ifndef oom
-#define oom() exit(-1)
-#endif
-
-typedef void (ctor_f)(void *dst, const void *src);
-typedef void (dtor_f)(void *elt);
-typedef void (init_f)(void *elt);
-typedef struct {
-    size_t sz;
-    init_f *init;
-    ctor_f *copy;
-    dtor_f *dtor;
-} UT_icd;
-
-typedef struct {
-    unsigned i,n;/* i: index of next available slot, n: num slots */
-    UT_icd icd;  /* initializer, copy and destructor functions */
-    char *d;     /* n slots of size icd->sz*/
-} UT_array;
-
-#define utarray_init(a,_icd) do {                                             \
-  memset(a,0,sizeof(UT_array));                                               \
-  (a)->icd = *(_icd);                                                         \
-} while(0)
-
-#define utarray_done(a) do {                                                  \
-  if ((a)->n) {                                                               \
-    if ((a)->icd.dtor) {                                                      \
-      unsigned _ut_i;                                                         \
-      for(_ut_i=0; _ut_i < (a)->i; _ut_i++) {                                 \
-        (a)->icd.dtor(utarray_eltptr(a,_ut_i));                               \
-      }                                                                       \
-    }                                                                         \
-    free((a)->d);                                                             \
-  }                                                                           \
-  (a)->n=0;                                                                   \
-} while(0)
-
-#define utarray_new(a,_icd) do {                                              \
-  (a) = (UT_array*)malloc(sizeof(UT_array));                                  \
-  if ((a) == NULL) oom();                                                     \
-  utarray_init(a,_icd);                                                       \
-} while(0)
-
-#define utarray_free(a) do {                                                  \
-  utarray_done(a);                                                            \
-  free(a);                                                                    \
-} while(0)
-
-#define utarray_reserve(a,by) do {                                            \
-  if (((a)->i+(by)) > (a)->n) {                                               \
-    char *utarray_tmp;                                                        \
-    while (((a)->i+(by)) > (a)->n) { (a)->n = ((a)->n ? (2*(a)->n) : 8); }    \
-    utarray_tmp=(char*)realloc((a)->d, (a)->n*(a)->icd.sz);                   \
-    if (utarray_tmp == NULL) oom();                                           \
-    (a)->d=utarray_tmp;                                                       \
-  }                                                                           \
-} while(0)
-
-#define utarray_push_back(a,p) do {                                           \
-  utarray_reserve(a,1);                                                       \
-  if ((a)->icd.copy) { (a)->icd.copy( _utarray_eltptr(a,(a)->i++), p); }      \
-  else { memcpy(_utarray_eltptr(a,(a)->i++), p, (a)->icd.sz); };              \
-} while(0)
-
-#define utarray_pop_back(a) do {                                              \
-  if ((a)->icd.dtor) { (a)->icd.dtor( _utarray_eltptr(a,--((a)->i))); }       \
-  else { (a)->i--; }                                                          \
-} while(0)
-
-#define utarray_extend_back(a) do {                                           \
-  utarray_reserve(a,1);                                                       \
-  if ((a)->icd.init) { (a)->icd.init(_utarray_eltptr(a,(a)->i)); }            \
-  else { memset(_utarray_eltptr(a,(a)->i),0,(a)->icd.sz); }                   \
-  (a)->i++;                                                                   \
-} while(0)
-
-#define utarray_len(a) ((a)->i)
-
-#define utarray_eltptr(a,j) (((j) < (a)->i) ? _utarray_eltptr(a,j) : NULL)
-#define _utarray_eltptr(a,j) ((a)->d + ((a)->icd.sz * (j)))
-
-#define utarray_insert(a,p,j) do {                                            \
-  if ((j) > (a)->i) utarray_resize(a,j);                                      \
-  utarray_reserve(a,1);                                                       \
-  if ((j) < (a)->i) {                                                         \
-    memmove( _utarray_eltptr(a,(j)+1), _utarray_eltptr(a,j),                  \
-             ((a)->i - (j))*((a)->icd.sz));                                   \
-  }                                                                           \
-  if ((a)->icd.copy) { (a)->icd.copy( _utarray_eltptr(a,j), p); }             \
-  else { memcpy(_utarray_eltptr(a,j), p, (a)->icd.sz); };                     \
-  (a)->i++;                                                                   \
-} while(0)
-
-#define utarray_inserta(a,w,j) do {                                           \
-  if (utarray_len(w) == 0) break;                                             \
-  if ((j) > (a)->i) utarray_resize(a,j);                                      \
-  utarray_reserve(a,utarray_len(w));                                          \
-  if ((j) < (a)->i) {                                                         \
-    memmove(_utarray_eltptr(a,(j)+utarray_len(w)),                            \
-            _utarray_eltptr(a,j),                                             \
-            ((a)->i - (j))*((a)->icd.sz));                                    \
-  }                                                                           \
-  if ((a)->icd.copy) {                                                        \
-    unsigned _ut_i;                                                           \
-    for(_ut_i=0;_ut_i<(w)->i;_ut_i++) {                                       \
-      (a)->icd.copy(_utarray_eltptr(a, (j) + _ut_i), _utarray_eltptr(w, _ut_i)); \
-    }                                                                         \
-  } else {                                                                    \
-    memcpy(_utarray_eltptr(a,j), _utarray_eltptr(w,0),                        \
-           utarray_len(w)*((a)->icd.sz));                                     \
-  }                                                                           \
-  (a)->i += utarray_len(w);                                                   \
-} while(0)
-
-#define utarray_resize(dst,num) do {                                          \
-  unsigned _ut_i;                                                             \
-  if ((dst)->i > (unsigned)(num)) {                                           \
-    if ((dst)->icd.dtor) {                                                    \
-      for (_ut_i = (num); _ut_i < (dst)->i; ++_ut_i) {                        \
-        (dst)->icd.dtor(_utarray_eltptr(dst, _ut_i));                         \
-      }                                                                       \
-    }                                                                         \
-  } else if ((dst)->i < (unsigned)(num)) {                                    \
-    utarray_reserve(dst, (num) - (dst)->i);                                   \
-    if ((dst)->icd.init) {                                                    \
-      for (_ut_i = (dst)->i; _ut_i < (unsigned)(num); ++_ut_i) {              \
-        (dst)->icd.init(_utarray_eltptr(dst, _ut_i));                         \
-      }                                                                       \
-    } else {                                                                  \
-      memset(_utarray_eltptr(dst, (dst)->i), 0, (dst)->icd.sz*((num) - (dst)->i)); \
-    }                                                                         \
-  }                                                                           \
-  (dst)->i = (num);                                                           \
-} while(0)
-
-#define utarray_concat(dst,src) do {                                          \
-  utarray_inserta(dst, src, utarray_len(dst));                                \
-} while(0)
-
-#define utarray_erase(a,pos,len) do {                                         \
-  if ((a)->icd.dtor) {                                                        \
-    unsigned _ut_i;                                                           \
-    for (_ut_i = 0; _ut_i < (len); _ut_i++) {                                 \
-      (a)->icd.dtor(utarray_eltptr(a, (pos) + _ut_i));                        \
-    }                                                                         \
-  }                                                                           \
-  if ((a)->i > ((pos) + (len))) {                                             \
-    memmove(_utarray_eltptr(a, pos), _utarray_eltptr(a, (pos) + (len)),       \
-            ((a)->i - ((pos) + (len))) * (a)->icd.sz);                        \
-  }                                                                           \
-  (a)->i -= (len);                                                            \
-} while(0)
-
-#define utarray_renew(a,u) do {                                               \
-  if (a) utarray_clear(a);                                                    \
-  else utarray_new(a, u);                                                     \
-} while(0)
-
-#define utarray_clear(a) do {                                                 \
-  if ((a)->i > 0) {                                                           \
-    if ((a)->icd.dtor) {                                                      \
-      unsigned _ut_i;                                                         \
-      for(_ut_i=0; _ut_i < (a)->i; _ut_i++) {                                 \
-        (a)->icd.dtor(_utarray_eltptr(a, _ut_i));                             \
-      }                                                                       \
-    }                                                                         \
-    (a)->i = 0;                                                               \
-  }                                                                           \
-} while(0)
-
-#define utarray_sort(a,cmp) do {                                              \
-  qsort((a)->d, (a)->i, (a)->icd.sz, cmp);                                    \
-} while(0)
-
-#define utarray_find(a,v,cmp) bsearch((v),(a)->d,(a)->i,(a)->icd.sz,cmp)
-
-#define utarray_front(a) (((a)->i) ? (_utarray_eltptr(a,0)) : NULL)
-#define utarray_next(a,e) (((e)==NULL) ? utarray_front(a) : ((((a)->i) > (utarray_eltidx(a,e)+1)) ? _utarray_eltptr(a,utarray_eltidx(a,e)+1) : NULL))
-#define utarray_prev(a,e) (((e)==NULL) ? utarray_back(a) : ((utarray_eltidx(a,e) > 0) ? _utarray_eltptr(a,utarray_eltidx(a,e)-1) : NULL))
-#define utarray_back(a) (((a)->i) ? (_utarray_eltptr(a,(a)->i-1)) : NULL)
-#define utarray_eltidx(a,e) (((char*)(e) >= (a)->d) ? (((char*)(e) - (a)->d)/(a)->icd.sz) : -1)
-
-/* last we pre-define a few icd for common utarrays of ints and strings */
-static void utarray_str_cpy(void *dst, const void *src) {
-  char **_src = (char**)src, **_dst = (char**)dst;
-  *_dst = (*_src == NULL) ? NULL : strdup(*_src);
-}
-static void utarray_str_dtor(void *elt) {
-  char **eltc = (char**)elt;
-  if (*eltc != NULL) free(*eltc);
-}
-static const UT_icd ut_str_icd _UNUSED_ = {sizeof(char*),NULL,utarray_str_cpy,utarray_str_dtor};
-static const UT_icd ut_int_icd _UNUSED_ = {sizeof(int),NULL,NULL,NULL};
-static const UT_icd ut_ptr_icd _UNUSED_ = {sizeof(void*),NULL,NULL,NULL};
-
-
-#endif /* UTARRAY_H */
diff --git a/src/uthash.h b/src/uthash.h
deleted file mode 100644
index 45d1f9fc1..000000000
--- a/src/uthash.h
+++ /dev/null
@@ -1,1074 +0,0 @@
-/*
-Copyright (c) 2003-2016, Troy D. Hanson     http://troydhanson.github.com/uthash/
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
-OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef UTHASH_H
-#define UTHASH_H
-
-#define UTHASH_VERSION 2.0.1
-
-#include <string.h>   /* memcmp,strlen */
-#include <stddef.h>   /* ptrdiff_t */
-#include <stdlib.h>   /* exit() */
-
-/* These macros use decltype or the earlier __typeof GNU extension.
-   As decltype is only available in newer compilers (VS2010 or gcc 4.3+
-   when compiling c++ source) this code uses whatever method is needed
-   or, for VS2008 where neither is available, uses casting workarounds. */
-#if defined(_MSC_VER)   /* MS compiler */
-#if _MSC_VER >= 1600 && defined(__cplusplus)  /* VS2010 or newer in C++ mode */
-#define DECLTYPE(x) (decltype(x))
-#else                   /* VS2008 or older (or VS2010 in C mode) */
-#define NO_DECLTYPE
-#define DECLTYPE(x)
-#endif
-#elif defined(__BORLANDC__) || defined(__LCC__) || defined(__WATCOMC__)
-#define NO_DECLTYPE
-#define DECLTYPE(x)
-#else                   /* GNU, Sun and other compilers */
-#define DECLTYPE(x) (__typeof(x))
-#endif
-
-#ifdef NO_DECLTYPE
-#define DECLTYPE_ASSIGN(dst,src)                                                 \
-do {                                                                             \
-  char **_da_dst = (char**)(&(dst));                                             \
-  *_da_dst = (char*)(src);                                                       \
-} while (0)
-#else
-#define DECLTYPE_ASSIGN(dst,src)                                                 \
-do {                                                                             \
-  (dst) = DECLTYPE(dst)(src);                                                    \
-} while (0)
-#endif
-
-/* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */
-#if defined(_WIN32)
-#if defined(_MSC_VER) && _MSC_VER >= 1600
-#include <stdint.h>
-#elif defined(__WATCOMC__) || defined(__MINGW32__) || defined(__CYGWIN__)
-#include <stdint.h>
-#else
-typedef unsigned int uint32_t;
-typedef unsigned char uint8_t;
-#endif
-#elif defined(__GNUC__) && !defined(__VXWORKS__)
-#include <stdint.h>
-#else
-typedef unsigned int uint32_t;
-typedef unsigned char uint8_t;
-#endif
-
-#ifndef uthash_fatal
-#define uthash_fatal(msg) exit(-1)        /* fatal error (out of memory,etc) */
-#endif
-#ifndef uthash_malloc
-#define uthash_malloc(sz) malloc(sz)      /* malloc fcn                      */
-#endif
-#ifndef uthash_free
-#define uthash_free(ptr,sz) free(ptr)     /* free fcn                        */
-#endif
-#ifndef uthash_strlen
-#define uthash_strlen(s) strlen(s)
-#endif
-#ifndef uthash_memcmp
-#define uthash_memcmp(a,b,n) memcmp(a,b,n)
-#endif
-
-#ifndef uthash_noexpand_fyi
-#define uthash_noexpand_fyi(tbl)          /* can be defined to log noexpand  */
-#endif
-#ifndef uthash_expand_fyi
-#define uthash_expand_fyi(tbl)            /* can be defined to log expands   */
-#endif
-
-/* initial number of buckets */
-#define HASH_INITIAL_NUM_BUCKETS 32U     /* initial number of buckets        */
-#define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */
-#define HASH_BKT_CAPACITY_THRESH 10U     /* expand when bucket count reaches */
-
-/* calculate the element whose hash handle address is hhp */
-#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
-/* calculate the hash handle from element address elp */
-#define HH_FROM_ELMT(tbl,elp) ((UT_hash_handle *)(((char*)(elp)) + ((tbl)->hho)))
-
-#define HASH_VALUE(keyptr,keylen,hashv)                                          \
-do {                                                                             \
-  HASH_FCN(keyptr, keylen, hashv);                                               \
-} while (0)
-
-#define HASH_FIND_BYHASHVALUE(hh,head,keyptr,keylen,hashval,out)                 \
-do {                                                                             \
-  (out) = NULL;                                                                  \
-  if (head) {                                                                    \
-    unsigned _hf_bkt;                                                            \
-    HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _hf_bkt);                  \
-    if (HASH_BLOOM_TEST((head)->hh.tbl, hashval) != 0) {                         \
-      HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], keyptr, keylen, hashval, out); \
-    }                                                                            \
-  }                                                                              \
-} while (0)
-
-#define HASH_FIND(hh,head,keyptr,keylen,out)                                     \
-do {                                                                             \
-  unsigned _hf_hashv;                                                            \
-  HASH_VALUE(keyptr, keylen, _hf_hashv);                                         \
-  HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out);               \
-} while (0)
-
-#ifdef HASH_BLOOM
-#define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM)
-#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL)
-#define HASH_BLOOM_MAKE(tbl)                                                     \
-do {                                                                             \
-  (tbl)->bloom_nbits = HASH_BLOOM;                                               \
-  (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN);                 \
-  if (!((tbl)->bloom_bv))  { uthash_fatal( "out of memory"); }                   \
-  memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN);                                \
-  (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE;                                       \
-} while (0)
-
-#define HASH_BLOOM_FREE(tbl)                                                     \
-do {                                                                             \
-  uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN);                              \
-} while (0)
-
-#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U)))
-#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U)))
-
-#define HASH_BLOOM_ADD(tbl,hashv)                                                \
-  HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U)))
-
-#define HASH_BLOOM_TEST(tbl,hashv)                                               \
-  HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U)))
-
-#else
-#define HASH_BLOOM_MAKE(tbl)
-#define HASH_BLOOM_FREE(tbl)
-#define HASH_BLOOM_ADD(tbl,hashv)
-#define HASH_BLOOM_TEST(tbl,hashv) (1)
-#define HASH_BLOOM_BYTELEN 0U
-#endif
-
-#define HASH_MAKE_TABLE(hh,head)                                                 \
-do {                                                                             \
-  (head)->hh.tbl = (UT_hash_table*)uthash_malloc(                                \
-                  sizeof(UT_hash_table));                                        \
-  if (!((head)->hh.tbl))  { uthash_fatal( "out of memory"); }                    \
-  memset((head)->hh.tbl, 0, sizeof(UT_hash_table));                              \
-  (head)->hh.tbl->tail = &((head)->hh);                                          \
-  (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS;                        \
-  (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2;              \
-  (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head);                    \
-  (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc(                      \
-          HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket));               \
-  if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); }             \
-  memset((head)->hh.tbl->buckets, 0,                                             \
-          HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket));               \
-  HASH_BLOOM_MAKE((head)->hh.tbl);                                               \
-  (head)->hh.tbl->signature = HASH_SIGNATURE;                                    \
-} while (0)
-
-#define HASH_REPLACE_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,replaced,cmpfcn) \
-do {                                                                             \
-  (replaced) = NULL;                                                             \
-  HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \
-  if (replaced) {                                                                \
-     HASH_DELETE(hh, head, replaced);                                            \
-  }                                                                              \
-  HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn); \
-} while (0)
-
-#define HASH_REPLACE_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add,replaced) \
-do {                                                                             \
-  (replaced) = NULL;                                                             \
-  HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \
-  if (replaced) {                                                                \
-     HASH_DELETE(hh, head, replaced);                                            \
-  }                                                                              \
-  HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add); \
-} while (0)
-
-#define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced)                   \
-do {                                                                             \
-  unsigned _hr_hashv;                                                            \
-  HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv);                         \
-  HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced); \
-} while (0)
-
-#define HASH_REPLACE_INORDER(hh,head,fieldname,keylen_in,add,replaced,cmpfcn)    \
-do {                                                                             \
-  unsigned _hr_hashv;                                                            \
-  HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv);                         \
-  HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced, cmpfcn); \
-} while (0)
-
-#define HASH_APPEND_LIST(hh, head, add)                                          \
-do {                                                                             \
-  (add)->hh.next = NULL;                                                         \
-  (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail);           \
-  (head)->hh.tbl->tail->next = (add);                                            \
-  (head)->hh.tbl->tail = &((add)->hh);                                           \
-} while (0)
-
-#define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh,head,keyptr,keylen_in,hashval,add,cmpfcn) \
-do {                                                                             \
-  unsigned _ha_bkt;                                                              \
-  (add)->hh.hashv = (hashval);                                                   \
-  (add)->hh.key = (char*) (keyptr);                                              \
-  (add)->hh.keylen = (unsigned) (keylen_in);                                     \
-  if (!(head)) {                                                                 \
-    (add)->hh.next = NULL;                                                       \
-    (add)->hh.prev = NULL;                                                       \
-    (head) = (add);                                                              \
-    HASH_MAKE_TABLE(hh, head);                                                   \
-  } else {                                                                       \
-    struct UT_hash_handle *_hs_iter = &(head)->hh;                               \
-    (add)->hh.tbl = (head)->hh.tbl;                                              \
-    do {                                                                         \
-      if (cmpfcn(DECLTYPE(head) ELMT_FROM_HH((head)->hh.tbl, _hs_iter), add) > 0) \
-        break;                                                                   \
-    } while ((_hs_iter = _hs_iter->next));                                       \
-    if (_hs_iter) {                                                              \
-      (add)->hh.next = _hs_iter;                                                 \
-      if (((add)->hh.prev = _hs_iter->prev)) {                                   \
-        HH_FROM_ELMT((head)->hh.tbl, _hs_iter->prev)->next = (add);              \
-      } else {                                                                   \
-        (head) = (add);                                                          \
-      }                                                                          \
-      _hs_iter->prev = (add);                                                    \
-    } else {                                                                     \
-      HASH_APPEND_LIST(hh, head, add);                                           \
-    }                                                                            \
-  }                                                                              \
-  (head)->hh.tbl->num_items++;                                                   \
-  HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt);                    \
-  HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], &(add)->hh);                 \
-  HASH_BLOOM_ADD((head)->hh.tbl, hashval);                                       \
-  HASH_EMIT_KEY(hh, head, keyptr, keylen_in);                                    \
-  HASH_FSCK(hh, head);                                                           \
-} while (0)
-
-#define HASH_ADD_KEYPTR_INORDER(hh,head,keyptr,keylen_in,add,cmpfcn)             \
-do {                                                                             \
-  unsigned _hs_hashv;                                                            \
-  HASH_VALUE(keyptr, keylen_in, _hs_hashv);                                      \
-  HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, _hs_hashv, add, cmpfcn); \
-} while (0)
-
-#define HASH_ADD_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,cmpfcn) \
-  HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn)
-
-#define HASH_ADD_INORDER(hh,head,fieldname,keylen_in,add,cmpfcn)                 \
-  HASH_ADD_KEYPTR_INORDER(hh, head, &((add)->fieldname), keylen_in, add, cmpfcn)
-
-#define HASH_ADD_KEYPTR_BYHASHVALUE(hh,head,keyptr,keylen_in,hashval,add)        \
-do {                                                                             \
-  unsigned _ha_bkt;                                                              \
-  (add)->hh.hashv = (hashval);                                                   \
-  (add)->hh.key = (char*) (keyptr);                                              \
-  (add)->hh.keylen = (unsigned) (keylen_in);                                     \
-  if (!(head)) {                                                                 \
-    (add)->hh.next = NULL;                                                       \
-    (add)->hh.prev = NULL;                                                       \
-    (head) = (add);                                                              \
-    HASH_MAKE_TABLE(hh, head);                                                   \
-  } else {                                                                       \
-    (add)->hh.tbl = (head)->hh.tbl;                                              \
-    HASH_APPEND_LIST(hh, head, add);                                             \
-  }                                                                              \
-  (head)->hh.tbl->num_items++;                                                   \
-  HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt);                    \
-  HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], &(add)->hh);                 \
-  HASH_BLOOM_ADD((head)->hh.tbl, hashval);                                       \
-  HASH_EMIT_KEY(hh, head, keyptr, keylen_in);                                    \
-  HASH_FSCK(hh, head);                                                           \
-} while (0)
-
-#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add)                            \
-do {                                                                             \
-  unsigned _ha_hashv;                                                            \
-  HASH_VALUE(keyptr, keylen_in, _ha_hashv);                                      \
-  HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, _ha_hashv, add);      \
-} while (0)
-
-#define HASH_ADD_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add)            \
-  HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add)
-
-#define HASH_ADD(hh,head,fieldname,keylen_in,add)                                \
-  HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add)
-
-#define HASH_TO_BKT(hashv,num_bkts,bkt)                                          \
-do {                                                                             \
-  bkt = ((hashv) & ((num_bkts) - 1U));                                           \
-} while (0)
-
-/* delete "delptr" from the hash table.
- * "the usual" patch-up process for the app-order doubly-linked-list.
- * The use of _hd_hh_del below deserves special explanation.
- * These used to be expressed using (delptr) but that led to a bug
- * if someone used the same symbol for the head and deletee, like
- *  HASH_DELETE(hh,users,users);
- * We want that to work, but by changing the head (users) below
- * we were forfeiting our ability to further refer to the deletee (users)
- * in the patch-up process. Solution: use scratch space to
- * copy the deletee pointer, then the latter references are via that
- * scratch pointer rather than through the repointed (users) symbol.
- */
-#define HASH_DELETE(hh,head,delptr)                                              \
-do {                                                                             \
-    struct UT_hash_handle *_hd_hh_del;                                           \
-    if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) )  {         \
-        uthash_free((head)->hh.tbl->buckets,                                     \
-                    (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
-        HASH_BLOOM_FREE((head)->hh.tbl);                                         \
-        uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                      \
-        head = NULL;                                                             \
-    } else {                                                                     \
-        unsigned _hd_bkt;                                                        \
-        _hd_hh_del = &((delptr)->hh);                                            \
-        if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) {     \
-            (head)->hh.tbl->tail =                                               \
-                (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) +               \
-                (head)->hh.tbl->hho);                                            \
-        }                                                                        \
-        if ((delptr)->hh.prev != NULL) {                                         \
-            ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) +                  \
-                    (head)->hh.tbl->hho))->next = (delptr)->hh.next;             \
-        } else {                                                                 \
-            DECLTYPE_ASSIGN(head,(delptr)->hh.next);                             \
-        }                                                                        \
-        if (_hd_hh_del->next != NULL) {                                          \
-            ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next +                     \
-                    (head)->hh.tbl->hho))->prev =                                \
-                    _hd_hh_del->prev;                                            \
-        }                                                                        \
-        HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt);   \
-        HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del);        \
-        (head)->hh.tbl->num_items--;                                             \
-    }                                                                            \
-    HASH_FSCK(hh,head);                                                          \
-} while (0)
-
-
-/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
-#define HASH_FIND_STR(head,findstr,out)                                          \
-    HASH_FIND(hh,head,findstr,(unsigned)uthash_strlen(findstr),out)
-#define HASH_ADD_STR(head,strfield,add)                                          \
-    HASH_ADD(hh,head,strfield[0],(unsigned)uthash_strlen(add->strfield),add)
-#define HASH_REPLACE_STR(head,strfield,add,replaced)                             \
-    HASH_REPLACE(hh,head,strfield[0],(unsigned)uthash_strlen(add->strfield),add,replaced)
-#define HASH_FIND_INT(head,findint,out)                                          \
-    HASH_FIND(hh,head,findint,sizeof(int),out)
-#define HASH_ADD_INT(head,intfield,add)                                          \
-    HASH_ADD(hh,head,intfield,sizeof(int),add)
-#define HASH_REPLACE_INT(head,intfield,add,replaced)                             \
-    HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced)
-#define HASH_FIND_PTR(head,findptr,out)                                          \
-    HASH_FIND(hh,head,findptr,sizeof(void *),out)
-#define HASH_ADD_PTR(head,ptrfield,add)                                          \
-    HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
-#define HASH_REPLACE_PTR(head,ptrfield,add,replaced)                             \
-    HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced)
-#define HASH_DEL(head,delptr)                                                    \
-    HASH_DELETE(hh,head,delptr)
-
-/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
- * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
- */
-#ifdef HASH_DEBUG
-#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0)
-#define HASH_FSCK(hh,head)                                                       \
-do {                                                                             \
-    struct UT_hash_handle *_thh;                                                 \
-    if (head) {                                                                  \
-        unsigned _bkt_i;                                                         \
-        unsigned _count;                                                         \
-        char *_prev;                                                             \
-        _count = 0;                                                              \
-        for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) {       \
-            unsigned _bkt_count = 0;                                             \
-            _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head;                      \
-            _prev = NULL;                                                        \
-            while (_thh) {                                                       \
-               if (_prev != (char*)(_thh->hh_prev)) {                            \
-                   HASH_OOPS("invalid hh_prev %p, actual %p\n",                  \
-                    _thh->hh_prev, _prev );                                      \
-               }                                                                 \
-               _bkt_count++;                                                     \
-               _prev = (char*)(_thh);                                            \
-               _thh = _thh->hh_next;                                             \
-            }                                                                    \
-            _count += _bkt_count;                                                \
-            if ((head)->hh.tbl->buckets[_bkt_i].count !=  _bkt_count) {          \
-               HASH_OOPS("invalid bucket count %u, actual %u\n",                 \
-                (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count);              \
-            }                                                                    \
-        }                                                                        \
-        if (_count != (head)->hh.tbl->num_items) {                               \
-            HASH_OOPS("invalid hh item count %u, actual %u\n",                   \
-                (head)->hh.tbl->num_items, _count );                             \
-        }                                                                        \
-        /* traverse hh in app order; check next/prev integrity, count */         \
-        _count = 0;                                                              \
-        _prev = NULL;                                                            \
-        _thh =  &(head)->hh;                                                     \
-        while (_thh) {                                                           \
-           _count++;                                                             \
-           if (_prev !=(char*)(_thh->prev)) {                                    \
-              HASH_OOPS("invalid prev %p, actual %p\n",                          \
-                    _thh->prev, _prev );                                         \
-           }                                                                     \
-           _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh);                    \
-           _thh = ( _thh->next ?  (UT_hash_handle*)((char*)(_thh->next) +        \
-                                  (head)->hh.tbl->hho) : NULL );                 \
-        }                                                                        \
-        if (_count != (head)->hh.tbl->num_items) {                               \
-            HASH_OOPS("invalid app item count %u, actual %u\n",                  \
-                (head)->hh.tbl->num_items, _count );                             \
-        }                                                                        \
-    }                                                                            \
-} while (0)
-#else
-#define HASH_FSCK(hh,head)
-#endif
-
-/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
- * the descriptor to which this macro is defined for tuning the hash function.
- * The app can #include <unistd.h> to get the prototype for write(2). */
-#ifdef HASH_EMIT_KEYS
-#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)                                   \
-do {                                                                             \
-    unsigned _klen = fieldlen;                                                   \
-    write(HASH_EMIT_KEYS, &_klen, sizeof(_klen));                                \
-    write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen);                      \
-} while (0)
-#else
-#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
-#endif
-
-/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
-#ifdef HASH_FUNCTION
-#define HASH_FCN HASH_FUNCTION
-#else
-#define HASH_FCN HASH_JEN
-#endif
-
-/* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */
-#define HASH_BER(key,keylen,hashv)                                               \
-do {                                                                             \
-  unsigned _hb_keylen=(unsigned)keylen;                                          \
-  const unsigned char *_hb_key=(const unsigned char*)(key);                      \
-  (hashv) = 0;                                                                   \
-  while (_hb_keylen-- != 0U) {                                                   \
-      (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++;                         \
-  }                                                                              \
-} while (0)
-
-
-/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
- * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
-#define HASH_SAX(key,keylen,hashv)                                               \
-do {                                                                             \
-  unsigned _sx_i;                                                                \
-  const unsigned char *_hs_key=(const unsigned char*)(key);                      \
-  hashv = 0;                                                                     \
-  for(_sx_i=0; _sx_i < keylen; _sx_i++) {                                        \
-      hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i];                     \
-  }                                                                              \
-} while (0)
-/* FNV-1a variation */
-#define HASH_FNV(key,keylen,hashv)                                               \
-do {                                                                             \
-  unsigned _fn_i;                                                                \
-  const unsigned char *_hf_key=(const unsigned char*)(key);                      \
-  hashv = 2166136261U;                                                           \
-  for(_fn_i=0; _fn_i < keylen; _fn_i++) {                                        \
-      hashv = hashv ^ _hf_key[_fn_i];                                            \
-      hashv = hashv * 16777619U;                                                 \
-  }                                                                              \
-} while (0)
-
-#define HASH_OAT(key,keylen,hashv)                                               \
-do {                                                                             \
-  unsigned _ho_i;                                                                \
-  const unsigned char *_ho_key=(const unsigned char*)(key);                      \
-  hashv = 0;                                                                     \
-  for(_ho_i=0; _ho_i < keylen; _ho_i++) {                                        \
-      hashv += _ho_key[_ho_i];                                                   \
-      hashv += (hashv << 10);                                                    \
-      hashv ^= (hashv >> 6);                                                     \
-  }                                                                              \
-  hashv += (hashv << 3);                                                         \
-  hashv ^= (hashv >> 11);                                                        \
-  hashv += (hashv << 15);                                                        \
-} while (0)
-
-#define HASH_JEN_MIX(a,b,c)                                                      \
-do {                                                                             \
-  a -= b; a -= c; a ^= ( c >> 13 );                                              \
-  b -= c; b -= a; b ^= ( a << 8 );                                               \
-  c -= a; c -= b; c ^= ( b >> 13 );                                              \
-  a -= b; a -= c; a ^= ( c >> 12 );                                              \
-  b -= c; b -= a; b ^= ( a << 16 );                                              \
-  c -= a; c -= b; c ^= ( b >> 5 );                                               \
-  a -= b; a -= c; a ^= ( c >> 3 );                                               \
-  b -= c; b -= a; b ^= ( a << 10 );                                              \
-  c -= a; c -= b; c ^= ( b >> 15 );                                              \
-} while (0)
-
-#define HASH_JEN(key,keylen,hashv)                                               \
-do {                                                                             \
-  unsigned _hj_i,_hj_j,_hj_k;                                                    \
-  unsigned const char *_hj_key=(unsigned const char*)(key);                      \
-  hashv = 0xfeedbeefu;                                                           \
-  _hj_i = _hj_j = 0x9e3779b9u;                                                   \
-  _hj_k = (unsigned)(keylen);                                                    \
-  while (_hj_k >= 12U) {                                                         \
-    _hj_i +=    (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 )                      \
-        + ( (unsigned)_hj_key[2] << 16 )                                         \
-        + ( (unsigned)_hj_key[3] << 24 ) );                                      \
-    _hj_j +=    (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 )                      \
-        + ( (unsigned)_hj_key[6] << 16 )                                         \
-        + ( (unsigned)_hj_key[7] << 24 ) );                                      \
-    hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 )                         \
-        + ( (unsigned)_hj_key[10] << 16 )                                        \
-        + ( (unsigned)_hj_key[11] << 24 ) );                                     \
-                                                                                 \
-     HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                          \
-                                                                                 \
-     _hj_key += 12;                                                              \
-     _hj_k -= 12U;                                                               \
-  }                                                                              \
-  hashv += (unsigned)(keylen);                                                   \
-  switch ( _hj_k ) {                                                             \
-     case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */        \
-     case 10: hashv += ( (unsigned)_hj_key[9] << 16 );  /* FALLTHROUGH */        \
-     case 9:  hashv += ( (unsigned)_hj_key[8] << 8 );   /* FALLTHROUGH */        \
-     case 8:  _hj_j += ( (unsigned)_hj_key[7] << 24 );  /* FALLTHROUGH */        \
-     case 7:  _hj_j += ( (unsigned)_hj_key[6] << 16 );  /* FALLTHROUGH */        \
-     case 6:  _hj_j += ( (unsigned)_hj_key[5] << 8 );   /* FALLTHROUGH */        \
-     case 5:  _hj_j += _hj_key[4];                      /* FALLTHROUGH */        \
-     case 4:  _hj_i += ( (unsigned)_hj_key[3] << 24 );  /* FALLTHROUGH */        \
-     case 3:  _hj_i += ( (unsigned)_hj_key[2] << 16 );  /* FALLTHROUGH */        \
-     case 2:  _hj_i += ( (unsigned)_hj_key[1] << 8 );   /* FALLTHROUGH */        \
-     case 1:  _hj_i += _hj_key[0];                                               \
-  }                                                                              \
-  HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                             \
-} while (0)
-
-/* The Paul Hsieh hash function */
-#undef get16bits
-#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__)             \
-  || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
-#define get16bits(d) (*((const uint16_t *) (d)))
-#endif
-
-#if !defined (get16bits)
-#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)             \
-                       +(uint32_t)(((const uint8_t *)(d))[0]) )
-#endif
-#define HASH_SFH(key,keylen,hashv)                                               \
-do {                                                                             \
-  unsigned const char *_sfh_key=(unsigned const char*)(key);                     \
-  uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen;                                \
-                                                                                 \
-  unsigned _sfh_rem = _sfh_len & 3U;                                             \
-  _sfh_len >>= 2;                                                                \
-  hashv = 0xcafebabeu;                                                           \
-                                                                                 \
-  /* Main loop */                                                                \
-  for (;_sfh_len > 0U; _sfh_len--) {                                             \
-    hashv    += get16bits (_sfh_key);                                            \
-    _sfh_tmp  = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv;              \
-    hashv     = (hashv << 16) ^ _sfh_tmp;                                        \
-    _sfh_key += 2U*sizeof (uint16_t);                                            \
-    hashv    += hashv >> 11;                                                     \
-  }                                                                              \
-                                                                                 \
-  /* Handle end cases */                                                         \
-  switch (_sfh_rem) {                                                            \
-    case 3: hashv += get16bits (_sfh_key);                                       \
-            hashv ^= hashv << 16;                                                \
-            hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18;              \
-            hashv += hashv >> 11;                                                \
-            break;                                                               \
-    case 2: hashv += get16bits (_sfh_key);                                       \
-            hashv ^= hashv << 11;                                                \
-            hashv += hashv >> 17;                                                \
-            break;                                                               \
-    case 1: hashv += *_sfh_key;                                                  \
-            hashv ^= hashv << 10;                                                \
-            hashv += hashv >> 1;                                                 \
-  }                                                                              \
-                                                                                 \
-    /* Force "avalanching" of final 127 bits */                                  \
-    hashv ^= hashv << 3;                                                         \
-    hashv += hashv >> 5;                                                         \
-    hashv ^= hashv << 4;                                                         \
-    hashv += hashv >> 17;                                                        \
-    hashv ^= hashv << 25;                                                        \
-    hashv += hashv >> 6;                                                         \
-} while (0)
-
-#ifdef HASH_USING_NO_STRICT_ALIASING
-/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
- * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
- * MurmurHash uses the faster approach only on CPU's where we know it's safe.
- *
- * Note the preprocessor built-in defines can be emitted using:
- *
- *   gcc -m64 -dM -E - < /dev/null                  (on gcc)
- *   cc -## a.c (where a.c is a simple test file)   (Sun Studio)
- */
-#if (defined(__i386__) || defined(__x86_64__)  || defined(_M_IX86))
-#define MUR_GETBLOCK(p,i) p[i]
-#else /* non intel */
-#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL)
-#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL)
-#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL)
-#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL)
-#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL))
-#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__))
-#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24))
-#define MUR_TWO_TWO(p)   ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16))
-#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >>  8))
-#else /* assume little endian non-intel */
-#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24))
-#define MUR_TWO_TWO(p)   ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16))
-#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) <<  8))
-#endif
-#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) :           \
-                            (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
-                             (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) :  \
-                                                      MUR_ONE_THREE(p))))
-#endif
-#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
-#define MUR_FMIX(_h) \
-do {                 \
-  _h ^= _h >> 16;    \
-  _h *= 0x85ebca6bu; \
-  _h ^= _h >> 13;    \
-  _h *= 0xc2b2ae35u; \
-  _h ^= _h >> 16;    \
-} while (0)
-
-#define HASH_MUR(key,keylen,hashv)                                     \
-do {                                                                   \
-  const uint8_t *_mur_data = (const uint8_t*)(key);                    \
-  const int _mur_nblocks = (int)(keylen) / 4;                          \
-  uint32_t _mur_h1 = 0xf88D5353u;                                      \
-  uint32_t _mur_c1 = 0xcc9e2d51u;                                      \
-  uint32_t _mur_c2 = 0x1b873593u;                                      \
-  uint32_t _mur_k1 = 0;                                                \
-  const uint8_t *_mur_tail;                                            \
-  const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \
-  int _mur_i;                                                          \
-  for(_mur_i = -_mur_nblocks; _mur_i!=0; _mur_i++) {                   \
-    _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i);                        \
-    _mur_k1 *= _mur_c1;                                                \
-    _mur_k1 = MUR_ROTL32(_mur_k1,15);                                  \
-    _mur_k1 *= _mur_c2;                                                \
-                                                                       \
-    _mur_h1 ^= _mur_k1;                                                \
-    _mur_h1 = MUR_ROTL32(_mur_h1,13);                                  \
-    _mur_h1 = (_mur_h1*5U) + 0xe6546b64u;                              \
-  }                                                                    \
-  _mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4));          \
-  _mur_k1=0;                                                           \
-  switch((keylen) & 3U) {                                              \
-    case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \
-    case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8;  /* FALLTHROUGH */ \
-    case 1: _mur_k1 ^= (uint32_t)_mur_tail[0];                         \
-    _mur_k1 *= _mur_c1;                                                \
-    _mur_k1 = MUR_ROTL32(_mur_k1,15);                                  \
-    _mur_k1 *= _mur_c2;                                                \
-    _mur_h1 ^= _mur_k1;                                                \
-  }                                                                    \
-  _mur_h1 ^= (uint32_t)(keylen);                                       \
-  MUR_FMIX(_mur_h1);                                                   \
-  hashv = _mur_h1;                                                     \
-} while (0)
-#endif  /* HASH_USING_NO_STRICT_ALIASING */
-
-/* iterate over items in a known bucket to find desired item */
-#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,hashval,out)               \
-do {                                                                             \
-  if ((head).hh_head != NULL) {                                                  \
-    DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (head).hh_head));                     \
-  } else {                                                                       \
-    (out) = NULL;                                                                \
-  }                                                                              \
-  while ((out) != NULL) {                                                        \
-    if ((out)->hh.hashv == (hashval) && (out)->hh.keylen == (keylen_in)) {       \
-      if (uthash_memcmp((out)->hh.key, keyptr, keylen_in) == 0) {                \
-        break;                                                                   \
-      }                                                                          \
-    }                                                                            \
-    if ((out)->hh.hh_next != NULL) {                                             \
-      DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next));                \
-    } else {                                                                     \
-      (out) = NULL;                                                              \
-    }                                                                            \
-  }                                                                              \
-} while (0)
-
-/* add an item to a bucket  */
-#define HASH_ADD_TO_BKT(head,addhh)                                              \
-do {                                                                             \
- head.count++;                                                                   \
- (addhh)->hh_next = head.hh_head;                                                \
- (addhh)->hh_prev = NULL;                                                        \
- if (head.hh_head != NULL) { (head).hh_head->hh_prev = (addhh); }                \
- (head).hh_head=addhh;                                                           \
- if ((head.count >= ((head.expand_mult+1U) * HASH_BKT_CAPACITY_THRESH))          \
-     && ((addhh)->tbl->noexpand != 1U)) {                                        \
-       HASH_EXPAND_BUCKETS((addhh)->tbl);                                        \
- }                                                                               \
-} while (0)
-
-/* remove an item from a given bucket */
-#define HASH_DEL_IN_BKT(hh,head,hh_del)                                          \
-    (head).count--;                                                              \
-    if ((head).hh_head == hh_del) {                                              \
-      (head).hh_head = hh_del->hh_next;                                          \
-    }                                                                            \
-    if (hh_del->hh_prev) {                                                       \
-        hh_del->hh_prev->hh_next = hh_del->hh_next;                              \
-    }                                                                            \
-    if (hh_del->hh_next) {                                                       \
-        hh_del->hh_next->hh_prev = hh_del->hh_prev;                              \
-    }
-
-/* Bucket expansion has the effect of doubling the number of buckets
- * and redistributing the items into the new buckets. Ideally the
- * items will distribute more or less evenly into the new buckets
- * (the extent to which this is true is a measure of the quality of
- * the hash function as it applies to the key domain).
- *
- * With the items distributed into more buckets, the chain length
- * (item count) in each bucket is reduced. Thus by expanding buckets
- * the hash keeps a bound on the chain length. This bounded chain
- * length is the essence of how a hash provides constant time lookup.
- *
- * The calculation of tbl->ideal_chain_maxlen below deserves some
- * explanation. First, keep in mind that we're calculating the ideal
- * maximum chain length based on the *new* (doubled) bucket count.
- * In fractions this is just n/b (n=number of items,b=new num buckets).
- * Since the ideal chain length is an integer, we want to calculate
- * ceil(n/b). We don't depend on floating point arithmetic in this
- * hash, so to calculate ceil(n/b) with integers we could write
- *
- *      ceil(n/b) = (n/b) + ((n%b)?1:0)
- *
- * and in fact a previous version of this hash did just that.
- * But now we have improved things a bit by recognizing that b is
- * always a power of two. We keep its base 2 log handy (call it lb),
- * so now we can write this with a bit shift and logical AND:
- *
- *      ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
- *
- */
-#define HASH_EXPAND_BUCKETS(tbl)                                                 \
-do {                                                                             \
-    unsigned _he_bkt;                                                            \
-    unsigned _he_bkt_i;                                                          \
-    struct UT_hash_handle *_he_thh, *_he_hh_nxt;                                 \
-    UT_hash_bucket *_he_new_buckets, *_he_newbkt;                                \
-    _he_new_buckets = (UT_hash_bucket*)uthash_malloc(                            \
-             2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket));            \
-    if (!_he_new_buckets) { uthash_fatal( "out of memory"); }                    \
-    memset(_he_new_buckets, 0,                                                   \
-            2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket));             \
-    tbl->ideal_chain_maxlen =                                                    \
-       (tbl->num_items >> (tbl->log2_num_buckets+1U)) +                          \
-       (((tbl->num_items & ((tbl->num_buckets*2U)-1U)) != 0U) ? 1U : 0U);        \
-    tbl->nonideal_items = 0;                                                     \
-    for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++)                \
-    {                                                                            \
-        _he_thh = tbl->buckets[ _he_bkt_i ].hh_head;                             \
-        while (_he_thh != NULL) {                                                \
-           _he_hh_nxt = _he_thh->hh_next;                                        \
-           HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2U, _he_bkt);           \
-           _he_newbkt = &(_he_new_buckets[ _he_bkt ]);                           \
-           if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) {                \
-             tbl->nonideal_items++;                                              \
-             _he_newbkt->expand_mult = _he_newbkt->count /                       \
-                                        tbl->ideal_chain_maxlen;                 \
-           }                                                                     \
-           _he_thh->hh_prev = NULL;                                              \
-           _he_thh->hh_next = _he_newbkt->hh_head;                               \
-           if (_he_newbkt->hh_head != NULL) { _he_newbkt->hh_head->hh_prev =     \
-                _he_thh; }                                                       \
-           _he_newbkt->hh_head = _he_thh;                                        \
-           _he_thh = _he_hh_nxt;                                                 \
-        }                                                                        \
-    }                                                                            \
-    uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
-    tbl->num_buckets *= 2U;                                                      \
-    tbl->log2_num_buckets++;                                                     \
-    tbl->buckets = _he_new_buckets;                                              \
-    tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ?         \
-        (tbl->ineff_expands+1U) : 0U;                                            \
-    if (tbl->ineff_expands > 1U) {                                               \
-        tbl->noexpand=1;                                                         \
-        uthash_noexpand_fyi(tbl);                                                \
-    }                                                                            \
-    uthash_expand_fyi(tbl);                                                      \
-} while (0)
-
-
-/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
-/* Note that HASH_SORT assumes the hash handle name to be hh.
- * HASH_SRT was added to allow the hash handle name to be passed in. */
-#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
-#define HASH_SRT(hh,head,cmpfcn)                                                 \
-do {                                                                             \
-  unsigned _hs_i;                                                                \
-  unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize;               \
-  struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail;            \
-  if (head != NULL) {                                                            \
-      _hs_insize = 1;                                                            \
-      _hs_looping = 1;                                                           \
-      _hs_list = &((head)->hh);                                                  \
-      while (_hs_looping != 0U) {                                                \
-          _hs_p = _hs_list;                                                      \
-          _hs_list = NULL;                                                       \
-          _hs_tail = NULL;                                                       \
-          _hs_nmerges = 0;                                                       \
-          while (_hs_p != NULL) {                                                \
-              _hs_nmerges++;                                                     \
-              _hs_q = _hs_p;                                                     \
-              _hs_psize = 0;                                                     \
-              for ( _hs_i = 0; _hs_i  < _hs_insize; _hs_i++ ) {                  \
-                  _hs_psize++;                                                   \
-                  _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ?              \
-                          ((void*)((char*)(_hs_q->next) +                        \
-                          (head)->hh.tbl->hho)) : NULL);                         \
-                  if (! (_hs_q) ) { break; }                                     \
-              }                                                                  \
-              _hs_qsize = _hs_insize;                                            \
-              while ((_hs_psize > 0U) || ((_hs_qsize > 0U) && (_hs_q != NULL))) {\
-                  if (_hs_psize == 0U) {                                         \
-                      _hs_e = _hs_q;                                             \
-                      _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ?          \
-                              ((void*)((char*)(_hs_q->next) +                    \
-                              (head)->hh.tbl->hho)) : NULL);                     \
-                      _hs_qsize--;                                               \
-                  } else if ( (_hs_qsize == 0U) || (_hs_q == NULL) ) {           \
-                      _hs_e = _hs_p;                                             \
-                      if (_hs_p != NULL){                                        \
-                        _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ?        \
-                                ((void*)((char*)(_hs_p->next) +                  \
-                                (head)->hh.tbl->hho)) : NULL);                   \
-                       }                                                         \
-                      _hs_psize--;                                               \
-                  } else if ((                                                   \
-                      cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
-                             DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
-                             ) <= 0) {                                           \
-                      _hs_e = _hs_p;                                             \
-                      if (_hs_p != NULL){                                        \
-                        _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ?        \
-                               ((void*)((char*)(_hs_p->next) +                   \
-                               (head)->hh.tbl->hho)) : NULL);                    \
-                       }                                                         \
-                      _hs_psize--;                                               \
-                  } else {                                                       \
-                      _hs_e = _hs_q;                                             \
-                      _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ?          \
-                              ((void*)((char*)(_hs_q->next) +                    \
-                              (head)->hh.tbl->hho)) : NULL);                     \
-                      _hs_qsize--;                                               \
-                  }                                                              \
-                  if ( _hs_tail != NULL ) {                                      \
-                      _hs_tail->next = ((_hs_e != NULL) ?                        \
-                            ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL);          \
-                  } else {                                                       \
-                      _hs_list = _hs_e;                                          \
-                  }                                                              \
-                  if (_hs_e != NULL) {                                           \
-                  _hs_e->prev = ((_hs_tail != NULL) ?                            \
-                     ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL);              \
-                  }                                                              \
-                  _hs_tail = _hs_e;                                              \
-              }                                                                  \
-              _hs_p = _hs_q;                                                     \
-          }                                                                      \
-          if (_hs_tail != NULL){                                                 \
-            _hs_tail->next = NULL;                                               \
-          }                                                                      \
-          if ( _hs_nmerges <= 1U ) {                                             \
-              _hs_looping=0;                                                     \
-              (head)->hh.tbl->tail = _hs_tail;                                   \
-              DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list));      \
-          }                                                                      \
-          _hs_insize *= 2U;                                                      \
-      }                                                                          \
-      HASH_FSCK(hh,head);                                                        \
- }                                                                               \
-} while (0)
-
-/* This function selects items from one hash into another hash.
- * The end result is that the selected items have dual presence
- * in both hashes. There is no copy of the items made; rather
- * they are added into the new hash through a secondary hash
- * hash handle that must be present in the structure. */
-#define HASH_SELECT(hh_dst, dst, hh_src, src, cond)                              \
-do {                                                                             \
-  unsigned _src_bkt, _dst_bkt;                                                   \
-  void *_last_elt=NULL, *_elt;                                                   \
-  UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL;                         \
-  ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst));                 \
-  if (src != NULL) {                                                             \
-    for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) {     \
-      for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head;                \
-          _src_hh != NULL;                                                       \
-          _src_hh = _src_hh->hh_next) {                                          \
-          _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh);                       \
-          if (cond(_elt)) {                                                      \
-            _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho);               \
-            _dst_hh->key = _src_hh->key;                                         \
-            _dst_hh->keylen = _src_hh->keylen;                                   \
-            _dst_hh->hashv = _src_hh->hashv;                                     \
-            _dst_hh->prev = _last_elt;                                           \
-            _dst_hh->next = NULL;                                                \
-            if (_last_elt_hh != NULL) { _last_elt_hh->next = _elt; }             \
-            if (dst == NULL) {                                                   \
-              DECLTYPE_ASSIGN(dst,_elt);                                         \
-              HASH_MAKE_TABLE(hh_dst,dst);                                       \
-            } else {                                                             \
-              _dst_hh->tbl = (dst)->hh_dst.tbl;                                  \
-            }                                                                    \
-            HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt);    \
-            HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh);            \
-            (dst)->hh_dst.tbl->num_items++;                                      \
-            _last_elt = _elt;                                                    \
-            _last_elt_hh = _dst_hh;                                              \
-          }                                                                      \
-      }                                                                          \
-    }                                                                            \
-  }                                                                              \
-  HASH_FSCK(hh_dst,dst);                                                         \
-} while (0)
-
-#define HASH_CLEAR(hh,head)                                                      \
-do {                                                                             \
-  if (head != NULL) {                                                            \
-    uthash_free((head)->hh.tbl->buckets,                                         \
-                (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket));      \
-    HASH_BLOOM_FREE((head)->hh.tbl);                                             \
-    uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                          \
-    (head)=NULL;                                                                 \
-  }                                                                              \
-} while (0)
-
-#define HASH_OVERHEAD(hh,head)                                                   \
- ((head != NULL) ? (                                                             \
- (size_t)(((head)->hh.tbl->num_items   * sizeof(UT_hash_handle))   +             \
-          ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket))   +             \
-           sizeof(UT_hash_table)                                   +             \
-           (HASH_BLOOM_BYTELEN))) : 0U)
-
-#ifdef NO_DECLTYPE
-#define HASH_ITER(hh,head,el,tmp)                                                \
-for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \
-  (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL)))
-#else
-#define HASH_ITER(hh,head,el,tmp)                                                \
-for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL));      \
-  (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL)))
-#endif
-
-/* obtain a count of items in the hash */
-#define HASH_COUNT(head) HASH_CNT(hh,head)
-#define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U)
-
-typedef struct UT_hash_bucket {
-   struct UT_hash_handle *hh_head;
-   unsigned count;
-
-   /* expand_mult is normally set to 0. In this situation, the max chain length
-    * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
-    * the bucket's chain exceeds this length, bucket expansion is triggered).
-    * However, setting expand_mult to a non-zero value delays bucket expansion
-    * (that would be triggered by additions to this particular bucket)
-    * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
-    * (The multiplier is simply expand_mult+1). The whole idea of this
-    * multiplier is to reduce bucket expansions, since they are expensive, in
-    * situations where we know that a particular bucket tends to be overused.
-    * It is better to let its chain length grow to a longer yet-still-bounded
-    * value, than to do an O(n) bucket expansion too often.
-    */
-   unsigned expand_mult;
-
-} UT_hash_bucket;
-
-/* random signature used only to find hash tables in external analysis */
-#define HASH_SIGNATURE 0xa0111fe1u
-#define HASH_BLOOM_SIGNATURE 0xb12220f2u
-
-typedef struct UT_hash_table {
-   UT_hash_bucket *buckets;
-   unsigned num_buckets, log2_num_buckets;
-   unsigned num_items;
-   struct UT_hash_handle *tail; /* tail hh in app order, for fast append    */
-   ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
-
-   /* in an ideal situation (all buckets used equally), no bucket would have
-    * more than ceil(#items/#buckets) items. that's the ideal chain length. */
-   unsigned ideal_chain_maxlen;
-
-   /* nonideal_items is the number of items in the hash whose chain position
-    * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
-    * hash distribution; reaching them in a chain traversal takes >ideal steps */
-   unsigned nonideal_items;
-
-   /* ineffective expands occur when a bucket doubling was performed, but
-    * afterward, more than half the items in the hash had nonideal chain
-    * positions. If this happens on two consecutive expansions we inhibit any
-    * further expansion, as it's not helping; this happens when the hash
-    * function isn't a good fit for the key domain. When expansion is inhibited
-    * the hash will still work, albeit no longer in constant time. */
-   unsigned ineff_expands, noexpand;
-
-   uint32_t signature; /* used only to find hash tables in external analysis */
-#ifdef HASH_BLOOM
-   uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
-   uint8_t *bloom_bv;
-   uint8_t bloom_nbits;
-#endif
-
-} UT_hash_table;
-
-typedef struct UT_hash_handle {
-   struct UT_hash_table *tbl;
-   void *prev;                       /* prev element in app order      */
-   void *next;                       /* next element in app order      */
-   struct UT_hash_handle *hh_prev;   /* previous hh in bucket order    */
-   struct UT_hash_handle *hh_next;   /* next hh in bucket order        */
-   void *key;                        /* ptr to enclosing struct's key  */
-   unsigned keylen;                  /* enclosing struct's key len     */
-   unsigned hashv;                   /* result of hash-fcn(key)        */
-} UT_hash_handle;
-
-#endif /* UTHASH_H */

From 5a0725ce9448946a517628ab2d4631d49b39c40d Mon Sep 17 00:00:00 2001
From: Ujval Misra <misraujval@gmail.com>
Date: Mon, 3 Oct 2016 18:35:13 -0700
Subject: [PATCH 57/91] Increase dlmalloc threshold along with granularity
 (#33)

* Increase allocation granularity dynamically with each MMAP call

* Fewer MMAP calls required when workload contains several objects.

* Delay hitting the per-process file descriptor constraint.

* Change type of GRANULARITY_MULTIPLIER

* Make granularity update more concise.

* Increase dlmalloc threshold along with granularity

* Eventually resolve issue of objects being allocated their own file if larger than dlmalloc threshold

* Avoid dlmalloc threshold and granularity integer overflow

* Update granularity directly without invoking dlmallopt

* Set the threshold to a fixed size (MAX_SIZE_T)

* Removed trailing whitespace
---
 src/malloc.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/malloc.c b/src/malloc.c
index 230fee38d..7dfd186ce 100644
--- a/src/malloc.c
+++ b/src/malloc.c
@@ -13,15 +13,14 @@
 void *fake_mmap(size_t);
 int fake_munmap(void *, size_t);
 
-size_t dlmalloc_granularity = ((size_t) 128U * 1024U);
-
 #define MMAP(s) fake_mmap(s)
 #define MUNMAP(a, s) fake_munmap(a, s)
 #define DIRECT_MMAP(s) fake_mmap(s)
 #define DIRECT_MUNMAP(a, s) fake_munmap(a, s)
 #define USE_DL_PREFIX
 #define HAVE_MORECORE 0
-#define DEFAULT_GRANULARITY (dlmalloc_granularity)
+#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
+#define DEFAULT_GRANULARITY ((size_t) 128U * 1024U)
 
 #include "thirdparty/dlmalloc.c"
 
@@ -84,9 +83,8 @@ void *fake_mmap(size_t size) {
     return pointer;
   }
 
-  /* Update dlmalloc's allocation granularity for future calls */
-  dlmalloc_granularity *= GRANULARITY_MULTIPLIER;
-  dlmallopt(M_GRANULARITY, dlmalloc_granularity);
+  /* Increase dlmalloc's allocation granularity directly. */
+  mparams.granularity *= GRANULARITY_MULTIPLIER;
 
   struct mmap_record *record = malloc(sizeof(struct mmap_record));
   record->fd = fd;

From da5ec3b5e0244d29eb63ea5ac1d464b1c89504ed Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Tue, 4 Oct 2016 12:11:52 -0700
Subject: [PATCH 58/91] add documentation for tasks (#30)

---
 doc/tasks.md | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 doc/tasks.md

diff --git a/doc/tasks.md b/doc/tasks.md
new file mode 100644
index 000000000..b56f89783
--- /dev/null
+++ b/doc/tasks.md
@@ -0,0 +1,33 @@
+# Task specifications, task instances and task logs
+
+A *task specification* contains all information that is needed for computing
+the results of a task:
+
+- The function ID of the function that executes the task
+- The arguments (either object IDs for pass by reference
+or values for pass by value)
+- The IDs of the result objects
+
+From these, a task ID can be computed which is also stored in the task
+specification.
+
+A *task instance* represents one execution of a task specification.
+It consists of:
+
+- A scheduling state (WAITING, SCHEDULED, RUNNING, DONE)
+- The target node where the task is scheduled or executed
+- A unique task instance ID that identifies the particular execution
+  of the task.
+
+The task data structures are defined in `common/task.h`.
+
+The *task log* is a mapping from the task instance ID to a sequence of
+updates to the status of the task instance. It is updated by various parts
+of the system:
+
+1. The local scheduler writes it with status WAITING when submits a task to the global scheduler
+2. The global scheduler appends an update WAITING -> SCHEDULED together with the node ID when assigning the task to a local scheduler
+3. The local scheduler appends an update SCHEDULED -> RUNNING when it assigns a task to a worker
+4. The local scheduler appends an update RUNNING -> DONE when the task finishes execution
+
+The task log is defined in `common/state/task_log.h`.

From 64c5e0880e9db4cca8f58954bc9d87c11e2f4ce2 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Tue, 4 Oct 2016 12:55:10 -0700
Subject: [PATCH 59/91] Let workers get tasks from local scheduler. (#5)

* Restructure to have separate client and scheduler files. Shared stuff is in photon.h.

* Let workers get tasks from local scheduler.
---
 Makefile             |   4 +-
 lib/python/photon.py |  69 ++++++++++++++++-
 photon.c             | 109 --------------------------
 photon.h             |  10 +--
 photon_client.c      |  14 ++++
 photon_client.h      |  53 +++++++++++--
 photon_scheduler.c   | 181 +++++++++++++++++++++++++++++++++++++++++++
 test/test.py         |   8 +-
 8 files changed, 319 insertions(+), 129 deletions(-)
 delete mode 100644 photon.c
 create mode 100644 photon_scheduler.c

diff --git a/Makefile b/Makefile
index 7c494a74e..ef5de50a7 100644
--- a/Makefile
+++ b/Makefile
@@ -7,8 +7,8 @@ all: $(BUILD)/photon_scheduler $(BUILD)/photon_client.so
 $(BUILD)/photon_client.so: photon_client.h photon_client.c common
 	$(CC) $(CFLAGS) photon_client.c common/build/libcommon.a -fPIC -shared -o $(BUILD)/photon_client.so
 
-$(BUILD)/photon_scheduler: photon.h photon.c common
-	$(CC) $(CFLAGS) -o $@ photon.c common/build/libcommon.a common/thirdparty/hiredis/libhiredis.a -Icommon/thirdparty -Icommon/
+$(BUILD)/photon_scheduler: photon.h photon_scheduler.c common
+	$(CC) $(CFLAGS) -o $@ photon_scheduler.c common/build/libcommon.a common/thirdparty/hiredis/libhiredis.a -Icommon/thirdparty -Icommon/
 
 common: FORCE
 	git submodule update --init --recursive
diff --git a/lib/python/photon.py b/lib/python/photon.py
index b0fc263a5..36f06ff2a 100644
--- a/lib/python/photon.py
+++ b/lib/python/photon.py
@@ -6,9 +6,57 @@ photon_client_library = ctypes.cdll.LoadLibrary(photon_client_library_path)
 photon_client_library.alloc_task_spec.restype = ctypes.c_void_p
 photon_client_library.photon_connect.restype = ctypes.c_void_p
 photon_client_library.photon_submit.restype = None
+photon_client_library.photon_get_task.restype = ctypes.c_void_p
 
 ID = ctypes.c_ubyte * 20
 
+buffer_from_read_write_memory = ctypes.pythonapi.PyBuffer_FromReadWriteMemory
+buffer_from_read_write_memory.argtypes = [ctypes.c_void_p, ctypes.c_int64]
+buffer_from_read_write_memory.restype = ctypes.py_object
+
+buffer_from_memory = ctypes.pythonapi.PyBuffer_FromMemory
+buffer_from_memory.argtypes = [ctypes.c_void_p, ctypes.c_int64]
+buffer_from_memory.restype = ctypes.py_object
+
+photon_client_library.task_function.restype = ctypes.c_void_p
+photon_client_library.task_num_args.restype = ctypes.c_int64
+photon_client_library.task_num_returns.restype = ctypes.c_int64
+photon_client_library.task_arg_type.restype = ctypes.c_int8
+photon_client_library.task_arg_id.restype = ctypes.c_void_p
+photon_client_library.task_arg_val.restype = ctypes.c_void_p
+photon_client_library.task_arg_length.restype = ctypes.c_void_p
+photon_client_library.task_return.restype = ctypes.c_void_p
+
+
+class TaskInfo(object):
+  def __init__(self, function_id, args, return_ids):
+    self.function_id = function_id
+    self.args = args
+    self.return_ids = return_ids
+
+def extract_task(c_task):
+  function_id = buffer_from_memory(photon_client_library.task_function(c_task), 20)[:]
+  num_args = photon_client_library.task_num_args(c_task)
+  num_returns = photon_client_library.task_num_returns(c_task)
+  arg_vals_and_ids = []
+  for i in range(num_args):
+    arg_type = photon_client_library.task_arg_type(c_task, i)
+    if arg_type == 0:
+      arg_id = buffer_from_memory(photon_client_library.task_arg_id(c_task, i), 20)
+      arg_vals_and_ids.append((arg_type, arg_id))
+    elif arg_type == 1:
+      arg_val = photon_client_library.task_arg_val(c_task, i)[:]
+      arg_length = photon_client_library.task_arg_length(c_task, i)
+      arg_value = buffer_from_memory(arg_val, arg_length)[:]
+      arg_vals_and_ids.append((arg_type, arg_value))
+    else:
+      raise Exception("arg_type must be 0 or 1")
+  return_ids = []
+  for i in range(num_returns):
+    ret_id = buffer_from_memory(photon_client_library.task_return(c_task, i), 20)
+    return_ids.append(ret_id[:])
+  return TaskInfo(function_id, arg_vals_and_ids, return_ids)
+
 class UniqueID(ctypes.Structure):
   _fields_ = [("unique_id", ID)]
 
@@ -19,11 +67,18 @@ def make_id(string):
   return UniqueID(unique_id=ID(*unique_id))
 
 class Task(object):
-  def __init__(self, function_id, args):
+  def __init__(self, function_id, args, return_ids):
     function_id = make_id(function_id)
     self.task_spec = ctypes.c_void_p(photon_client_library.alloc_task_spec(function_id, len(args), 1, 0))
     for arg in args:
-      photon_client_library.task_args_add_ref(self.task_spec, arg)
+      photon_client_library.task_args_add_ref(self.task_spec, make_id(arg))
+
+    # Add return IDs. This may not be the appropriate place for this.
+    num_returns = photon_client_library.task_num_returns(self.task_spec)
+    for i in range(num_returns):
+      ret_id = buffer_from_read_write_memory(photon_client_library.task_return(self.task_spec, i), 20)
+      for j in range(20):
+        ret_id[j] = return_ids[i][j]
 
   def __del__(self):
     photon_client_library.free_task_spec(self.task_spec)
@@ -33,6 +88,12 @@ class PhotonClient(object):
   def __init__(self, socket_name):
     self.photon_conn = ctypes.c_void_p(photon_client_library.photon_connect(socket_name))
 
-  def submit(self, function_id, args):
-    task = Task(function_id, args)
+  def submit(self, function_id, args, return_ids):
+    task = Task(function_id, args, return_ids)
     photon_client_library.photon_submit(self.photon_conn, task.task_spec)
+
+  def get_task(self):
+    c_task = ctypes.c_void_p(photon_client_library.photon_get_task(self.photon_conn))
+    task = c_task # TODO Extract the actual task. EXTRACT...(c_task)
+    # photon_client_library.free_task_spec(c_task)
+    return extract_task(task)
diff --git a/photon.c b/photon.c
deleted file mode 100644
index 8c84c2b95..000000000
--- a/photon.c
+++ /dev/null
@@ -1,109 +0,0 @@
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <unistd.h>
-
-#include "common.h"
-#include "event_loop.h"
-#include "io.h"
-#include "photon.h"
-#include "state/db.h"
-#include "state/task_queue.h"
-#include "task.h"
-#include "utarray.h"
-
-typedef struct {
-  db_handle *db;
-  UT_array *task_queue;
-} local_scheduler_state;
-
-event_loop *init_local_scheduler() { return event_loop_create(); };
-
-void process_message(event_loop *loop, int client_sock, void *context,
-                     int events) {
-  local_scheduler_state *s = context;
-
-  uint8_t *message;
-  int64_t type;
-  int64_t length;
-  read_message(client_sock, &type, &length, &message);
-
-  switch (type) {
-  case SUBMIT_TASK: {
-    task_spec *task = (task_spec *)message;
-    CHECK(task_size(task) == length);
-    unique_id id = globally_unique_id();
-    task_queue_submit_task(s->db, id, task);
-  } break;
-  case TASK_DONE: {
-  } break;
-  case DISCONNECT_CLIENT: {
-    LOG_INFO("Disconnecting client on fd %d", client_sock);
-    event_loop_remove_file(loop, client_sock);
-  } break;
-  case LOG_MESSAGE: {
-  } break;
-  default:
-    /* This code should be unreachable. */
-    CHECK(0);
-  }
-  free(message);
-}
-
-void new_client_connection(event_loop *loop, int listener_sock, void *context,
-                           int events) {
-  local_scheduler_state *s = context;
-  int new_socket = accept_client(listener_sock);
-  event_loop_add_file(loop, new_socket, EVENT_LOOP_READ, process_message, s);
-  LOG_INFO("new connection with fd %d", new_socket);
-}
-
-void start_server(const char *socket_name, const char *redis_addr,
-                  int redis_port) {
-  int fd = bind_ipc_sock(socket_name);
-  local_scheduler_state state;
-  event_loop *loop = init_local_scheduler();
-
-  state.db = db_connect(redis_addr, redis_port, "photon", "", -1);
-  db_attach(state.db, loop);
-
-  /* Run event loop. */
-  event_loop_add_file(loop, fd, EVENT_LOOP_READ, new_client_connection, &state);
-  event_loop_run(loop);
-}
-
-int main(int argc, char *argv[]) {
-  /* Path of the listening socket of the local scheduler. */
-  char *scheduler_socket_name = NULL;
-  /* IP address and port of redis. */
-  char *redis_addr_port = NULL;
-  int c;
-  while ((c = getopt(argc, argv, "s:r:")) != -1) {
-    switch (c) {
-    case 's':
-      scheduler_socket_name = optarg;
-      break;
-    case 'r':
-      redis_addr_port = optarg;
-      break;
-    default:
-      LOG_ERR("unknown option %c", c);
-      exit(-1);
-    }
-  }
-  if (!scheduler_socket_name) {
-    LOG_ERR("please specify socket for incoming connections with -s switch");
-    exit(-1);
-  }
-  char redis_addr[16] = {0};
-  char redis_port[6] = {0};
-  if (!redis_addr_port ||
-      sscanf(redis_addr_port, "%15[0-9.]:%5[0-9]", redis_addr, redis_port) !=
-          2) {
-    LOG_ERR("need to specify redis address like 127.0.0.1:6379 with -r switch");
-    exit(-1);
-  }
-  start_server(scheduler_socket_name, &redis_addr[0], atoi(redis_port));
-}
diff --git a/photon.h b/photon.h
index 6a213c4a5..a59e5566f 100644
--- a/photon.h
+++ b/photon.h
@@ -4,11 +4,11 @@
 enum photon_message_type {
   /** Notify the local scheduler that a task has finished. */
   TASK_DONE = 64,
-};
-
-struct photon_conn_impl {
-  /* File descriptor of the Unix domain socket that connects to photon. */
-  int conn;
+  /** Get a new task from the local scheduler. */
+  GET_TASK,
+  /** This is sent from the local scheduler to a worker to tell the worker to
+   *  execute a task. */
+  EXECUTE_TASK,
 };
 
 #endif
diff --git a/photon_client.c b/photon_client.c
index a33b25631..1bf87f491 100644
--- a/photon_client.c
+++ b/photon_client.c
@@ -14,6 +14,20 @@ void photon_submit(photon_conn *conn, task_spec *task) {
   write_message(conn->conn, SUBMIT_TASK, task_size(task), (uint8_t *)task);
 }
 
+task_spec *photon_get_task(photon_conn *conn) {
+  write_message(conn->conn, GET_TASK, 0, NULL);
+  int64_t type;
+  int64_t length;
+  uint8_t *message;
+  /* Receive a task from the local scheduler. This will block until the local
+   * scheduler gives this client a task. */
+  read_message(conn->conn, &type, &length, &message);
+  CHECK(type == EXECUTE_TASK);
+  task_spec *task = (task_spec *)message;
+  CHECK(length == task_size(task));
+  return task;
+}
+
 void photon_task_done(photon_conn *conn) {
   write_message(conn->conn, TASK_DONE, 0, NULL);
 }
diff --git a/photon_client.h b/photon_client.h
index 3163d8b7d..76b09455c 100644
--- a/photon_client.h
+++ b/photon_client.h
@@ -4,24 +4,63 @@
 #include "common/task.h"
 #include "photon.h"
 
-typedef struct photon_conn_impl photon_conn;
+typedef struct {
+  /* File descriptor of the Unix domain socket that connects to photon. */
+  int conn;
+} photon_conn;
 
-/* Connect to the local scheduler. */
+/**
+ * Connect to the local scheduler.
+ *
+ * @param photon_socket The name of the socket to use to connect to the local
+          scheduler.
+ * @return The connection information.
+ */
 photon_conn *photon_connect(const char *photon_socket);
 
-/* Submit a task to the local scheduler. */
+/**
+ * Submit a task to the local scheduler.
+ *
+ * @param conn The connection information.
+ * @param task The address of the task to submit.
+ * @return Void.
+ */
 void photon_submit(photon_conn *conn, task_spec *task);
 
-/* Get next task for this client. */
+/**
+ * Get next task for this client. This will block until the scheduler assigns
+ * a task to this worker. This allocates and returns a task, and so the task
+ * must be freed by the caller.
+ *
+ * @todo When does this actually get freed?
+ *
+ * @param conn The connection information.
+ * @return The address of the assigned task.
+ */
 task_spec *photon_get_task(photon_conn *conn);
 
-/* Tell the local scheduler that the client has finished executing a task. */
+/**
+ * Tell the local scheduler that the client has finished executing a task.
+ *
+ * @param conn The connection information.
+ * @return Void.
+ */
 void photon_task_done(photon_conn *conn);
 
-/* Disconnect from the local scheduler. */
+/**
+ * Disconnect from the local scheduler.
+ *
+ * @param conn The connection information.
+ * @return Void.
+ */
 void photon_disconnect(photon_conn *conn);
 
-/* Send a log message to the local scheduler. */
+/**
+ * Send a log message to the local scheduler.
+ *
+ * @param conn The connection information.
+ * @return Void.
+ */
 void photon_log_message(photon_conn *conn);
 
 #endif
diff --git a/photon_scheduler.c b/photon_scheduler.c
new file mode 100644
index 000000000..bf5672bcd
--- /dev/null
+++ b/photon_scheduler.c
@@ -0,0 +1,181 @@
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "event_loop.h"
+#include "io.h"
+#include "photon.h"
+#include "state/db.h"
+#include "state/task_queue.h"
+#include "task.h"
+#include "utarray.h"
+
+typedef struct {
+  /** The file descriptor used to communicate with the worker. */
+  int client_sock;
+} available_worker;
+
+/* These are needed to define the UT_arrays. */
+UT_icd task_ptr_icd = {sizeof(task_spec *), NULL, NULL, NULL};
+UT_icd worker_icd = {sizeof(available_worker), NULL, NULL, NULL};
+
+typedef struct {
+  db_handle *db;
+  /** This is an array of pointers to tasks that are waiting to be scheduled. */
+  UT_array *task_queue;
+  /** This is an array of file descriptors corresponding to clients that are
+   *  waiting for tasks. */
+  UT_array *available_worker_queue;
+} local_scheduler_state;
+
+void try_to_assign_task(task_spec *task, local_scheduler_state *s);
+void try_to_assign_task_to_worker(int client_sock, local_scheduler_state *s);
+
+event_loop *init_local_scheduler() { return event_loop_create(); };
+
+void process_message(event_loop *loop, int client_sock, void *context,
+                     int events) {
+  local_scheduler_state *s = context;
+
+  uint8_t *message;
+  int64_t type;
+  int64_t length;
+  read_message(client_sock, &type, &length, &message);
+
+  switch (type) {
+  case SUBMIT_TASK: {
+    task_spec *task = (task_spec *)message;
+    CHECK(task_size(task) == length);
+    /* Create a unique task instance ID. This is different from the task ID and
+     * is used to distinguish between potentially multiple executions of the
+     * task. */
+    unique_id id = globally_unique_id();
+    // task_queue_submit_task(s->db, id, task);
+    /* Try to assign the task to a worker locally. TODO(rkn): This should
+     * probably go somewhere else. */
+    try_to_assign_task(task, s);
+  } break;
+  case TASK_DONE: {
+  } break;
+  case GET_TASK: {
+    try_to_assign_task_to_worker(client_sock, s);
+  } break;
+  case DISCONNECT_CLIENT: {
+    LOG_INFO("Disconnecting client on fd %d", client_sock);
+    event_loop_remove_file(loop, client_sock);
+  } break;
+  case LOG_MESSAGE: {
+  } break;
+  default:
+    /* This code should be unreachable. */
+    CHECK(0);
+  }
+  free(message);
+}
+
+void try_to_assign_task(task_spec *task, local_scheduler_state *s) {
+  /* Assign this task to an available worker. If there are no available workers,
+   * then add this task to the local task queue. */
+  if (utarray_len(s->available_worker_queue) > 0) {
+    /* Get the last available worker in the available worker queue. */
+    available_worker *worker =
+        (available_worker *)utarray_back(s->available_worker_queue);
+    /* Tell the available worker to execute the task. */
+    write_message(worker->client_sock, EXECUTE_TASK, task_size(task),
+                  (uint8_t *)task);
+    utarray_pop_back(s->available_worker_queue);
+    /* TODO: Do we need to free the available_worker struct? */
+  } else {
+    /* Add the task to the task queue. */
+    task_spec *task_copy = malloc(task_size(task));
+    memcpy(task_copy, task, task_size(task));
+    utarray_push_back(s->task_queue, &task_copy);
+  }
+}
+
+void try_to_assign_task_to_worker(int client_sock, local_scheduler_state *s) {
+  if (utarray_len(s->task_queue) > 0) {
+    /* Get the last task in the task queue. */
+    task_spec **task_ptr = (task_spec **)utarray_back(s->task_queue);
+    task_spec *task = *task_ptr;
+    /* Send a task to the worker. */
+    write_message(client_sock, EXECUTE_TASK, task_size(task), (uint8_t *)task);
+    /* Update the task queue data structure and free the task. */
+    utarray_pop_back(s->task_queue);
+    free(task);
+  } else {
+    /* Check that client_sock is not already in the available workers. */
+    for (available_worker *p =
+             (available_worker *)utarray_front(s->available_worker_queue);
+         p != NULL;
+         p = (available_worker *)utarray_next(s->available_worker_queue, p)) {
+      CHECK(p->client_sock != client_sock);
+    }
+    /* Add client_sock to a list of available workers. */
+    available_worker worker_info = {.client_sock = client_sock};
+    utarray_push_back(s->available_worker_queue, &worker_info);
+    LOG_INFO("Adding client_sock %d to available workers.\n", client_sock);
+  }
+}
+
+void new_client_connection(event_loop *loop, int listener_sock, void *context,
+                           int events) {
+  local_scheduler_state *s = context;
+  int new_socket = accept_client(listener_sock);
+  event_loop_add_file(loop, new_socket, EVENT_LOOP_READ, process_message, s);
+  LOG_INFO("new connection with fd %d", new_socket);
+}
+
+void start_server(const char *socket_name, const char *redis_addr,
+                  int redis_port) {
+  int fd = bind_ipc_sock(socket_name);
+  local_scheduler_state state;
+  event_loop *loop = init_local_scheduler();
+
+  state.db = db_connect(redis_addr, redis_port, "photon", "", -1);
+  db_attach(state.db, loop);
+  utarray_new(state.task_queue, &task_ptr_icd);
+  utarray_new(state.available_worker_queue, &worker_icd);
+
+  /* Run event loop. */
+  event_loop_add_file(loop, fd, EVENT_LOOP_READ, new_client_connection, &state);
+  event_loop_run(loop);
+}
+
+int main(int argc, char *argv[]) {
+  /* Path of the listening socket of the local scheduler. */
+  char *scheduler_socket_name = NULL;
+  /* IP address and port of redis. */
+  char *redis_addr_port = NULL;
+  int c;
+  while ((c = getopt(argc, argv, "s:r:")) != -1) {
+    switch (c) {
+    case 's':
+      scheduler_socket_name = optarg;
+      break;
+    case 'r':
+      redis_addr_port = optarg;
+      break;
+    default:
+      LOG_ERR("unknown option %c", c);
+      exit(-1);
+    }
+  }
+  if (!scheduler_socket_name) {
+    LOG_ERR("please specify socket for incoming connections with -s switch");
+    exit(-1);
+  }
+  char redis_addr[16] = {0};
+  char redis_port[6] = {0};
+  if (!redis_addr_port ||
+      sscanf(redis_addr_port, "%15[0-9.]:%5[0-9]", redis_addr, redis_port) !=
+          2) {
+    LOG_ERR("need to specify redis address like 127.0.0.1:6379 with -r switch");
+    exit(-1);
+  }
+  start_server(scheduler_socket_name, &redis_addr[0], atoi(redis_port));
+}
diff --git a/test/test.py b/test/test.py
index 2307f2276..0517c40a5 100644
--- a/test/test.py
+++ b/test/test.py
@@ -30,8 +30,12 @@ class TestPhotonClient(unittest.TestCase):
     self.p2.kill()
 
   def test_create(self):
-    l = [photon.make_id(20 * "a"), photon.make_id(20 * "b"), photon.make_id(20 * "c")]
-    self.photon_client.submit(20 * "a", l)
+    l = [20 * "a", 20 * "b", 20 * "c"]
+    r = [20 * "e", 20 * "f"]
+    # Submit a task.
+    self.photon_client.submit(20 * "d", l, r)
+    # Get the task.
+    task = self.photon_client.get_task()
 
 if __name__ == "__main__":
   unittest.main(verbosity=2)

From 8e044535e2624c53a2d4fca974fdeceaf2b9be1c Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Tue, 4 Oct 2016 15:56:24 -0700
Subject: [PATCH 60/91] Python API for constructing tasks (#28)

* Python API for constructing tasks

* Fixes.
---
 .clang-format              |   5 +-
 .travis.yml                |  15 ++++
 install-dependencies.sh    |  21 +++++
 lib/python/object_id.c     |  91 +++++++++++++++++++
 lib/python/serialization.c |  80 +++++++++++++++++
 lib/python/setup.py        |  12 +++
 lib/python/task.c          | 177 +++++++++++++++++++++++++++++++++++++
 lib/python/types.h         |  33 +++++++
 task.h                     |   1 -
 test/test.py               |  51 +++++++++++
 10 files changed, 484 insertions(+), 2 deletions(-)
 create mode 100755 install-dependencies.sh
 create mode 100644 lib/python/object_id.c
 create mode 100644 lib/python/serialization.c
 create mode 100644 lib/python/setup.py
 create mode 100644 lib/python/task.c
 create mode 100644 lib/python/types.h
 create mode 100644 test/test.py

diff --git a/.clang-format b/.clang-format
index 90d254290..89b87e25d 100644
--- a/.clang-format
+++ b/.clang-format
@@ -3,4 +3,7 @@ DerivePointerAlignment: true
 IndentCaseLabels: false
 PointerAlignment: Right
 SpaceAfterCStyleCast: true
-
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: false
+AllowShortIfStatementsOnASingleLine: false
diff --git a/.travis.yml b/.travis.yml
index 220df4b86..174fee43f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,8 +6,16 @@ matrix:
   include:
     - os: linux
       dist: trusty
+      python: "2.7"
+    - os: linux
+      dist: trusty
+      python: "3.5"
     - os: osx
       osx_image: xcode7
+      python: "2.7"
+    - os: osx
+      osx_image: xcode7
+      python: "3.5"
     - os: linux
       dist: trusty
       env: LINT=1
@@ -22,5 +30,12 @@ matrix:
         - .travis/check-git-clang-format-output.sh
 
 install:
+  - ./install-dependencies.sh
   - make
   - make test
+  - cd lib/python
+  - python setup.py install --user
+  - cd ../..
+
+script:
+  - python test/test.py
diff --git a/install-dependencies.sh b/install-dependencies.sh
new file mode 100755
index 000000000..f84da1684
--- /dev/null
+++ b/install-dependencies.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
+
+platform="unknown"
+unamestr="$(uname)"
+if [[ "$unamestr" == "Linux" ]]; then
+  echo "Platform is linux."
+  platform="linux"
+elif [[ "$unamestr" == "Darwin" ]]; then
+  echo "Platform is macosx."
+  platform="macosx"
+else
+  echo "Unrecognized platform."
+  exit 1
+fi
+
+if [[ $platform == "linux" ]]; then
+  sudo apt-get update
+  sudo apt-get install -y git python-dev
+fi
diff --git a/lib/python/object_id.c b/lib/python/object_id.c
new file mode 100644
index 000000000..bd7db9bc7
--- /dev/null
+++ b/lib/python/object_id.c
@@ -0,0 +1,91 @@
+#include "types.h"
+
+int PyObjectToUniqueID(PyObject *object, object_id *objectid) {
+  if (PyObject_IsInstance(object, (PyObject *) &PyObjectIDType)) {
+    *objectid = ((PyObjectID *) object)->object_id;
+    return 1;
+  } else {
+    PyErr_SetString(PyExc_TypeError, "must be an ObjectID");
+    return 0;
+  }
+}
+
+static int PyObjectID_init(PyObjectID *self, PyObject *args, PyObject *kwds) {
+  const char *data;
+  int size;
+  if (!PyArg_ParseTuple(args, "s#", &data, &size)) {
+    return -1;
+  }
+  if (size != UNIQUE_ID_SIZE) {
+    PyErr_SetString(CommonError,
+                    "ObjectID: object id string needs to have length 20");
+    return -1;
+  }
+  memcpy(&self->object_id.id[0], data, UNIQUE_ID_SIZE);
+  return 0;
+}
+
+/* create PyObjectID from C */
+PyObject *PyObjectID_make(object_id object_id) {
+  PyObjectID *result = PyObject_New(PyObjectID, &PyObjectIDType);
+  result = (PyObjectID *) PyObject_Init((PyObject *) result, &PyObjectIDType);
+  result->object_id = object_id;
+  return (PyObject *) result;
+}
+
+static PyObject *PyObjectID_id(PyObject *self) {
+  PyObjectID *s = (PyObjectID *) self;
+  return PyString_FromStringAndSize((char *) &s->object_id.id[0],
+                                    UNIQUE_ID_SIZE);
+}
+
+static PyMethodDef PyObjectID_methods[] = {
+    {"id", (PyCFunction) PyObjectID_id, METH_NOARGS,
+     "Return the hash associated with this ObjectID"},
+    {NULL} /* Sentinel */
+};
+
+static PyMemberDef PyObjectID_members[] = {
+    {NULL} /* Sentinel */
+};
+
+PyTypeObject PyObjectIDType = {
+    PyObject_HEAD_INIT(NULL) 0, /* ob_size */
+    "common.ObjectID",          /* tp_name */
+    sizeof(PyObjectID),         /* tp_basicsize */
+    0,                          /* tp_itemsize */
+    0,                          /* tp_dealloc */
+    0,                          /* tp_print */
+    0,                          /* tp_getattr */
+    0,                          /* tp_setattr */
+    0,                          /* tp_compare */
+    0,                          /* tp_repr */
+    0,                          /* tp_as_number */
+    0,                          /* tp_as_sequence */
+    0,                          /* tp_as_mapping */
+    0,                          /* tp_hash */
+    0,                          /* tp_call */
+    0,                          /* tp_str */
+    0,                          /* tp_getattro */
+    0,                          /* tp_setattro */
+    0,                          /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,         /* tp_flags */
+    "ObjectID object",          /* tp_doc */
+    0,                          /* tp_traverse */
+    0,                          /* tp_clear */
+    0,                          /* tp_richcompare */
+    0,                          /* tp_weaklistoffset */
+    0,                          /* tp_iter */
+    0,                          /* tp_iternext */
+    PyObjectID_methods,         /* tp_methods */
+    PyObjectID_members,         /* tp_members */
+    0,                          /* tp_getset */
+    0,                          /* tp_base */
+    0,                          /* tp_dict */
+    0,                          /* tp_descr_get */
+    0,                          /* tp_descr_set */
+    0,                          /* tp_dictoffset */
+    (initproc) PyObjectID_init, /* tp_init */
+    0,                          /* tp_alloc */
+    PyType_GenericNew,          /* tp_new */
+};
diff --git a/lib/python/serialization.c b/lib/python/serialization.c
new file mode 100644
index 000000000..82cf6417c
--- /dev/null
+++ b/lib/python/serialization.c
@@ -0,0 +1,80 @@
+#include "types.h"
+
+/* TODO(pcm): Add limit on total number of elements. */
+
+#define SIZE_LIMIT 100
+#define NUM_ELEMENTS_LIMIT 1000
+
+/**
+ * This method checks if a Python object is sufficiently simple that it can be
+ * serialized and passed by value as an argument to a task (without being put in
+ * the object store). The details of which objects are sufficiently simple are
+ * defined by this method and are not particularly important. But for
+ * performance reasons, it is better to place "small" objects in the task itself
+ * and "large" objects in the object store.
+ *
+ * @param value The Python object in question.
+ * @param num_elements_contained If this method returns 1, then the number of
+ *        objects recursively contained within this object will be added to the
+ *        value at this address. This is used to make sure that we do not
+ *        serialize objects that are too large.
+ * @return 0 if the object cannot be serialized in the task and 1 if it can.
+ */
+int is_simple_value(PyObject *value, int *num_elements_contained) {
+  *num_elements_contained += 1;
+  if (*num_elements_contained >= NUM_ELEMENTS_LIMIT) {
+    return 0;
+  }
+  if (PyInt_Check(value) || PyLong_Check(value) || value == Py_False ||
+      value == Py_True || PyFloat_Check(value) || value == Py_None) {
+    return 1;
+  }
+  if (PyString_CheckExact(value)) {
+    *num_elements_contained += PyString_Size(value);
+    return (*num_elements_contained < NUM_ELEMENTS_LIMIT);
+  }
+  if (PyUnicode_CheckExact(value)) {
+    *num_elements_contained += PyUnicode_GET_SIZE(value);
+    return (*num_elements_contained < NUM_ELEMENTS_LIMIT);
+  }
+  if (PyList_CheckExact(value) && PyList_Size(value) < SIZE_LIMIT) {
+    for (size_t i = 0; i < PyList_Size(value); ++i) {
+      if (!is_simple_value(PyList_GetItem(value, i), num_elements_contained)) {
+        return 0;
+      }
+    }
+    return (*num_elements_contained < NUM_ELEMENTS_LIMIT);
+  }
+  if (PyDict_CheckExact(value) && PyDict_Size(value) < SIZE_LIMIT) {
+    PyObject *key, *val;
+    Py_ssize_t pos = 0;
+    while (PyDict_Next(value, &pos, &key, &val)) {
+      if (!is_simple_value(key, num_elements_contained) ||
+          !is_simple_value(val, num_elements_contained)) {
+        return 0;
+      }
+    }
+    return (*num_elements_contained < NUM_ELEMENTS_LIMIT);
+  }
+  if (PyTuple_CheckExact(value) && PyTuple_Size(value) < SIZE_LIMIT) {
+    for (size_t i = 0; i < PyTuple_Size(value); ++i) {
+      if (!is_simple_value(PyTuple_GetItem(value, i), num_elements_contained)) {
+        return 0;
+      }
+    }
+    return (*num_elements_contained < NUM_ELEMENTS_LIMIT);
+  }
+  return 0;
+}
+
+PyObject *check_simple_value(PyObject *self, PyObject *args) {
+  PyObject *value;
+  if (!PyArg_ParseTuple(args, "O", &value)) {
+    return NULL;
+  }
+  int num_elements_contained = 0;
+  if (is_simple_value(value, &num_elements_contained)) {
+    Py_RETURN_TRUE;
+  }
+  Py_RETURN_FALSE;
+}
diff --git a/lib/python/setup.py b/lib/python/setup.py
new file mode 100644
index 000000000..38af43f34
--- /dev/null
+++ b/lib/python/setup.py
@@ -0,0 +1,12 @@
+from setuptools import setup, find_packages, Extension
+
+common_module = Extension("common",
+                          sources=["object_id.c", "serialization.c", "task.c"],
+                          include_dirs=["../../", "../../thirdparty"],
+                          extra_objects=["../../build/libcommon.a"],
+                          extra_compile_args=["--std=c99", "-Werror"])
+
+setup(name="Common",
+      version="0.1",
+      description="Common library for Ray",
+      ext_modules=[common_module])
diff --git a/lib/python/task.c b/lib/python/task.c
new file mode 100644
index 000000000..ef685b493
--- /dev/null
+++ b/lib/python/task.c
@@ -0,0 +1,177 @@
+#include <Python.h>
+#include "node.h"
+
+#include "types.h"
+#include "task.h"
+#include "utarray.h"
+#include "utstring.h"
+
+PyObject *CommonError;
+
+#define MARSHAL_VERSION 2
+
+static int PyTask_init(PyTask *self, PyObject *args, PyObject *kwds) {
+  function_id function_id;
+  /* Arguments of the task (can be PyObjectIDs or Python values). */
+  PyObject *arguments;
+  /* Array of pointers to string representations of pass-by-value args. */
+  UT_array *val_repr_ptrs;
+  utarray_new(val_repr_ptrs, &ut_ptr_icd);
+  int num_returns;
+  if (!PyArg_ParseTuple(args, "O&Oi", &PyObjectToUniqueID, &function_id,
+                        &arguments, &num_returns)) {
+    return -1;
+  }
+  size_t size = PyList_Size(arguments);
+  /* Determine the size of pass by value data in bytes. */
+  size_t value_data_bytes = 0;
+  for (size_t i = 0; i < size; ++i) {
+    PyObject *arg = PyList_GetItem(arguments, i);
+    if (!PyObject_IsInstance(arg, (PyObject *) &PyObjectIDType)) {
+      PyObject *data = PyMarshal_WriteObjectToString(arg, MARSHAL_VERSION);
+      value_data_bytes += PyString_Size(data);
+      utarray_push_back(val_repr_ptrs, &data);
+    }
+  }
+  /* Construct the task specification. */
+  int val_repr_index = 0;
+  self->spec =
+      alloc_task_spec(function_id, size, num_returns, value_data_bytes);
+  for (size_t i = 0; i < size; ++i) {
+    PyObject *arg = PyList_GetItem(arguments, i);
+    if (PyObject_IsInstance(arg, (PyObject *) &PyObjectIDType)) {
+      task_args_add_ref(self->spec, ((PyObjectID *) arg)->object_id);
+    } else {
+      PyObject *data =
+          *((PyObject **) utarray_eltptr(val_repr_ptrs, val_repr_index));
+      task_args_add_val(self->spec, (uint8_t *) PyString_AS_STRING(data),
+                        PyString_GET_SIZE(data));
+      Py_DECREF(data);
+      val_repr_index += 1;
+    }
+  }
+  utarray_free(val_repr_ptrs);
+  return 0;
+}
+
+static void PyTask_dealloc(PyTask *self) {
+  free_task_spec(self->spec);
+  Py_TYPE(self)->tp_free((PyObject *) self);
+}
+
+static PyObject *PyTask_function_id(PyObject *self) {
+  function_id function_id = *task_function(((PyTask *) self)->spec);
+  return PyObjectID_make(function_id);
+}
+
+static PyObject *PyTask_arguments(PyObject *self, PyObject *args) {
+  int arg_index;
+  task_spec *spec = ((PyTask *) self)->spec;
+  if (!PyArg_ParseTuple(args, "i", &arg_index)) {
+    return NULL;
+  }
+  if (task_arg_type(spec, arg_index) == ARG_BY_REF) {
+    object_id object_id = *task_arg_id(spec, arg_index);
+    return PyObjectID_make(object_id);
+  } else {
+    PyObject *s = PyMarshal_ReadObjectFromString(
+        (char *) task_arg_val(spec, arg_index),
+        (Py_ssize_t) task_arg_length(spec, arg_index));
+    Py_DECREF(s);
+    Py_RETURN_NONE;
+  }
+}
+
+static PyObject *PyTask_returns(PyObject *self, PyObject *args) {
+  int ret_index;
+  if (!PyArg_ParseTuple(args, "i", &ret_index)) {
+    return NULL;
+  }
+  object_id object_id = *task_return(((PyTask *) self)->spec, ret_index);
+  return PyObjectID_make(object_id);
+}
+
+static PyMethodDef PyTask_methods[] = {
+    {"function_id", (PyCFunction) PyTask_function_id, METH_NOARGS,
+     "Return the function id associated with this task."},
+    {"arguments", (PyCFunction) PyTask_arguments, METH_VARARGS,
+     "Return the i-th argument of the task."},
+    {"returns", (PyCFunction) PyTask_returns, METH_VARARGS,
+     "Return the i-th object reference of the task."},
+    {NULL} /* Sentinel */
+};
+
+static PyTypeObject PyTaskType = {
+    PyObject_HEAD_INIT(NULL) 0,  /* ob_size */
+    "task.Task",                 /* tp_name */
+    sizeof(PyTask),              /* tp_basicsize */
+    0,                           /* tp_itemsize */
+    (destructor) PyTask_dealloc, /* tp_dealloc */
+    0,                           /* tp_print */
+    0,                           /* tp_getattr */
+    0,                           /* tp_setattr */
+    0,                           /* tp_compare */
+    0,                           /* tp_repr */
+    0,                           /* tp_as_number */
+    0,                           /* tp_as_sequence */
+    0,                           /* tp_as_mapping */
+    0,                           /* tp_hash */
+    0,                           /* tp_call */
+    0,                           /* tp_str */
+    0,                           /* tp_getattro */
+    0,                           /* tp_setattro */
+    0,                           /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,          /* tp_flags */
+    "Task object",               /* tp_doc */
+    0,                           /* tp_traverse */
+    0,                           /* tp_clear */
+    0,                           /* tp_richcompare */
+    0,                           /* tp_weaklistoffset */
+    0,                           /* tp_iter */
+    0,                           /* tp_iternext */
+    PyTask_methods,              /* tp_methods */
+    0,                           /* tp_members */
+    0,                           /* tp_getset */
+    0,                           /* tp_base */
+    0,                           /* tp_dict */
+    0,                           /* tp_descr_get */
+    0,                           /* tp_descr_set */
+    0,                           /* tp_dictoffset */
+    (initproc) PyTask_init,      /* tp_init */
+    0,                           /* tp_alloc */
+    PyType_GenericNew,           /* tp_new */
+};
+
+static PyMethodDef common_methods[] = {
+    {"check_simple_value", check_simple_value, METH_VARARGS,
+     "Should the object be passed by value?"},
+    {NULL} /* Sentinel */
+};
+
+#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
+#define PyMODINIT_FUNC void
+#endif
+
+PyMODINIT_FUNC initcommon(void) {
+  PyObject *m;
+
+  if (PyType_Ready(&PyTaskType) < 0)
+    return;
+
+  if (PyType_Ready(&PyObjectIDType) < 0)
+    return;
+
+  m = Py_InitModule3("common", common_methods,
+                     "Example module that creates an extension type.");
+
+  Py_INCREF(&PyTaskType);
+  PyModule_AddObject(m, "Task", (PyObject *) &PyTaskType);
+
+  Py_INCREF(&PyObjectIDType);
+  PyModule_AddObject(m, "ObjectID", (PyObject *) &PyObjectIDType);
+
+  char common_error[] = "common.error";
+  CommonError = PyErr_NewException(common_error, NULL, NULL);
+  Py_INCREF(CommonError);
+  PyModule_AddObject(m, "common_error", CommonError);
+}
diff --git a/lib/python/types.h b/lib/python/types.h
new file mode 100644
index 000000000..9e30581b5
--- /dev/null
+++ b/lib/python/types.h
@@ -0,0 +1,33 @@
+#ifndef TYPES_H
+#define TYPES_H
+
+#include <Python.h>
+#include "marshal.h"
+#include "structmember.h"
+
+#include "common.h"
+#include "task.h"
+
+extern PyObject *CommonError;
+
+// clang-format off
+typedef struct {
+  PyObject_HEAD
+  object_id object_id;
+} PyObjectID;
+
+typedef struct {
+  PyObject_HEAD
+  task_spec *spec;
+} PyTask;
+// clang-format on
+
+extern PyTypeObject PyObjectIDType;
+
+int PyObjectToUniqueID(PyObject *object, object_id *objectid);
+
+PyObject *PyObjectID_make(object_id object_id);
+
+PyObject *check_simple_value(PyObject *self, PyObject *args);
+
+#endif /* TYPES_H */
diff --git a/task.h b/task.h
index 9267edb65..28886bf14 100644
--- a/task.h
+++ b/task.h
@@ -13,7 +13,6 @@
 #include "utstring.h"
 
 typedef unique_id function_id;
-typedef unique_id object_id;
 
 /* The task ID is a deterministic hash of the function ID that
  * the task executes and the argument IDs or argument values */
diff --git a/test/test.py b/test/test.py
new file mode 100644
index 000000000..bbed21376
--- /dev/null
+++ b/test/test.py
@@ -0,0 +1,51 @@
+from __future__ import print_function
+
+import unittest
+
+import common
+
+BASE_SIMPLE_OBJECTS = [
+  0, 1, 100000, 0L, 1L, 100000L, 1L << 100, 0.0, 0.5, 0.9, 100000.1, (), [], {},
+  "", 990 * "h", u"", 990 * u"h"
+]
+
+LIST_SIMPLE_OBJECTS = [[obj] for obj in BASE_SIMPLE_OBJECTS]
+TUPLE_SIMPLE_OBJECTS = [(obj,) for obj in BASE_SIMPLE_OBJECTS]
+DICT_SIMPLE_OBJECTS = [{(): obj} for obj in BASE_SIMPLE_OBJECTS]
+
+SIMPLE_OBJECTS = (BASE_SIMPLE_OBJECTS +
+                  LIST_SIMPLE_OBJECTS +
+                  TUPLE_SIMPLE_OBJECTS +
+                  DICT_SIMPLE_OBJECTS)
+
+# Create some complex objects that cannot be serialized by value in tasks.
+
+l = []
+l.append(l)
+
+class Foo(object):
+  def __init__(self):
+    pass
+
+BASE_COMPLEX_OBJECTS = [999 * "h", 999 * u"h", l, Foo(), 10 * [10 * [10 * [1]]]]
+
+LIST_COMPLEX_OBJECTS = [[obj] for obj in BASE_COMPLEX_OBJECTS]
+TUPLE_COMPLEX_OBJECTS = [(obj,) for obj in BASE_COMPLEX_OBJECTS]
+DICT_COMPLEX_OBJECTS = [{(): obj} for obj in BASE_COMPLEX_OBJECTS]
+
+COMPLEX_OBJECTS = (BASE_COMPLEX_OBJECTS +
+                   LIST_COMPLEX_OBJECTS +
+                   TUPLE_COMPLEX_OBJECTS +
+                   DICT_COMPLEX_OBJECTS)
+
+class TestPlasmaClient(unittest.TestCase):
+
+  def test_serialize_by_value(self):
+
+    for val in SIMPLE_OBJECTS:
+      self.assertTrue(common.check_simple_value(val))
+    for val in COMPLEX_OBJECTS:
+      self.assertFalse(common.check_simple_value(val))
+
+if __name__ == "__main__":
+  unittest.main(verbosity=2)

From 872e68b5b004cbde78aee18325cee187387fac9c Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Tue, 4 Oct 2016 16:25:11 -0700
Subject: [PATCH 61/91] submit task to redis

---
 common             |   2 +-
 photon_scheduler.c | 130 +++++++++++++++++++++++----------------------
 photon_scheduler.h |  23 ++++----
 3 files changed, 81 insertions(+), 74 deletions(-)

diff --git a/common b/common
index 084220b0e..49ac871ef 160000
--- a/common
+++ b/common
@@ -1 +1 @@
-Subproject commit 084220b0e70de6bed466e97e08f4b6909133aafb
+Subproject commit 49ac871ef6a6751835dcd8448f1d67ef4d6c82ad
diff --git a/photon_scheduler.c b/photon_scheduler.c
index bf5672bcd..e6b6cdad2 100644
--- a/photon_scheduler.c
+++ b/photon_scheduler.c
@@ -9,9 +9,9 @@
 #include "event_loop.h"
 #include "io.h"
 #include "photon.h"
+#include "photon_scheduler.h"
 #include "state/db.h"
-#include "state/task_queue.h"
-#include "task.h"
+#include "state/task_log.h"
 #include "utarray.h"
 
 typedef struct {
@@ -20,22 +20,76 @@ typedef struct {
 } available_worker;
 
 /* These are needed to define the UT_arrays. */
-UT_icd task_ptr_icd = {sizeof(task_spec *), NULL, NULL, NULL};
+UT_icd task_ptr_icd = {sizeof(task_instance *), NULL, NULL, NULL};
 UT_icd worker_icd = {sizeof(available_worker), NULL, NULL, NULL};
 
-typedef struct {
+struct local_scheduler_state {
   db_handle *db;
   /** This is an array of pointers to tasks that are waiting to be scheduled. */
   UT_array *task_queue;
   /** This is an array of file descriptors corresponding to clients that are
    *  waiting for tasks. */
   UT_array *available_worker_queue;
-} local_scheduler_state;
+};
 
-void try_to_assign_task(task_spec *task, local_scheduler_state *s);
-void try_to_assign_task_to_worker(int client_sock, local_scheduler_state *s);
+local_scheduler_state *init_local_scheduler(event_loop *loop,
+                                            const char *redis_addr,
+                                            int redis_port) {
+  local_scheduler_state *state = malloc(sizeof(local_scheduler_state));
+  state->db = db_connect(redis_addr, redis_port, "photon", "", -1);
+  db_attach(state->db, loop);
+  utarray_new(state->task_queue, &task_ptr_icd);
+  utarray_new(state->available_worker_queue, &worker_icd);
+  return state;
+};
 
-event_loop *init_local_scheduler() { return event_loop_create(); };
+void handle_submit_task(local_scheduler_state *s, task_spec *task) {
+  /* Assign this task to an available worker. If there are no available workers,
+   * then add this task to the local task queue. */
+  task_iid task_iid = globally_unique_id();
+  task_instance *instance = make_task_instance(task_iid, task, TASK_WAITING, NIL_ID);
+  if (utarray_len(s->available_worker_queue) > 0) {
+    /* Get the last available worker in the available worker queue. */
+    available_worker *worker =
+        (available_worker *)utarray_back(s->available_worker_queue);
+    /* Tell the available worker to execute the task. */
+    write_message(worker->client_sock, EXECUTE_TASK, task_size(task),
+                  (uint8_t *)task);
+    utarray_pop_back(s->available_worker_queue);
+    /* TODO: Do we need to free the available_worker struct? */
+  } else {
+    /* Add the task to the task queue. */
+    utarray_push_back(s->task_queue, &instance);
+  }
+  /* Submit task to redis. */
+  task_log_add_task(s->db, instance);
+  // free(instance);
+}
+
+void handle_get_task(local_scheduler_state *s, int client_sock) {
+  if (utarray_len(s->task_queue) > 0) {
+    /* Get the last task in the task queue. */
+    task_instance **back = (task_instance **)utarray_back(s->task_queue);
+    task_spec *task = task_instance_task_spec(*back);
+    /* Send a task to the worker. */
+    write_message(client_sock, EXECUTE_TASK, task_size(task), (uint8_t *)task);
+    /* Update the task queue data structure and free the task. */
+    utarray_pop_back(s->task_queue);
+    free(*back);
+  } else {
+    /* Check that client_sock is not already in the available workers. */
+    for (available_worker *p =
+             (available_worker *)utarray_front(s->available_worker_queue);
+         p != NULL;
+         p = (available_worker *)utarray_next(s->available_worker_queue, p)) {
+      CHECK(p->client_sock != client_sock);
+    }
+    /* Add client_sock to a list of available workers. */
+    available_worker worker_info = {.client_sock = client_sock};
+    utarray_push_back(s->available_worker_queue, &worker_info);
+    LOG_INFO("Adding client_sock %d to available workers.\n", client_sock);
+  }
+}
 
 void process_message(event_loop *loop, int client_sock, void *context,
                      int events) {
@@ -57,12 +111,12 @@ void process_message(event_loop *loop, int client_sock, void *context,
     // task_queue_submit_task(s->db, id, task);
     /* Try to assign the task to a worker locally. TODO(rkn): This should
      * probably go somewhere else. */
-    try_to_assign_task(task, s);
+    handle_submit_task(s, task);
   } break;
   case TASK_DONE: {
   } break;
   case GET_TASK: {
-    try_to_assign_task_to_worker(client_sock, s);
+    handle_get_task(s, client_sock);
   } break;
   case DISCONNECT_CLIENT: {
     LOG_INFO("Disconnecting client on fd %d", client_sock);
@@ -77,51 +131,6 @@ void process_message(event_loop *loop, int client_sock, void *context,
   free(message);
 }
 
-void try_to_assign_task(task_spec *task, local_scheduler_state *s) {
-  /* Assign this task to an available worker. If there are no available workers,
-   * then add this task to the local task queue. */
-  if (utarray_len(s->available_worker_queue) > 0) {
-    /* Get the last available worker in the available worker queue. */
-    available_worker *worker =
-        (available_worker *)utarray_back(s->available_worker_queue);
-    /* Tell the available worker to execute the task. */
-    write_message(worker->client_sock, EXECUTE_TASK, task_size(task),
-                  (uint8_t *)task);
-    utarray_pop_back(s->available_worker_queue);
-    /* TODO: Do we need to free the available_worker struct? */
-  } else {
-    /* Add the task to the task queue. */
-    task_spec *task_copy = malloc(task_size(task));
-    memcpy(task_copy, task, task_size(task));
-    utarray_push_back(s->task_queue, &task_copy);
-  }
-}
-
-void try_to_assign_task_to_worker(int client_sock, local_scheduler_state *s) {
-  if (utarray_len(s->task_queue) > 0) {
-    /* Get the last task in the task queue. */
-    task_spec **task_ptr = (task_spec **)utarray_back(s->task_queue);
-    task_spec *task = *task_ptr;
-    /* Send a task to the worker. */
-    write_message(client_sock, EXECUTE_TASK, task_size(task), (uint8_t *)task);
-    /* Update the task queue data structure and free the task. */
-    utarray_pop_back(s->task_queue);
-    free(task);
-  } else {
-    /* Check that client_sock is not already in the available workers. */
-    for (available_worker *p =
-             (available_worker *)utarray_front(s->available_worker_queue);
-         p != NULL;
-         p = (available_worker *)utarray_next(s->available_worker_queue, p)) {
-      CHECK(p->client_sock != client_sock);
-    }
-    /* Add client_sock to a list of available workers. */
-    available_worker worker_info = {.client_sock = client_sock};
-    utarray_push_back(s->available_worker_queue, &worker_info);
-    LOG_INFO("Adding client_sock %d to available workers.\n", client_sock);
-  }
-}
-
 void new_client_connection(event_loop *loop, int listener_sock, void *context,
                            int events) {
   local_scheduler_state *s = context;
@@ -133,16 +142,11 @@ void new_client_connection(event_loop *loop, int listener_sock, void *context,
 void start_server(const char *socket_name, const char *redis_addr,
                   int redis_port) {
   int fd = bind_ipc_sock(socket_name);
-  local_scheduler_state state;
-  event_loop *loop = init_local_scheduler();
-
-  state.db = db_connect(redis_addr, redis_port, "photon", "", -1);
-  db_attach(state.db, loop);
-  utarray_new(state.task_queue, &task_ptr_icd);
-  utarray_new(state.available_worker_queue, &worker_icd);
+  event_loop *loop = event_loop_create();
+  local_scheduler_state *state = init_local_scheduler(loop, redis_addr, redis_port);
 
   /* Run event loop. */
-  event_loop_add_file(loop, fd, EVENT_LOOP_READ, new_client_connection, &state);
+  event_loop_add_file(loop, fd, EVENT_LOOP_READ, new_client_connection, state);
   event_loop_run(loop);
 }
 
diff --git a/photon_scheduler.h b/photon_scheduler.h
index cce91155d..bef1f42d9 100644
--- a/photon_scheduler.h
+++ b/photon_scheduler.h
@@ -1,15 +1,18 @@
-#ifndef PHOTON_SCHEDULER
-#define PHOTON_SCHEDULER
+#ifndef PHOTON_SCHEDULER_H
+#define PHOTON_SCHEDULER_H
+
+#include "task.h"
+
+typedef struct local_scheduler_state local_scheduler_state;
 
 /* Establish a connection to a new client. */
-void new_client_connection(local_scheduler_state *s, int listener_sock);
+void new_client_connection(event_loop *loop, int listener_sock, void *context,
+                           int events);
 
-/* schedule a task on a given worker. */
-void schedule_on_worker(local_scheduler_state *s, task_spec *task,
-                        int client_id);
+/* Assign a task to a worker. */
+void handle_get_task(local_scheduler_state *s, int client_sock);
 
-/* Handle new incoming task that was scheduled by the globl scheduler on
- * this local scheduler. */
-void schedule_task(local_scheduler_state *s, task_spec *task)
+/* Handle incoming submit request by a worker. */
+void handle_submit_task(local_scheduler_state *s, task_spec *task);
 
-#endif
+#endif /* PHOTON_SCHEDULER_H */

From 4329afbd53449412b08e64d4057a2857393c2212 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Tue, 4 Oct 2016 16:59:44 -0700
Subject: [PATCH 62/91] rename TASK_* -> TASK_STATUS_* (#31)

---
 task.h          |  8 ++++----
 test/db_tests.c | 14 +++++++-------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/task.h b/task.h
index 28886bf14..782bf685b 100644
--- a/task.h
+++ b/task.h
@@ -93,10 +93,10 @@ void print_task(task_spec *spec, UT_string *output);
 /* The scheduling_state can be used as a flag when we are listening
  * for an event, for example TASK_WAITING | TASK_SCHEDULED. */
 enum scheduling_state {
-  TASK_WAITING = 1,
-  TASK_SCHEDULED = 2,
-  TASK_RUNNING = 4,
-  TASK_DONE = 8
+  TASK_STATUS_WAITING = 1,
+  TASK_STATUS_SCHEDULED = 2,
+  TASK_STATUS_RUNNING = 4,
+  TASK_STATUS_DONE = 8
 };
 
 /* A task instance is one execution of a task specification.
diff --git a/test/db_tests.c b/test/db_tests.c
index 96f16b528..be09ad28e 100644
--- a/test/db_tests.c
+++ b/test/db_tests.c
@@ -74,7 +74,7 @@ TEST object_table_lookup_test(void) {
 
 void task_log_test_callback(task_instance *instance, void *userdata) {
   task_instance *other = userdata;
-  CHECK(*task_instance_state(instance) == TASK_SCHEDULED);
+  CHECK(*task_instance_state(instance) == TASK_STATUS_SCHEDULED);
   CHECK(task_instance_size(instance) == task_instance_size(other));
   CHECK(memcmp(instance, other, task_instance_size(instance)) == 0);
 }
@@ -86,9 +86,9 @@ TEST task_log_test(void) {
   node_id node = globally_unique_id();
   task_spec *task = example_task();
   task_instance *instance =
-      make_task_instance(globally_unique_id(), task, TASK_SCHEDULED, node);
-  task_log_register_callback(db, task_log_test_callback, node, TASK_SCHEDULED,
-                             instance);
+      make_task_instance(globally_unique_id(), task, TASK_STATUS_SCHEDULED, node);
+  task_log_register_callback(db, task_log_test_callback, node,
+                             TASK_STATUS_SCHEDULED, instance);
   task_log_add_task(db, instance);
   event_loop_add_timer(loop, 100, timeout_handler, NULL);
   event_loop_run(loop);
@@ -112,11 +112,11 @@ TEST task_log_all_test(void) {
   task_spec *task = example_task();
   /* Schedule two tasks on different nodes. */
   task_instance *instance1 = make_task_instance(
-      globally_unique_id(), task, TASK_SCHEDULED, globally_unique_id());
+      globally_unique_id(), task, TASK_STATUS_SCHEDULED, globally_unique_id());
   task_instance *instance2 = make_task_instance(
-      globally_unique_id(), task, TASK_SCHEDULED, globally_unique_id());
+      globally_unique_id(), task, TASK_STATUS_SCHEDULED, globally_unique_id());
   task_log_register_callback(db, task_log_all_test_callback, NIL_ID,
-                             TASK_SCHEDULED, NULL);
+                             TASK_STATUS_SCHEDULED, NULL);
   task_log_add_task(db, instance1);
   task_log_add_task(db, instance2);
   event_loop_add_timer(loop, 100, timeout_handler, NULL);

From 67677c3c923f6e3e949532b642952997a951cc4d Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Tue, 4 Oct 2016 17:06:52 -0700
Subject: [PATCH 63/91] update documentation and common

---
 common             |  2 +-
 photon_scheduler.c | 11 ++++++-----
 photon_scheduler.h | 24 +++++++++++++++++++++---
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/common b/common
index 49ac871ef..4329afbd5 160000
--- a/common
+++ b/common
@@ -1 +1 @@
-Subproject commit 49ac871ef6a6751835dcd8448f1d67ef4d6c82ad
+Subproject commit 4329afbd53449412b08e64d4057a2857393c2212
diff --git a/photon_scheduler.c b/photon_scheduler.c
index e6b6cdad2..7d9b5f7ef 100644
--- a/photon_scheduler.c
+++ b/photon_scheduler.c
@@ -32,9 +32,8 @@ struct local_scheduler_state {
   UT_array *available_worker_queue;
 };
 
-local_scheduler_state *init_local_scheduler(event_loop *loop,
-                                            const char *redis_addr,
-                                            int redis_port) {
+local_scheduler_state *
+init_local_scheduler(event_loop *loop, const char *redis_addr, int redis_port) {
   local_scheduler_state *state = malloc(sizeof(local_scheduler_state));
   state->db = db_connect(redis_addr, redis_port, "photon", "", -1);
   db_attach(state->db, loop);
@@ -47,7 +46,8 @@ void handle_submit_task(local_scheduler_state *s, task_spec *task) {
   /* Assign this task to an available worker. If there are no available workers,
    * then add this task to the local task queue. */
   task_iid task_iid = globally_unique_id();
-  task_instance *instance = make_task_instance(task_iid, task, TASK_WAITING, NIL_ID);
+  task_instance *instance =
+      make_task_instance(task_iid, task, TASK_STATUS_WAITING, NIL_ID);
   if (utarray_len(s->available_worker_queue) > 0) {
     /* Get the last available worker in the available worker queue. */
     available_worker *worker =
@@ -143,7 +143,8 @@ void start_server(const char *socket_name, const char *redis_addr,
                   int redis_port) {
   int fd = bind_ipc_sock(socket_name);
   event_loop *loop = event_loop_create();
-  local_scheduler_state *state = init_local_scheduler(loop, redis_addr, redis_port);
+  local_scheduler_state *state =
+      init_local_scheduler(loop, redis_addr, redis_port);
 
   /* Run event loop. */
   event_loop_add_file(loop, fd, EVENT_LOOP_READ, new_client_connection, state);
diff --git a/photon_scheduler.h b/photon_scheduler.h
index bef1f42d9..54905bb88 100644
--- a/photon_scheduler.h
+++ b/photon_scheduler.h
@@ -5,14 +5,32 @@
 
 typedef struct local_scheduler_state local_scheduler_state;
 
-/* Establish a connection to a new client. */
+/**
+ * Establish a connection to a new client.
+ *
+ * @param loop Event loop of the local scheduler.
+ * @param listener_socket Socket the local scheduler is listening on for new
+ *                        client requests.
+ * @param context State of the local scheduler.
+ * @param events Flag for events that are available on the listener socket.
+ */
 void new_client_connection(event_loop *loop, int listener_sock, void *context,
                            int events);
 
-/* Assign a task to a worker. */
+/**
+ * Assign a task to a worker.
+ *
+ * @param s State of the local scheduler.
+ * @param client_sock Socket by which the worker is connected.
+ */
 void handle_get_task(local_scheduler_state *s, int client_sock);
 
-/* Handle incoming submit request by a worker. */
+/**
+ * Handle incoming submit request by a worker.
+ *
+ * @param s State of the local scheduler.
+ * @param task Task specification of the task to be submitted.
+ */
 void handle_submit_task(local_scheduler_state *s, task_spec *task);
 
 #endif /* PHOTON_SCHEDULER_H */

From 4204500d23be7726e27598badb691d29d08a0ad7 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Wed, 5 Oct 2016 09:17:08 -0700
Subject: [PATCH 64/91] Fix formatting. (#32)

---
 test/db_tests.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/db_tests.c b/test/db_tests.c
index be09ad28e..6eb592e45 100644
--- a/test/db_tests.c
+++ b/test/db_tests.c
@@ -85,8 +85,8 @@ TEST task_log_test(void) {
   db_attach(db, loop);
   node_id node = globally_unique_id();
   task_spec *task = example_task();
-  task_instance *instance =
-      make_task_instance(globally_unique_id(), task, TASK_STATUS_SCHEDULED, node);
+  task_instance *instance = make_task_instance(globally_unique_id(), task,
+                                               TASK_STATUS_SCHEDULED, node);
   task_log_register_callback(db, task_log_test_callback, node,
                              TASK_STATUS_SCHEDULED, instance);
   task_log_add_task(db, instance);

From a7a963445d50fa0c53ef771404a40da06abc3cc7 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Wed, 5 Oct 2016 13:30:10 -0700
Subject: [PATCH 65/91] fixes

---
 common             | 2 +-
 photon_scheduler.h | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/common b/common
index 4329afbd5..4204500d2 160000
--- a/common
+++ b/common
@@ -1 +1 @@
-Subproject commit 4329afbd53449412b08e64d4057a2857393c2212
+Subproject commit 4204500d23be7726e27598badb691d29d08a0ad7
diff --git a/photon_scheduler.h b/photon_scheduler.h
index 54905bb88..591ffe0f5 100644
--- a/photon_scheduler.h
+++ b/photon_scheduler.h
@@ -10,9 +10,10 @@ typedef struct local_scheduler_state local_scheduler_state;
  *
  * @param loop Event loop of the local scheduler.
  * @param listener_socket Socket the local scheduler is listening on for new
- *                        client requests.
+ *        client requests.
  * @param context State of the local scheduler.
  * @param events Flag for events that are available on the listener socket.
+ * @return Void.
  */
 void new_client_connection(event_loop *loop, int listener_sock, void *context,
                            int events);
@@ -22,6 +23,7 @@ void new_client_connection(event_loop *loop, int listener_sock, void *context,
  *
  * @param s State of the local scheduler.
  * @param client_sock Socket by which the worker is connected.
+ * @return Void.
  */
 void handle_get_task(local_scheduler_state *s, int client_sock);
 
@@ -30,6 +32,7 @@ void handle_get_task(local_scheduler_state *s, int client_sock);
  *
  * @param s State of the local scheduler.
  * @param task Task specification of the task to be submitted.
+ * @return Void.
  */
 void handle_submit_task(local_scheduler_state *s, task_spec *task);
 

From 0f97855333370f99e31391442a4ef9b97969032b Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Wed, 5 Oct 2016 14:11:02 -0700
Subject: [PATCH 66/91] More fixes.

---
 photon_scheduler.c | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/photon_scheduler.c b/photon_scheduler.c
index 7d9b5f7ef..44022451c 100644
--- a/photon_scheduler.c
+++ b/photon_scheduler.c
@@ -43,30 +43,43 @@ init_local_scheduler(event_loop *loop, const char *redis_addr, int redis_port) {
 };
 
 void handle_submit_task(local_scheduler_state *s, task_spec *task) {
-  /* Assign this task to an available worker. If there are no available workers,
-   * then add this task to the local task queue. */
+  /* Create a unique task instance ID. This is different from the task ID and
+   * is used to distinguish between potentially multiple executions of the
+   * task. */
   task_iid task_iid = globally_unique_id();
   task_instance *instance =
       make_task_instance(task_iid, task, TASK_STATUS_WAITING, NIL_ID);
-  if (utarray_len(s->available_worker_queue) > 0) {
+  /* Assign this task to an available worker. If there are no available workers,
+   * then add this task to the local task queue. */
+  int schedule_locally = utarray_len(s->available_worker_queue) > 0;
+  if (schedule_locally) {
     /* Get the last available worker in the available worker queue. */
     available_worker *worker =
         (available_worker *)utarray_back(s->available_worker_queue);
     /* Tell the available worker to execute the task. */
     write_message(worker->client_sock, EXECUTE_TASK, task_size(task),
                   (uint8_t *)task);
+    /* Remove the available worker from the queue and free the struct. */
     utarray_pop_back(s->available_worker_queue);
-    /* TODO: Do we need to free the available_worker struct? */
+    free(worker);
   } else {
-    /* Add the task to the task queue. */
+    /* Add the task to the task queue. This passes ownership of the task queue.
+     * And the task will be freed when it is assigned to a worker. */
     utarray_push_back(s->task_queue, &instance);
   }
-  /* Submit task to redis. */
+  /* Submit the task to redis. */
   task_log_add_task(s->db, instance);
-  // free(instance);
+  if (schedule_locally) {
+    /* If the task was scheduled locally, we need to free it. Otherwise,
+     * ownership of the task is passed to the task_queue, and it will be freed
+     * when it is assigned to a worker. */
+    free(instance);
+  }
 }
 
 void handle_get_task(local_scheduler_state *s, int client_sock) {
+  /* If there is an available task, assign that task to this worker. Otherwise
+   * add the worker to the queue of available workers. */
   if (utarray_len(s->task_queue) > 0) {
     /* Get the last task in the task queue. */
     task_instance **back = (task_instance **)utarray_back(s->task_queue);
@@ -84,7 +97,8 @@ void handle_get_task(local_scheduler_state *s, int client_sock) {
          p = (available_worker *)utarray_next(s->available_worker_queue, p)) {
       CHECK(p->client_sock != client_sock);
     }
-    /* Add client_sock to a list of available workers. */
+    /* Add client_sock to a list of available workers. This struct will be freed
+     * when a task is assigned to this worker. */
     available_worker worker_info = {.client_sock = client_sock};
     utarray_push_back(s->available_worker_queue, &worker_info);
     LOG_INFO("Adding client_sock %d to available workers.\n", client_sock);
@@ -104,13 +118,6 @@ void process_message(event_loop *loop, int client_sock, void *context,
   case SUBMIT_TASK: {
     task_spec *task = (task_spec *)message;
     CHECK(task_size(task) == length);
-    /* Create a unique task instance ID. This is different from the task ID and
-     * is used to distinguish between potentially multiple executions of the
-     * task. */
-    unique_id id = globally_unique_id();
-    // task_queue_submit_task(s->db, id, task);
-    /* Try to assign the task to a worker locally. TODO(rkn): This should
-     * probably go somewhere else. */
     handle_submit_task(s, task);
   } break;
   case TASK_DONE: {

From 75441a180d4935917f0072059de5ddf8d3a298fd Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Wed, 5 Oct 2016 16:09:40 -0700
Subject: [PATCH 67/91] add valgrind tests (#33)

* add valgrind

* install valgrind
---
 .travis.yml | 9 +++++++++
 Makefile    | 7 +++++++
 2 files changed, 16 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 174fee43f..d0e14edf5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -28,6 +28,15 @@ matrix:
       install: []
       script:
         - .travis/check-git-clang-format-output.sh
+    - os: linux
+      dist: trusty
+      python: "2.7"
+      env: VALGRIND=1
+      before_install:
+        - sudo apt-get update -qq
+        - sudo apt-get install -qq valgrind
+      script:
+        - make valgrind
 
 install:
   - ./install-dependencies.sh
diff --git a/Makefile b/Makefile
index ad955de98..dae76e87b 100644
--- a/Makefile
+++ b/Makefile
@@ -36,4 +36,11 @@ test: hiredis redis $(BUILD)/common_tests $(BUILD)/db_tests $(BUILD)/io_tests $(
 	./thirdparty/redis-3.2.3/src/redis-server &
 	sleep 1s ; ./build/common_tests ; ./build/db_tests ; ./build/io_tests ; ./build/task_tests ; ./build/redis_tests
 
+valgrind: test
+	valgrind --leak-check=full --error-exitcode=1 ./build/common_tests
+	valgrind --leak-check=full --error-exitcode=1 ./build/db_tests
+	valgrind --leak-check=full --error-exitcode=1 ./build/io_tests
+	valgrind --leak-check=full --error-exitcode=1 ./build/task_tests
+	valgrind --leak-check=full --error-exitcode=1 ./build/redis_tests
+
 FORCE:

From e8e4aa6d8e79cd85cde002dc2a8fc850c3ed89fb Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Wed, 5 Oct 2016 18:07:08 -0700
Subject: [PATCH 68/91] add valgrind check

---
 .travis.yml        | 13 +++++++++++++
 photon_scheduler.c | 33 ++++++++++++++++++++++++++++++---
 test/test.py       | 28 +++++++++++++++++++++++++---
 3 files changed, 68 insertions(+), 6 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 45ef9a286..caab2e356 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -29,6 +29,19 @@ matrix:
       install: []
       script:
         - .travis/check-git-clang-format-output.sh
+    - os: linux
+      dist: trusty
+      python: "2.7"
+      env: VALGRIND=1
+      before_install:
+        - sudo apt-get update -qq
+        - sudo apt-get install -qq valgrind
+      script:
+        - cd common
+        - make test
+        - cd ..
+        - source setup-env.sh
+        - python test/test.py valgrind
 
 install:
   - make
diff --git a/photon_scheduler.c b/photon_scheduler.c
index 44022451c..b876bc9f2 100644
--- a/photon_scheduler.c
+++ b/photon_scheduler.c
@@ -1,4 +1,5 @@
 #include <inttypes.h>
+#include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/socket.h>
@@ -24,6 +25,9 @@ UT_icd task_ptr_icd = {sizeof(task_instance *), NULL, NULL, NULL};
 UT_icd worker_icd = {sizeof(available_worker), NULL, NULL, NULL};
 
 struct local_scheduler_state {
+  /* The local scheduler event loop. */
+  event_loop *loop;
+  /* The handle to the database. */
   db_handle *db;
   /** This is an array of pointers to tasks that are waiting to be scheduled. */
   UT_array *task_queue;
@@ -35,6 +39,7 @@ struct local_scheduler_state {
 local_scheduler_state *
 init_local_scheduler(event_loop *loop, const char *redis_addr, int redis_port) {
   local_scheduler_state *state = malloc(sizeof(local_scheduler_state));
+  state->loop = loop;
   state->db = db_connect(redis_addr, redis_port, "photon", "", -1);
   db_attach(state->db, loop);
   utarray_new(state->task_queue, &task_ptr_icd);
@@ -42,6 +47,14 @@ init_local_scheduler(event_loop *loop, const char *redis_addr, int redis_port) {
   return state;
 };
 
+void free_local_scheduler(local_scheduler_state *s) {
+  db_disconnect(s->db);
+  utarray_free(s->task_queue);
+  utarray_free(s->available_worker_queue);
+  event_loop_destroy(s->loop);
+  free(s);
+}
+
 void handle_submit_task(local_scheduler_state *s, task_spec *task) {
   /* Create a unique task instance ID. This is different from the task ID and
    * is used to distinguish between potentially multiple executions of the
@@ -146,19 +159,33 @@ void new_client_connection(event_loop *loop, int listener_sock, void *context,
   LOG_INFO("new connection with fd %d", new_socket);
 }
 
+/* We need this code so we can clean up when we get a SIGTERM signal. */
+
+local_scheduler_state *g_state;
+
+void signal_handler(int signal) {
+  if (signal == SIGTERM) {
+    free_local_scheduler(g_state);
+    exit(0);
+  }
+}
+
+/* End of the cleanup code. */
+
 void start_server(const char *socket_name, const char *redis_addr,
                   int redis_port) {
   int fd = bind_ipc_sock(socket_name);
   event_loop *loop = event_loop_create();
-  local_scheduler_state *state =
-      init_local_scheduler(loop, redis_addr, redis_port);
+  g_state = init_local_scheduler(loop, redis_addr, redis_port);
 
   /* Run event loop. */
-  event_loop_add_file(loop, fd, EVENT_LOOP_READ, new_client_connection, state);
+  event_loop_add_file(loop, fd, EVENT_LOOP_READ, new_client_connection,
+                      g_state);
   event_loop_run(loop);
 }
 
 int main(int argc, char *argv[]) {
+  signal(SIGTERM, signal_handler);
   /* Path of the listening socket of the local scheduler. */
   char *scheduler_socket_name = NULL;
   /* IP address and port of redis. */
diff --git a/test/test.py b/test/test.py
index 0517c40a5..bbd164c37 100644
--- a/test/test.py
+++ b/test/test.py
@@ -1,6 +1,7 @@
 from __future__ import print_function
 
 import os
+import signal
 import subprocess
 import sys
 import unittest
@@ -9,6 +10,8 @@ import time
 
 import photon
 
+USE_VALGRIND = False
+
 class TestPhotonClient(unittest.TestCase):
 
   def setUp(self):
@@ -18,8 +21,15 @@ class TestPhotonClient(unittest.TestCase):
     time.sleep(0.1)
     scheduler_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../build/photon_scheduler")
     scheduler_name = "/tmp/scheduler{}".format(random.randint(0, 10000))
-    self.p2 = subprocess.Popen([scheduler_executable, "-s", scheduler_name, "-r", "127.0.0.1:6379"])
-    time.sleep(0.1)
+    command = [scheduler_executable, "-s", scheduler_name, "-r", "127.0.0.1:6379"]
+    if USE_VALGRIND:
+      self.p2 = subprocess.Popen(["valgrind", "--track-origins=yes", "--leak-check=full", "--show-leak-kinds=all"] + command)
+    else:
+      self.p2 = subprocess.Popen(command)
+    if USE_VALGRIND:
+      time.sleep(1.0)
+    else:
+      time.sleep(0.1)
     # Connect to the scheduler.
     self.photon_client = photon.PhotonClient(scheduler_name)
 
@@ -27,7 +37,13 @@ class TestPhotonClient(unittest.TestCase):
     # Kill the Redis server.
     self.p1.kill()
     # Kill the local scheduler.
-    self.p2.kill()
+    if USE_VALGRIND:
+      self.p2.send_signal(signal.SIGTERM)
+      self.p2.wait()
+      os._exit(self.p2.returncode)
+    else:
+      self.p2.kill()
+    
 
   def test_create(self):
     l = [20 * "a", 20 * "b", 20 * "c"]
@@ -38,4 +54,10 @@ class TestPhotonClient(unittest.TestCase):
     task = self.photon_client.get_task()
 
 if __name__ == "__main__":
+  if len(sys.argv) > 1:
+    # pop the argument so we don't mess with unittest's own argument parser
+    arg = sys.argv.pop()
+    if arg == "valgrind":
+      USE_VALGRIND = True
+      print("Using valgrind for tests")
   unittest.main(verbosity=2)

From 90a6a99b0380a7b5243b18c1f2360a75209b0fa2 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Thu, 6 Oct 2016 16:25:04 -0700
Subject: [PATCH 69/91] Clean up task interface and add testing. (#34)

* Update c extensions and python tests.

* Updates.

* Documentation fix.

* Move c extensions into common_extension.c
---
 lib/python/common_extension.c              | 361 +++++++++++++++++++++
 lib/python/{types.h => common_extension.h} |   8 +-
 lib/python/object_id.c                     |  91 ------
 lib/python/serialization.c                 |  80 -----
 lib/python/setup.py                        |   2 +-
 lib/python/task.c                          | 177 ----------
 test/test.py                               |  52 ++-
 7 files changed, 418 insertions(+), 353 deletions(-)
 create mode 100644 lib/python/common_extension.c
 rename lib/python/{types.h => common_extension.h} (79%)
 delete mode 100644 lib/python/object_id.c
 delete mode 100644 lib/python/serialization.c
 delete mode 100644 lib/python/task.c

diff --git a/lib/python/common_extension.c b/lib/python/common_extension.c
new file mode 100644
index 000000000..556fea443
--- /dev/null
+++ b/lib/python/common_extension.c
@@ -0,0 +1,361 @@
+#include <Python.h>
+#include "node.h"
+
+#include "common_extension.h"
+#include "task.h"
+#include "utarray.h"
+#include "utstring.h"
+
+PyObject *CommonError;
+
+#define MARSHAL_VERSION 2
+
+/* Define the PyObjectID class. */
+
+int PyObjectToUniqueID(PyObject *object, object_id *objectid) {
+  if (PyObject_IsInstance(object, (PyObject *) &PyObjectIDType)) {
+    *objectid = ((PyObjectID *) object)->object_id;
+    return 1;
+  } else {
+    PyErr_SetString(PyExc_TypeError, "must be an ObjectID");
+    return 0;
+  }
+}
+
+static int PyObjectID_init(PyObjectID *self, PyObject *args, PyObject *kwds) {
+  const char *data;
+  int size;
+  if (!PyArg_ParseTuple(args, "s#", &data, &size)) {
+    return -1;
+  }
+  if (size != UNIQUE_ID_SIZE) {
+    PyErr_SetString(CommonError,
+                    "ObjectID: object id string needs to have length 20");
+    return -1;
+  }
+  memcpy(&self->object_id.id[0], data, UNIQUE_ID_SIZE);
+  return 0;
+}
+
+/* Create a PyObjectID from C. */
+PyObject *PyObjectID_make(object_id object_id) {
+  PyObjectID *result = PyObject_New(PyObjectID, &PyObjectIDType);
+  result = (PyObjectID *) PyObject_Init((PyObject *) result, &PyObjectIDType);
+  result->object_id = object_id;
+  return (PyObject *) result;
+}
+
+static PyObject *PyObjectID_id(PyObject *self) {
+  PyObjectID *s = (PyObjectID *) self;
+  return PyString_FromStringAndSize((char *) &s->object_id.id[0],
+                                    UNIQUE_ID_SIZE);
+}
+
+static PyMethodDef PyObjectID_methods[] = {
+    {"id", (PyCFunction) PyObjectID_id, METH_NOARGS,
+     "Return the hash associated with this ObjectID"},
+    {NULL} /* Sentinel */
+};
+
+static PyMemberDef PyObjectID_members[] = {
+    {NULL} /* Sentinel */
+};
+
+PyTypeObject PyObjectIDType = {
+    PyObject_HEAD_INIT(NULL) 0, /* ob_size */
+    "common.ObjectID",          /* tp_name */
+    sizeof(PyObjectID),         /* tp_basicsize */
+    0,                          /* tp_itemsize */
+    0,                          /* tp_dealloc */
+    0,                          /* tp_print */
+    0,                          /* tp_getattr */
+    0,                          /* tp_setattr */
+    0,                          /* tp_compare */
+    0,                          /* tp_repr */
+    0,                          /* tp_as_number */
+    0,                          /* tp_as_sequence */
+    0,                          /* tp_as_mapping */
+    0,                          /* tp_hash */
+    0,                          /* tp_call */
+    0,                          /* tp_str */
+    0,                          /* tp_getattro */
+    0,                          /* tp_setattro */
+    0,                          /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,         /* tp_flags */
+    "ObjectID object",          /* tp_doc */
+    0,                          /* tp_traverse */
+    0,                          /* tp_clear */
+    0,                          /* tp_richcompare */
+    0,                          /* tp_weaklistoffset */
+    0,                          /* tp_iter */
+    0,                          /* tp_iternext */
+    PyObjectID_methods,         /* tp_methods */
+    PyObjectID_members,         /* tp_members */
+    0,                          /* tp_getset */
+    0,                          /* tp_base */
+    0,                          /* tp_dict */
+    0,                          /* tp_descr_get */
+    0,                          /* tp_descr_set */
+    0,                          /* tp_dictoffset */
+    (initproc) PyObjectID_init, /* tp_init */
+    0,                          /* tp_alloc */
+    PyType_GenericNew,          /* tp_new */
+};
+
+/* Define the PyTask class. */
+
+static int PyTask_init(PyTask *self, PyObject *args, PyObject *kwds) {
+  function_id function_id;
+  /* Arguments of the task (can be PyObjectIDs or Python values). */
+  PyObject *arguments;
+  /* Array of pointers to string representations of pass-by-value args. */
+  UT_array *val_repr_ptrs;
+  utarray_new(val_repr_ptrs, &ut_ptr_icd);
+  int num_returns;
+  if (!PyArg_ParseTuple(args, "O&Oi", &PyObjectToUniqueID, &function_id,
+                        &arguments, &num_returns)) {
+    return -1;
+  }
+  size_t size = PyList_Size(arguments);
+  /* Determine the size of pass by value data in bytes. */
+  size_t value_data_bytes = 0;
+  for (size_t i = 0; i < size; ++i) {
+    PyObject *arg = PyList_GetItem(arguments, i);
+    if (!PyObject_IsInstance(arg, (PyObject *) &PyObjectIDType)) {
+      PyObject *data = PyMarshal_WriteObjectToString(arg, MARSHAL_VERSION);
+      value_data_bytes += PyString_Size(data);
+      utarray_push_back(val_repr_ptrs, &data);
+    }
+  }
+  /* Construct the task specification. */
+  int val_repr_index = 0;
+  self->spec =
+      alloc_task_spec(function_id, size, num_returns, value_data_bytes);
+  for (size_t i = 0; i < size; ++i) {
+    PyObject *arg = PyList_GetItem(arguments, i);
+    if (PyObject_IsInstance(arg, (PyObject *) &PyObjectIDType)) {
+      task_args_add_ref(self->spec, ((PyObjectID *) arg)->object_id);
+    } else {
+      PyObject *data =
+          *((PyObject **) utarray_eltptr(val_repr_ptrs, val_repr_index));
+      task_args_add_val(self->spec, (uint8_t *) PyString_AS_STRING(data),
+                        PyString_GET_SIZE(data));
+      Py_DECREF(data);
+      val_repr_index += 1;
+    }
+  }
+  utarray_free(val_repr_ptrs);
+  return 0;
+}
+
+static void PyTask_dealloc(PyTask *self) {
+  free_task_spec(self->spec);
+  Py_TYPE(self)->tp_free((PyObject *) self);
+}
+
+static PyObject *PyTask_function_id(PyObject *self) {
+  function_id function_id = *task_function(((PyTask *) self)->spec);
+  return PyObjectID_make(function_id);
+}
+
+static PyObject *PyTask_arguments(PyObject *self) {
+  int64_t num_args = task_num_args(((PyTask *) self)->spec);
+  PyObject *arg_list = PyList_New((Py_ssize_t) num_args);
+  task_spec *task = ((PyTask *) self)->spec;
+  for (int i = 0; i < num_args; ++i) {
+    if (task_arg_type(task, i) == ARG_BY_REF) {
+      object_id object_id = *task_arg_id(task, i);
+      PyList_SetItem(arg_list, i, PyObjectID_make(object_id));
+    } else {
+      PyObject *s =
+          PyMarshal_ReadObjectFromString((char *) task_arg_val(task, i),
+                                         (Py_ssize_t) task_arg_length(task, i));
+      PyList_SetItem(arg_list, i, s);
+    }
+  }
+  return arg_list;
+}
+
+static PyObject *PyTask_returns(PyObject *self) {
+  int64_t num_returns = task_num_returns(((PyTask *) self)->spec);
+  PyObject *return_id_list = PyList_New((Py_ssize_t) num_returns);
+  task_spec *task = ((PyTask *) self)->spec;
+  for (int i = 0; i < num_returns; ++i) {
+    object_id object_id = *task_return(task, i);
+    PyList_SetItem(return_id_list, i, PyObjectID_make(object_id));
+  }
+  return return_id_list;
+}
+
+static PyMethodDef PyTask_methods[] = {
+    {"function_id", (PyCFunction) PyTask_function_id, METH_NOARGS,
+     "Return the function ID for this task."},
+    {"arguments", (PyCFunction) PyTask_arguments, METH_NOARGS,
+     "Return the arguments for the task."},
+    {"returns", (PyCFunction) PyTask_returns, METH_NOARGS,
+     "Return the object IDs for the return values of the task."},
+    {NULL} /* Sentinel */
+};
+
+static PyTypeObject PyTaskType = {
+    PyObject_HEAD_INIT(NULL) 0,  /* ob_size */
+    "task.Task",                 /* tp_name */
+    sizeof(PyTask),              /* tp_basicsize */
+    0,                           /* tp_itemsize */
+    (destructor) PyTask_dealloc, /* tp_dealloc */
+    0,                           /* tp_print */
+    0,                           /* tp_getattr */
+    0,                           /* tp_setattr */
+    0,                           /* tp_compare */
+    0,                           /* tp_repr */
+    0,                           /* tp_as_number */
+    0,                           /* tp_as_sequence */
+    0,                           /* tp_as_mapping */
+    0,                           /* tp_hash */
+    0,                           /* tp_call */
+    0,                           /* tp_str */
+    0,                           /* tp_getattro */
+    0,                           /* tp_setattro */
+    0,                           /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,          /* tp_flags */
+    "Task object",               /* tp_doc */
+    0,                           /* tp_traverse */
+    0,                           /* tp_clear */
+    0,                           /* tp_richcompare */
+    0,                           /* tp_weaklistoffset */
+    0,                           /* tp_iter */
+    0,                           /* tp_iternext */
+    PyTask_methods,              /* tp_methods */
+    0,                           /* tp_members */
+    0,                           /* tp_getset */
+    0,                           /* tp_base */
+    0,                           /* tp_dict */
+    0,                           /* tp_descr_get */
+    0,                           /* tp_descr_set */
+    0,                           /* tp_dictoffset */
+    (initproc) PyTask_init,      /* tp_init */
+    0,                           /* tp_alloc */
+    PyType_GenericNew,           /* tp_new */
+};
+
+/* Create a PyTask from a C struct. The resulting PyTask takes ownership of the
+ * task_spec and will deallocate the task_spec in the PyTask destructor. */
+PyObject *PyTask_make(task_spec *task_spec) {
+  PyTask *result = PyObject_New(PyTask, &PyTaskType);
+  result = (PyTask *) PyObject_Init((PyObject *) result, &PyTaskType);
+  result->spec = task_spec;
+  return (PyObject *) result;
+}
+
+/* Define the methods for the module. */
+
+#define SIZE_LIMIT 100
+#define NUM_ELEMENTS_LIMIT 1000
+
+/**
+ * This method checks if a Python object is sufficiently simple that it can be
+ * serialized and passed by value as an argument to a task (without being put in
+ * the object store). The details of which objects are sufficiently simple are
+ * defined by this method and are not particularly important. But for
+ * performance reasons, it is better to place "small" objects in the task itself
+ * and "large" objects in the object store.
+ *
+ * @param value The Python object in question.
+ * @param num_elements_contained If this method returns 1, then the number of
+ *        objects recursively contained within this object will be added to the
+ *        value at this address. This is used to make sure that we do not
+ *        serialize objects that are too large.
+ * @return 0 if the object cannot be serialized in the task and 1 if it can.
+ */
+int is_simple_value(PyObject *value, int *num_elements_contained) {
+  *num_elements_contained += 1;
+  if (*num_elements_contained >= NUM_ELEMENTS_LIMIT) {
+    return 0;
+  }
+  if (PyInt_Check(value) || PyLong_Check(value) || value == Py_False ||
+      value == Py_True || PyFloat_Check(value) || value == Py_None) {
+    return 1;
+  }
+  if (PyString_CheckExact(value)) {
+    *num_elements_contained += PyString_Size(value);
+    return (*num_elements_contained < NUM_ELEMENTS_LIMIT);
+  }
+  if (PyUnicode_CheckExact(value)) {
+    *num_elements_contained += PyUnicode_GET_SIZE(value);
+    return (*num_elements_contained < NUM_ELEMENTS_LIMIT);
+  }
+  if (PyList_CheckExact(value) && PyList_Size(value) < SIZE_LIMIT) {
+    for (size_t i = 0; i < PyList_Size(value); ++i) {
+      if (!is_simple_value(PyList_GetItem(value, i), num_elements_contained)) {
+        return 0;
+      }
+    }
+    return (*num_elements_contained < NUM_ELEMENTS_LIMIT);
+  }
+  if (PyDict_CheckExact(value) && PyDict_Size(value) < SIZE_LIMIT) {
+    PyObject *key, *val;
+    Py_ssize_t pos = 0;
+    while (PyDict_Next(value, &pos, &key, &val)) {
+      if (!is_simple_value(key, num_elements_contained) ||
+          !is_simple_value(val, num_elements_contained)) {
+        return 0;
+      }
+    }
+    return (*num_elements_contained < NUM_ELEMENTS_LIMIT);
+  }
+  if (PyTuple_CheckExact(value) && PyTuple_Size(value) < SIZE_LIMIT) {
+    for (size_t i = 0; i < PyTuple_Size(value); ++i) {
+      if (!is_simple_value(PyTuple_GetItem(value, i), num_elements_contained)) {
+        return 0;
+      }
+    }
+    return (*num_elements_contained < NUM_ELEMENTS_LIMIT);
+  }
+  return 0;
+}
+
+PyObject *check_simple_value(PyObject *self, PyObject *args) {
+  PyObject *value;
+  if (!PyArg_ParseTuple(args, "O", &value)) {
+    return NULL;
+  }
+  int num_elements_contained = 0;
+  if (is_simple_value(value, &num_elements_contained)) {
+    Py_RETURN_TRUE;
+  }
+  Py_RETURN_FALSE;
+}
+
+static PyMethodDef common_methods[] = {
+    {"check_simple_value", check_simple_value, METH_VARARGS,
+     "Should the object be passed by value?"},
+    {NULL} /* Sentinel */
+};
+
+#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
+#define PyMODINIT_FUNC void
+#endif
+
+PyMODINIT_FUNC initcommon(void) {
+  PyObject *m;
+
+  if (PyType_Ready(&PyTaskType) < 0)
+    return;
+
+  if (PyType_Ready(&PyObjectIDType) < 0)
+    return;
+
+  m = Py_InitModule3("common", common_methods,
+                     "Example module that creates an extension type.");
+
+  Py_INCREF(&PyTaskType);
+  PyModule_AddObject(m, "Task", (PyObject *) &PyTaskType);
+
+  Py_INCREF(&PyObjectIDType);
+  PyModule_AddObject(m, "ObjectID", (PyObject *) &PyObjectIDType);
+
+  char common_error[] = "common.error";
+  CommonError = PyErr_NewException(common_error, NULL, NULL);
+  Py_INCREF(CommonError);
+  PyModule_AddObject(m, "common_error", CommonError);
+}
diff --git a/lib/python/types.h b/lib/python/common_extension.h
similarity index 79%
rename from lib/python/types.h
rename to lib/python/common_extension.h
index 9e30581b5..578c548d0 100644
--- a/lib/python/types.h
+++ b/lib/python/common_extension.h
@@ -1,5 +1,5 @@
-#ifndef TYPES_H
-#define TYPES_H
+#ifndef COMMON_EXTENSION_H
+#define COMMON_EXTENSION_H
 
 #include <Python.h>
 #include "marshal.h"
@@ -30,4 +30,6 @@ PyObject *PyObjectID_make(object_id object_id);
 
 PyObject *check_simple_value(PyObject *self, PyObject *args);
 
-#endif /* TYPES_H */
+PyObject *PyTask_make(task_spec *task_spec);
+
+#endif /* COMMON_EXTENSION_H */
diff --git a/lib/python/object_id.c b/lib/python/object_id.c
deleted file mode 100644
index bd7db9bc7..000000000
--- a/lib/python/object_id.c
+++ /dev/null
@@ -1,91 +0,0 @@
-#include "types.h"
-
-int PyObjectToUniqueID(PyObject *object, object_id *objectid) {
-  if (PyObject_IsInstance(object, (PyObject *) &PyObjectIDType)) {
-    *objectid = ((PyObjectID *) object)->object_id;
-    return 1;
-  } else {
-    PyErr_SetString(PyExc_TypeError, "must be an ObjectID");
-    return 0;
-  }
-}
-
-static int PyObjectID_init(PyObjectID *self, PyObject *args, PyObject *kwds) {
-  const char *data;
-  int size;
-  if (!PyArg_ParseTuple(args, "s#", &data, &size)) {
-    return -1;
-  }
-  if (size != UNIQUE_ID_SIZE) {
-    PyErr_SetString(CommonError,
-                    "ObjectID: object id string needs to have length 20");
-    return -1;
-  }
-  memcpy(&self->object_id.id[0], data, UNIQUE_ID_SIZE);
-  return 0;
-}
-
-/* create PyObjectID from C */
-PyObject *PyObjectID_make(object_id object_id) {
-  PyObjectID *result = PyObject_New(PyObjectID, &PyObjectIDType);
-  result = (PyObjectID *) PyObject_Init((PyObject *) result, &PyObjectIDType);
-  result->object_id = object_id;
-  return (PyObject *) result;
-}
-
-static PyObject *PyObjectID_id(PyObject *self) {
-  PyObjectID *s = (PyObjectID *) self;
-  return PyString_FromStringAndSize((char *) &s->object_id.id[0],
-                                    UNIQUE_ID_SIZE);
-}
-
-static PyMethodDef PyObjectID_methods[] = {
-    {"id", (PyCFunction) PyObjectID_id, METH_NOARGS,
-     "Return the hash associated with this ObjectID"},
-    {NULL} /* Sentinel */
-};
-
-static PyMemberDef PyObjectID_members[] = {
-    {NULL} /* Sentinel */
-};
-
-PyTypeObject PyObjectIDType = {
-    PyObject_HEAD_INIT(NULL) 0, /* ob_size */
-    "common.ObjectID",          /* tp_name */
-    sizeof(PyObjectID),         /* tp_basicsize */
-    0,                          /* tp_itemsize */
-    0,                          /* tp_dealloc */
-    0,                          /* tp_print */
-    0,                          /* tp_getattr */
-    0,                          /* tp_setattr */
-    0,                          /* tp_compare */
-    0,                          /* tp_repr */
-    0,                          /* tp_as_number */
-    0,                          /* tp_as_sequence */
-    0,                          /* tp_as_mapping */
-    0,                          /* tp_hash */
-    0,                          /* tp_call */
-    0,                          /* tp_str */
-    0,                          /* tp_getattro */
-    0,                          /* tp_setattro */
-    0,                          /* tp_as_buffer */
-    Py_TPFLAGS_DEFAULT,         /* tp_flags */
-    "ObjectID object",          /* tp_doc */
-    0,                          /* tp_traverse */
-    0,                          /* tp_clear */
-    0,                          /* tp_richcompare */
-    0,                          /* tp_weaklistoffset */
-    0,                          /* tp_iter */
-    0,                          /* tp_iternext */
-    PyObjectID_methods,         /* tp_methods */
-    PyObjectID_members,         /* tp_members */
-    0,                          /* tp_getset */
-    0,                          /* tp_base */
-    0,                          /* tp_dict */
-    0,                          /* tp_descr_get */
-    0,                          /* tp_descr_set */
-    0,                          /* tp_dictoffset */
-    (initproc) PyObjectID_init, /* tp_init */
-    0,                          /* tp_alloc */
-    PyType_GenericNew,          /* tp_new */
-};
diff --git a/lib/python/serialization.c b/lib/python/serialization.c
deleted file mode 100644
index 82cf6417c..000000000
--- a/lib/python/serialization.c
+++ /dev/null
@@ -1,80 +0,0 @@
-#include "types.h"
-
-/* TODO(pcm): Add limit on total number of elements. */
-
-#define SIZE_LIMIT 100
-#define NUM_ELEMENTS_LIMIT 1000
-
-/**
- * This method checks if a Python object is sufficiently simple that it can be
- * serialized and passed by value as an argument to a task (without being put in
- * the object store). The details of which objects are sufficiently simple are
- * defined by this method and are not particularly important. But for
- * performance reasons, it is better to place "small" objects in the task itself
- * and "large" objects in the object store.
- *
- * @param value The Python object in question.
- * @param num_elements_contained If this method returns 1, then the number of
- *        objects recursively contained within this object will be added to the
- *        value at this address. This is used to make sure that we do not
- *        serialize objects that are too large.
- * @return 0 if the object cannot be serialized in the task and 1 if it can.
- */
-int is_simple_value(PyObject *value, int *num_elements_contained) {
-  *num_elements_contained += 1;
-  if (*num_elements_contained >= NUM_ELEMENTS_LIMIT) {
-    return 0;
-  }
-  if (PyInt_Check(value) || PyLong_Check(value) || value == Py_False ||
-      value == Py_True || PyFloat_Check(value) || value == Py_None) {
-    return 1;
-  }
-  if (PyString_CheckExact(value)) {
-    *num_elements_contained += PyString_Size(value);
-    return (*num_elements_contained < NUM_ELEMENTS_LIMIT);
-  }
-  if (PyUnicode_CheckExact(value)) {
-    *num_elements_contained += PyUnicode_GET_SIZE(value);
-    return (*num_elements_contained < NUM_ELEMENTS_LIMIT);
-  }
-  if (PyList_CheckExact(value) && PyList_Size(value) < SIZE_LIMIT) {
-    for (size_t i = 0; i < PyList_Size(value); ++i) {
-      if (!is_simple_value(PyList_GetItem(value, i), num_elements_contained)) {
-        return 0;
-      }
-    }
-    return (*num_elements_contained < NUM_ELEMENTS_LIMIT);
-  }
-  if (PyDict_CheckExact(value) && PyDict_Size(value) < SIZE_LIMIT) {
-    PyObject *key, *val;
-    Py_ssize_t pos = 0;
-    while (PyDict_Next(value, &pos, &key, &val)) {
-      if (!is_simple_value(key, num_elements_contained) ||
-          !is_simple_value(val, num_elements_contained)) {
-        return 0;
-      }
-    }
-    return (*num_elements_contained < NUM_ELEMENTS_LIMIT);
-  }
-  if (PyTuple_CheckExact(value) && PyTuple_Size(value) < SIZE_LIMIT) {
-    for (size_t i = 0; i < PyTuple_Size(value); ++i) {
-      if (!is_simple_value(PyTuple_GetItem(value, i), num_elements_contained)) {
-        return 0;
-      }
-    }
-    return (*num_elements_contained < NUM_ELEMENTS_LIMIT);
-  }
-  return 0;
-}
-
-PyObject *check_simple_value(PyObject *self, PyObject *args) {
-  PyObject *value;
-  if (!PyArg_ParseTuple(args, "O", &value)) {
-    return NULL;
-  }
-  int num_elements_contained = 0;
-  if (is_simple_value(value, &num_elements_contained)) {
-    Py_RETURN_TRUE;
-  }
-  Py_RETURN_FALSE;
-}
diff --git a/lib/python/setup.py b/lib/python/setup.py
index 38af43f34..db915b06b 100644
--- a/lib/python/setup.py
+++ b/lib/python/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup, find_packages, Extension
 
 common_module = Extension("common",
-                          sources=["object_id.c", "serialization.c", "task.c"],
+                          sources=["common_extension.c"],
                           include_dirs=["../../", "../../thirdparty"],
                           extra_objects=["../../build/libcommon.a"],
                           extra_compile_args=["--std=c99", "-Werror"])
diff --git a/lib/python/task.c b/lib/python/task.c
deleted file mode 100644
index ef685b493..000000000
--- a/lib/python/task.c
+++ /dev/null
@@ -1,177 +0,0 @@
-#include <Python.h>
-#include "node.h"
-
-#include "types.h"
-#include "task.h"
-#include "utarray.h"
-#include "utstring.h"
-
-PyObject *CommonError;
-
-#define MARSHAL_VERSION 2
-
-static int PyTask_init(PyTask *self, PyObject *args, PyObject *kwds) {
-  function_id function_id;
-  /* Arguments of the task (can be PyObjectIDs or Python values). */
-  PyObject *arguments;
-  /* Array of pointers to string representations of pass-by-value args. */
-  UT_array *val_repr_ptrs;
-  utarray_new(val_repr_ptrs, &ut_ptr_icd);
-  int num_returns;
-  if (!PyArg_ParseTuple(args, "O&Oi", &PyObjectToUniqueID, &function_id,
-                        &arguments, &num_returns)) {
-    return -1;
-  }
-  size_t size = PyList_Size(arguments);
-  /* Determine the size of pass by value data in bytes. */
-  size_t value_data_bytes = 0;
-  for (size_t i = 0; i < size; ++i) {
-    PyObject *arg = PyList_GetItem(arguments, i);
-    if (!PyObject_IsInstance(arg, (PyObject *) &PyObjectIDType)) {
-      PyObject *data = PyMarshal_WriteObjectToString(arg, MARSHAL_VERSION);
-      value_data_bytes += PyString_Size(data);
-      utarray_push_back(val_repr_ptrs, &data);
-    }
-  }
-  /* Construct the task specification. */
-  int val_repr_index = 0;
-  self->spec =
-      alloc_task_spec(function_id, size, num_returns, value_data_bytes);
-  for (size_t i = 0; i < size; ++i) {
-    PyObject *arg = PyList_GetItem(arguments, i);
-    if (PyObject_IsInstance(arg, (PyObject *) &PyObjectIDType)) {
-      task_args_add_ref(self->spec, ((PyObjectID *) arg)->object_id);
-    } else {
-      PyObject *data =
-          *((PyObject **) utarray_eltptr(val_repr_ptrs, val_repr_index));
-      task_args_add_val(self->spec, (uint8_t *) PyString_AS_STRING(data),
-                        PyString_GET_SIZE(data));
-      Py_DECREF(data);
-      val_repr_index += 1;
-    }
-  }
-  utarray_free(val_repr_ptrs);
-  return 0;
-}
-
-static void PyTask_dealloc(PyTask *self) {
-  free_task_spec(self->spec);
-  Py_TYPE(self)->tp_free((PyObject *) self);
-}
-
-static PyObject *PyTask_function_id(PyObject *self) {
-  function_id function_id = *task_function(((PyTask *) self)->spec);
-  return PyObjectID_make(function_id);
-}
-
-static PyObject *PyTask_arguments(PyObject *self, PyObject *args) {
-  int arg_index;
-  task_spec *spec = ((PyTask *) self)->spec;
-  if (!PyArg_ParseTuple(args, "i", &arg_index)) {
-    return NULL;
-  }
-  if (task_arg_type(spec, arg_index) == ARG_BY_REF) {
-    object_id object_id = *task_arg_id(spec, arg_index);
-    return PyObjectID_make(object_id);
-  } else {
-    PyObject *s = PyMarshal_ReadObjectFromString(
-        (char *) task_arg_val(spec, arg_index),
-        (Py_ssize_t) task_arg_length(spec, arg_index));
-    Py_DECREF(s);
-    Py_RETURN_NONE;
-  }
-}
-
-static PyObject *PyTask_returns(PyObject *self, PyObject *args) {
-  int ret_index;
-  if (!PyArg_ParseTuple(args, "i", &ret_index)) {
-    return NULL;
-  }
-  object_id object_id = *task_return(((PyTask *) self)->spec, ret_index);
-  return PyObjectID_make(object_id);
-}
-
-static PyMethodDef PyTask_methods[] = {
-    {"function_id", (PyCFunction) PyTask_function_id, METH_NOARGS,
-     "Return the function id associated with this task."},
-    {"arguments", (PyCFunction) PyTask_arguments, METH_VARARGS,
-     "Return the i-th argument of the task."},
-    {"returns", (PyCFunction) PyTask_returns, METH_VARARGS,
-     "Return the i-th object reference of the task."},
-    {NULL} /* Sentinel */
-};
-
-static PyTypeObject PyTaskType = {
-    PyObject_HEAD_INIT(NULL) 0,  /* ob_size */
-    "task.Task",                 /* tp_name */
-    sizeof(PyTask),              /* tp_basicsize */
-    0,                           /* tp_itemsize */
-    (destructor) PyTask_dealloc, /* tp_dealloc */
-    0,                           /* tp_print */
-    0,                           /* tp_getattr */
-    0,                           /* tp_setattr */
-    0,                           /* tp_compare */
-    0,                           /* tp_repr */
-    0,                           /* tp_as_number */
-    0,                           /* tp_as_sequence */
-    0,                           /* tp_as_mapping */
-    0,                           /* tp_hash */
-    0,                           /* tp_call */
-    0,                           /* tp_str */
-    0,                           /* tp_getattro */
-    0,                           /* tp_setattro */
-    0,                           /* tp_as_buffer */
-    Py_TPFLAGS_DEFAULT,          /* tp_flags */
-    "Task object",               /* tp_doc */
-    0,                           /* tp_traverse */
-    0,                           /* tp_clear */
-    0,                           /* tp_richcompare */
-    0,                           /* tp_weaklistoffset */
-    0,                           /* tp_iter */
-    0,                           /* tp_iternext */
-    PyTask_methods,              /* tp_methods */
-    0,                           /* tp_members */
-    0,                           /* tp_getset */
-    0,                           /* tp_base */
-    0,                           /* tp_dict */
-    0,                           /* tp_descr_get */
-    0,                           /* tp_descr_set */
-    0,                           /* tp_dictoffset */
-    (initproc) PyTask_init,      /* tp_init */
-    0,                           /* tp_alloc */
-    PyType_GenericNew,           /* tp_new */
-};
-
-static PyMethodDef common_methods[] = {
-    {"check_simple_value", check_simple_value, METH_VARARGS,
-     "Should the object be passed by value?"},
-    {NULL} /* Sentinel */
-};
-
-#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
-#define PyMODINIT_FUNC void
-#endif
-
-PyMODINIT_FUNC initcommon(void) {
-  PyObject *m;
-
-  if (PyType_Ready(&PyTaskType) < 0)
-    return;
-
-  if (PyType_Ready(&PyObjectIDType) < 0)
-    return;
-
-  m = Py_InitModule3("common", common_methods,
-                     "Example module that creates an extension type.");
-
-  Py_INCREF(&PyTaskType);
-  PyModule_AddObject(m, "Task", (PyObject *) &PyTaskType);
-
-  Py_INCREF(&PyObjectIDType);
-  PyModule_AddObject(m, "ObjectID", (PyObject *) &PyObjectIDType);
-
-  char common_error[] = "common.error";
-  CommonError = PyErr_NewException(common_error, NULL, NULL);
-  Py_INCREF(CommonError);
-  PyModule_AddObject(m, "common_error", CommonError);
-}
diff --git a/test/test.py b/test/test.py
index bbed21376..a40d2045c 100644
--- a/test/test.py
+++ b/test/test.py
@@ -38,7 +38,7 @@ COMPLEX_OBJECTS = (BASE_COMPLEX_OBJECTS +
                    TUPLE_COMPLEX_OBJECTS +
                    DICT_COMPLEX_OBJECTS)
 
-class TestPlasmaClient(unittest.TestCase):
+class TestSerialization(unittest.TestCase):
 
   def test_serialize_by_value(self):
 
@@ -47,5 +47,55 @@ class TestPlasmaClient(unittest.TestCase):
     for val in COMPLEX_OBJECTS:
       self.assertFalse(common.check_simple_value(val))
 
+class TestObjectID(unittest.TestCase):
+
+  def test_create_object_id(self):
+    object_id = common.ObjectID(20 * "a")
+
+class TestTask(unittest.TestCase):
+
+  def test_create_task(self):
+    # TODO(rkn): The function ID should be a FunctionID object, not an ObjectID.
+    function_id = common.ObjectID(20 * "a")
+    object_ids = [common.ObjectID(20 * chr(i)) for i in range(256)]
+    args_list = [
+      [],
+      1 * [1],
+      10 * [1],
+      100 * [1],
+      1000 * [1],
+      1 * ["a"],
+      10 * ["a"],
+      100 * ["a"],
+      1000 * ["a"],
+      [1, 1.3, 2L, 1L << 100, "hi", u"hi", [1, 2]],
+      object_ids[:1],
+      object_ids[:2],
+      object_ids[:3],
+      object_ids[:4],
+      object_ids[:5],
+      object_ids[:10],
+      object_ids[:100],
+      object_ids[:256],
+      [1, object_ids[0]],
+      [object_ids[0], "a"],
+      [1, object_ids[0], "a"],
+      [object_ids[0], 1, object_ids[1], "a"],
+      object_ids[:3] + [1, "hi", 2.3] + object_ids[:5],
+      object_ids + 100 * ["a"] + object_ids
+    ]
+    for args in args_list:
+      for num_return_vals in [0, 1, 2, 3, 5, 10, 100]:
+        task = common.Task(function_id, args, num_return_vals)
+        self.assertEqual(function_id.id(), task.function_id().id())
+        retrieved_args = task.arguments()
+        self.assertEqual(num_return_vals, len(task.returns()))
+        self.assertEqual(len(args), len(retrieved_args))
+        for i in range(len(retrieved_args)):
+          if isinstance(retrieved_args[i], common.ObjectID):
+            self.assertEqual(retrieved_args[i].id(), args[i].id())
+          else:
+            self.assertEqual(retrieved_args[i], args[i])
+
 if __name__ == "__main__":
   unittest.main(verbosity=2)

From 7be1a93d64ca36fc639e11f81de1483e0bd17b8c Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Thu, 6 Oct 2016 19:16:09 -0700
Subject: [PATCH 70/91] Move common C extension module into a different C file.
 (#35)

* Move common C extension module into a different C file so that the actual definitions can be more easily included in other C extensions.

* Rename common_extension_module -> common_module.
---
 lib/python/common_extension.c | 36 +--------------------------------
 lib/python/common_extension.h |  2 ++
 lib/python/common_module.c    | 38 +++++++++++++++++++++++++++++++++++
 lib/python/setup.py           |  2 +-
 4 files changed, 42 insertions(+), 36 deletions(-)
 create mode 100644 lib/python/common_module.c

diff --git a/lib/python/common_extension.c b/lib/python/common_extension.c
index 556fea443..46ae9e560 100644
--- a/lib/python/common_extension.c
+++ b/lib/python/common_extension.c
@@ -197,7 +197,7 @@ static PyMethodDef PyTask_methods[] = {
     {NULL} /* Sentinel */
 };
 
-static PyTypeObject PyTaskType = {
+PyTypeObject PyTaskType = {
     PyObject_HEAD_INIT(NULL) 0,  /* ob_size */
     "task.Task",                 /* tp_name */
     sizeof(PyTask),              /* tp_basicsize */
@@ -325,37 +325,3 @@ PyObject *check_simple_value(PyObject *self, PyObject *args) {
   }
   Py_RETURN_FALSE;
 }
-
-static PyMethodDef common_methods[] = {
-    {"check_simple_value", check_simple_value, METH_VARARGS,
-     "Should the object be passed by value?"},
-    {NULL} /* Sentinel */
-};
-
-#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
-#define PyMODINIT_FUNC void
-#endif
-
-PyMODINIT_FUNC initcommon(void) {
-  PyObject *m;
-
-  if (PyType_Ready(&PyTaskType) < 0)
-    return;
-
-  if (PyType_Ready(&PyObjectIDType) < 0)
-    return;
-
-  m = Py_InitModule3("common", common_methods,
-                     "Example module that creates an extension type.");
-
-  Py_INCREF(&PyTaskType);
-  PyModule_AddObject(m, "Task", (PyObject *) &PyTaskType);
-
-  Py_INCREF(&PyObjectIDType);
-  PyModule_AddObject(m, "ObjectID", (PyObject *) &PyObjectIDType);
-
-  char common_error[] = "common.error";
-  CommonError = PyErr_NewException(common_error, NULL, NULL);
-  Py_INCREF(CommonError);
-  PyModule_AddObject(m, "common_error", CommonError);
-}
diff --git a/lib/python/common_extension.h b/lib/python/common_extension.h
index 578c548d0..1fce38e42 100644
--- a/lib/python/common_extension.h
+++ b/lib/python/common_extension.h
@@ -24,6 +24,8 @@ typedef struct {
 
 extern PyTypeObject PyObjectIDType;
 
+extern PyTypeObject PyTaskType;
+
 int PyObjectToUniqueID(PyObject *object, object_id *objectid);
 
 PyObject *PyObjectID_make(object_id object_id);
diff --git a/lib/python/common_module.c b/lib/python/common_module.c
new file mode 100644
index 000000000..d5222cd87
--- /dev/null
+++ b/lib/python/common_module.c
@@ -0,0 +1,38 @@
+#include <Python.h>
+#include "node.h"
+
+#include "common_extension.h"
+
+static PyMethodDef common_methods[] = {
+    {"check_simple_value", check_simple_value, METH_VARARGS,
+     "Should the object be passed by value?"},
+    {NULL} /* Sentinel */
+};
+
+#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
+#define PyMODINIT_FUNC void
+#endif
+
+PyMODINIT_FUNC initcommon(void) {
+  PyObject *m;
+
+  if (PyType_Ready(&PyTaskType) < 0)
+    return;
+
+  if (PyType_Ready(&PyObjectIDType) < 0)
+    return;
+
+  m = Py_InitModule3("common", common_methods,
+                     "A module for common types. This is used for testing.");
+
+  Py_INCREF(&PyTaskType);
+  PyModule_AddObject(m, "Task", (PyObject *) &PyTaskType);
+
+  Py_INCREF(&PyObjectIDType);
+  PyModule_AddObject(m, "ObjectID", (PyObject *) &PyObjectIDType);
+
+  char common_error[] = "common.error";
+  CommonError = PyErr_NewException(common_error, NULL, NULL);
+  Py_INCREF(CommonError);
+  PyModule_AddObject(m, "common_error", CommonError);
+}
diff --git a/lib/python/setup.py b/lib/python/setup.py
index db915b06b..8ef8a1abd 100644
--- a/lib/python/setup.py
+++ b/lib/python/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup, find_packages, Extension
 
 common_module = Extension("common",
-                          sources=["common_extension.c"],
+                          sources=["common_module.c", "common_extension.c"],
                           include_dirs=["../../", "../../thirdparty"],
                           extra_objects=["../../build/libcommon.a"],
                           extra_compile_args=["--std=c99", "-Werror"])

From 18934c3a4566a201f1f5dcf7425420d699fb6ab1 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Fri, 7 Oct 2016 11:00:46 -0700
Subject: [PATCH 71/91] Make photon client into a C extension. (#8)

* Make photon client into a C extension.

* Fix formatting.

* Rename extension from PhotonClient to Photon.

* Update common submodule.

* Fix Makefile to compile with fPIC.

* Update common submodule.

* Compile C extension against common.

* Fix formatting.

* Remove unnecessary include.

* Update common submodule and rename Photon -> PhotonClient.

* Drop global interpretor lock during get_task.
---
 .travis.yml                   |  12 ++-
 Makefile                      |   9 ++-
 common                        |   2 +-
 install-dependencies.sh       |  21 +++++
 lib/python/photon.py          |  99 ------------------------
 lib/python/photon_extension.c | 140 ++++++++++++++++++++++++++++++++++
 lib/python/setup.py           |  14 ++++
 test/test.py                  |  55 +++++++++++--
 8 files changed, 238 insertions(+), 114 deletions(-)
 create mode 100755 install-dependencies.sh
 delete mode 100644 lib/python/photon.py
 create mode 100644 lib/python/photon_extension.c
 create mode 100644 lib/python/setup.py

diff --git a/.travis.yml b/.travis.yml
index caab2e356..360027785 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -44,11 +44,17 @@ matrix:
         - python test/test.py valgrind
 
 install:
+  - ./install-dependencies.sh
   - make
-
-script:
+  - cd common/lib/python
+  - python setup.py install --user
+  - cd ../../..
+  - cd lib/python
+  - python setup.py install --user
+  - cd ../..
   - cd common
   - make test
   - cd ..
-  - source setup-env.sh
+
+script:
   - python test/test.py
diff --git a/Makefile b/Makefile
index ef5de50a7..436502d6a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,11 +1,11 @@
 CC = gcc
-CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -Icommon/thirdparty
+CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -Icommon/thirdparty -fPIC
 BUILD = build
 
-all: $(BUILD)/photon_scheduler $(BUILD)/photon_client.so
+all: $(BUILD)/photon_scheduler $(BUILD)/photon_client.a
 
-$(BUILD)/photon_client.so: photon_client.h photon_client.c common
-	$(CC) $(CFLAGS) photon_client.c common/build/libcommon.a -fPIC -shared -o $(BUILD)/photon_client.so
+$(BUILD)/photon_client.a: photon_client.o
+	ar rcs $(BUILD)/photon_client.a photon_client.o
 
 $(BUILD)/photon_scheduler: photon.h photon_scheduler.c common
 	$(CC) $(CFLAGS) -o $@ photon_scheduler.c common/build/libcommon.a common/thirdparty/hiredis/libhiredis.a -Icommon/thirdparty -Icommon/
@@ -17,5 +17,6 @@ common: FORCE
 clean:
 	cd common; make clean
 	rm -r $(BUILD)/*
+	rm *.o
 
 FORCE:
diff --git a/common b/common
index 4204500d2..7be1a93d6 160000
--- a/common
+++ b/common
@@ -1 +1 @@
-Subproject commit 4204500d23be7726e27598badb691d29d08a0ad7
+Subproject commit 7be1a93d64ca36fc639e11f81de1483e0bd17b8c
diff --git a/install-dependencies.sh b/install-dependencies.sh
new file mode 100755
index 000000000..f84da1684
--- /dev/null
+++ b/install-dependencies.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
+
+platform="unknown"
+unamestr="$(uname)"
+if [[ "$unamestr" == "Linux" ]]; then
+  echo "Platform is linux."
+  platform="linux"
+elif [[ "$unamestr" == "Darwin" ]]; then
+  echo "Platform is macosx."
+  platform="macosx"
+else
+  echo "Unrecognized platform."
+  exit 1
+fi
+
+if [[ $platform == "linux" ]]; then
+  sudo apt-get update
+  sudo apt-get install -y git python-dev
+fi
diff --git a/lib/python/photon.py b/lib/python/photon.py
deleted file mode 100644
index 36f06ff2a..000000000
--- a/lib/python/photon.py
+++ /dev/null
@@ -1,99 +0,0 @@
-import ctypes
-import os
-
-photon_client_library_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../../build/photon_client.so")
-photon_client_library = ctypes.cdll.LoadLibrary(photon_client_library_path)
-photon_client_library.alloc_task_spec.restype = ctypes.c_void_p
-photon_client_library.photon_connect.restype = ctypes.c_void_p
-photon_client_library.photon_submit.restype = None
-photon_client_library.photon_get_task.restype = ctypes.c_void_p
-
-ID = ctypes.c_ubyte * 20
-
-buffer_from_read_write_memory = ctypes.pythonapi.PyBuffer_FromReadWriteMemory
-buffer_from_read_write_memory.argtypes = [ctypes.c_void_p, ctypes.c_int64]
-buffer_from_read_write_memory.restype = ctypes.py_object
-
-buffer_from_memory = ctypes.pythonapi.PyBuffer_FromMemory
-buffer_from_memory.argtypes = [ctypes.c_void_p, ctypes.c_int64]
-buffer_from_memory.restype = ctypes.py_object
-
-photon_client_library.task_function.restype = ctypes.c_void_p
-photon_client_library.task_num_args.restype = ctypes.c_int64
-photon_client_library.task_num_returns.restype = ctypes.c_int64
-photon_client_library.task_arg_type.restype = ctypes.c_int8
-photon_client_library.task_arg_id.restype = ctypes.c_void_p
-photon_client_library.task_arg_val.restype = ctypes.c_void_p
-photon_client_library.task_arg_length.restype = ctypes.c_void_p
-photon_client_library.task_return.restype = ctypes.c_void_p
-
-
-class TaskInfo(object):
-  def __init__(self, function_id, args, return_ids):
-    self.function_id = function_id
-    self.args = args
-    self.return_ids = return_ids
-
-def extract_task(c_task):
-  function_id = buffer_from_memory(photon_client_library.task_function(c_task), 20)[:]
-  num_args = photon_client_library.task_num_args(c_task)
-  num_returns = photon_client_library.task_num_returns(c_task)
-  arg_vals_and_ids = []
-  for i in range(num_args):
-    arg_type = photon_client_library.task_arg_type(c_task, i)
-    if arg_type == 0:
-      arg_id = buffer_from_memory(photon_client_library.task_arg_id(c_task, i), 20)
-      arg_vals_and_ids.append((arg_type, arg_id))
-    elif arg_type == 1:
-      arg_val = photon_client_library.task_arg_val(c_task, i)[:]
-      arg_length = photon_client_library.task_arg_length(c_task, i)
-      arg_value = buffer_from_memory(arg_val, arg_length)[:]
-      arg_vals_and_ids.append((arg_type, arg_value))
-    else:
-      raise Exception("arg_type must be 0 or 1")
-  return_ids = []
-  for i in range(num_returns):
-    ret_id = buffer_from_memory(photon_client_library.task_return(c_task, i), 20)
-    return_ids.append(ret_id[:])
-  return TaskInfo(function_id, arg_vals_and_ids, return_ids)
-
-class UniqueID(ctypes.Structure):
-  _fields_ = [("unique_id", ID)]
-
-def make_id(string):
-  if len(string) != 20:
-    raise Exception("PlasmaIDs must be 20 characters long")
-  unique_id = map(ord, string)
-  return UniqueID(unique_id=ID(*unique_id))
-
-class Task(object):
-  def __init__(self, function_id, args, return_ids):
-    function_id = make_id(function_id)
-    self.task_spec = ctypes.c_void_p(photon_client_library.alloc_task_spec(function_id, len(args), 1, 0))
-    for arg in args:
-      photon_client_library.task_args_add_ref(self.task_spec, make_id(arg))
-
-    # Add return IDs. This may not be the appropriate place for this.
-    num_returns = photon_client_library.task_num_returns(self.task_spec)
-    for i in range(num_returns):
-      ret_id = buffer_from_read_write_memory(photon_client_library.task_return(self.task_spec, i), 20)
-      for j in range(20):
-        ret_id[j] = return_ids[i][j]
-
-  def __del__(self):
-    photon_client_library.free_task_spec(self.task_spec)
-
-class PhotonClient(object):
-
-  def __init__(self, socket_name):
-    self.photon_conn = ctypes.c_void_p(photon_client_library.photon_connect(socket_name))
-
-  def submit(self, function_id, args, return_ids):
-    task = Task(function_id, args, return_ids)
-    photon_client_library.photon_submit(self.photon_conn, task.task_spec)
-
-  def get_task(self):
-    c_task = ctypes.c_void_p(photon_client_library.photon_get_task(self.photon_conn))
-    task = c_task # TODO Extract the actual task. EXTRACT...(c_task)
-    # photon_client_library.free_task_spec(c_task)
-    return extract_task(task)
diff --git a/lib/python/photon_extension.c b/lib/python/photon_extension.c
new file mode 100644
index 000000000..4aa199887
--- /dev/null
+++ b/lib/python/photon_extension.c
@@ -0,0 +1,140 @@
+#include <Python.h>
+
+#include "common_extension.h"
+#include "photon_client.h"
+#include "task.h"
+
+PyObject *PhotonError;
+
+// clang-format off
+typedef struct {
+  PyObject_HEAD
+  photon_conn *photon_connection;
+} PyPhotonClient;
+// clang-format on
+
+static int PyPhotonClient_init(PyPhotonClient *self, PyObject *args,
+                               PyObject *kwds) {
+  char *socket_name;
+  if (!PyArg_ParseTuple(args, "s", &socket_name)) {
+    return -1;
+  }
+  self->photon_connection = photon_connect(socket_name);
+  return 0;
+}
+
+static void PyPhotonClient_dealloc(PyPhotonClient *self) {
+  free(((PyPhotonClient *)self)->photon_connection);
+  Py_TYPE(self)->tp_free((PyObject *)self);
+}
+
+static PyObject *PyPhotonClient_submit(PyObject *self, PyObject *args) {
+  PyObject *py_task;
+  if (!PyArg_ParseTuple(args, "O", &py_task)) {
+    return NULL;
+  }
+  photon_submit(((PyPhotonClient *)self)->photon_connection,
+                ((PyTask *)py_task)->spec);
+  Py_RETURN_NONE;
+}
+
+// clang-format off
+static PyObject *PyPhotonClient_get_task(PyObject *self) {
+  task_spec *task_spec;
+  /* Drop the global interpreter lock while we get a task because
+   * photon_get_task may block for a long time. */
+  Py_BEGIN_ALLOW_THREADS
+  task_spec = photon_get_task(((PyPhotonClient *)self)->photon_connection);
+  Py_END_ALLOW_THREADS
+  return PyTask_make(task_spec);
+}
+// clang-format on
+
+static PyMethodDef PyPhotonClient_methods[] = {
+    {"submit", (PyCFunction)PyPhotonClient_submit, METH_VARARGS,
+     "Submit a task to the local scheduler."},
+    {"get_task", (PyCFunction)PyPhotonClient_get_task, METH_NOARGS,
+     "Get a task from the local scheduler."},
+    {NULL} /* Sentinel */
+};
+
+static PyTypeObject PyPhotonClientType = {
+    PyObject_HEAD_INIT(NULL) 0,         /* ob_size */
+    "photon.PhotonClient",              /* tp_name */
+    sizeof(PyPhotonClient),             /* tp_basicsize */
+    0,                                  /* tp_itemsize */
+    (destructor)PyPhotonClient_dealloc, /* tp_dealloc */
+    0,                                  /* tp_print */
+    0,                                  /* tp_getattr */
+    0,                                  /* tp_setattr */
+    0,                                  /* tp_compare */
+    0,                                  /* tp_repr */
+    0,                                  /* tp_as_number */
+    0,                                  /* tp_as_sequence */
+    0,                                  /* tp_as_mapping */
+    0,                                  /* tp_hash */
+    0,                                  /* tp_call */
+    0,                                  /* tp_str */
+    0,                                  /* tp_getattro */
+    0,                                  /* tp_setattro */
+    0,                                  /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
+    "PhotonClient object",              /* tp_doc */
+    0,                                  /* tp_traverse */
+    0,                                  /* tp_clear */
+    0,                                  /* tp_richcompare */
+    0,                                  /* tp_weaklistoffset */
+    0,                                  /* tp_iter */
+    0,                                  /* tp_iternext */
+    PyPhotonClient_methods,             /* tp_methods */
+    0,                                  /* tp_members */
+    0,                                  /* tp_getset */
+    0,                                  /* tp_base */
+    0,                                  /* tp_dict */
+    0,                                  /* tp_descr_get */
+    0,                                  /* tp_descr_set */
+    0,                                  /* tp_dictoffset */
+    (initproc)PyPhotonClient_init,      /* tp_init */
+    0,                                  /* tp_alloc */
+    PyType_GenericNew,                  /* tp_new */
+};
+
+static PyMethodDef photon_methods[] = {
+    {"check_simple_value", check_simple_value, METH_VARARGS,
+     "Should the object be passed by value?"},
+    {NULL} /* Sentinel */
+};
+
+#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
+#define PyMODINIT_FUNC void
+#endif
+
+PyMODINIT_FUNC initphoton(void) {
+  PyObject *m;
+
+  if (PyType_Ready(&PyTaskType) < 0)
+    return;
+
+  if (PyType_Ready(&PyObjectIDType) < 0)
+    return;
+
+  if (PyType_Ready(&PyPhotonClientType) < 0)
+    return;
+
+  m = Py_InitModule3("photon", photon_methods,
+                     "A module for the local scheduler.");
+
+  Py_INCREF(&PyTaskType);
+  PyModule_AddObject(m, "Task", (PyObject *)&PyTaskType);
+
+  Py_INCREF(&PyObjectIDType);
+  PyModule_AddObject(m, "ObjectID", (PyObject *)&PyObjectIDType);
+
+  Py_INCREF(&PyPhotonClientType);
+  PyModule_AddObject(m, "PhotonClient", (PyObject *)&PyPhotonClientType);
+
+  char photon_error[] = "photon.error";
+  PhotonError = PyErr_NewException(photon_error, NULL, NULL);
+  Py_INCREF(PhotonError);
+  PyModule_AddObject(m, "photon_error", PhotonError);
+}
diff --git a/lib/python/setup.py b/lib/python/setup.py
new file mode 100644
index 000000000..d94fd5a83
--- /dev/null
+++ b/lib/python/setup.py
@@ -0,0 +1,14 @@
+from setuptools import setup, find_packages, Extension
+
+photon_module = Extension("photon",
+                          sources=["photon_extension.c", "../../common/lib/python/common_extension.c"],
+                          include_dirs=["../../", "../../common/",
+                                        "../../common/thirdparty/",
+                                        "../../common/lib/python"],
+                          extra_objects=["../../build/photon_client.a", "../../common/build/libcommon.a"],
+                          extra_compile_args=["--std=c99", "-Werror"])
+
+setup(name="Photon",
+      version="0.1",
+      description="Photon library for Ray",
+      ext_modules=[photon_module])
diff --git a/test/test.py b/test/test.py
index bbd164c37..1d4cbc5af 100644
--- a/test/test.py
+++ b/test/test.py
@@ -45,13 +45,54 @@ class TestPhotonClient(unittest.TestCase):
       self.p2.kill()
     
 
-  def test_create(self):
-    l = [20 * "a", 20 * "b", 20 * "c"]
-    r = [20 * "e", 20 * "f"]
-    # Submit a task.
-    self.photon_client.submit(20 * "d", l, r)
-    # Get the task.
-    task = self.photon_client.get_task()
+  def test_submit_and_get_task(self):
+    # TODO(rkn): This should be a FunctionID.
+    function_id = photon.ObjectID(20 * "a")
+    object_ids = [photon.ObjectID(20 * chr(i)) for i in range(256)]
+    args_list = [
+      [],
+      1 * [1],
+      10 * [1],
+      100 * [1],
+      1000 * [1],
+      1 * ["a"],
+      10 * ["a"],
+      100 * ["a"],
+      1000 * ["a"],
+      [1, 1.3, 2L, 1L << 100, "hi", u"hi", [1, 2]],
+      object_ids[:1],
+      object_ids[:2],
+      object_ids[:3],
+      object_ids[:4],
+      object_ids[:5],
+      object_ids[:10],
+      object_ids[:100],
+      object_ids[:256],
+      [1, object_ids[0]],
+      [object_ids[0], "a"],
+      [1, object_ids[0], "a"],
+      [object_ids[0], 1, object_ids[1], "a"],
+      object_ids[:3] + [1, "hi", 2.3] + object_ids[:5],
+      object_ids + 100 * ["a"] + object_ids
+    ]
+
+    for args in args_list:
+      for num_return_vals in [0, 1, 2, 3, 5, 10, 100]:
+        task = photon.Task(function_id, args, num_return_vals)
+        # Submit a task.
+        self.photon_client.submit(task)
+        # Get the task.
+        new_task = self.photon_client.get_task()
+        self.assertEqual(task.function_id().id(), new_task.function_id().id())
+        retrieved_args = new_task.arguments()
+        returns = new_task.returns()
+        self.assertEqual(len(args), len(retrieved_args))
+        self.assertEqual(num_return_vals, len(returns))
+        for i in range(len(retrieved_args)):
+          if isinstance(args[i], photon.ObjectID):
+            self.assertEqual(args[i].id(), retrieved_args[i].id())
+          else:
+            self.assertEqual(args[i], retrieved_args[i])
 
 if __name__ == "__main__":
   if len(sys.argv) > 1:

From e9a336a34473113df2186ae16a38ec7b6baddcd8 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Fri, 7 Oct 2016 12:31:57 -0700
Subject: [PATCH 72/91] Fix incorrect free. (#9)

---
 photon_scheduler.c |  1 -
 test/test.py       | 12 +++++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/photon_scheduler.c b/photon_scheduler.c
index b876bc9f2..1ce5fb916 100644
--- a/photon_scheduler.c
+++ b/photon_scheduler.c
@@ -74,7 +74,6 @@ void handle_submit_task(local_scheduler_state *s, task_spec *task) {
                   (uint8_t *)task);
     /* Remove the available worker from the queue and free the struct. */
     utarray_pop_back(s->available_worker_queue);
-    free(worker);
   } else {
     /* Add the task to the task queue. This passes ownership of the task queue.
      * And the task will be freed when it is assigned to a worker. */
diff --git a/test/test.py b/test/test.py
index 1d4cbc5af..2db6df314 100644
--- a/test/test.py
+++ b/test/test.py
@@ -43,7 +43,7 @@ class TestPhotonClient(unittest.TestCase):
       os._exit(self.p2.returncode)
     else:
       self.p2.kill()
-    
+
 
   def test_submit_and_get_task(self):
     # TODO(rkn): This should be a FunctionID.
@@ -94,6 +94,16 @@ class TestPhotonClient(unittest.TestCase):
           else:
             self.assertEqual(args[i], retrieved_args[i])
 
+    # Submit all of the tasks.
+    for args in args_list:
+      for num_return_vals in [0, 1, 2, 3, 5, 10, 100]:
+        task = photon.Task(function_id, args, num_return_vals)
+        self.photon_client.submit(task)
+    # Get all of the tasks.
+    for args in args_list:
+      for num_return_vals in [0, 1, 2, 3, 5, 10, 100]:
+        new_task = self.photon_client.get_task()
+
 if __name__ == "__main__":
   if len(sys.argv) > 1:
     # pop the argument so we don't mess with unittest's own argument parser

From 7f515113fa3a465a5af6ed5e87fab84788683f0c Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Fri, 7 Oct 2016 15:20:56 -0700
Subject: [PATCH 73/91] Generate return object IDs in the task constructor.
 (#36)

---
 lib/python/common_extension.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/lib/python/common_extension.c b/lib/python/common_extension.c
index 46ae9e560..efc4b9e01 100644
--- a/lib/python/common_extension.c
+++ b/lib/python/common_extension.c
@@ -131,6 +131,7 @@ static int PyTask_init(PyTask *self, PyObject *args, PyObject *kwds) {
   int val_repr_index = 0;
   self->spec =
       alloc_task_spec(function_id, size, num_returns, value_data_bytes);
+  /* Add the task arguments. */
   for (size_t i = 0; i < size; ++i) {
     PyObject *arg = PyList_GetItem(arguments, i);
     if (PyObject_IsInstance(arg, (PyObject *) &PyObjectIDType)) {
@@ -145,6 +146,14 @@ static int PyTask_init(PyTask *self, PyObject *args, PyObject *kwds) {
     }
   }
   utarray_free(val_repr_ptrs);
+  /* Generate and add the object IDs for the return values. */
+  for (size_t i = 0; i < num_returns; ++i) {
+    /* TODO(rkn): Later, this should be computed as a deterministic hash of (1)
+     * the contents of the task, (2) the index i, and (3) a counter of the
+     * number of tasks launched so far by the parent task. For now, we generate
+     * it randomly. */
+    *task_return(self->spec, i) = globally_unique_id();
+  }
   return 0;
 }
 

From 94ad12ff646f84d4f663ce836d41a55253617ab8 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Tue, 11 Oct 2016 17:58:14 -0700
Subject: [PATCH 74/91] Get valgrind in place for plasma (#35)

* plasma store: refactor and get valgrind in place

* fix valgrind errors
---
 .travis.yml          |  11 +++++
 src/plasma_client.c  |   1 +
 src/plasma_manager.c |   9 ++++
 src/plasma_store.c   | 109 ++++++++++++++++++++++++++-----------------
 src/plasma_store.h   |  49 +++++++++++++------
 test/test.py         |  74 ++++++++++++++++++++++++-----
 6 files changed, 184 insertions(+), 69 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 5873bfca2..464724a27 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -29,6 +29,17 @@ matrix:
       install: []
       script:
         - .travis/check-git-clang-format-output.sh
+    - os: linux
+      dist: trusty
+      python: "2.7"
+      env: VALGRIND=1
+      before_install:
+        - sudo apt-get update -qq
+        - sudo apt-get install -qq valgrind
+      script:
+        - make
+        - source setup-env.sh
+        - python test/test.py valgrind
 
 install:
   - make
diff --git a/src/plasma_client.c b/src/plasma_client.c
index bd706ded7..43c448e40 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -115,6 +115,7 @@ void plasma_get(plasma_store_conn *conn,
   plasma_send_request(conn->conn, PLASMA_GET, &req);
   plasma_reply reply;
   int fd = recv_fd(conn->conn, (char *) &reply, sizeof(plasma_reply));
+  CHECKM(fd != -1, "recv not successful");
   plasma_object *object = &reply.object;
   *data = lookup_or_mmap(conn, fd, object->handle.store_fd,
                          object->handle.mmap_size) +
diff --git a/src/plasma_manager.c b/src/plasma_manager.c
index 6bda50d93..921d328ee 100644
--- a/src/plasma_manager.c
+++ b/src/plasma_manager.c
@@ -8,6 +8,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
+#include <signal.h>
 #include <stdlib.h>
 #include <sys/mman.h>
 #include <sys/types.h>
@@ -343,7 +344,15 @@ void start_server(const char *store_socket_name,
   event_loop_run(loop);
 }
 
+/* Report "success" to valgrind. */
+void signal_handler(int signal) {
+  if (signal == SIGTERM) {
+    exit(0);
+  }
+}
+
 int main(int argc, char *argv[]) {
+  signal(SIGTERM, signal_handler);
   /* Socket name of the plasma store this manager is connected to. */
   char *store_socket_name = NULL;
   /* IP address of this node. */
diff --git a/src/plasma_store.c b/src/plasma_store.c
index 128a13292..9edc2c258 100644
--- a/src/plasma_store.c
+++ b/src/plasma_store.c
@@ -18,6 +18,7 @@
 #include <sys/un.h>
 #include <getopt.h>
 #include <string.h>
+#include <signal.h>
 #include <limits.h>
 #include <poll.h>
 
@@ -28,7 +29,7 @@
 #include "utarray.h"
 #include "fling.h"
 #include "malloc.h"
-#include "plasma.h"
+#include "plasma_store.h"
 
 void *dlmalloc(size_t);
 void dlfree(void *);
@@ -61,13 +62,6 @@ typedef struct {
   uint8_t *pointer;
 } object_table_entry;
 
-/* Objects that are still being written by their owner process. */
-object_table_entry *open_objects = NULL;
-
-/* Objects that have already been sealed by their owner process and
- * can now be shared with other processes. */
-object_table_entry *sealed_objects = NULL;
-
 typedef struct {
   /* Object id of this object. */
   object_id object_id;
@@ -77,11 +71,29 @@ typedef struct {
   UT_hash_handle handle;
 } object_notify_entry;
 
-/* Objects that processes are waiting for. */
-object_notify_entry *objects_notify = NULL;
+struct plasma_store_state {
+  /* Event loop of the plasma store. */
+  event_loop *loop;
+  /* Objects that are still being written by their owner process. */
+  object_table_entry *open_objects;
+  /* Objects that have already been sealed by their owner process and
+   * can now be shared with other processes. */
+  object_table_entry *sealed_objects;
+  /* Objects that processes are waiting for. */
+  object_notify_entry *objects_notify;
+};
+
+plasma_store_state *init_plasma_store(event_loop *loop) {
+  plasma_store_state *state = malloc(sizeof(plasma_store_state));
+  state->loop = loop;
+  state->open_objects = NULL;
+  state->sealed_objects = NULL;
+  state->objects_notify = NULL;
+  return state;
+}
 
 /* Create a new object buffer in the hash table. */
-plasma_object create_object(int conn,
+plasma_object create_object(plasma_store_state *s,
                             object_id object_id,
                             int64_t data_size,
                             int64_t metadata_size,
@@ -89,7 +101,7 @@ plasma_object create_object(int conn,
   LOG_DEBUG("creating object"); /* TODO(pcm): add object_id here */
 
   object_table_entry *entry;
-  HASH_FIND(handle, open_objects, &object_id, sizeof(object_id), entry);
+  HASH_FIND(handle, s->open_objects, &object_id, sizeof(object_id), entry);
   CHECKM(entry == NULL, "Cannot create object twice.");
 
   uint8_t *pointer = dlmalloc(data_size + metadata_size);
@@ -108,9 +120,9 @@ plasma_object create_object(int conn,
   entry->fd = fd;
   entry->map_size = map_size;
   entry->offset = offset;
-  HASH_ADD(handle, open_objects, object_id, sizeof(object_id), entry);
-  object_handle handle = {.store_fd = fd, .mmap_size = map_size};
-  result->handle = handle;
+  HASH_ADD(handle, s->open_objects, object_id, sizeof(object_id), entry);
+  result->handle.store_fd = fd;
+  result->handle.mmap_size = map_size;
   result->data_offset = offset;
   result->metadata_offset = offset + data_size;
   result->data_size = data_size;
@@ -118,13 +130,15 @@ plasma_object create_object(int conn,
 }
 
 /* Get an object from the hash table. */
-int get_object(int conn, object_id object_id, plasma_object *result) {
+int get_object(plasma_store_state *s,
+               int conn,
+               object_id object_id,
+               plasma_object *result) {
   object_table_entry *entry;
-  HASH_FIND(handle, sealed_objects, &object_id, sizeof(object_id), entry);
+  HASH_FIND(handle, s->sealed_objects, &object_id, sizeof(object_id), entry);
   if (entry) {
-    object_handle handle = {.store_fd = entry->fd,
-                            .mmap_size = entry->map_size};
-    result->handle = handle;
+    result->handle.store_fd = entry->fd;
+    result->handle.mmap_size = entry->map_size;
     result->data_offset = entry->offset;
     result->metadata_offset = entry->offset + entry->info.data_size;
     result->data_size = entry->info.data_size;
@@ -133,14 +147,14 @@ int get_object(int conn, object_id object_id, plasma_object *result) {
   } else {
     object_notify_entry *notify_entry;
     LOG_DEBUG("object not in hash table of sealed objects");
-    HASH_FIND(handle, objects_notify, &object_id, sizeof(object_id),
+    HASH_FIND(handle, s->objects_notify, &object_id, sizeof(object_id),
               notify_entry);
     if (!notify_entry) {
       notify_entry = malloc(sizeof(object_notify_entry));
       memset(notify_entry, 0, sizeof(object_notify_entry));
       utarray_new(notify_entry->conns, &ut_int_icd);
       memcpy(&notify_entry->object_id, &object_id, 20);
-      HASH_ADD(handle, objects_notify, object_id, sizeof(object_id),
+      HASH_ADD(handle, s->objects_notify, object_id, sizeof(object_id),
                notify_entry);
     }
     utarray_push_back(notify_entry->conns, &conn);
@@ -149,62 +163,64 @@ int get_object(int conn, object_id object_id, plasma_object *result) {
 }
 
 /* Check if an object is present. */
-int contains_object(int conn, object_id object_id) {
+int contains_object(plasma_store_state *s, object_id object_id) {
   object_table_entry *entry;
-  HASH_FIND(handle, sealed_objects, &object_id, sizeof(object_id), entry);
+  HASH_FIND(handle, s->sealed_objects, &object_id, sizeof(object_id), entry);
   return entry ? OBJECT_FOUND : OBJECT_NOT_FOUND;
 }
 
 /* Seal an object that has been created in the hash table. */
-void seal_object(int conn,
+void seal_object(plasma_store_state *s,
                  object_id object_id,
                  UT_array **conns,
                  plasma_object *result) {
   LOG_DEBUG("sealing object");  // TODO(pcm): add object_id here
   object_table_entry *entry;
-  HASH_FIND(handle, open_objects, &object_id, sizeof(object_id), entry);
+  HASH_FIND(handle, s->open_objects, &object_id, sizeof(object_id), entry);
   if (!entry) {
     return; /* TODO(pcm): return error */
   }
-  HASH_DELETE(handle, open_objects, entry);
-  HASH_ADD(handle, sealed_objects, object_id, sizeof(object_id), entry);
+  HASH_DELETE(handle, s->open_objects, entry);
+  HASH_ADD(handle, s->sealed_objects, object_id, sizeof(object_id), entry);
   /* Inform processes that the object is ready now. */
   object_notify_entry *notify_entry;
-  HASH_FIND(handle, objects_notify, &object_id, sizeof(object_id),
+  HASH_FIND(handle, s->objects_notify, &object_id, sizeof(object_id),
             notify_entry);
   if (!notify_entry) {
     *conns = NULL;
     return;
   }
-  object_handle handle = {.store_fd = entry->fd, .mmap_size = entry->map_size};
-  result->handle = handle;
+  result->handle.store_fd = entry->fd;
+  result->handle.mmap_size = entry->map_size;
   result->data_offset = entry->offset;
   result->metadata_offset = entry->offset + entry->info.data_size;
   result->data_size = entry->info.data_size;
   result->metadata_size = entry->info.metadata_size;
-  HASH_DELETE(handle, objects_notify, notify_entry);
+  HASH_DELETE(handle, s->objects_notify, notify_entry);
   *conns = notify_entry->conns;
   free(notify_entry);
 }
 
 /* Delete an object that has been created in the hash table. */
-void delete_object(int conn, object_id object_id) {
+void delete_object(plasma_store_state *s, object_id object_id) {
   LOG_DEBUG("deleting object");  // TODO(rkn): add object_id here
   object_table_entry *entry;
-  HASH_FIND(handle, sealed_objects, &object_id, sizeof(object_id), entry);
+  HASH_FIND(handle, s->sealed_objects, &object_id, sizeof(object_id), entry);
   /* TODO(rkn): This should probably not fail, but should instead throw an
    * error. Maybe we should also support deleting objects that have been created
    * but not sealed. */
   CHECKM(entry != NULL, "To delete an object it must have been sealed.");
   uint8_t *pointer = entry->pointer;
-  HASH_DELETE(handle, sealed_objects, entry);
+  HASH_DELETE(handle, s->sealed_objects, entry);
   dlfree(pointer);
+  free(entry);
 }
 
 void process_message(event_loop *loop,
                      int client_sock,
                      void *context,
                      int events) {
+  plasma_store_state *s = context;
   int64_t type;
   int64_t length;
   plasma_request *req;
@@ -215,26 +231,26 @@ void process_message(event_loop *loop,
 
   switch (type) {
   case PLASMA_CREATE:
-    create_object(client_sock, req->object_id, req->data_size,
-                  req->metadata_size, &reply.object);
+    create_object(s, req->object_id, req->data_size, req->metadata_size,
+                  &reply.object);
     send_fd(client_sock, reply.object.handle.store_fd, (char *) &reply,
             sizeof(reply));
     break;
   case PLASMA_GET:
-    if (get_object(client_sock, req->object_id, &reply.object) ==
+    if (get_object(s, client_sock, req->object_id, &reply.object) ==
         OBJECT_FOUND) {
       send_fd(client_sock, reply.object.handle.store_fd, (char *) &reply,
               sizeof(reply));
     }
     break;
   case PLASMA_CONTAINS:
-    if (contains_object(client_sock, req->object_id) == OBJECT_FOUND) {
+    if (contains_object(s, req->object_id) == OBJECT_FOUND) {
       reply.has_object = 1;
     }
     plasma_send_reply(client_sock, &reply);
     break;
   case PLASMA_SEAL:
-    seal_object(client_sock, req->object_id, &conns, &reply.object);
+    seal_object(s, req->object_id, &conns, &reply.object);
     if (conns) {
       for (int *c = (int *) utarray_front(conns); c != NULL;
            c = (int *) utarray_next(conns, c)) {
@@ -245,7 +261,7 @@ void process_message(event_loop *loop,
     }
     break;
   case PLASMA_DELETE:
-    delete_object(client_sock, req->object_id);
+    delete_object(s, req->object_id);
     break;
   case DISCONNECT_CLIENT: {
     LOG_DEBUG("Disconnecting client on fd %d", client_sock);
@@ -269,16 +285,25 @@ void new_client_connection(event_loop *loop,
   LOG_DEBUG("new connection with fd %d", new_socket);
 }
 
+/* Report "success" to valgrind. */
+void signal_handler(int signal) {
+  if (signal == SIGTERM) {
+    exit(0);
+  }
+}
+
 void start_server(char *socket_name) {
   int socket = bind_ipc_sock(socket_name);
   CHECK(socket >= 0);
   event_loop *loop = event_loop_create();
+  plasma_store_state *state = init_plasma_store(loop);
   event_loop_add_file(loop, socket, EVENT_LOOP_READ, new_client_connection,
-                      NULL);
+                      state);
   event_loop_run(loop);
 }
 
 int main(int argc, char *argv[]) {
+  signal(SIGTERM, signal_handler);
   char *socket_name = NULL;
   int c;
   while ((c = getopt(argc, argv, "s:")) != -1) {
diff --git a/src/plasma_store.h b/src/plasma_store.h
index 6b8a0df1d..ae3f03e25 100644
--- a/src/plasma_store.h
+++ b/src/plasma_store.h
@@ -3,47 +3,66 @@
 
 #include "plasma.h"
 
+typedef struct plasma_store_state plasma_store_state;
+
 /**
  * Create a new object:
  *
+ * @param s The plasma store state.
  * @param object_id Object ID of the object to be created.
  * @param data_size Size in bytes of the object to be created.
  * @param metadata_size Size in bytes of the object metadata.
+ * @return The new plasma object.
  */
-void create_object(int conn,
-                   object_id object_id,
-                   int64_t data_size,
-                   int64_t metadata_size,
-                   plasma_object *result);
+plasma_object create_object(plasma_store_state *s,
+                            object_id object_id,
+                            int64_t data_size,
+                            int64_t metadata_size,
+                            plasma_object *result);
 
 /**
  * Get an object:
  *
+ * @param s The plasma store state.
+ * @param conn The client connection that requests the object.
  * @param object_id Object ID of the object to be gotten.
- *
- * Returns the status of the object (object_status in plasma.h).
+ * @return The status of the object (object_status in plasma.h).
  */
-int get_object(int conn, object_id object_id, plasma_object *result);
+int get_object(plasma_store_state *s,
+               int conn,
+               object_id object_id,
+               plasma_object *result);
 
 /**
  * Seal an object:
  *
+ * @param s The plasma store state.
  * @param object_id Object ID of the object to be sealed.
  * @param conns Returns the connection that are waiting for this object.
                 The caller is responsible for destroying this array.
- *
- * Should notify all the sockets waiting for the object.
+ * @return Void.
  */
-plasma_object seal_object(int conn,
-                          object_id object_id,
-                          UT_array **conns,
-                          plasma_object *result);
+void seal_object(plasma_store_state *s,
+                 object_id object_id,
+                 UT_array **conns,
+                 plasma_object *result);
 
 /**
  * Check if the plasma store contains an object:
  *
+ * @param s The plasma store state.
  * @param object_id Object ID that will be checked.
+ * @return OBJECT_FOUND if the object is in the store, OBJECT_NOT_FOUND if not
  */
-int contains_object(int conn, object_id object_id);
+int contains_object(plasma_store_state *s, object_id object_id);
+
+/**
+ * Delete an object from the plasma store:
+ *
+ * @param s The plasma store state.
+ * @param object_id Object ID of the object to be deleted.
+ * @return Void.
+ */
+void delete_object(plasma_store_state *s, object_id object_id);
 
 #endif /* PLASMA_STORE_H */
diff --git a/test/test.py b/test/test.py
index fcf0be437..75c072c65 100644
--- a/test/test.py
+++ b/test/test.py
@@ -1,6 +1,7 @@
 from __future__ import print_function
 
 import os
+import signal
 import socket
 import subprocess
 import sys
@@ -11,6 +12,8 @@ import tempfile
 
 import plasma
 
+USE_VALGRIND = False
+
 def random_object_id():
   return "".join([chr(random.randint(0, 255)) for _ in range(20)])
 
@@ -45,13 +48,24 @@ class TestPlasmaClient(unittest.TestCase):
     # Start Plasma.
     plasma_store_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../build/plasma_store")
     store_name = "/tmp/store{}".format(random.randint(0, 10000))
-    self.p = subprocess.Popen([plasma_store_executable, "-s", store_name])
+    command = [plasma_store_executable, "-s", store_name]
+    if USE_VALGRIND:
+      self.p = subprocess.Popen(["valgrind", "--track-origins=yes", "--leak-check=full"] + command)
+      time.sleep(2.0)
+    else:
+      self.p = subprocess.Popen(command)
     # Connect to Plasma.
     self.plasma_client = plasma.PlasmaClient(store_name)
 
   def tearDown(self):
     # Kill the plasma store process.
-    self.p.kill()
+    if USE_VALGRIND:
+      self.p.send_signal(signal.SIGTERM)
+      self.p.wait()
+      if self.p.returncode != 0:
+        os._exit(-1)
+    else:
+      self.p.kill()
 
   def test_create(self):
     # Create an object id string.
@@ -174,15 +188,32 @@ class TestPlasmaManager(unittest.TestCase):
     plasma_store_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../build/plasma_store")
     store_name1 = "/tmp/store{}".format(random.randint(0, 10000))
     store_name2 = "/tmp/store{}".format(random.randint(0, 10000))
-    self.p2 = subprocess.Popen([plasma_store_executable, "-s", store_name1])
-    self.p3 = subprocess.Popen([plasma_store_executable, "-s", store_name2])
+    plasma_store_command1 = [plasma_store_executable, "-s", store_name1]
+    plasma_store_command2 = [plasma_store_executable, "-s", store_name2]
+
+    if USE_VALGRIND:
+      self.p2 = subprocess.Popen(["valgrind", "--track-origins=yes", "--error-exitcode=1"] + plasma_store_command1)
+      self.p3 = subprocess.Popen(["valgrind", "--track-origins=yes", "--error-exitcode=1"] + plasma_store_command2)
+    else:
+      self.p2 = subprocess.Popen(plasma_store_command1)
+      self.p3 = subprocess.Popen(plasma_store_command2)
+
     # Start two PlasmaManagers.
     self.port1 = random.randint(10000, 50000)
     self.port2 = random.randint(10000, 50000)
     plasma_manager_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../build/plasma_manager")
-    self.p4 = subprocess.Popen([plasma_manager_executable, "-s", store_name1, "-m", "127.0.0.1", "-p", str(self.port1)])
-    self.p5 = subprocess.Popen([plasma_manager_executable, "-s", store_name2, "-m", "127.0.0.1", "-p", str(self.port2)])
-    time.sleep(0.1)
+    plasma_manager_command1 = [plasma_manager_executable, "-s", store_name1, "-m", "127.0.0.1", "-p", str(self.port1)]
+    plasma_manager_command2 = [plasma_manager_executable, "-s", store_name2, "-m", "127.0.0.1", "-p", str(self.port2)]
+
+    if USE_VALGRIND:
+      self.p4 = subprocess.Popen(["valgrind", "--track-origins=yes", "--error-exitcode=1"] + plasma_manager_command1)
+      self.p5 = subprocess.Popen(["valgrind", "--track-origins=yes", "--error-exitcode=1"] + plasma_manager_command2)
+      time.sleep(2.0)
+    else:
+      self.p4 = subprocess.Popen(plasma_manager_command1)
+      self.p5 = subprocess.Popen(plasma_manager_command2)
+      time.sleep(0.1)
+
     # Connect two PlasmaClients.
     self.client1 = plasma.PlasmaClient(store_name1, "127.0.0.1", self.port1)
     self.client2 = plasma.PlasmaClient(store_name2, "127.0.0.1", self.port2)
@@ -190,10 +221,23 @@ class TestPlasmaManager(unittest.TestCase):
 
   def tearDown(self):
     # Kill the PlasmaStore and PlasmaManager processes.
-    self.p2.kill()
-    self.p3.kill()
-    self.p4.kill()
-    self.p5.kill()
+    if USE_VALGRIND:
+      self.p4.send_signal(signal.SIGTERM)
+      self.p4.wait()
+      self.p5.send_signal(signal.SIGTERM)
+      self.p5.wait()
+      self.p2.send_signal(signal.SIGTERM)
+      self.p2.wait()
+      self.p3.send_signal(signal.SIGTERM)
+      self.p3.wait()
+      if self.p2.returncode != 0 or self.p3.returncode != 0 or self.p4.returncode != 0 or self.p5.returncode != 0:
+        print("aborting due to valgrind error")
+        os._exit(-1)
+    else:
+      self.p2.kill()
+      self.p3.kill()
+      self.p4.kill()
+      self.p5.kill()
 
   def test_transfer(self):
     for _ in range(100):
@@ -226,7 +270,7 @@ class TestPlasmaManager(unittest.TestCase):
     # Create an object id string.
     object_id = random_object_id()
     # Create a new buffer.
-    memory_buffer = self.client1.create(object_id, 20000)
+    # memory_buffer = self.client1.create(object_id, 20000)
     # This test is commented out because it currently fails.
     # # Transferring the buffer before sealing it should fail.
     # self.assertRaises(Exception, lambda : self.manager1.transfer(1, object_id))
@@ -246,4 +290,10 @@ class TestPlasmaManager(unittest.TestCase):
     print("it took", b, "seconds to put and transfer the objects")
 
 if __name__ == "__main__":
+  if len(sys.argv) > 1:
+    # pop the argument so we don't mess with unittest's own argument parser
+    arg = sys.argv.pop()
+    if arg == "valgrind":
+      USE_VALGRIND = True
+      print("Using valgrind for tests")
   unittest.main(verbosity=2)

From 6290cab750566bbb65449122e767484649aee2d7 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Wed, 12 Oct 2016 00:23:40 -0700
Subject: [PATCH 75/91] fix compiler warning for linux

---
 event_loop.c       |  4 ++--
 event_loop.h       | 21 ++++++++++++---------
 logging.c          |  2 +-
 test/db_tests.c    |  4 ++--
 test/redis_tests.c |  4 ++--
 test/task_tests.c  |  2 +-
 6 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/event_loop.c b/event_loop.c
index 6928705e7..7e7835445 100644
--- a/event_loop.c
+++ b/event_loop.c
@@ -48,8 +48,8 @@ int64_t event_loop_add_timer(event_loop *loop,
   return aeCreateTimeEvent(loop, milliseconds, handler, context, NULL);
 }
 
-void event_loop_remove_timer(event_loop *loop, int64_t id) {
-  int err = aeDeleteTimeEvent(loop, id);
+void event_loop_remove_timer(event_loop *loop, timer_id timer_id) {
+  int err = aeDeleteTimeEvent(loop, timer_id);
   CHECK(err == AE_OK); /* timer id found? */
 }
 
diff --git a/event_loop.h b/event_loop.h
index bb6afdb93..10762ee89 100644
--- a/event_loop.h
+++ b/event_loop.h
@@ -4,6 +4,8 @@
 #include <stdint.h>
 #include "ae/ae.h"
 
+typedef long long timer_id;
+
 typedef aeEventLoop event_loop;
 
 /* File descriptor is readable. */
@@ -12,6 +14,9 @@ typedef aeEventLoop event_loop;
 /* File descriptor is writable. */
 #define EVENT_LOOP_WRITE AE_WRITABLE
 
+/* Constant specifying that the timer is done and it will be removed. */
+#define EVENT_LOOP_TIMER_DONE AE_NOMORE
+
 /* Signature of the handler that will be called when there is a new event
  * on the file descriptor that this handler has been registered for. The
  * context is the one that was passed into add_file by the user. The
@@ -24,10 +29,12 @@ typedef void (*event_loop_file_handler)(event_loop *loop,
 
 /* This handler will be called when a timer times out. The id of the timer
  * as well as the context that was specified when registering this handler
- * are passed as arguments. */
-typedef int64_t (*event_loop_timer_handler)(event_loop *loop,
-                                            int64_t id,
-                                            void *context);
+ * are passed as arguments. The return is the number of milliseconds the
+ * timer shall be reset to or EVENT_LOOP_TIMER_DONE if the timer shall
+ * not triggered again. */
+typedef int (*event_loop_timer_handler)(event_loop *loop,
+                                        timer_id timer_id,
+                                        void *context);
 
 /* Create and return a new event loop. */
 event_loop *event_loop_create();
@@ -58,12 +65,8 @@ int64_t event_loop_add_timer(event_loop *loop,
                              event_loop_timer_handler handler,
                              void *context);
 
-/* Reset the timer timeout to a given number of milliseconds.
- * NOTE: This is not implemented yet. */
-void event_loop_reset_timer(event_loop *loop, int64_t id, int64_t milliseconds);
-
 /* Remove a registered time event handler from the event loop. */
-void event_loop_remove_timer(event_loop *loop, int64_t id);
+void event_loop_remove_timer(event_loop *loop, timer_id timer_id);
 
 /* Run the event loop. */
 void event_loop_run(event_loop *loop);
diff --git a/logging.c b/logging.c
index 1a8e96820..301ed4b11 100644
--- a/logging.c
+++ b/logging.c
@@ -12,7 +12,7 @@ static const char *log_fmt =
 
 struct ray_logger_impl {
   /* String that identifies this client type. */
-  char *client_type;
+  const char *client_type;
   /* Suppress all log messages below this level. */
   int log_level;
   /* Whether or not we have a direct connection to Redis. */
diff --git a/test/db_tests.c b/test/db_tests.c
index 6eb592e45..95e986e33 100644
--- a/test/db_tests.c
+++ b/test/db_tests.c
@@ -38,9 +38,9 @@ void test_callback(object_id object_id,
   free(manager_vector);
 }
 
-int64_t timeout_handler(event_loop *loop, int64_t id, void *context) {
+int timeout_handler(event_loop *loop, timer_id timer_id, void *context) {
   event_loop_stop(loop);
-  return -1;
+  return EVENT_LOOP_TIMER_DONE;
 }
 
 TEST object_table_lookup_test(void) {
diff --git a/test/redis_tests.c b/test/redis_tests.c
index 2277174c0..0a50c7d1b 100644
--- a/test/redis_tests.c
+++ b/test/redis_tests.c
@@ -86,9 +86,9 @@ void redis_accept_callback(event_loop *loop,
                       context);
 }
 
-int64_t timeout_handler(event_loop *loop, int64_t id, void *context) {
+int timeout_handler(event_loop *loop, timer_id timer_id, void *context) {
   event_loop_stop(loop);
-  return -1;
+  return EVENT_LOOP_TIMER_DONE;
 }
 
 TEST async_redis_socket_test(void) {
diff --git a/test/task_tests.c b/test/task_tests.c
index f72a0e2c2..759e9c8b7 100644
--- a/test/task_tests.c
+++ b/test/task_tests.c
@@ -49,7 +49,7 @@ TEST send_task(void) {
   *task_return(task, 1) = globally_unique_id();
   int fd[2];
   socketpair(AF_UNIX, SOCK_STREAM, 0, fd);
-  write_message(fd[0], SUBMIT_TASK, task_size(task), task);
+  write_message(fd[0], SUBMIT_TASK, task_size(task), (uint8_t*) task);
   int64_t type;
   int64_t length;
   uint8_t *message;

From 50fb53fd915618070795de3a1e47757a74178dfb Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Wed, 12 Oct 2016 00:56:04 -0700
Subject: [PATCH 76/91] fix compiler warnings on macOS

---
 Makefile  | 2 +-
 logging.c | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index dae76e87b..9a0f75e06 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 CC = gcc
-CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -fPIC -I. -Ithirdparty -Ithirdparty/ae
+CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -fPIC -I. -Ithirdparty -Ithirdparty/ae -Wno-typedef-redefinition
 BUILD = build
 
 all: hiredis $(BUILD)/libcommon.a
diff --git a/logging.c b/logging.c
index 301ed4b11..09cad7856 100644
--- a/logging.c
+++ b/logging.c
@@ -1,5 +1,7 @@
 #include "logging.h"
 
+#include <stdint.h>
+#include <inttypes.h>
 #include <hiredis/hiredis.h>
 #include <utstring.h>
 
@@ -52,13 +54,13 @@ void ray_log(ray_logger *logger,
   UT_string *timestamp;
   utstring_new(timestamp);
   gettimeofday(&tv, NULL);
-  utstring_printf(timestamp, "%ld.%ld", tv.tv_sec, tv.tv_usec);
+  utstring_printf(timestamp, "%ld.%ld", tv.tv_sec, (long) tv.tv_usec);
 
   UT_string *origin_id;
   utstring_new(origin_id);
   if (logger->is_direct) {
     db_handle *db = (db_handle *) logger->conn;
-    utstring_printf(origin_id, "%ld:%s", db->client_id, "");
+    utstring_printf(origin_id, "%" PRId64 ":%s", db->client_id, "");
     redisAsyncCommand(db->context, NULL, NULL, log_fmt,
                       utstring_body(timestamp), logger->client_type,
                       utstring_body(origin_id), log_levels[log_level],

From 832888d4736da0133d91b6e1cacb1ebfef77c6e9 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Wed, 12 Oct 2016 00:59:22 -0700
Subject: [PATCH 77/91] fix formating

---
 event_loop.h      |  2 +-
 test/task_tests.c | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/event_loop.h b/event_loop.h
index 10762ee89..7f0659ba7 100644
--- a/event_loop.h
+++ b/event_loop.h
@@ -31,7 +31,7 @@ typedef void (*event_loop_file_handler)(event_loop *loop,
  * as well as the context that was specified when registering this handler
  * are passed as arguments. The return is the number of milliseconds the
  * timer shall be reset to or EVENT_LOOP_TIMER_DONE if the timer shall
- * not triggered again. */
+ * not be triggered again. */
 typedef int (*event_loop_timer_handler)(event_loop *loop,
                                         timer_id timer_id,
                                         void *context);
diff --git a/test/task_tests.c b/test/task_tests.c
index 759e9c8b7..77f43bbf0 100644
--- a/test/task_tests.c
+++ b/test/task_tests.c
@@ -19,10 +19,10 @@ TEST task_test(void) {
 
   unique_id arg1 = globally_unique_id();
   ASSERT(task_args_add_ref(task, arg1) == 0);
-  ASSERT(task_args_add_val(task, (uint8_t*) "hello", 5) == 1);
+  ASSERT(task_args_add_val(task, (uint8_t *) "hello", 5) == 1);
   unique_id arg2 = globally_unique_id();
   ASSERT(task_args_add_ref(task, arg2) == 2);
-  ASSERT(task_args_add_val(task, (uint8_t*) "world", 5) == 3);
+  ASSERT(task_args_add_val(task, (uint8_t *) "world", 5) == 3);
 
   unique_id ret0 = globally_unique_id();
   unique_id ret1 = globally_unique_id();
@@ -30,10 +30,10 @@ TEST task_test(void) {
   memcpy(task_return(task, 1), &ret1, sizeof(ret1));
 
   ASSERT(memcmp(task_arg_id(task, 0), &arg1, sizeof(arg1)) == 0);
-  ASSERT(memcmp(task_arg_val(task, 1), (uint8_t*) "hello",
+  ASSERT(memcmp(task_arg_val(task, 1), (uint8_t *) "hello",
                 task_arg_length(task, 1)) == 0);
   ASSERT(memcmp(task_arg_id(task, 2), &arg2, sizeof(arg2)) == 0);
-  ASSERT(memcmp(task_arg_val(task, 3), (uint8_t*) "world",
+  ASSERT(memcmp(task_arg_val(task, 3), (uint8_t *) "world",
                 task_arg_length(task, 3)) == 0);
 
   ASSERT(memcmp(task_return(task, 0), &ret0, sizeof(unique_id)) == 0);
@@ -49,7 +49,7 @@ TEST send_task(void) {
   *task_return(task, 1) = globally_unique_id();
   int fd[2];
   socketpair(AF_UNIX, SOCK_STREAM, 0, fd);
-  write_message(fd[0], SUBMIT_TASK, task_size(task), (uint8_t*) task);
+  write_message(fd[0], SUBMIT_TASK, task_size(task), (uint8_t *) task);
   int64_t type;
   int64_t length;
   uint8_t *message;

From 1adafee6d33879f3f07712511c07e2f5ae3fa3c5 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Wed, 12 Oct 2016 12:58:15 -0700
Subject: [PATCH 78/91] Fix signature of create_object. (#39)

---
 src/plasma_store.c | 10 +++++-----
 src/plasma_store.h | 12 ++++++------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/plasma_store.c b/src/plasma_store.c
index 9edc2c258..ebb3bac0b 100644
--- a/src/plasma_store.c
+++ b/src/plasma_store.c
@@ -93,11 +93,11 @@ plasma_store_state *init_plasma_store(event_loop *loop) {
 }
 
 /* Create a new object buffer in the hash table. */
-plasma_object create_object(plasma_store_state *s,
-                            object_id object_id,
-                            int64_t data_size,
-                            int64_t metadata_size,
-                            plasma_object *result) {
+void create_object(plasma_store_state *s,
+                   object_id object_id,
+                   int64_t data_size,
+                   int64_t metadata_size,
+                   plasma_object *result) {
   LOG_DEBUG("creating object"); /* TODO(pcm): add object_id here */
 
   object_table_entry *entry;
diff --git a/src/plasma_store.h b/src/plasma_store.h
index ae3f03e25..0f4bfd82a 100644
--- a/src/plasma_store.h
+++ b/src/plasma_store.h
@@ -12,13 +12,13 @@ typedef struct plasma_store_state plasma_store_state;
  * @param object_id Object ID of the object to be created.
  * @param data_size Size in bytes of the object to be created.
  * @param metadata_size Size in bytes of the object metadata.
- * @return The new plasma object.
+ * @return Void.
  */
-plasma_object create_object(plasma_store_state *s,
-                            object_id object_id,
-                            int64_t data_size,
-                            int64_t metadata_size,
-                            plasma_object *result);
+void create_object(plasma_store_state *s,
+                   object_id object_id,
+                   int64_t data_size,
+                   int64_t metadata_size,
+                   plasma_object *result);
 
 /**
  * Get an object:

From 182985015cb751fde6cb61e236afaf496d5939aa Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Wed, 12 Oct 2016 13:20:57 -0700
Subject: [PATCH 79/91] Make warnings errors

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 9a0f75e06..6982b7945 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 CC = gcc
-CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -fPIC -I. -Ithirdparty -Ithirdparty/ae -Wno-typedef-redefinition
+CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -fPIC -I. -Ithirdparty -Ithirdparty/ae -Wno-typedef-redefinition -Werror
 BUILD = build
 
 all: hiredis $(BUILD)/libcommon.a

From 5ad8e145ae08e69a83205da24c188c636e48f4aa Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Fri, 14 Oct 2016 19:27:17 -0700
Subject: [PATCH 80/91] Allow clients to subscribe to notifications about
 sealed objects. (#40)

* Create libplasma_client.a in Makefile.

* Implement plasma subscribe.

* Fixes and tests.

* Buffer notifications in the Plasma store when the socket send buffer is full.

* Fix formatting.

* Turn off -Werror in Makefile.

* Fixes.

* Fix formatting.
---
 Makefile             |  5 ++-
 lib/python/plasma.py | 31 +++++++++++++--
 src/plasma.h         |  2 +
 src/plasma_client.c  | 20 ++++++++++
 src/plasma_client.h  | 13 +++++++
 src/plasma_manager.c | 11 ++----
 src/plasma_store.c   | 89 +++++++++++++++++++++++++++++++++++++++++++-
 src/plasma_store.h   | 17 +++++++++
 test/test.py         | 17 ++++++++-
 9 files changed, 192 insertions(+), 13 deletions(-)

diff --git a/Makefile b/Makefile
index a91249059..c36d99e02 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ CC = gcc
 CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -I. -Icommon -Icommon/thirdparty
 BUILD = build
 
-all: $(BUILD)/plasma_store $(BUILD)/plasma_manager $(BUILD)/plasma_client.so $(BUILD)/example
+all: $(BUILD)/plasma_store $(BUILD)/plasma_manager $(BUILD)/plasma_client.so $(BUILD)/example $(BUILD)/libplasma_client.a
 
 debug: FORCE
 debug: CFLAGS += -DDEBUG=1
@@ -21,6 +21,9 @@ $(BUILD)/plasma_manager: src/plasma_manager.c src/plasma.h src/plasma_client.c s
 $(BUILD)/plasma_client.so: src/plasma_client.c src/fling.h src/fling.c common
 	$(CC) $(CFLAGS) src/plasma_client.c src/fling.c common/build/libcommon.a -fPIC -shared -o $(BUILD)/plasma_client.so
 
+$(BUILD)/libplasma_client.a: src/plasma_client.o src/fling.o
+	ar rcs $@ $^
+
 $(BUILD)/example: src/plasma_client.c src/plasma.h src/example.c src/fling.h src/fling.c common
 	$(CC) $(CFLAGS) src/plasma_client.c src/example.c src/fling.c common/build/libcommon.a -o $(BUILD)/example
 
diff --git a/lib/python/plasma.py b/lib/python/plasma.py
index 37fa8f72c..347715118 100644
--- a/lib/python/plasma.py
+++ b/lib/python/plasma.py
@@ -1,17 +1,19 @@
 import os
 import socket
 import ctypes
+import time
 
 Addr = ctypes.c_ubyte * 4
 
-ID = ctypes.c_ubyte * 20
+PLASMA_ID_SIZE = 20
+ID = ctypes.c_ubyte * PLASMA_ID_SIZE
 
 class PlasmaID(ctypes.Structure):
   _fields_ = [("plasma_id", ID)]
 
 def make_plasma_id(string):
-  if len(string) != 20:
-    raise Exception("PlasmaIDs must be 20 characters long")
+  if len(string) != PLASMA_ID_SIZE:
+    raise Exception("PlasmaIDs must be {} characters long".format(PLASMA_ID_SIZE))
   object_id = map(ord, string)
   return PlasmaID(plasma_id=ID(*object_id))
 
@@ -46,6 +48,7 @@ class PlasmaClient(object):
     self.client.plasma_contains.restype = None
     self.client.plasma_seal.restype = None
     self.client.plasma_delete.restype = None
+    self.client.plasma_subscribe.restype = ctypes.c_int
 
     self.buffer_from_memory = ctypes.pythonapi.PyBuffer_FromMemory
     self.buffer_from_memory.argtypes = [ctypes.c_void_p, ctypes.c_int64]
@@ -161,3 +164,25 @@ class PlasmaClient(object):
     if self.manager_conn == -1:
       raise Exception("Not connected to the plasma manager socket")
     self.client.plasma_transfer(self.manager_conn, addr, port, make_plasma_id(object_id))
+
+  def subscribe(self):
+    """Subscribe to notifications about sealed objects."""
+    fd = self.client.plasma_subscribe(self.store_conn)
+    self.notification_sock = socket.fromfd(fd, socket.AF_UNIX, socket.SOCK_STREAM)
+    # Make the socket non-blocking.
+    self.notification_sock.setblocking(0)
+
+  def get_next_notification(self):
+    """Get the next notification from the notification socket."""
+    if not self.notification_sock:
+      raise Exception("To get notifications, first call subscribe.")
+    # Loop until we've read PLASMA_ID_SIZE bytes from the socket.
+    while True:
+      try:
+        message_data = self.notification_sock.recv(PLASMA_ID_SIZE)
+      except socket.error:
+        time.sleep(0.001)
+      else:
+        assert len(message_data) == PLASMA_ID_SIZE
+        break
+    return message_data
diff --git a/src/plasma.h b/src/plasma.h
index 6a39deb6c..8b6852f65 100644
--- a/src/plasma.h
+++ b/src/plasma.h
@@ -52,6 +52,8 @@ enum plasma_message_type {
   PLASMA_SEAL,
   /** Delete an object. */
   PLASMA_DELETE,
+  /** Subscribe to notifications about sealed objects. */
+  PLASMA_SUBSCRIBE,
   /** Request transfer to another store. */
   PLASMA_TRANSFER,
   /** Header for sending data. */
diff --git a/src/plasma_client.c b/src/plasma_client.c
index 43c448e40..6c4ee4f1d 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -1,6 +1,7 @@
 /* PLASMA CLIENT: Client library for using the plasma store and manager */
 
 #include <assert.h>
+#include <fcntl.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <sys/types.h>
@@ -151,6 +152,25 @@ void plasma_delete(plasma_store_conn *conn, object_id object_id) {
   plasma_send_request(conn->conn, PLASMA_DELETE, &req);
 }
 
+int plasma_subscribe(plasma_store_conn *conn) {
+  int fd[2];
+  /* Create a non-blocking socket pair. This will only be used to send
+   * notifications from the Plasma store to the client. */
+  socketpair(AF_UNIX, SOCK_STREAM, 0, fd);
+  /* Make the socket non-blocking. */
+  int flags = fcntl(fd[1], F_GETFL, 0);
+  CHECK(fcntl(fd[1], F_SETFL, flags | O_NONBLOCK) == 0);
+  /* Tell the Plasma store about the subscription. */
+  plasma_request req = {};
+  plasma_send_request(conn->conn, PLASMA_SUBSCRIBE, &req);
+  /* Send the file descriptor that the Plasma store should use to push
+   * notifications about sealed objects to this client. */
+  send_fd(conn->conn, fd[1], NULL, 0);
+  /* Return the file descriptor that the client should use to read notifications
+   * about sealed objects. */
+  return fd[0];
+}
+
 plasma_store_conn *plasma_store_connect(const char *socket_name) {
   assert(socket_name);
   /* Try to connect to the Plasma store. If unsuccessful, retry several times.
diff --git a/src/plasma_client.h b/src/plasma_client.h
index 44af5a1f0..36ecb1061 100644
--- a/src/plasma_client.h
+++ b/src/plasma_client.h
@@ -1,6 +1,8 @@
 #ifndef PLASMA_CLIENT_H
 #define PLASMA_CLIENT_H
 
+#include "plasma.h"
+
 typedef struct plasma_store_conn plasma_store_conn;
 
 /**
@@ -124,4 +126,15 @@ void plasma_seal(plasma_store_conn *conn, object_id object_id);
  */
 void plasma_delete(plasma_store_conn *conn, object_id object_id);
 
+/**
+ * Subscribe to notifications when objects are sealed in the object store.
+ * Whenever an object is sealed, a message will be written to the client socket
+ * that is returned by this method.
+ *
+ * @param conn The object containing the connection state.
+ * @return The file descriptor that the client should use to read notifications
+           from the object store about sealed objects.
+ */
+int plasma_subscribe(plasma_store_conn *conn);
+
 #endif
diff --git a/src/plasma_manager.c b/src/plasma_manager.c
index 921d328ee..c7bdc7627 100644
--- a/src/plasma_manager.c
+++ b/src/plasma_manager.c
@@ -5,6 +5,7 @@
  * transfering an object to another object store comes in, it ships the data
  * using a new connection to the target object manager. */
 
+#include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -12,7 +13,6 @@
 #include <stdlib.h>
 #include <sys/mman.h>
 #include <sys/types.h>
-#include <sys/ioctl.h>
 #include <sys/socket.h>
 #include <sys/un.h>
 #include <strings.h>
@@ -319,13 +319,10 @@ void start_server(const char *store_socket_name,
   name.sin_family = AF_INET;
   name.sin_port = htons(port);
   name.sin_addr.s_addr = htonl(INADDR_ANY);
+  /* Make the socket non-blocking. */
+  int flags = fcntl(sock, F_GETFL, 0);
+  CHECK(fcntl(sock, F_SETFL, flags | O_NONBLOCK) == 0);
   int on = 1;
-  /* TODO(pcm): http://stackoverflow.com/q/1150635 */
-  if (ioctl(sock, FIONBIO, (char *) &on) < 0) {
-    LOG_ERR("ioctl failed");
-    close(sock);
-    exit(-1);
-  }
   setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
   if (bind(sock, (struct sockaddr *) &name, sizeof(name)) < 0) {
     LOG_ERR("could not bind socket");
diff --git a/src/plasma_store.c b/src/plasma_store.c
index ebb3bac0b..9419d9dd2 100644
--- a/src/plasma_store.c
+++ b/src/plasma_store.c
@@ -15,6 +15,7 @@
 #include <unistd.h>
 #include <sys/ioctl.h>
 #include <sys/socket.h>
+#include <sys/types.h>
 #include <sys/un.h>
 #include <getopt.h>
 #include <string.h>
@@ -71,6 +72,20 @@ typedef struct {
   UT_hash_handle handle;
 } object_notify_entry;
 
+/* This is used to define the array of object IDs used to define the
+ * notification_queue type. */
+UT_icd object_id_icd = {sizeof(object_id), NULL, NULL, NULL};
+
+typedef struct {
+  /** Client file descriptor. This is used as a key for the hash table. */
+  int subscriber_fd;
+  /** The object IDs to notify the client about. We notify the client about the
+   *  IDs in the order that the objects were sealed. */
+  UT_array *object_ids;
+  /** Handle for the uthash table. */
+  UT_hash_handle hh;
+} notification_queue;
+
 struct plasma_store_state {
   /* Event loop of the plasma store. */
   event_loop *loop;
@@ -81,6 +96,10 @@ struct plasma_store_state {
   object_table_entry *sealed_objects;
   /* Objects that processes are waiting for. */
   object_notify_entry *objects_notify;
+  /** The pending notifications that have not been sent to subscribers because
+   *  the socket send buffers were full. This is a hash table from client file
+   *  descriptor to an array of object_ids to send to that client. */
+  notification_queue *pending_notifications;
 };
 
 plasma_store_state *init_plasma_store(event_loop *loop) {
@@ -89,6 +108,7 @@ plasma_store_state *init_plasma_store(event_loop *loop) {
   state->open_objects = NULL;
   state->sealed_objects = NULL;
   state->objects_notify = NULL;
+  state->pending_notifications = NULL;
   return state;
 }
 
@@ -182,7 +202,15 @@ void seal_object(plasma_store_state *s,
   }
   HASH_DELETE(handle, s->open_objects, entry);
   HASH_ADD(handle, s->sealed_objects, object_id, sizeof(object_id), entry);
-  /* Inform processes that the object is ready now. */
+
+  /* Inform all subscribers that a new object has been sealed. */
+  notification_queue *queue, *temp_queue;
+  HASH_ITER(hh, s->pending_notifications, queue, temp_queue) {
+    utarray_push_back(queue->object_ids, &object_id);
+    send_notifications(s->loop, queue->subscriber_fd, s, 0);
+  }
+
+  /* Inform processes getting this object that the object is ready now. */
   object_notify_entry *notify_entry;
   HASH_FIND(handle, s->objects_notify, &object_id, sizeof(object_id),
             notify_entry);
@@ -216,6 +244,62 @@ void delete_object(plasma_store_state *s, object_id object_id) {
   free(entry);
 }
 
+/* Send more notifications to a subscriber. */
+void send_notifications(event_loop *loop,
+                        int client_sock,
+                        void *context,
+                        int events) {
+  plasma_store_state *s = context;
+
+  notification_queue *queue;
+  HASH_FIND_INT(s->pending_notifications, &client_sock, queue);
+  CHECK(queue != NULL);
+
+  int num_processed = 0;
+  /* Loop over the array of pending notifications and send as many of them as
+   * possible. */
+  for (object_id *obj_id = (object_id *) utarray_front(queue->object_ids);
+       obj_id != NULL;
+       obj_id = (object_id *) utarray_next(queue->object_ids, obj_id)) {
+    /* Attempt to send a notification about this object ID. */
+    int nbytes = send(client_sock, obj_id, sizeof(object_id), 0);
+    if (nbytes >= 0) {
+      CHECK(nbytes == sizeof(object_id));
+    } else if (nbytes == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
+      LOG_DEBUG(
+          "The socket's send buffer is full, so we are caching this "
+          "notification and will send it later.");
+      break;
+    } else {
+      CHECKM(0, "This code should be unreachable.");
+    }
+    num_processed += 1;
+  }
+  /* Remove the sent notifications from the array. */
+  utarray_erase(queue->object_ids, 0, num_processed);
+}
+
+/* Subscribe to notifications about sealed objects. */
+void subscribe_to_updates(plasma_store_state *s, int conn) {
+  LOG_DEBUG("subscribing to updates");
+  int fd = recv_fd(conn, NULL, 0);
+  CHECKM(HASH_CNT(handle, s->open_objects) == 0,
+         "plasma_subscribe should be called before any objects are created.");
+  CHECKM(HASH_CNT(handle, s->sealed_objects) == 0,
+         "plasma_subscribe should be called before any objects are created.");
+  /* Create a new array to buffer notifications that can't be sent to the
+   * subscriber yet because the socket send buffer is full. TODO(rkn): the queue
+   * never gets freed. */
+  notification_queue *queue =
+      (notification_queue *) malloc(sizeof(notification_queue));
+  queue->subscriber_fd = fd;
+  utarray_new(queue->object_ids, &object_id_icd);
+  HASH_ADD_INT(s->pending_notifications, subscriber_fd, queue);
+  /* Add a callback to the event loop to send queued notifications whenever
+   * there is room in the socket's send buffer. */
+  event_loop_add_file(s->loop, fd, EVENT_LOOP_WRITE, send_notifications, s);
+}
+
 void process_message(event_loop *loop,
                      int client_sock,
                      void *context,
@@ -263,6 +347,9 @@ void process_message(event_loop *loop,
   case PLASMA_DELETE:
     delete_object(s, req->object_id);
     break;
+  case PLASMA_SUBSCRIBE:
+    subscribe_to_updates(s, client_sock);
+    break;
   case DISCONNECT_CLIENT: {
     LOG_DEBUG("Disconnecting client on fd %d", client_sock);
     event_loop_remove_file(loop, client_sock);
diff --git a/src/plasma_store.h b/src/plasma_store.h
index 0f4bfd82a..dd5e963fb 100644
--- a/src/plasma_store.h
+++ b/src/plasma_store.h
@@ -65,4 +65,21 @@ int contains_object(plasma_store_state *s, object_id object_id);
  */
 void delete_object(plasma_store_state *s, object_id object_id);
 
+/**
+ * Send notifications about sealed objects to the subscribers. This is called
+ * in seal_object. If the socket's send buffer is full, the notification will be
+ * buffered, and this will be called again when the send buffer has room.
+ *
+ * @param loop The Plasma store event loop.
+ * @param client_sock The file descriptor to send the notification to.
+ * @param context The plasma store global state.
+ * @param events This is needed for this function to have the signature of a
+          callback.
+ * @return Void.
+ */
+void send_notifications(event_loop *loop,
+                        int client_sock,
+                        void *context,
+                        int events);
+
 #endif /* PLASMA_STORE_H */
diff --git a/test/test.py b/test/test.py
index 75c072c65..4e08ed67e 100644
--- a/test/test.py
+++ b/test/test.py
@@ -3,6 +3,7 @@ from __future__ import print_function
 import os
 import signal
 import socket
+import struct
 import subprocess
 import sys
 import unittest
@@ -15,7 +16,7 @@ import plasma
 USE_VALGRIND = False
 
 def random_object_id():
-  return "".join([chr(random.randint(0, 255)) for _ in range(20)])
+  return "".join([chr(random.randint(0, 255)) for _ in range(plasma.PLASMA_ID_SIZE)])
 
 def generate_metadata(length):
   metadata = length * ["\x00"]
@@ -181,6 +182,20 @@ class TestPlasmaClient(unittest.TestCase):
       memory_buffer[0] = chr(0)
     self.assertRaises(Exception, illegal_assignment)
 
+  def test_subscribe(self):
+    # Subscribe to notifications from the Plasma Store.
+    sock = self.plasma_client.subscribe()
+    for i in [1, 10, 100, 1000, 10000, 100000]:
+      object_ids = [random_object_id() for _ in range(i)]
+      for object_id in object_ids:
+        # Create an object and seal it to trigger a notification.
+        self.plasma_client.create(object_id, 1000)
+        self.plasma_client.seal(object_id)
+      # Check that we received notifications for all of the objects.
+      for object_id in object_ids:
+        message_data = self.plasma_client.get_next_notification()
+        self.assertEqual(object_id, message_data)
+
 class TestPlasmaManager(unittest.TestCase):
 
   def setUp(self):

From f189ca746b57f22371ef10077aa535492bbd8421 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Sat, 15 Oct 2016 11:44:51 -0700
Subject: [PATCH 81/91] Send a one byte message in send_fd because otherwise it
 sometimes hangs on Linux. (#41)

---
 src/plasma_client.c | 6 ++++--
 src/plasma_store.c  | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/plasma_client.c b/src/plasma_client.c
index 6c4ee4f1d..1cae73607 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -164,8 +164,10 @@ int plasma_subscribe(plasma_store_conn *conn) {
   plasma_request req = {};
   plasma_send_request(conn->conn, PLASMA_SUBSCRIBE, &req);
   /* Send the file descriptor that the Plasma store should use to push
-   * notifications about sealed objects to this client. */
-  send_fd(conn->conn, fd[1], NULL, 0);
+   * notifications about sealed objects to this client. We include a one byte
+   * message because otherwise it seems to hang on Linux. */
+  char dummy = '\0';
+  send_fd(conn->conn, fd[1], &dummy, 1);
   /* Return the file descriptor that the client should use to read notifications
    * about sealed objects. */
   return fd[0];
diff --git a/src/plasma_store.c b/src/plasma_store.c
index 9419d9dd2..3bdc1364a 100644
--- a/src/plasma_store.c
+++ b/src/plasma_store.c
@@ -282,7 +282,8 @@ void send_notifications(event_loop *loop,
 /* Subscribe to notifications about sealed objects. */
 void subscribe_to_updates(plasma_store_state *s, int conn) {
   LOG_DEBUG("subscribing to updates");
-  int fd = recv_fd(conn, NULL, 0);
+  char dummy;
+  int fd = recv_fd(conn, &dummy, 1);
   CHECKM(HASH_CNT(handle, s->open_objects) == 0,
          "plasma_subscribe should be called before any objects are created.");
   CHECKM(HASH_CNT(handle, s->sealed_objects) == 0,

From e57b87928c7566137030136861a7a652e877d115 Mon Sep 17 00:00:00 2001
From: Stephanie Wang <swang93@mit.edu>
Date: Tue, 18 Oct 2016 12:38:30 -0700
Subject: [PATCH 82/91] Fixes for implementing Plasma fetch (#39)

* Add ability to pass callback context to object table lookup

* Propagate errors during socket writes up to caller.

* Use recv and MSG_WAITALL flag instead of looping read

* Error checking in write_bytes

* Method to listen on a network port

* Revert "Use recv and MSG_WAITALL flag instead of looping read"

This reverts commit 32d9333bc6a185729aadb4b41b70b3d7f150a9c2.

* Some documentation

* Clearer documentation

* Fix bug where database clients were getting assigned the same ID

* Regression test for unique client IDs
---
 io.c                 | 161 ++++++++++++++++++++++++++++++++++---------
 io.h                 |   3 +-
 state/object_table.h |   6 +-
 state/redis.c        |  18 ++++-
 state/redis.h        |  11 +++
 test/db_tests.c      |  31 ++++++++-
 6 files changed, 188 insertions(+), 42 deletions(-)

diff --git a/io.c b/io.c
index 32f3ed4b9..1f1125bc3 100644
--- a/io.c
+++ b/io.c
@@ -8,14 +8,69 @@
 #include <stdio.h>
 #include <inttypes.h>
 #include <stdarg.h>
+#include <sys/ioctl.h>
+#include <netinet/in.h>
 #include <utstring.h>
 
 #include "common.h"
 
-/* Binds to a Unix domain streaming socket at the given
- * pathname. Removes any existing file at the pathname. Returns
- * a file descriptor for the socket, or -1 if an error
- * occurred. */
+/**
+ * Binds to an Internet socket at the given port. Removes any existing file at
+ * the pathname. Returns a non-blocking file descriptor for the socket, or -1
+ * if an error occurred.
+ *
+ * @note Since the returned file descriptor is non-blocking, it is not
+ * recommended to use the Linux read and write calls directly, since these
+ * might read or write a partial message. Instead, use the provided
+ * write_message and read_message methods.
+ *
+ * @param port The port to bind to.
+ * @return A non-blocking file descriptor for the socket, or -1 if an error
+ *         occurs.
+ */
+int bind_inet_sock(const int port) {
+  struct sockaddr_in name;
+  int socket_fd = socket(PF_INET, SOCK_STREAM, 0);
+  if (socket_fd < 0) {
+    LOG_ERR("socket() failed for port %d.", port);
+    return -1;
+  }
+  name.sin_family = AF_INET;
+  name.sin_port = htons(port);
+  name.sin_addr.s_addr = htonl(INADDR_ANY);
+  int on = 1;
+  /* TODO(pcm): http://stackoverflow.com/q/1150635 */
+  if (ioctl(socket_fd, FIONBIO, (char *) &on) < 0) {
+    LOG_ERR("ioctl failed");
+    close(socket_fd);
+    return -1;
+  }
+  if (setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) {
+    LOG_ERR("setsockopt failed for port %d", port);
+    close(socket_fd);
+    return -1;
+  }
+  if (bind(socket_fd, (struct sockaddr *) &name, sizeof(name)) < 0) {
+    LOG_ERR("Bind failed for port %d", port);
+    close(socket_fd);
+    return -1;
+  }
+  if (listen(socket_fd, 5) == -1) {
+    LOG_ERR("Could not listen to socket %d", port);
+    close(socket_fd);
+    return -1;
+  }
+  return socket_fd;
+}
+
+/**
+ * Binds to a Unix domain streaming socket at the given
+ * pathname. Removes any existing file at the pathname.
+ *
+ * @param socket_pathname The pathname for the socket.
+ * @return A blocking file descriptor for the socket, or -1 if an error
+ *         occurs.
+ */
 int bind_ipc_sock(const char *socket_pathname) {
   struct sockaddr_un socket_address;
   int socket_fd = socket(AF_UNIX, SOCK_STREAM, 0);
@@ -27,9 +82,9 @@ int bind_ipc_sock(const char *socket_pathname) {
   int on = 1;
   if (setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, (char *) &on,
                  sizeof(on)) < 0) {
-    LOG_ERR("setsockopt failed");
+    LOG_ERR("setsockopt failed for pathname %s", socket_pathname);
     close(socket_fd);
-    exit(-1);
+    return -1;
   }
 
   unlink(socket_pathname);
@@ -37,6 +92,7 @@ int bind_ipc_sock(const char *socket_pathname) {
   socket_address.sun_family = AF_UNIX;
   if (strlen(socket_pathname) + 1 > sizeof(socket_address.sun_path)) {
     LOG_ERR("Socket pathname is too long.");
+    close(socket_fd);
     return -1;
   }
   strncpy(socket_address.sun_path, socket_pathname,
@@ -45,16 +101,22 @@ int bind_ipc_sock(const char *socket_pathname) {
   if (bind(socket_fd, (struct sockaddr *) &socket_address,
            sizeof(struct sockaddr_un)) != 0) {
     LOG_ERR("Bind failed for pathname %s.", socket_pathname);
+    close(socket_fd);
+    return -1;
+  }
+  if (listen(socket_fd, 5) == -1) {
+    LOG_ERR("Could not listen to socket %s", socket_pathname);
+    close(socket_fd);
     return -1;
   }
-  listen(socket_fd, 5);
-
   return socket_fd;
 }
 
-/* Connects to a Unix domain streaming socket at the given
+/**
+ * Connects to a Unix domain streaming socket at the given
  * pathname. Returns a file descriptor for the socket, or -1 if
- * an error occurred. */
+ * an error occurred.
+ */
 int connect_ipc_sock(const char *socket_pathname) {
   struct sockaddr_un socket_address;
   int socket_fd;
@@ -83,8 +145,10 @@ int connect_ipc_sock(const char *socket_pathname) {
   return socket_fd;
 }
 
-/* Accept a new client connection on the given socket
- * descriptor. Returns a descriptor for the new socket. */
+/**
+ * Accept a new client connection on the given socket
+ * descriptor. Returns a descriptor for the new socket.
+ */
 int accept_client(int socket_fd) {
   int client_fd = accept(socket_fd, NULL, NULL);
   if (client_fd < 0) {
@@ -95,55 +159,80 @@ int accept_client(int socket_fd) {
 }
 
 /**
- * Reliably write a sequence of bytes into a file descriptor. This will block
- * until one of the following happens: (1) there is an error (2) end of file,
- * or (3) all length bytes have been written.
+ * Write a sequence of bytes into a file descriptor. This will block until one
+ * of the following happens: (1) there is an error (2) end of file, or (3) all
+ * length bytes have been written.
  *
- * @param fd The file descriptor to write to.
+ * @param fd The file descriptor to write to. It can be non-blocking.
  * @param cursor The cursor pointing to the beginning of the bytes to send.
  * @param length The size of the bytes sequence to write.
- * @return Void.
+ * @return int Whether there was an error while writing. 0 corresponds to
+ *         success and -1 corresponds to an error (errno will be set).
  */
-void write_bytes(int fd, uint8_t *cursor, size_t length) {
+int write_bytes(int fd, uint8_t *cursor, size_t length) {
   ssize_t nbytes = 0;
   while (length > 0) {
     /* While we haven't written the whole message, write to the file
      * descriptor, advance the cursor, and decrease the amount left to write. */
     nbytes = write(fd, cursor, length);
-    CHECK(nbytes > 0);
+    if (nbytes < 0) {
+      if (errno == EAGAIN || errno == EWOULDBLOCK) {
+        continue;
+      }
+      /* TODO(swang): Return the error instead of exiting. */
+      /* Force an exit if there was any other type of error. */
+      CHECK(nbytes < 0);
+    }
+    if (nbytes == 0) {
+      return -1;
+    }
     cursor += nbytes;
     length -= nbytes;
   }
+  return 0;
 }
 
 /**
  * Write a sequence of bytes on a file descriptor. The bytes should then be read
  * by read_message.
  *
- * @param fd The file descriptor to write to.
+ * @param fd The file descriptor to write to. It can be non-blocking.
  * @param type The type of the message to send.
  * @param length The size in bytes of the bytes parameter.
  * @param bytes The address of the message to send.
- * @return Void.
+ * @return int Whether there was an error while writing. 0 corresponds to
+ *         success and -1 corresponds to an error (errno will be set).
  */
-void write_message(int fd, int64_t type, int64_t length, uint8_t *bytes) {
-  write_bytes(fd, (uint8_t *) &type, sizeof(type));
-  write_bytes(fd, (uint8_t *) &length, sizeof(length));
-  write_bytes(fd, bytes, length * sizeof(char));
+int write_message(int fd, int64_t type, int64_t length, uint8_t *bytes) {
+  int closed;
+  closed = write_bytes(fd, (uint8_t *) &type, sizeof(type));
+  if (closed) {
+    return closed;
+  }
+  closed = write_bytes(fd, (uint8_t *) &length, sizeof(length));
+  if (closed) {
+    return closed;
+  }
+  closed = write_bytes(fd, bytes, length * sizeof(char));
+  if (closed) {
+    return closed;
+  }
+  return 0;
 }
 
 /**
- * Reliably read a sequence of bytes from a file descriptor into a buffer. This
- * will block until one of the following happens: (1) there is an error (2) end
- * of file, or (3) all length bytes have been written.
+ * Read a sequence of bytes from a file descriptor into a buffer. This will
+ * block until one of the following happens: (1) there is an error (2) end of
+ * file, or (3) all length bytes have been written.
  *
  * @note The buffer pointed to by cursor must already have length number of
  * bytes allocated before calling this method.
  *
- * @param fd The file descriptor to read from.
+ * @param fd The file descriptor to read from. It can be non-blocking.
  * @param cursor The cursor pointing to the beginning of the buffer.
  * @param length The size of the byte sequence to read.
- * @return Void.
+ * @return int Whether there was an error while writing. 0 corresponds to
+ *         success and -1 corresponds to an error (errno will be set).
  */
 int read_bytes(int fd, uint8_t *cursor, size_t length) {
   ssize_t nbytes = 0;
@@ -173,14 +262,18 @@ int read_bytes(int fd, uint8_t *cursor, size_t length) {
  *
  * @note The caller must free the memory.
  *
- * @param fd The file descriptor to read from.
+ * @param fd The file descriptor to read from. It can be non-blocking.
  * @param type The type of the message that is read will be written at this
-          address.
+          address. If there was an error while reading, this will be
+          DISCONNECT_CLIENT.
  * @param length The size in bytes of the message that is read will be written
           at this address. This size does not include the bytes used to encode
-          the type and length.
+          the type and length. If there was an error while reading, this will
+          be 0.
  * @param bytes The address at which to write the pointer to the bytes that are
-          read and allocated by this function.
+          read and allocated by this function. If there was an error while
+          reading, this will be NULL.
+
  * @return Void.
  */
 void read_message(int fd, int64_t *type, int64_t *length, uint8_t **bytes) {
diff --git a/io.h b/io.h
index 2299806f7..362b85fc1 100644
--- a/io.h
+++ b/io.h
@@ -14,6 +14,7 @@ enum common_message_type {
 
 /* Helper functions for socket communication. */
 
+int bind_inet_sock(const int port);
 int bind_ipc_sock(const char *socket_pathname);
 int connect_ipc_sock(const char *socket_pathname);
 
@@ -21,7 +22,7 @@ int accept_client(int socket_fd);
 
 /* Reading and writing data */
 
-void write_message(int fd, int64_t type, int64_t length, uint8_t *bytes);
+int write_message(int fd, int64_t type, int64_t length, uint8_t *bytes);
 void read_message(int fd, int64_t *type, int64_t *length, uint8_t **bytes);
 
 void write_log_message(int fd, char *message);
diff --git a/state/object_table.h b/state/object_table.h
index e2eb89433..bab54bc7b 100644
--- a/state/object_table.h
+++ b/state/object_table.h
@@ -6,7 +6,8 @@
  * the manager_vector array, but NOT the strings they are pointing to. */
 typedef void (*lookup_callback)(object_id object_id,
                                 int manager_count,
-                                const char *manager_vector[]);
+                                const char *manager_vector[],
+                                void *context);
 
 /* Register a new object with the directory. */
 /* TODO(pcm): Retry, print for each attempt. */
@@ -20,4 +21,5 @@ void object_table_remove(db_handle *db,
 /* Look up entry from the directory */
 void object_table_lookup(db_handle *db,
                          object_id object_id,
-                         lookup_callback callback);
+                         lookup_callback callback,
+                         void *context);
diff --git a/state/redis.c b/state/redis.c
index db008cb5d..d53b0bf15 100644
--- a/state/redis.c
+++ b/state/redis.c
@@ -56,7 +56,8 @@ db_handle *db_connect(const char *address,
                          num_clients, client_addr, client_port);
     freeReplyObject(reply);
     reply = redisCommand(context, "EXEC");
-    if (reply) {
+    CHECK(reply);
+    if (reply->type != REDIS_REPLY_NIL) {
       freeReplyObject(reply);
       break;
     }
@@ -150,17 +151,20 @@ void object_table_get_entry(redisAsyncContext *c, void *r, void *privdata) {
     HASH_FIND_INT(db->service_cache, &result[j], entry);
     manager_vector[j] = entry->addr;
   }
-  cb_data->callback(cb_data->object_id, manager_count, manager_vector);
+  cb_data->callback(cb_data->object_id, manager_count, manager_vector,
+                    cb_data->context);
   free(privdata);
   free(result);
 }
 
 void object_table_lookup(db_handle *db,
                          object_id object_id,
-                         lookup_callback callback) {
+                         lookup_callback callback,
+                         void *context) {
   lookup_callback_data *cb_data = malloc(sizeof(lookup_callback_data));
   cb_data->callback = callback;
   cb_data->object_id = object_id;
+  cb_data->context = context;
   redisAsyncCommand(db->context, object_table_get_entry, cb_data,
                     "SMEMBERS obj:%b", &object_id.id[0], UNIQUE_ID_SIZE);
   if (db->context->err) {
@@ -230,3 +234,11 @@ void task_log_register_callback(db_handle *db,
     LOG_REDIS_ERR(db->sub_context, "error in task_log_register_callback");
   }
 }
+
+int get_client_id(db_handle *db) {
+  if (db) {
+    return db->client_id;
+  } else {
+    return -1;
+  }
+}
diff --git a/state/redis.h b/state/redis.h
index 51479f8f0..a3e0555e7 100644
--- a/state/redis.h
+++ b/state/redis.h
@@ -55,6 +55,8 @@ typedef struct {
   lookup_callback callback;
   /* Object ID that is looked up. */
   object_id object_id;
+  /* Data context for the callback. */
+  void *context;
 } lookup_callback_data;
 
 void object_table_get_entry(redisAsyncContext *c, void *r, void *privdata);
@@ -63,4 +65,13 @@ void object_table_lookup_callback(redisAsyncContext *c,
                                   void *r,
                                   void *privdata);
 
+/**
+ * Returns the client ID, according to Redis.
+ *
+ * @param db The handle to the Redis database.
+ * @returns int The client ID for this connection to Redis. If
+ *          this client has no connection to Redis, returns -1.
+ */
+int get_client_id(db_handle *db);
+
 #endif
diff --git a/test/db_tests.c b/test/db_tests.c
index 95e986e33..74dd9a850 100644
--- a/test/db_tests.c
+++ b/test/db_tests.c
@@ -1,6 +1,8 @@
 #include "greatest.h"
 
 #include <assert.h>
+#include <unistd.h>
+#include <sys/wait.h>
 
 #include "event_loop.h"
 #include "test/example_task.h"
@@ -23,7 +25,8 @@ char received_port2[6] = {0};
 /* Test if entries have been written to the database. */
 void test_callback(object_id object_id,
                    int manager_count,
-                   const char *manager_vector[]) {
+                   const char *manager_vector[],
+                   void *context) {
   CHECK(manager_count == 2);
   if (!manager_vector[0] ||
       sscanf(manager_vector[0], "%15[0-9.]:%5[0-9]", received_addr1,
@@ -56,7 +59,7 @@ TEST object_table_lookup_test(void) {
   object_table_add(db2, id);
   event_loop_add_timer(loop, 100, timeout_handler, NULL);
   event_loop_run(loop);
-  object_table_lookup(db1, id, test_callback);
+  object_table_lookup(db1, id, test_callback, NULL);
   event_loop_add_timer(loop, 100, timeout_handler, NULL);
   event_loop_run(loop);
   int port1 = atoi(received_port1);
@@ -130,12 +133,36 @@ TEST task_log_all_test(void) {
   PASS();
 }
 
+TEST unique_client_id_test(void) {
+  const int num_conns = 50;
+
+  db_handle *db;
+  pid_t pid = fork();
+  for (int i = 0; i < num_conns; ++i) {
+    db = db_connect("127.0.0.1", 6379, "plasma_manager", manager_addr,
+                    manager_port1);
+    db_disconnect(db);
+  }
+  if (pid == 0) {
+    exit(0);
+  } else {
+    wait(NULL);
+  }
+
+  db = db_connect("127.0.0.1", 6379, "plasma_manager", manager_addr,
+                  manager_port1);
+  ASSERT_EQ(get_client_id(db), num_conns * 2);
+  db_disconnect(db);
+  PASS();
+}
+
 SUITE(db_tests) {
   redisContext *context = redisConnect("127.0.0.1", 6379);
   freeReplyObject(redisCommand(context, "FLUSHALL"));
   RUN_REDIS_TEST(context, object_table_lookup_test);
   RUN_REDIS_TEST(context, task_log_test);
   RUN_REDIS_TEST(context, task_log_all_test);
+  RUN_REDIS_TEST(context, unique_client_id_test);
   redisFree(context);
 }
 

From da3a3127e095f679651119f0debfafcade1b0b94 Mon Sep 17 00:00:00 2001
From: Stephanie Wang <swang93@mit.edu>
Date: Tue, 18 Oct 2016 15:12:41 -0700
Subject: [PATCH 83/91] Move get_client_id to db.h (#40)

---
 state/db.h    | 9 +++++++++
 state/redis.h | 9 ---------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/state/db.h b/state/db.h
index e6ca089a0..50536e271 100644
--- a/state/db.h
+++ b/state/db.h
@@ -20,4 +20,13 @@ void db_attach(db_handle *db, event_loop *loop);
 /* Disconnect from the global system store. */
 void db_disconnect(db_handle *db);
 
+/**
+ * Returns the client ID, according to the database.
+ *
+ * @param db The handle to the database.
+ * @returns int The client ID for this connection to the database. If
+ *          this client has no connection to the database, returns -1.
+ */
+int get_client_id(db_handle *db);
+
 #endif
diff --git a/state/redis.h b/state/redis.h
index a3e0555e7..cf368b992 100644
--- a/state/redis.h
+++ b/state/redis.h
@@ -65,13 +65,4 @@ void object_table_lookup_callback(redisAsyncContext *c,
                                   void *r,
                                   void *privdata);
 
-/**
- * Returns the client ID, according to Redis.
- *
- * @param db The handle to the Redis database.
- * @returns int The client ID for this connection to Redis. If
- *          this client has no connection to Redis, returns -1.
- */
-int get_client_id(db_handle *db);
-
 #endif

From ddfbd70dadf522d413504a536686b5771587311d Mon Sep 17 00:00:00 2001
From: Stephanie Wang <swang93@mit.edu>
Date: Tue, 18 Oct 2016 18:20:59 -0700
Subject: [PATCH 84/91] Plasma fetch (#36)

* Fetch objects from remote plasma instances. Includes:
  - Configurable number of retries, with a timeout per retry
  - Support for multiple-object fetch
  - Test cases (integration only)

Update ray common

Remove attempts to retry object table lookup during fetch

lint

Fix a couple valgrind errors

Fix valgrind errors

Address Robert and Philipp's comments.

* Add fix from ray common and some TODOs.

* Update ray common

* Update ray common again

* Remove unused file
---
 .travis.yml          |   1 +
 Makefile             |  10 +-
 common               |   2 +-
 lib/python/plasma.py |  46 ++-
 src/example.c        |   6 +-
 src/plasma.h         |  16 +-
 src/plasma_client.c  | 158 +++++++---
 src/plasma_client.h  |  78 ++++-
 src/plasma_manager.c | 694 ++++++++++++++++++++++++++++++++++---------
 src/plasma_manager.h | 104 +++++--
 src/plasma_store.c   |  88 +++---
 src/plasma_store.h   |   4 +-
 test/test.py         | 118 ++++++--
 13 files changed, 1012 insertions(+), 313 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 464724a27..d592044fe 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -43,6 +43,7 @@ matrix:
 
 install:
   - make
+  - make test
 
 script:
   - source setup-env.sh
diff --git a/Makefile b/Makefile
index c36d99e02..0ad78eb74 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BUILD = build
 all: $(BUILD)/plasma_store $(BUILD)/plasma_manager $(BUILD)/plasma_client.so $(BUILD)/example $(BUILD)/libplasma_client.a
 
 debug: FORCE
-debug: CFLAGS += -DDEBUG=1
+debug: CFLAGS += -DRAY_COMMON_DEBUG=1
 debug: all
 
 clean:
@@ -16,7 +16,7 @@ $(BUILD)/plasma_store: src/plasma_store.c src/plasma.h src/fling.h src/fling.c s
 	$(CC) $(CFLAGS) src/plasma_store.c src/fling.c src/malloc.c common/build/libcommon.a -o $(BUILD)/plasma_store
 
 $(BUILD)/plasma_manager: src/plasma_manager.c src/plasma.h src/plasma_client.c src/fling.h src/fling.c common
-	$(CC) $(CFLAGS) src/plasma_manager.c src/plasma_client.c src/fling.c common/build/libcommon.a -o $(BUILD)/plasma_manager
+	$(CC) $(CFLAGS) src/plasma_manager.c src/plasma_client.c src/fling.c common/build/libcommon.a common/thirdparty/hiredis/libhiredis.a -o $(BUILD)/plasma_manager
 
 $(BUILD)/plasma_client.so: src/plasma_client.c src/fling.h src/fling.c common
 	$(CC) $(CFLAGS) src/plasma_client.c src/fling.c common/build/libcommon.a -fPIC -shared -o $(BUILD)/plasma_client.so
@@ -31,4 +31,10 @@ common: FORCE
 		git submodule update --init --recursive
 		cd common; make
 
+# Set the request timeout low for testing purposes.
+test: CFLAGS += -DRAY_TIMEOUT=50
+test: FORCE
+		cd common; make redis
+test: all
+
 FORCE:
diff --git a/common b/common
index f4037ad19..da3a3127e 160000
--- a/common
+++ b/common
@@ -1 +1 @@
-Subproject commit f4037ad19f38dc68b186c9338d3f67c9058c556c
+Subproject commit da3a3127e095f679651119f0debfafcade1b0b94
diff --git a/lib/python/plasma.py b/lib/python/plasma.py
index 347715118..8200471d5 100644
--- a/lib/python/plasma.py
+++ b/lib/python/plasma.py
@@ -42,7 +42,7 @@ class PlasmaClient(object):
     plasma_client_library = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../../build/plasma_client.so")
     self.client = ctypes.cdll.LoadLibrary(plasma_client_library)
 
-    self.client.plasma_store_connect.restype = ctypes.c_void_p
+    self.client.plasma_connect.restype = ctypes.c_void_p
     self.client.plasma_create.restype = None
     self.client.plasma_get.restype = None
     self.client.plasma_contains.restype = None
@@ -58,12 +58,12 @@ class PlasmaClient(object):
     self.buffer_from_read_write_memory.argtypes = [ctypes.c_void_p, ctypes.c_int64]
     self.buffer_from_read_write_memory.restype = ctypes.py_object
 
-    self.store_conn = ctypes.c_void_p(self.client.plasma_store_connect(socket_name))
-
     if addr is not None and port is not None:
-      self.manager_conn = self.client.plasma_manager_connect(addr, port)
+      self.has_manager_conn = True
+      self.plasma_conn = ctypes.c_void_p(self.client.plasma_connect(socket_name, addr, port))
     else:
-      self.manager_conn = -1 # not connected
+      self.has_manager_conn = False
+      self.plasma_conn = ctypes.c_void_p(self.client.plasma_connect(socket_name, None, 0))
 
   def create(self, object_id, size, metadata=None):
     """Create a new buffer in the PlasmaStore for a particular object ID.
@@ -81,7 +81,7 @@ class PlasmaClient(object):
     # Turn the metadata into the right type.
     metadata = buffer("") if metadata is None else metadata
     metadata = (ctypes.c_ubyte * len(metadata)).from_buffer_copy(metadata)
-    self.client.plasma_create(self.store_conn, make_plasma_id(object_id), size, ctypes.cast(metadata, ctypes.POINTER(ctypes.c_ubyte * len(metadata))), len(metadata), ctypes.byref(data))
+    self.client.plasma_create(self.plasma_conn, make_plasma_id(object_id), size, ctypes.cast(metadata, ctypes.POINTER(ctypes.c_ubyte * len(metadata))), len(metadata), ctypes.byref(data))
     return self.buffer_from_read_write_memory(data, size)
 
   def get(self, object_id):
@@ -97,7 +97,7 @@ class PlasmaClient(object):
     data = ctypes.c_void_p()
     metadata_size = ctypes.c_int64()
     metadata = ctypes.c_void_p()
-    buf = self.client.plasma_get(self.store_conn, make_plasma_id(object_id), ctypes.byref(size), ctypes.byref(data), ctypes.byref(metadata_size), ctypes.byref(metadata))
+    buf = self.client.plasma_get(self.plasma_conn, make_plasma_id(object_id), ctypes.byref(size), ctypes.byref(data), ctypes.byref(metadata_size), ctypes.byref(metadata))
     return self.buffer_from_memory(data, size)
 
   def get_metadata(self, object_id):
@@ -113,7 +113,7 @@ class PlasmaClient(object):
     data = ctypes.c_void_p()
     metadata_size = ctypes.c_int64()
     metadata = ctypes.c_void_p()
-    buf = self.client.plasma_get(self.store_conn, make_plasma_id(object_id), ctypes.byref(size), ctypes.byref(data), ctypes.byref(metadata_size), ctypes.byref(metadata))
+    buf = self.client.plasma_get(self.plasma_conn, make_plasma_id(object_id), ctypes.byref(size), ctypes.byref(data), ctypes.byref(metadata_size), ctypes.byref(metadata))
     return self.buffer_from_memory(metadata, metadata_size)
 
   def contains(self, object_id):
@@ -123,7 +123,7 @@ class PlasmaClient(object):
       object_id (str): A string used to identify an object.
     """
     has_object = ctypes.c_int()
-    self.client.plasma_contains(self.store_conn, make_plasma_id(object_id), ctypes.byref(has_object))
+    self.client.plasma_contains(self.plasma_conn, make_plasma_id(object_id), ctypes.byref(has_object))
     has_object = has_object.value
     if has_object == 1:
       return True
@@ -141,7 +141,7 @@ class PlasmaClient(object):
     Args:
       object_id (str): A string used to identify an object.
     """
-    self.client.plasma_seal(self.store_conn, make_plasma_id(object_id))
+    self.client.plasma_seal(self.plasma_conn, make_plasma_id(object_id))
 
   def delete(self, object_id):
     """Delete the buffer in the PlasmaStore for a particular object ID.
@@ -151,7 +151,7 @@ class PlasmaClient(object):
     Args:
       object_id (str): A string used to identify an object.
     """
-    self.client.plasma_delete(self.store_conn, make_plasma_id(object_id))
+    self.client.plasma_delete(self.plasma_conn, make_plasma_id(object_id))
 
   def transfer(self, addr, port, object_id):
     """Transfer local object with id object_id to another plasma instance
@@ -161,13 +161,31 @@ class PlasmaClient(object):
       port (int): Port number of the plasma instance the object is sent to.
       object_id (str): A string used to identify an object.
     """
-    if self.manager_conn == -1:
+    if not self.has_manager_conn:
       raise Exception("Not connected to the plasma manager socket")
-    self.client.plasma_transfer(self.manager_conn, addr, port, make_plasma_id(object_id))
+    self.client.plasma_transfer(self.plasma_conn, addr, port, make_plasma_id(object_id))
+
+  def fetch(self, object_ids):
+    """Fetch the object with id object_id from another plasma manager instance.
+
+    Args:
+      object_id (str): A string used to identify an object.
+    """
+    object_id_array = (len(object_ids) * PlasmaID)()
+    for i, object_id in enumerate(object_ids):
+      object_id_array[i] = make_plasma_id(object_id)
+    success_array = (len(object_ids) * ctypes.c_int)()
+    if not self.has_manager_conn:
+      raise Exception("Not connected to the plasma manager socket")
+    self.client.plasma_fetch(self.plasma_conn,
+                             object_id_array._length_,
+                             object_id_array,
+                             success_array);
+    return [bool(success) for success in success_array]
 
   def subscribe(self):
     """Subscribe to notifications about sealed objects."""
-    fd = self.client.plasma_subscribe(self.store_conn)
+    fd = self.client.plasma_subscribe(self.plasma_conn)
     self.notification_sock = socket.fromfd(fd, socket.AF_UNIX, socket.SOCK_STREAM)
     # Make the socket non-blocking.
     self.notification_sock.setblocking(0)
diff --git a/src/example.c b/src/example.c
index 81763dac7..922d10583 100644
--- a/src/example.c
+++ b/src/example.c
@@ -16,7 +16,7 @@
 #include "plasma_client.h"
 
 int main(int argc, char *argv[]) {
-  plasma_store_conn *conn = NULL;
+  plasma_connection *conn = NULL;
   int64_t size;
   uint8_t *data;
   int c;
@@ -25,7 +25,7 @@ int main(int argc, char *argv[]) {
   while ((c = getopt(argc, argv, "s:cfg")) != -1) {
     switch (c) {
     case 's':
-      conn = plasma_store_connect(optarg);
+      conn = plasma_connect(optarg, NULL, 0);
       break;
     case 'c':
       assert(conn != NULL);
@@ -43,5 +43,5 @@ int main(int argc, char *argv[]) {
     }
   }
   assert(conn != NULL);
-  plasma_store_disconnect(conn);
+  plasma_disconnect(conn);
 }
diff --git a/src/plasma.h b/src/plasma.h
index 8b6852f65..35e4f0db3 100644
--- a/src/plasma.h
+++ b/src/plasma.h
@@ -58,11 +58,11 @@ enum plasma_message_type {
   PLASMA_TRANSFER,
   /** Header for sending data. */
   PLASMA_DATA,
+  /** Request a fetch of an object in another store. */
+  PLASMA_FETCH,
 };
 
 typedef struct {
-  /** The ID of the object that the request is about. */
-  object_id object_id;
   /** The size of the object's data. */
   int64_t data_size;
   /** The size of the object's metadata. */
@@ -73,13 +73,21 @@ typedef struct {
   /** In a transfer request, this is the port of the Plasma Manager to transfer
    *  the object to. */
   int port;
+  /** The number of object IDs that will be included in this request. */
+  int num_object_ids;
+  /** The IDs of the objects that the request is about. */
+  object_id object_ids[1];
 } plasma_request;
 
 typedef struct {
+  /** The object ID that this reply refers to. */
+  object_id object_id;
   /** The object that is returned with this reply. */
   plasma_object object;
-  /** This is used only to respond to requests of type PLASMA_CONTAINS. It is 1
-   *  if the object is present and 0 otherwise. Used for plasma_contains. */
+  /** This is used only to respond to requests of type
+   *  PLASMA_CONTAINS or PLASMA_FETCH. It is 1 if the object is
+   *  present and 0 otherwise. Used for plasma_contains and
+   *  plasma_fetch. */
   int has_object;
 } plasma_reply;
 
diff --git a/src/plasma_client.c b/src/plasma_client.c
index 1cae73607..b01204a1c 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -32,22 +32,45 @@ typedef struct {
 
 /** Information about a connection between a Plasma Client and Plasma Store.
  *  This is used to avoid mapping the same files into memory multiple times. */
-struct plasma_store_conn {
+struct plasma_connection {
   /** File descriptor of the Unix domain socket that connects to the store. */
-  int conn;
+  int store_conn;
+  /** File descriptor of the Unix domain socket that connects to the manager. */
+  int manager_conn;
   /** Table of dlmalloc buffer files that have been memory mapped so far. */
   client_mmap_table_entry *mmap_table;
 };
 
+int plasma_request_size(int num_object_ids) {
+  int object_ids_size = (num_object_ids - 1) * sizeof(object_id);
+  return sizeof(plasma_request) + object_ids_size;
+}
+
 void plasma_send_request(int fd, int type, plasma_request *req) {
-  int req_count = sizeof(plasma_request);
-  write_message(fd, type, req_count, (uint8_t *) req);
+  int req_size = plasma_request_size(req->num_object_ids);
+  int error = write_message(fd, type, req_size, (uint8_t *) req);
+  /* TODO(swang): Actually handle the write error. */
+  CHECK(!error);
+}
+
+plasma_request make_plasma_request(object_id object_id) {
+  plasma_request req = {.num_object_ids = 1, .object_ids = {object_id}};
+  return req;
+}
+
+plasma_request *make_plasma_multiple_request(int num_object_ids,
+                                             object_id object_ids[]) {
+  int req_size = plasma_request_size(num_object_ids);
+  plasma_request *req = malloc(req_size);
+  req->num_object_ids = num_object_ids;
+  memcpy(&req->object_ids, object_ids, num_object_ids * sizeof(object_id));
+  return req;
 }
 
 /* If the file descriptor fd has been mmapped in this client process before,
  * return the pointer that was returned by mmap, otherwise mmap it and store the
  * pointer in a hash table. */
-uint8_t *lookup_or_mmap(plasma_store_conn *conn,
+uint8_t *lookup_or_mmap(plasma_connection *conn,
                         int fd,
                         int store_fd_val,
                         int64_t map_size) {
@@ -72,7 +95,7 @@ uint8_t *lookup_or_mmap(plasma_store_conn *conn,
   }
 }
 
-void plasma_create(plasma_store_conn *conn,
+void plasma_create(plasma_connection *conn,
                    object_id object_id,
                    int64_t data_size,
                    uint8_t *metadata,
@@ -81,13 +104,13 @@ void plasma_create(plasma_store_conn *conn,
   LOG_DEBUG("called plasma_create on conn %d with size %" PRId64
             " and metadata size "
             "%" PRId64,
-            conn->conn, data_size, metadata_size);
-  plasma_request req = {.object_id = object_id,
-                        .data_size = data_size,
-                        .metadata_size = metadata_size};
-  plasma_send_request(conn->conn, PLASMA_CREATE, &req);
+            conn->store_conn, data_size, metadata_size);
+  plasma_request req = make_plasma_request(object_id);
+  req.data_size = data_size;
+  req.metadata_size = metadata_size;
+  plasma_send_request(conn->store_conn, PLASMA_CREATE, &req);
   plasma_reply reply;
-  int fd = recv_fd(conn->conn, (char *) &reply, sizeof(plasma_reply));
+  int fd = recv_fd(conn->store_conn, (char *) &reply, sizeof(plasma_reply));
   plasma_object *object = &reply.object;
   CHECK(object->data_size == data_size);
   CHECK(object->metadata_size == metadata_size);
@@ -106,16 +129,16 @@ void plasma_create(plasma_store_conn *conn,
 }
 
 /* This method is used to get both the data and the metadata. */
-void plasma_get(plasma_store_conn *conn,
+void plasma_get(plasma_connection *conn,
                 object_id object_id,
                 int64_t *size,
                 uint8_t **data,
                 int64_t *metadata_size,
                 uint8_t **metadata) {
-  plasma_request req = {.object_id = object_id};
-  plasma_send_request(conn->conn, PLASMA_GET, &req);
+  plasma_request req = make_plasma_request(object_id);
+  plasma_send_request(conn->store_conn, PLASMA_GET, &req);
   plasma_reply reply;
-  int fd = recv_fd(conn->conn, (char *) &reply, sizeof(plasma_reply));
+  int fd = recv_fd(conn->store_conn, (char *) &reply, sizeof(plasma_reply));
   CHECKM(fd != -1, "recv not successful");
   plasma_object *object = &reply.object;
   *data = lookup_or_mmap(conn, fd, object->handle.store_fd,
@@ -130,29 +153,32 @@ void plasma_get(plasma_store_conn *conn,
 }
 
 /* This method is used to query whether the plasma store contains an object. */
-void plasma_contains(plasma_store_conn *conn,
+void plasma_contains(plasma_connection *conn,
                      object_id object_id,
                      int *has_object) {
-  plasma_request req = {.object_id = object_id};
-  plasma_send_request(conn->conn, PLASMA_CONTAINS, &req);
+  plasma_request req = make_plasma_request(object_id);
+  plasma_send_request(conn->store_conn, PLASMA_CONTAINS, &req);
   plasma_reply reply;
-  int r = read(conn->conn, &reply, sizeof(plasma_reply));
+  int r = read(conn->store_conn, &reply, sizeof(plasma_reply));
   CHECKM(r != -1, "read error");
   CHECKM(r != 0, "connection disconnected");
   *has_object = reply.has_object;
 }
 
-void plasma_seal(plasma_store_conn *conn, object_id object_id) {
-  plasma_request req = {.object_id = object_id};
-  plasma_send_request(conn->conn, PLASMA_SEAL, &req);
+void plasma_seal(plasma_connection *conn, object_id object_id) {
+  plasma_request req = make_plasma_request(object_id);
+  plasma_send_request(conn->store_conn, PLASMA_SEAL, &req);
+  if (conn->manager_conn >= 0) {
+    plasma_send_request(conn->manager_conn, PLASMA_SEAL, &req);
+  }
 }
 
-void plasma_delete(plasma_store_conn *conn, object_id object_id) {
-  plasma_request req = {.object_id = object_id};
-  plasma_send_request(conn->conn, PLASMA_DELETE, &req);
+void plasma_delete(plasma_connection *conn, object_id object_id) {
+  plasma_request req = make_plasma_request(object_id);
+  plasma_send_request(conn->store_conn, PLASMA_DELETE, &req);
 }
 
-int plasma_subscribe(plasma_store_conn *conn) {
+int plasma_subscribe(plasma_connection *conn) {
   int fd[2];
   /* Create a non-blocking socket pair. This will only be used to send
    * notifications from the Plasma store to the client. */
@@ -162,25 +188,27 @@ int plasma_subscribe(plasma_store_conn *conn) {
   CHECK(fcntl(fd[1], F_SETFL, flags | O_NONBLOCK) == 0);
   /* Tell the Plasma store about the subscription. */
   plasma_request req = {};
-  plasma_send_request(conn->conn, PLASMA_SUBSCRIBE, &req);
+  plasma_send_request(conn->store_conn, PLASMA_SUBSCRIBE, &req);
   /* Send the file descriptor that the Plasma store should use to push
    * notifications about sealed objects to this client. We include a one byte
    * message because otherwise it seems to hang on Linux. */
   char dummy = '\0';
-  send_fd(conn->conn, fd[1], &dummy, 1);
+  send_fd(conn->store_conn, fd[1], &dummy, 1);
   /* Return the file descriptor that the client should use to read notifications
    * about sealed objects. */
   return fd[0];
 }
 
-plasma_store_conn *plasma_store_connect(const char *socket_name) {
-  assert(socket_name);
+plasma_connection *plasma_connect(const char *store_socket_name,
+                                  const char *manager_addr,
+                                  int manager_port) {
+  CHECK(store_socket_name);
   /* Try to connect to the Plasma store. If unsuccessful, retry several times.
    */
   int fd = -1;
   int connected_successfully = 0;
   for (int num_attempts = 0; num_attempts < 50; ++num_attempts) {
-    fd = connect_ipc_sock(socket_name);
+    fd = connect_ipc_sock(store_socket_name);
     if (fd >= 0) {
       connected_successfully = 1;
       break;
@@ -190,23 +218,32 @@ plasma_store_conn *plasma_store_connect(const char *socket_name) {
   }
   /* If we could not connect to the Plasma store, exit. */
   if (!connected_successfully) {
-    LOG_ERR("could not connect to store %s", socket_name);
+    LOG_ERR("could not connect to store %s", store_socket_name);
     exit(-1);
   }
   /* Initialize the store connection struct */
-  plasma_store_conn *result = malloc(sizeof(plasma_store_conn));
-  result->conn = fd;
+  plasma_connection *result = malloc(sizeof(plasma_connection));
+  result->store_conn = fd;
+  if (manager_addr != NULL) {
+    result->manager_conn = plasma_manager_connect(manager_addr, manager_port);
+  } else {
+    result->manager_conn = -1;
+  }
   result->mmap_table = NULL;
   return result;
 }
 
-void plasma_store_disconnect(plasma_store_conn *conn) {
-  close(conn->conn);
+void plasma_disconnect(plasma_connection *conn) {
+  close(conn->store_conn);
+  if (conn->manager_conn >= 0) {
+    close(conn->manager_conn);
+  }
   free(conn);
 }
 
 #define h_addr h_addr_list[0]
 
+/* TODO(swang): Return the error to the caller. */
 int plasma_manager_connect(const char *ip_addr, int port) {
   int fd = socket(PF_INET, SOCK_STREAM, 0);
   if (fd < 0) {
@@ -236,16 +273,57 @@ int plasma_manager_connect(const char *ip_addr, int port) {
   return fd;
 }
 
-void plasma_transfer(int manager,
+void plasma_transfer(plasma_connection *conn,
                      const char *addr,
                      int port,
                      object_id object_id) {
-  plasma_request req = {.object_id = object_id, .port = port};
+  plasma_request req = make_plasma_request(object_id);
+  req.port = port;
   char *end = NULL;
   for (int i = 0; i < 4; ++i) {
     req.addr[i] = strtol(end ? end : addr, &end, 10);
     /* skip the '.' */
     end += 1;
   }
-  plasma_send_request(manager, PLASMA_TRANSFER, &req);
+  plasma_send_request(conn->manager_conn, PLASMA_TRANSFER, &req);
+}
+
+void plasma_fetch(plasma_connection *conn,
+                  int num_object_ids,
+                  object_id object_ids[],
+                  int is_fetched[]) {
+  CHECK(conn->manager_conn >= 0);
+  plasma_request *req =
+      make_plasma_multiple_request(num_object_ids, object_ids);
+  LOG_DEBUG("Requesting fetch");
+  plasma_send_request(conn->manager_conn, PLASMA_FETCH, req);
+  free(req);
+
+  plasma_reply reply;
+  int nbytes, success;
+  for (int received = 0; received < num_object_ids; ++received) {
+    nbytes = recv(conn->manager_conn, (uint8_t *) &reply, sizeof(reply),
+                  MSG_WAITALL);
+    if (nbytes < 0) {
+      LOG_ERR("Error while waiting for manager response in fetch");
+      success = 0;
+    } else if (nbytes == 0) {
+      success = 0;
+    } else {
+      CHECK(nbytes == sizeof(reply));
+      success = reply.has_object;
+    }
+    /* Update the correct index in is_fetched. */
+    int i = 0;
+    for (; i < num_object_ids; i++) {
+      if (memcmp(&object_ids[i], &reply.object_id, sizeof(object_id)) == 0) {
+        /* Check that this isn't a duplicate response. */
+        CHECK(!is_fetched[i]);
+        is_fetched[i] = success;
+        break;
+      }
+    }
+    CHECKM(i != num_object_ids,
+           "Received unexpected object ID from manager during fetch.");
+  }
 }
diff --git a/src/plasma_client.h b/src/plasma_client.h
index 36ecb1061..9468397cc 100644
--- a/src/plasma_client.h
+++ b/src/plasma_client.h
@@ -3,7 +3,7 @@
 
 #include "plasma.h"
 
-typedef struct plasma_store_conn plasma_store_conn;
+typedef struct plasma_connection plasma_connection;
 
 /**
  * This is used by the Plasma Client to send a request to the Plasma Store or
@@ -14,25 +14,52 @@ typedef struct plasma_store_conn plasma_store_conn;
  * @param req The address of the request to send.
  * @return Void.
  */
-void plasma_send_request(int conn, int type, plasma_request *req);
+void plasma_send_request(int fd, int type, plasma_request *req);
 
 /**
- * Connect to the local plasma store UNIX domain socket with path socket_name
- * and return the resulting connection.
+ * Create a plasma request to be sent with a single object ID.
  *
- * @param socket_name The name of the socket to use to connect to the Plasma
- *        Store.
+ * @param object_id The object ID to include in the request.
+ * @return The plasma request.
+ */
+plasma_request make_plasma_request(object_id object_id);
+
+/**
+ * Create a plasma request to be sent with multiple object ID. Caller must free
+ * the returned plasma request pointer.
+ *
+ * @param num_object_ids The number of object IDs to include in the request.
+ * @param object_ids The array of object IDs to include in the request. It must
+ *        have length at least equal to num_object_ids.
+ * @return A pointer to the newly created plasma request.
+ */
+plasma_request *make_plasma_multiple_request(int num_object_ids,
+                                             object_id object_ids[]);
+
+/**
+ * Connect to the local plasma store and plasma manager. Return
+ * the resulting connection.
+ *
+ * @param socket_name The name of the UNIX domain socket to use
+ *        to connect to the Plasma Store.
+ * @param manager_addr The IP address of the plasma manager to
+ *        connect to.
+ * @param manager_addr The port of the plasma manager to connect
+ *        to.
  * @return The object containing the connection state.
  */
-plasma_store_conn *plasma_store_connect(const char *socket_name);
+plasma_connection *plasma_connect(const char *store_socket_name,
+                                  const char *manager_addr,
+                                  int manager_port);
 
 /**
- * Disconnect from the local plasma store.
+ * Disconnect from the local plasma instance, including the local store and
+ * manager.
  *
- * @param conn The connection to the local plasma store.
+ * @param conn The connection to the local plasma store and plasma manager.
  * @return Void.
  */
-void plasma_store_disconnect(plasma_store_conn *conn);
+void plasma_disconnect(plasma_connection *conn);
 
 /**
  * Connect to a possibly remote Plasma Manager.
@@ -58,7 +85,7 @@ int plasma_manager_connect(const char *addr, int port);
  * @param data The address of the newly created object will be written here.
  * @return Void.
  */
-void plasma_create(plasma_store_conn *conn,
+void plasma_create(plasma_connection *conn,
                    object_id object_id,
                    int64_t size,
                    uint8_t *metadata,
@@ -80,7 +107,7 @@ void plasma_create(plasma_store_conn *conn,
  *        address.
  * @return Void.
  */
-void plasma_get(plasma_store_conn *conn,
+void plasma_get(plasma_connection *conn,
                 object_id object_id,
                 int64_t *size,
                 uint8_t **data,
@@ -99,7 +126,7 @@ void plasma_get(plasma_store_conn *conn,
  *        present and 0 if it is not present.
  * @return Void.
  */
-void plasma_contains(plasma_store_conn *conn,
+void plasma_contains(plasma_connection *conn,
                      object_id object_id,
                      int *has_object);
 
@@ -111,7 +138,7 @@ void plasma_contains(plasma_store_conn *conn,
  * @param object_id The ID of the object to seal.
  * @return Void.
  */
-void plasma_seal(plasma_store_conn *conn, object_id object_id);
+void plasma_seal(plasma_connection *conn, object_id object_id);
 
 /**
  * Delete an object from the object store. This currently assumes that the
@@ -124,7 +151,26 @@ void plasma_seal(plasma_store_conn *conn, object_id object_id);
  * @param object_id The ID of the object to delete.
  * @return Void.
  */
-void plasma_delete(plasma_store_conn *conn, object_id object_id);
+void plasma_delete(plasma_connection *conn, object_id object_id);
+
+/**
+ * Fetch objects from remote plasma stores that have the
+ * objects stored.
+ *
+ * @param manager A file descriptor for the socket connection
+ *        to the local manager.
+ * @param object_id_count The number of object IDs requested.
+ * @param object_ids[] The vector of object IDs requested. Length must be at
+ * least num_object_ids.
+ * @param is_fetched[] The vector in which to return the success
+ *        of each object's fetch operation, in the same order as
+ *        object_ids. Length must be at least num_object_ids.
+ * @return Void.
+ */
+void plasma_fetch(plasma_connection *conn,
+                  int num_object_ids,
+                  object_id object_ids[],
+                  int is_fetched[]);
 
 /**
  * Subscribe to notifications when objects are sealed in the object store.
@@ -135,6 +181,6 @@ void plasma_delete(plasma_store_conn *conn, object_id object_id);
  * @return The file descriptor that the client should use to read notifications
            from the object store about sealed objects.
  */
-int plasma_subscribe(plasma_store_conn *conn);
+int plasma_subscribe(plasma_connection *conn);
 
 #endif
diff --git a/src/plasma_manager.c b/src/plasma_manager.c
index c7bdc7627..3dfe21735 100644
--- a/src/plasma_manager.c
+++ b/src/plasma_manager.c
@@ -23,6 +23,7 @@
 
 #include "uthash.h"
 #include "utlist.h"
+#include "utarray.h"
 #include "utstring.h"
 #include "common.h"
 #include "io.h"
@@ -30,56 +31,255 @@
 #include "plasma.h"
 #include "plasma_client.h"
 #include "plasma_manager.h"
+#include "state/db.h"
+#include "state/object_table.h"
+
+#define NUM_RETRIES 5
+
+/* Timeouts are in milliseconds. */
+#ifndef RAY_TIMEOUT
+#define MANAGER_TIMEOUT 1000
+#else
+#define MANAGER_TIMEOUT RAY_TIMEOUT
+#endif
+
+typedef struct client_object_connection client_object_connection;
 
 typedef struct {
+  /** Event loop. */
+  event_loop *loop;
   /** Connection to the local plasma store for reading or writing data. */
-  plasma_store_conn *store_conn;
-  /** Hash table of all contexts for active connections to other plasma
-   * managers. These are used for writing data to other plasma stores. */
+  plasma_connection *plasma_conn;
+  /** Hash table of all contexts for active connections to
+   *  other plasma managers. These are used for writing data to
+   *  other plasma stores. */
   client_connection *manager_connections;
+  db_handle *db;
+  /** Our address. */
+  uint8_t addr[4];
+  /** Our port. */
+  int port;
+  /** Hash table of outstanding fetch requests. The key is
+   *  object id, value is a list of connections to the clients
+   *  who are blocking on a fetch of this object. */
+  client_object_connection *fetch_connections;
 } plasma_manager_state;
 
-typedef struct plasma_buffer plasma_buffer;
+plasma_manager_state *g_manager_state = NULL;
 
-/* Buffer for reading and writing data between plasma managers. */
-struct plasma_buffer {
+typedef struct plasma_request_buffer plasma_request_buffer;
+
+/* Buffer for requests between plasma managers. */
+struct plasma_request_buffer {
+  int type;
   object_id object_id;
   uint8_t *data;
   int64_t data_size;
   uint8_t *metadata;
   int64_t metadata_size;
-  int writable;
   /* Pointer to the next buffer that we will write to this plasma manager. This
-   * field is only used if we're transferring data to another plasma manager,
+   * field is only used if we're pushing requests to another plasma manager,
    * not if we are receiving data. */
-  plasma_buffer *next;
+  plasma_request_buffer *next;
+};
+
+/* The context for fetch and wait requests. These are per client, per object. */
+struct client_object_connection {
+  /** The ID of the object we are fetching or waiting for. */
+  object_id object_id;
+  /** The client connection context, shared between other
+   *  client_object_connections for the same client. */
+  client_connection *client_conn;
+  /** The ID for the timer that will time out the current request to the state
+   *  database or another plasma manager. */
+  int64_t timer;
+  /** How many retries we have left for the request. Decremented on every
+   *  timeout. */
+  int num_retries;
+  /** Handle for a linked list. */
+  client_object_connection *next;
+  /** Pointer to the array containing the manager locations of
+   *  this object. */
+  char **manager_vector;
+  /** The number of manager locations in the array manager_vector. */
+  int manager_count;
+  /** Handle for the uthash table in the client connection
+   *  context that keeps track of active object connection
+   *  contexts. */
+  UT_hash_handle active_hh;
+  /** Handle for the uthash table in the manager state that
+   *  keeps track of outstanding fetch requests. */
+  UT_hash_handle fetch_hh;
 };
 
 /* Context for a client connection to another plasma manager. */
 struct client_connection {
-  /* Current state for this plasma manager. This is shared between all client
-   * connections to the plasma manager. */
+  /** Current state for this plasma manager. This is shared
+   *  between all client connections to the plasma manager. */
   plasma_manager_state *manager_state;
-  /* Current position in the buffer. */
+  /** Current position in the buffer. */
   int64_t cursor;
-  /* Buffer that this connection is reading from. If this is a connection to
-   * write data to another plasma store, then it is a linked list of buffers to
-   * write. */
-  plasma_buffer *transfer_queue;
-  /* File descriptor for the socket connected to the other plasma manager. */
+  /** Buffer that this connection is reading from. If this is a connection to
+   *  write data to another plasma store, then it is a linked
+   *  list of buffers to write. */
+  /* TODO(swang): Split into two queues, data transfers and data requests. */
+  plasma_request_buffer *transfer_queue;
+  /** File descriptor for the socket connected to the other
+   *  plasma manager. */
   int fd;
-  /* Following fields are used only for connections to plasma managers. */
-  /* Key that uniquely identifies the plasma manager that we're connected to.
-   * We will use the string <address>:<port> as an identifier. */
+  /** The objects that we are waiting for and their callback
+   *  contexts, for either a fetch or a wait operation. */
+  client_object_connection *active_objects;
+  /** The number of objects that we have left to return for
+   *  this fetch or wait operation. */
+  int num_return_objects;
+  /** Fields specific to connections to plasma managers.  Key that uniquely
+   * identifies the plasma manager that we're connected to. We will use the
+   * string <address>:<port> as an identifier. */
   char *ip_addr_port;
   /** Handle for the uthash table. */
   UT_hash_handle hh;
 };
 
-plasma_manager_state *init_plasma_manager_state(const char *store_socket_name) {
+void free_client_object_connection(client_object_connection *object_conn) {
+  for (int i = 0; i < object_conn->manager_count; ++i) {
+    free(object_conn->manager_vector[i]);
+  }
+  free(object_conn->manager_vector);
+  free(object_conn);
+}
+
+int send_client_reply(client_connection *conn, plasma_reply *reply) {
+  conn->num_return_objects--;
+  CHECK(conn->num_return_objects >= 0);
+  /* TODO(swang): Handle errors in write. */
+  int n = write(conn->fd, (uint8_t *) reply, sizeof(plasma_reply));
+  return (n != sizeof(plasma_reply));
+}
+
+/**
+ * Get the context for the given object ID for the given client
+ * connection, if there is one active.
+ *
+ * @param client_conn The client connection context.
+ * @param object_id The object ID whose context we want.
+ * @return A pointer to the active object context, or NULL if
+ *         there isn't one.
+ */
+client_object_connection *get_object_connection(client_connection *client_conn,
+                                                object_id object_id) {
+  client_object_connection *object_conn;
+  HASH_FIND(active_hh, client_conn->active_objects, &object_id,
+            sizeof(object_id), object_conn);
+  return object_conn;
+}
+
+/**
+ * Create a new context for the given object ID with the given
+ * client connection and register it with the manager's
+ * outstanding fetch or wait requests and the client
+ * connection's active object contexts.
+ *
+ * @param client_conn The client connection context.
+ * @param object_id The object ID whose context we want to
+ *        create.
+ * @return A pointer to the newly created object context.
+ */
+client_object_connection *add_object_connection(client_connection *client_conn,
+                                                object_id object_id) {
+  /* TODO(swang): Support registration of wait operations. */
+  /* Create a new context for this client connection and object. */
+  client_object_connection *object_conn =
+      malloc(sizeof(client_object_connection));
+  if (!object_conn) {
+    return NULL;
+  }
+  object_conn->object_id = object_id;
+  object_conn->client_conn = client_conn;
+  object_conn->manager_count = 0;
+  object_conn->manager_vector = NULL;
+  /* Register the object context with the client context. */
+  HASH_ADD(active_hh, client_conn->active_objects, object_id, sizeof(object_id),
+           object_conn);
+  /* Register the object context with the manager state. */
+  client_object_connection *fetch_connections;
+  HASH_FIND(fetch_hh, client_conn->manager_state->fetch_connections, &object_id,
+            sizeof(object_id), fetch_connections);
+  LOG_DEBUG("Registering fd %d for fetch.", client_conn->fd);
+  if (!fetch_connections) {
+    fetch_connections = NULL;
+    LL_APPEND(fetch_connections, object_conn);
+    HASH_ADD(fetch_hh, client_conn->manager_state->fetch_connections, object_id,
+             sizeof(object_id), fetch_connections);
+  } else {
+    LL_APPEND(fetch_connections, object_conn);
+  }
+  return object_conn;
+}
+
+/**
+ * Clean up and free an active object context. Deregister it from the
+ * associated client connection and from the manager state.
+ *
+ * @param client_conn The client connection context.
+ * @param object_id The object ID whose context we want to delete.
+ */
+void remove_object_connection(client_connection *client_conn,
+                              client_object_connection *object_conn) {
+  /* Deregister the object context with the client context. */
+  HASH_DELETE(active_hh, client_conn->active_objects, object_conn);
+  /* Deregister the object context with the manager state. */
+  client_object_connection *object_conns;
+  HASH_FIND(fetch_hh, client_conn->manager_state->fetch_connections,
+            &(object_conn->object_id), sizeof(object_conn->object_id),
+            object_conns);
+  CHECK(object_conns);
+  int len;
+  client_object_connection *tmp;
+  LL_COUNT(object_conns, tmp, len);
+  if (len == 1) {
+    HASH_DELETE(fetch_hh, client_conn->manager_state->fetch_connections,
+                object_conns);
+  }
+  LL_DELETE(object_conns, object_conn);
+  /* Free the object. */
+  free_client_object_connection(object_conn);
+}
+
+/* Helper function to parse a string of the form <IP address>:<port> into the
+ * given ip_addr and port pointers. The ip_addr buffer must already be
+ * allocated. */
+/* TODO(swang): Move this function to Ray common. */
+void parse_ip_addr_port(const char *ip_addr_port, char *ip_addr, int *port) {
+  char port_str[6];
+  int parsed = sscanf(ip_addr_port, "%15[0-9.]:%5[0-9]", ip_addr, port_str);
+  CHECK(parsed == 2);
+  *port = atoi(port_str);
+}
+
+plasma_manager_state *init_plasma_manager_state(const char *store_socket_name,
+                                                const char *manager_addr,
+                                                int manager_port,
+                                                const char *db_addr,
+                                                int db_port) {
   plasma_manager_state *state = malloc(sizeof(plasma_manager_state));
-  state->store_conn = plasma_store_connect(store_socket_name);
+  state->loop = event_loop_create();
+  state->plasma_conn = plasma_connect(store_socket_name, NULL, 0);
   state->manager_connections = NULL;
+  state->fetch_connections = NULL;
+  if (db_addr) {
+    state->db = db_connect(db_addr, db_port, "plasma_manager", manager_addr,
+                           manager_port);
+    db_attach(state->db, state->loop);
+    LOG_DEBUG("Connected to db at %s:%d, assigned client ID %d", db_addr,
+              db_port, get_client_id(state->db));
+  } else {
+    state->db = NULL;
+    LOG_DEBUG("No db connection specified");
+  }
+  sscanf(manager_addr, "%hhu.%hhu.%hhu.%hhu", &state->addr[0], &state->addr[1],
+         &state->addr[2], &state->addr[3]);
+  state->port = manager_port;
   return state;
 }
 
@@ -90,31 +290,8 @@ void process_message(event_loop *loop,
                      void *context,
                      int events);
 
-void write_object_chunk(event_loop *loop,
-                        int data_sock,
-                        void *context,
-                        int events) {
-  client_connection *conn = (client_connection *) context;
-  if (conn->transfer_queue == NULL) {
-    /* If there are no objects to transfer, temporarily remove this connection
-     * from the event loop. It will be reawoken when we receive another
-     * PLASMA_TRANSFER request. */
-    event_loop_remove_file(loop, conn->fd);
-    return;
-  }
-
-  LOG_DEBUG("Writing data");
+void write_object_chunk(client_connection *conn, plasma_request_buffer *buf) {
   ssize_t r, s;
-  plasma_buffer *buf = conn->transfer_queue;
-  if (conn->cursor == 0) {
-    /* If the cursor is zero, we haven't sent any requests for this object yet,
-     * so send the initial PLASMA_DATA request. */
-    plasma_request manager_req = {.object_id = buf->object_id,
-                                  .data_size = buf->data_size,
-                                  .metadata_size = buf->metadata_size};
-    plasma_send_request(conn->fd, PLASMA_DATA, &manager_req);
-  }
-
   /* Try to write one BUFSIZE at a time. */
   s = buf->data_size + buf->metadata_size - conn->cursor;
   if (s > BUFSIZE)
@@ -132,23 +309,67 @@ void write_object_chunk(event_loop *loop,
     conn->cursor += r;
   }
   if (r == 0) {
-    /* If we've finished writing this buffer, move on to the next transfer
-     * request and reset the cursor to zero. */
-    LOG_DEBUG("writing on channel %d finished", data_sock);
+    /* If we've finished writing this buffer, reset the cursor to zero. */
+    LOG_DEBUG("writing on channel %d finished", conn->fd);
     conn->cursor = 0;
+  }
+}
+
+void send_queued_request(event_loop *loop,
+                         int data_sock,
+                         void *context,
+                         int events) {
+  client_connection *conn = (client_connection *) context;
+  if (conn->transfer_queue == NULL) {
+    /* If there are no objects to transfer, temporarily remove this connection
+     * from the event loop. It will be reawoken when we receive another
+     * PLASMA_TRANSFER request. */
+    event_loop_remove_file(loop, conn->fd);
+    return;
+  }
+
+  plasma_request_buffer *buf = conn->transfer_queue;
+  plasma_request manager_req = make_plasma_request(buf->object_id);
+  switch (buf->type) {
+  case PLASMA_TRANSFER:
+    LOG_DEBUG("Requesting transfer on DB client %d",
+              get_client_id(conn->manager_state->db));
+    memcpy(manager_req.addr, conn->manager_state->addr,
+           sizeof(manager_req.addr));
+    manager_req.port = conn->manager_state->port;
+    plasma_send_request(conn->fd, buf->type, &manager_req);
+    break;
+  case PLASMA_DATA:
+    LOG_DEBUG("Transferring object to manager");
+    if (conn->cursor == 0) {
+      /* If the cursor is zero, we haven't sent any requests for this object
+       * yet,
+       * so send the initial PLASMA_DATA request. */
+      manager_req.data_size = buf->data_size;
+      manager_req.metadata_size = buf->metadata_size;
+      plasma_send_request(conn->fd, PLASMA_DATA, &manager_req);
+    }
+    write_object_chunk(conn, buf);
+    break;
+  default:
+    LOG_ERR("Buffered request has unknown type.");
+  }
+
+  /* We are done sending this request. */
+  if (conn->cursor == 0) {
     LL_DELETE(conn->transfer_queue, buf);
     free(buf);
   }
 }
 
-void read_object_chunk(event_loop *loop,
-                       int data_sock,
-                       void *context,
-                       int events) {
+void process_data_chunk(event_loop *loop,
+                        int data_sock,
+                        void *context,
+                        int events) {
   LOG_DEBUG("Reading data");
   ssize_t r, s;
   client_connection *conn = (client_connection *) context;
-  plasma_buffer *buf = conn->transfer_queue;
+  plasma_request_buffer *buf = conn->transfer_queue;
   CHECK(buf != NULL);
   /* Try to read one BUFSIZE at a time. */
   s = buf->data_size + buf->metadata_size - conn->cursor;
@@ -164,90 +385,120 @@ void read_object_chunk(event_loop *loop,
   } else {
     conn->cursor += r;
   }
-  if (conn->cursor == buf->data_size + buf->metadata_size) {
-    LOG_DEBUG("reading on channel %d finished", data_sock);
-    plasma_seal(conn->manager_state->store_conn, buf->object_id);
-    LL_DELETE(conn->transfer_queue, buf);
-    free(buf);
-    /* Switch to listening for requests from this socket, instead of reading
-     * data. */
-    event_loop_remove_file(loop, data_sock);
-    event_loop_add_file(loop, data_sock, EVENT_LOOP_READ, process_message,
-                        conn);
+
+  if (conn->cursor != buf->data_size + buf->metadata_size) {
+    /* If we haven't finished reading all the data for this object yet, we're
+     * done for now. */
+    return;
   }
-  return;
+
+  /* Seal the object.*/
+  LOG_DEBUG("reading on channel %d finished", data_sock);
+  plasma_seal(conn->manager_state->plasma_conn, buf->object_id);
+  /* Notify any clients who were waiting on a fetch to this object. */
+  client_object_connection *object_conn, *next;
+  client_connection *client_conn;
+  HASH_FIND(fetch_hh, conn->manager_state->fetch_connections, &(buf->object_id),
+            sizeof(buf->object_id), object_conn);
+  plasma_reply reply = {.object_id = buf->object_id, .has_object = 1};
+  while (object_conn) {
+    next = object_conn->next;
+    client_conn = object_conn->client_conn;
+    send_client_reply(client_conn, &reply);
+    event_loop_remove_timer(client_conn->manager_state->loop,
+                            object_conn->timer);
+    remove_object_connection(client_conn, object_conn);
+    object_conn = next;
+  }
+  /* Remove the request buffer used for reading this object's data. */
+  LL_DELETE(conn->transfer_queue, buf);
+  free(buf);
+  /* Switch to listening for requests from this socket, instead of reading
+   * object data. */
+  event_loop_remove_file(loop, data_sock);
+  event_loop_add_file(loop, data_sock, EVENT_LOOP_READ, process_message, conn);
 }
 
-void start_writing_data(event_loop *loop,
-                        object_id object_id,
-                        uint8_t addr[4],
-                        int port,
-                        client_connection *conn) {
-  uint8_t *data;
-  int64_t data_size;
-  uint8_t *metadata;
-  int64_t metadata_size;
-  plasma_get(conn->manager_state->store_conn, object_id, &data_size, &data,
-             &metadata_size, &metadata);
-  assert(metadata == data + data_size);
-  plasma_buffer *buf = malloc(sizeof(plasma_buffer));
-  buf->object_id = object_id;
-  buf->data = data; /* We treat this as a pointer to the
-                       concatenated data and metadata. */
-  buf->data_size = data_size;
-  buf->metadata_size = metadata_size;
-  buf->writable = 0;
-
-  /* Look to see if we already have a connection to this plasma manager. */
-  UT_string *ip_addr;
+client_connection *get_manager_connection(plasma_manager_state *state,
+                                          const char *ip_addr,
+                                          int port) {
+  /* TODO(swang): Should probably check whether ip_addr and port belong to us.
+   */
   UT_string *ip_addr_port;
-  utstring_new(ip_addr);
   utstring_new(ip_addr_port);
-  utstring_printf(ip_addr, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]);
-  utstring_printf(ip_addr_port, "%s:%d", utstring_body(ip_addr), port);
+  utstring_printf(ip_addr_port, "%s:%d", ip_addr, port);
   client_connection *manager_conn;
-  HASH_FIND_STR(conn->manager_state->manager_connections,
-                utstring_body(ip_addr_port), manager_conn);
-
+  HASH_FIND_STR(state->manager_connections, utstring_body(ip_addr_port),
+                manager_conn);
+  LOG_DEBUG("Getting manager connection to %s on DB client %d",
+            utstring_body(ip_addr_port), get_client_id(state->db));
   if (!manager_conn) {
     /* If we don't already have a connection to this manager, start one. */
     manager_conn = malloc(sizeof(client_connection));
-    manager_conn->fd = plasma_manager_connect(utstring_body(ip_addr), port);
-    manager_conn->manager_state = conn->manager_state;
+    manager_conn->fd = plasma_manager_connect(ip_addr, port);
+    manager_conn->manager_state = state;
     manager_conn->transfer_queue = NULL;
     manager_conn->cursor = 0;
-
     manager_conn->ip_addr_port = strdup(utstring_body(ip_addr_port));
     HASH_ADD_KEYPTR(hh, manager_conn->manager_state->manager_connections,
                     manager_conn->ip_addr_port,
                     strlen(manager_conn->ip_addr_port), manager_conn);
   }
   utstring_free(ip_addr_port);
+  return manager_conn;
+}
+
+void process_transfer_request(event_loop *loop,
+                              object_id object_id,
+                              uint8_t addr[4],
+                              int port,
+                              client_connection *conn) {
+  uint8_t *data;
+  int64_t data_size;
+  uint8_t *metadata;
+  int64_t metadata_size;
+  /* TODO(swang): A non-blocking plasma_get, or else we could block here
+   * forever if we don't end up sealing this object. */
+  plasma_get(conn->manager_state->plasma_conn, object_id, &data_size, &data,
+             &metadata_size, &metadata);
+  assert(metadata == data + data_size);
+  plasma_request_buffer *buf = malloc(sizeof(plasma_request_buffer));
+  buf->type = PLASMA_DATA;
+  buf->object_id = object_id;
+  buf->data = data; /* We treat this as a pointer to the
+                       concatenated data and metadata. */
+  buf->data_size = data_size;
+  buf->metadata_size = metadata_size;
+
+  UT_string *ip_addr;
+  utstring_new(ip_addr);
+  utstring_printf(ip_addr, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]);
+  client_connection *manager_conn =
+      get_manager_connection(conn->manager_state, utstring_body(ip_addr), port);
   utstring_free(ip_addr);
 
   if (manager_conn->transfer_queue == NULL) {
     /* If we already have a connection to this manager and its inactive,
      * (re)register it with the event loop again. */
     event_loop_add_file(loop, manager_conn->fd, EVENT_LOOP_WRITE,
-                        write_object_chunk, manager_conn);
+                        send_queued_request, manager_conn);
   }
   /* Add this transfer request to this connection's transfer queue. */
   LL_APPEND(manager_conn->transfer_queue, buf);
 }
 
-void start_reading_data(event_loop *loop,
-                        int client_sock,
-                        object_id object_id,
-                        int64_t data_size,
-                        int64_t metadata_size,
-                        client_connection *conn) {
-  plasma_buffer *buf = malloc(sizeof(plasma_buffer));
+void process_data_request(event_loop *loop,
+                          int client_sock,
+                          object_id object_id,
+                          int64_t data_size,
+                          int64_t metadata_size,
+                          client_connection *conn) {
+  plasma_request_buffer *buf = malloc(sizeof(plasma_request_buffer));
   buf->object_id = object_id;
   buf->data_size = data_size;
   buf->metadata_size = metadata_size;
-  buf->writable = 1;
 
-  plasma_create(conn->manager_state->store_conn, object_id, data_size, NULL,
+  plasma_create(conn->manager_state->plasma_conn, object_id, data_size, NULL,
                 metadata_size, &(buf->data));
   LL_APPEND(conn->transfer_queue, buf);
   conn->cursor = 0;
@@ -255,10 +506,152 @@ void start_reading_data(event_loop *loop,
   /* Switch to reading the data from this socket, instead of listening for
    * other requests. */
   event_loop_remove_file(loop, client_sock);
-  event_loop_add_file(loop, client_sock, EVENT_LOOP_READ, read_object_chunk,
+  event_loop_add_file(loop, client_sock, EVENT_LOOP_READ, process_data_chunk,
                       conn);
 }
 
+/**
+ * Request a transfer for the given object ID from the next manager believed to
+ * have a copy. Adds the request for this object ID to the queue of outgoing
+ * requests to the manager we want to try.
+ *
+ * @param client_conn The context for the connection to this client.
+ * @param object_id The object ID we want to request a transfer of.
+ * @returns Void.
+ */
+void request_transfer_from(client_connection *client_conn,
+                           object_id object_id) {
+  client_object_connection *object_conn =
+      get_object_connection(client_conn, object_id);
+  CHECK(object_conn);
+  CHECK(object_conn->manager_count > 0);
+  char addr[16];
+  int port;
+  int i = object_conn->num_retries % object_conn->manager_count;
+  parse_ip_addr_port(object_conn->manager_vector[i], addr, &port);
+
+  client_connection *manager_conn =
+      get_manager_connection(client_conn->manager_state, addr, port);
+  plasma_request_buffer *transfer_request =
+      malloc(sizeof(plasma_request_buffer));
+  transfer_request->type = PLASMA_TRANSFER;
+  transfer_request->object_id = object_conn->object_id;
+
+  if (manager_conn->transfer_queue == NULL) {
+    /* If we already have a connection to this manager and its inactive,
+     * (re)register it with the event loop. */
+    event_loop_add_file(client_conn->manager_state->loop, manager_conn->fd,
+                        EVENT_LOOP_WRITE, send_queued_request, manager_conn);
+  }
+  /* Add this transfer request to this connection's transfer queue. */
+  LL_APPEND(manager_conn->transfer_queue, transfer_request);
+}
+
+int manager_timeout_handler(event_loop *loop, timer_id id, void *context) {
+  client_object_connection *object_conn = context;
+  client_connection *client_conn = object_conn->client_conn;
+  LOG_DEBUG("Timer went off, %d tries left", object_conn->num_retries);
+  if (object_conn->num_retries > 0) {
+    request_transfer_from(client_conn, object_conn->object_id);
+    object_conn->num_retries--;
+    return MANAGER_TIMEOUT;
+  }
+  plasma_reply reply = {.object_id = object_conn->object_id, .has_object = 0};
+  send_client_reply(client_conn, &reply);
+  remove_object_connection(client_conn, object_conn);
+  return AE_NOMORE;
+}
+
+/**
+ * Given an object ID and the managers it can be found on, start requesting a
+ * transfer from the managers.
+ *
+ * @param object_id The object ID we want to request a transfer of.
+ * @param manager_count The number of managers the object can be found on.
+ * @param manager_vector A vector of the IP addresses of the managers that the
+ *        object can be found on.
+ * @param context The context for the connection to this client.
+ *
+ * Initializes a new context for this client and object. Managers are tried in
+ * order until we receive the data or we timeout and run out of retries.
+ */
+void request_transfer(object_id object_id,
+                      int manager_count,
+                      const char *manager_vector[],
+                      void *context) {
+  client_connection *client_conn = (client_connection *) context;
+  client_object_connection *object_conn =
+      get_object_connection(client_conn, object_id);
+  CHECK(object_conn);
+  LOG_DEBUG("Object is on %d managers", manager_count);
+  if (manager_count == 0) {
+    /* TODO(swang): Instead of immediately counting this as a failure, maybe
+     * register a Redis callback for changes to this object table entry. */
+    free(manager_vector);
+    plasma_reply reply = {.object_id = object_conn->object_id, .has_object = 0};
+    send_client_reply(client_conn, &reply);
+    remove_object_connection(client_conn, object_conn);
+    return;
+  }
+  /* Pick a different manager to request a transfer from on every attempt. */
+  object_conn->manager_count = manager_count;
+  object_conn->manager_vector = malloc(manager_count * sizeof(char *));
+  memset(object_conn->manager_vector, 0, manager_count * sizeof(char *));
+  for (int i = 0; i < manager_count; ++i) {
+    int len = strlen(manager_vector[i]);
+    object_conn->manager_vector[i] = malloc(len + 1);
+    strncpy(object_conn->manager_vector[i], manager_vector[i], len);
+    object_conn->manager_vector[i][len] = '\0';
+  }
+  free(manager_vector);
+  /* Wait for the object data for the default number of retries, which timeout
+   * after a default interval. */
+  object_conn->num_retries = NUM_RETRIES;
+  object_conn->timer =
+      event_loop_add_timer(client_conn->manager_state->loop, MANAGER_TIMEOUT,
+                           manager_timeout_handler, object_conn);
+  request_transfer_from(client_conn, object_id);
+}
+
+void process_fetch_request(client_connection *client_conn,
+                           object_id object_id) {
+  plasma_reply reply = {.object_id = object_id};
+  if (client_conn->manager_state->db == NULL) {
+    reply.has_object = 0;
+    send_client_reply(client_conn, &reply);
+    return;
+  }
+  /* Return success immediately if we already have this object. */
+  int is_local = 0;
+  plasma_contains(client_conn->manager_state->plasma_conn, object_id,
+                  &is_local);
+  if (is_local) {
+    reply.has_object = 1;
+    send_client_reply(client_conn, &reply);
+    return;
+  }
+  /* Register the new context with the current client connection. */
+  client_object_connection *object_conn =
+      add_object_connection(client_conn, object_id);
+  if (!object_conn) {
+    LOG_DEBUG("Unable to allocate memory for object context.");
+    reply.has_object = 0;
+    send_client_reply(client_conn, &reply);
+  }
+  /* Request a transfer from a plasma manager that has this object. */
+  object_table_lookup(client_conn->manager_state->db, object_id,
+                      request_transfer, client_conn);
+}
+
+void process_fetch_requests(client_connection *client_conn,
+                            int num_object_ids,
+                            object_id object_ids[]) {
+  for (int i = 0; i < num_object_ids; ++i) {
+    client_conn->num_return_objects++;
+    process_fetch_request(client_conn, object_ids[i]);
+  }
+}
+
 void process_message(event_loop *loop,
                      int client_sock,
                      void *context,
@@ -272,16 +665,27 @@ void process_message(event_loop *loop,
 
   switch (type) {
   case PLASMA_TRANSFER:
-    LOG_DEBUG("transfering object to manager with port %d", req->port);
-    start_writing_data(loop, req->object_id, req->addr, req->port, conn);
+    process_transfer_request(loop, req->object_ids[0], req->addr, req->port,
+                             conn);
     break;
   case PLASMA_DATA:
-    LOG_DEBUG("starting to stream data");
-    start_reading_data(loop, client_sock, req->object_id, req->data_size,
-                       req->metadata_size, conn);
+    LOG_DEBUG("Starting to stream data");
+    process_data_request(loop, client_sock, req->object_ids[0], req->data_size,
+                         req->metadata_size, conn);
+    break;
+  case PLASMA_FETCH:
+    LOG_DEBUG("Processing fetch");
+    process_fetch_requests(conn, req->num_object_ids, req->object_ids);
+    break;
+  case PLASMA_SEAL:
+    LOG_DEBUG("Publishing to object table from DB client %d.",
+              get_client_id(conn->manager_state->db));
+    object_table_add(conn->manager_state->db, req->object_ids[0]);
     break;
   case DISCONNECT_CLIENT: {
     LOG_INFO("Disconnecting client on fd %d", client_sock);
+    /* TODO(swang): Check if this connection was to a plasma manager. If so,
+     * delete it. */
     event_loop_remove_file(loop, client_sock);
     close(client_sock);
     free(conn);
@@ -303,47 +707,37 @@ void new_client_connection(event_loop *loop,
   client_connection *conn = malloc(sizeof(client_connection));
   conn->manager_state = (plasma_manager_state *) context;
   conn->transfer_queue = NULL;
+  conn->fd = new_socket;
+  conn->active_objects = NULL;
+  conn->num_return_objects = 0;
   event_loop_add_file(loop, new_socket, EVENT_LOOP_READ, process_message, conn);
-  LOG_DEBUG("new connection with fd %d", new_socket);
+  LOG_DEBUG("New plasma manager connection with fd %d", new_socket);
 }
 
 void start_server(const char *store_socket_name,
                   const char *master_addr,
-                  int port) {
-  struct sockaddr_in name;
-  int sock = socket(PF_INET, SOCK_STREAM, 0);
-  if (sock < 0) {
-    LOG_ERR("could not create socket");
-    exit(-1);
-  }
-  name.sin_family = AF_INET;
-  name.sin_port = htons(port);
-  name.sin_addr.s_addr = htonl(INADDR_ANY);
-  /* Make the socket non-blocking. */
-  int flags = fcntl(sock, F_GETFL, 0);
-  CHECK(fcntl(sock, F_SETFL, flags | O_NONBLOCK) == 0);
-  int on = 1;
-  setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
-  if (bind(sock, (struct sockaddr *) &name, sizeof(name)) < 0) {
-    LOG_ERR("could not bind socket");
-    exit(-1);
-  }
-  LOG_DEBUG("listening on port %d", port);
-  if (listen(sock, 5) == -1) {
-    LOG_ERR("could not listen to socket");
-    exit(-1);
-  }
+                  int port,
+                  const char *db_addr,
+                  int db_port) {
+  int sock = bind_inet_sock(port);
+  CHECKM(sock >= 0, "Unable to bind to manager port");
 
-  event_loop *loop = event_loop_create();
-  plasma_manager_state *state = init_plasma_manager_state(store_socket_name);
-  event_loop_add_file(loop, sock, EVENT_LOOP_READ, new_client_connection,
-                      state);
-  event_loop_run(loop);
+  g_manager_state = init_plasma_manager_state(store_socket_name, master_addr,
+                                              port, db_addr, db_port);
+  CHECK(g_manager_state);
+  LOG_DEBUG("Started server connected to store %s, listening on port %d",
+            store_socket_name, port);
+  event_loop_add_file(g_manager_state->loop, sock, EVENT_LOOP_READ,
+                      new_client_connection, g_manager_state);
+  event_loop_run(g_manager_state->loop);
 }
 
 /* Report "success" to valgrind. */
 void signal_handler(int signal) {
   if (signal == SIGTERM) {
+    if (g_manager_state) {
+      db_disconnect(g_manager_state->db);
+    }
     exit(0);
   }
 }
@@ -356,8 +750,10 @@ int main(int argc, char *argv[]) {
   char *master_addr = NULL;
   /* Port number the manager should use. */
   int port;
+  /* IP address and port of state database. */
+  char *db_host = NULL;
   int c;
-  while ((c = getopt(argc, argv, "s:m:p:")) != -1) {
+  while ((c = getopt(argc, argv, "s:m:p:d:")) != -1) {
     switch (c) {
     case 's':
       store_socket_name = optarg;
@@ -368,6 +764,9 @@ int main(int argc, char *argv[]) {
     case 'p':
       port = atoi(optarg);
       break;
+    case 'd':
+      db_host = optarg;
+      break;
     default:
       LOG_ERR("unknown option %c", c);
       exit(-1);
@@ -385,5 +784,12 @@ int main(int argc, char *argv[]) {
         "123.456.789.10 with -m switch");
     exit(-1);
   }
-  start_server(store_socket_name, master_addr, port);
+  char db_addr[16];
+  int db_port;
+  if (db_host) {
+    parse_ip_addr_port(db_host, db_addr, &db_port);
+    start_server(store_socket_name, master_addr, port, db_addr, db_port);
+  } else {
+    start_server(store_socket_name, master_addr, port, NULL, 0);
+  }
 }
diff --git a/src/plasma_manager.h b/src/plasma_manager.h
index 27075632e..368b1314e 100644
--- a/src/plasma_manager.h
+++ b/src/plasma_manager.h
@@ -7,7 +7,7 @@
 typedef struct client_connection client_connection;
 
 /**
- * Start transfering data to another object store manager.
+ * Process a request from another object store manager to transfer an object.
  *
  * @param loop This is the event loop of the plasma manager.
  * @param object_id The object_id of the object we will be sending.
@@ -15,18 +15,20 @@ typedef struct client_connection client_connection;
  * to.
  * @param port The port of the plasma manager we are sending the object to.
  * @param conn The client_connection to the other plasma manager.
+ * @return Void.
  *
- * This establishes a connection to the remote manager and sends the data
- * header to the other object manager.
+ * This establishes a connection to the remote manager if one doesn't already
+ * exist, and queues up the request to transfer the data to the other object
+ * manager.
  */
-void start_writing_data(event_loop *loop,
-                        object_id object_id,
-                        uint8_t addr[4],
-                        int port,
-                        client_connection *conn);
+void process_transfer(event_loop *loop,
+                      object_id object_id,
+                      uint8_t addr[4],
+                      int port,
+                      client_connection *conn);
 
 /**
- * Start reading data from another object manager.
+ * Process a request from another object store manager to receive data.
  *
  * @param loop This is the event loop of the plasma manager.
  * @param client_sock The connection to the other plasma manager.
@@ -34,46 +36,84 @@ void start_writing_data(event_loop *loop,
  * @param data_size Size of the object.
  * @param metadata_size Size of the metadata.
  * @param conn The client_connection to the other plasma manager.
+ * @return Void.
  *
- * Initializes the object we are going to write to in the
- * local plasma store and then switches the data socket to reading mode.
+ * Initializes the object we are going to write to in the local plasma store
+ * and then switches the data socket to read the raw object bytes instead of
+ * plasma requests.
  */
-void start_reading_data(event_loop *loop,
-                        int client_sock,
-                        object_id object_id,
-                        int64_t data_size,
-                        int64_t metadata_size,
-                        client_connection *conn);
+void process_data(event_loop *loop,
+                  int client_sock,
+                  object_id object_id,
+                  int64_t data_size,
+                  int64_t metadata_size,
+                  client_connection *conn);
 
 /**
  * Read the next chunk of the object in transit from the plasma manager
- * that is connected to the connection with index "conn_index". Once all data
- * has been read, the socket switches to listening for the next request.
+ * connected to the given socket. Once all data for this object has been read,
+ * the socket switches to listening for the next plasma request.
  *
  * @param loop This is the event loop of the plasma manager.
  * @param data_sock The connection to the other plasma manager.
  * @param context The client_connection to the other plasma manager.
- *
+ * @return Void.
  */
-void read_object_chunk(event_loop *loop,
-                       int data_sock,
-                       void *context,
-                       int events);
+void process_data_chunk(event_loop *loop,
+                        int data_sock,
+                        void *context,
+                        int events);
 
 /**
- * Write the next chunk of the object currently transfered to the plasma manager
- * that is connected to the socket "data_sock". If no data has been sent yet,
- * the initial handshake to transfer the object size is performed.
+ * Process a fetch request. The fetch request tries:
+ * 1) If there is no connection to the database, return faliure to the client.
+ * 2) If the object is available locally, return success to the client.
+ * 3) Query the database for plasma managers that the object might be on.
+ * 4) Request a transfer from each of the managers that the object might be on
+ *    until we receive the data, or until we timeout.
+ * 5) Returns success or failure to the client depending on whether we received
+ *    the data or not.
+ *
+ * @param client_conn The connection context for the client that made the
+ *        request.
+ * @param object_id The object ID requested.
+ * @return Void.
+ */
+void process_fetch_request(client_connection *client_conn, object_id object_id);
+
+/**
+ * Process a fetch request for multiple objects. The success of each object
+ * will be written back individually to the socket connected to the client that
+ * made the request in a plasma_reply. See documentation for
+ * process_fetch_request for the sequence of operations per object.
+ *
+ * @param client_conn The connection context for the client that made the
+ *        request.
+ * @param object_id_count The number of object IDs requested.
+ * @param object_ids[] The vector of object IDs requested.
+ * @return Void.
+ */
+void process_fetch_requests(client_connection *client_conn,
+                            int object_id_count,
+                            object_id object_ids[]);
+
+/**
+ * Send the next request queued for the other plasma manager connected to the
+ * socket "data_sock". This could be a request to either write object data or
+ * request object data. If the request is to write object data and no data has
+ * been sent yet, the initial handshake to transfer the object size is
+ * performed.
  *
  * @param loop This is the event loop of the plasma manager.
  * @param data_sock This is the socket the other plasma manager is listening on.
  * @param context The client_connection to the other plasma manager, contains a
  *                queue of objects that will be sent.
+ * @return Void.
  */
-void write_object_chunk(event_loop *loop,
-                        int data_sock,
-                        void *context,
-                        int events);
+void send_queued_request(event_loop *loop,
+                         int data_sock,
+                         void *context,
+                         int events);
 
 /**
  * Register a new client connection with the plasma manager. A client can
@@ -82,7 +122,7 @@ void write_object_chunk(event_loop *loop,
  * @param loop This is the event loop of the plasma manager.
  * @param listener_socket The socket the plasma manager is listening on.
  * @param context The plasma manager state.
- *
+ * @return Void.
  */
 void new_client_connection(event_loop *loop,
                            int listener_sock,
diff --git a/src/plasma_store.c b/src/plasma_store.c
index 3bdc1364a..3cb251234 100644
--- a/src/plasma_store.c
+++ b/src/plasma_store.c
@@ -113,7 +113,7 @@ plasma_store_state *init_plasma_store(event_loop *loop) {
 }
 
 /* Create a new object buffer in the hash table. */
-void create_object(plasma_store_state *s,
+void create_object(plasma_store_state *plasma_state,
                    object_id object_id,
                    int64_t data_size,
                    int64_t metadata_size,
@@ -121,7 +121,10 @@ void create_object(plasma_store_state *s,
   LOG_DEBUG("creating object"); /* TODO(pcm): add object_id here */
 
   object_table_entry *entry;
-  HASH_FIND(handle, s->open_objects, &object_id, sizeof(object_id), entry);
+  HASH_FIND(handle, plasma_state->open_objects, &object_id, sizeof(object_id),
+            entry);
+  /* TODO(swang): Return this error to the client instead of
+   * exiting. */
   CHECKM(entry == NULL, "Cannot create object twice.");
 
   uint8_t *pointer = dlmalloc(data_size + metadata_size);
@@ -140,7 +143,8 @@ void create_object(plasma_store_state *s,
   entry->fd = fd;
   entry->map_size = map_size;
   entry->offset = offset;
-  HASH_ADD(handle, s->open_objects, object_id, sizeof(object_id), entry);
+  HASH_ADD(handle, plasma_state->open_objects, object_id, sizeof(object_id),
+           entry);
   result->handle.store_fd = fd;
   result->handle.mmap_size = map_size;
   result->data_offset = offset;
@@ -150,12 +154,13 @@ void create_object(plasma_store_state *s,
 }
 
 /* Get an object from the hash table. */
-int get_object(plasma_store_state *s,
+int get_object(plasma_store_state *plasma_state,
                int conn,
                object_id object_id,
                plasma_object *result) {
   object_table_entry *entry;
-  HASH_FIND(handle, s->sealed_objects, &object_id, sizeof(object_id), entry);
+  HASH_FIND(handle, plasma_state->sealed_objects, &object_id, sizeof(object_id),
+            entry);
   if (entry) {
     result->handle.store_fd = entry->fd;
     result->handle.mmap_size = entry->map_size;
@@ -167,15 +172,15 @@ int get_object(plasma_store_state *s,
   } else {
     object_notify_entry *notify_entry;
     LOG_DEBUG("object not in hash table of sealed objects");
-    HASH_FIND(handle, s->objects_notify, &object_id, sizeof(object_id),
-              notify_entry);
+    HASH_FIND(handle, plasma_state->objects_notify, &object_id,
+              sizeof(object_id), notify_entry);
     if (!notify_entry) {
       notify_entry = malloc(sizeof(object_notify_entry));
       memset(notify_entry, 0, sizeof(object_notify_entry));
       utarray_new(notify_entry->conns, &ut_int_icd);
       memcpy(&notify_entry->object_id, &object_id, 20);
-      HASH_ADD(handle, s->objects_notify, object_id, sizeof(object_id),
-               notify_entry);
+      HASH_ADD(handle, plasma_state->objects_notify, object_id,
+               sizeof(object_id), notify_entry);
     }
     utarray_push_back(notify_entry->conns, &conn);
   }
@@ -183,36 +188,40 @@ int get_object(plasma_store_state *s,
 }
 
 /* Check if an object is present. */
-int contains_object(plasma_store_state *s, object_id object_id) {
+int contains_object(plasma_store_state *plasma_state, object_id object_id) {
   object_table_entry *entry;
-  HASH_FIND(handle, s->sealed_objects, &object_id, sizeof(object_id), entry);
+  HASH_FIND(handle, plasma_state->sealed_objects, &object_id, sizeof(object_id),
+            entry);
   return entry ? OBJECT_FOUND : OBJECT_NOT_FOUND;
 }
 
 /* Seal an object that has been created in the hash table. */
-void seal_object(plasma_store_state *s,
+void seal_object(plasma_store_state *plasma_state,
                  object_id object_id,
                  UT_array **conns,
                  plasma_object *result) {
   LOG_DEBUG("sealing object");  // TODO(pcm): add object_id here
   object_table_entry *entry;
-  HASH_FIND(handle, s->open_objects, &object_id, sizeof(object_id), entry);
+  HASH_FIND(handle, plasma_state->open_objects, &object_id, sizeof(object_id),
+            entry);
   if (!entry) {
     return; /* TODO(pcm): return error */
   }
-  HASH_DELETE(handle, s->open_objects, entry);
-  HASH_ADD(handle, s->sealed_objects, object_id, sizeof(object_id), entry);
+  HASH_DELETE(handle, plasma_state->open_objects, entry);
+  HASH_ADD(handle, plasma_state->sealed_objects, object_id, sizeof(object_id),
+           entry);
 
   /* Inform all subscribers that a new object has been sealed. */
   notification_queue *queue, *temp_queue;
-  HASH_ITER(hh, s->pending_notifications, queue, temp_queue) {
+  HASH_ITER(hh, plasma_state->pending_notifications, queue, temp_queue) {
     utarray_push_back(queue->object_ids, &object_id);
-    send_notifications(s->loop, queue->subscriber_fd, s, 0);
+    send_notifications(plasma_state->loop, queue->subscriber_fd, plasma_state,
+                       0);
   }
 
   /* Inform processes getting this object that the object is ready now. */
   object_notify_entry *notify_entry;
-  HASH_FIND(handle, s->objects_notify, &object_id, sizeof(object_id),
+  HASH_FIND(handle, plasma_state->objects_notify, &object_id, sizeof(object_id),
             notify_entry);
   if (!notify_entry) {
     *conns = NULL;
@@ -224,22 +233,23 @@ void seal_object(plasma_store_state *s,
   result->metadata_offset = entry->offset + entry->info.data_size;
   result->data_size = entry->info.data_size;
   result->metadata_size = entry->info.metadata_size;
-  HASH_DELETE(handle, s->objects_notify, notify_entry);
+  HASH_DELETE(handle, plasma_state->objects_notify, notify_entry);
   *conns = notify_entry->conns;
   free(notify_entry);
 }
 
 /* Delete an object that has been created in the hash table. */
-void delete_object(plasma_store_state *s, object_id object_id) {
+void delete_object(plasma_store_state *plasma_state, object_id object_id) {
   LOG_DEBUG("deleting object");  // TODO(rkn): add object_id here
   object_table_entry *entry;
-  HASH_FIND(handle, s->sealed_objects, &object_id, sizeof(object_id), entry);
+  HASH_FIND(handle, plasma_state->sealed_objects, &object_id, sizeof(object_id),
+            entry);
   /* TODO(rkn): This should probably not fail, but should instead throw an
    * error. Maybe we should also support deleting objects that have been created
    * but not sealed. */
   CHECKM(entry != NULL, "To delete an object it must have been sealed.");
   uint8_t *pointer = entry->pointer;
-  HASH_DELETE(handle, s->sealed_objects, entry);
+  HASH_DELETE(handle, plasma_state->sealed_objects, entry);
   dlfree(pointer);
   free(entry);
 }
@@ -249,10 +259,10 @@ void send_notifications(event_loop *loop,
                         int client_sock,
                         void *context,
                         int events) {
-  plasma_store_state *s = context;
+  plasma_store_state *plasma_state = context;
 
   notification_queue *queue;
-  HASH_FIND_INT(s->pending_notifications, &client_sock, queue);
+  HASH_FIND_INT(plasma_state->pending_notifications, &client_sock, queue);
   CHECK(queue != NULL);
 
   int num_processed = 0;
@@ -280,13 +290,13 @@ void send_notifications(event_loop *loop,
 }
 
 /* Subscribe to notifications about sealed objects. */
-void subscribe_to_updates(plasma_store_state *s, int conn) {
+void subscribe_to_updates(plasma_store_state *plasma_state, int conn) {
   LOG_DEBUG("subscribing to updates");
   char dummy;
   int fd = recv_fd(conn, &dummy, 1);
-  CHECKM(HASH_CNT(handle, s->open_objects) == 0,
+  CHECKM(HASH_CNT(handle, plasma_state->open_objects) == 0,
          "plasma_subscribe should be called before any objects are created.");
-  CHECKM(HASH_CNT(handle, s->sealed_objects) == 0,
+  CHECKM(HASH_CNT(handle, plasma_state->sealed_objects) == 0,
          "plasma_subscribe should be called before any objects are created.");
   /* Create a new array to buffer notifications that can't be sent to the
    * subscriber yet because the socket send buffer is full. TODO(rkn): the queue
@@ -295,47 +305,49 @@ void subscribe_to_updates(plasma_store_state *s, int conn) {
       (notification_queue *) malloc(sizeof(notification_queue));
   queue->subscriber_fd = fd;
   utarray_new(queue->object_ids, &object_id_icd);
-  HASH_ADD_INT(s->pending_notifications, subscriber_fd, queue);
+  HASH_ADD_INT(plasma_state->pending_notifications, subscriber_fd, queue);
   /* Add a callback to the event loop to send queued notifications whenever
    * there is room in the socket's send buffer. */
-  event_loop_add_file(s->loop, fd, EVENT_LOOP_WRITE, send_notifications, s);
+  event_loop_add_file(plasma_state->loop, fd, EVENT_LOOP_WRITE,
+                      send_notifications, plasma_state);
 }
 
 void process_message(event_loop *loop,
                      int client_sock,
                      void *context,
                      int events) {
-  plasma_store_state *s = context;
+  plasma_store_state *plasma_state = context;
   int64_t type;
   int64_t length;
   plasma_request *req;
   read_message(client_sock, &type, &length, (uint8_t **) &req);
+  /* We're only sending a single object ID at a time for now. */
   plasma_reply reply;
   memset(&reply, 0, sizeof(reply));
   UT_array *conns;
 
   switch (type) {
   case PLASMA_CREATE:
-    create_object(s, req->object_id, req->data_size, req->metadata_size,
-                  &reply.object);
+    create_object(plasma_state, req->object_ids[0], req->data_size,
+                  req->metadata_size, &reply.object);
     send_fd(client_sock, reply.object.handle.store_fd, (char *) &reply,
             sizeof(reply));
     break;
   case PLASMA_GET:
-    if (get_object(s, client_sock, req->object_id, &reply.object) ==
-        OBJECT_FOUND) {
+    if (get_object(plasma_state, client_sock, req->object_ids[0],
+                   &reply.object) == OBJECT_FOUND) {
       send_fd(client_sock, reply.object.handle.store_fd, (char *) &reply,
               sizeof(reply));
     }
     break;
   case PLASMA_CONTAINS:
-    if (contains_object(s, req->object_id) == OBJECT_FOUND) {
+    if (contains_object(plasma_state, req->object_ids[0]) == OBJECT_FOUND) {
       reply.has_object = 1;
     }
     plasma_send_reply(client_sock, &reply);
     break;
   case PLASMA_SEAL:
-    seal_object(s, req->object_id, &conns, &reply.object);
+    seal_object(plasma_state, req->object_ids[0], &conns, &reply.object);
     if (conns) {
       for (int *c = (int *) utarray_front(conns); c != NULL;
            c = (int *) utarray_next(conns, c)) {
@@ -346,10 +358,10 @@ void process_message(event_loop *loop,
     }
     break;
   case PLASMA_DELETE:
-    delete_object(s, req->object_id);
+    delete_object(plasma_state, req->object_ids[0]);
     break;
   case PLASMA_SUBSCRIBE:
-    subscribe_to_updates(s, client_sock);
+    subscribe_to_updates(plasma_state, client_sock);
     break;
   case DISCONNECT_CLIENT: {
     LOG_DEBUG("Disconnecting client on fd %d", client_sock);
diff --git a/src/plasma_store.h b/src/plasma_store.h
index dd5e963fb..b22302209 100644
--- a/src/plasma_store.h
+++ b/src/plasma_store.h
@@ -21,7 +21,9 @@ void create_object(plasma_store_state *s,
                    plasma_object *result);
 
 /**
- * Get an object:
+ * Get an object. This method assumes that we currently have or will
+ * eventually have this object sealed. If the object has not yet been sealed,
+ * the client that requested the object will be notified when it is sealed.
  *
  * @param s The plasma store state.
  * @param conn The client connection that requests the object.
diff --git a/test/test.py b/test/test.py
index 4e08ed67e..94b2acbab 100644
--- a/test/test.py
+++ b/test/test.py
@@ -43,6 +43,15 @@ def create_object(client, data_size, metadata_size, seal=True):
     client.seal(object_id)
   return object_id, memory_buffer, metadata
 
+def assert_get_object_equal(unit_test, client1, client2, object_id, memory_buffer=None, metadata=None):
+  if memory_buffer is not None:
+    unit_test.assertEqual(memory_buffer[:], client2.get(object_id)[:])
+  if metadata is not None:
+    unit_test.assertEqual(metadata[:], client2.get_metadata(object_id)[:])
+  unit_test.assertEqual(client1.get(object_id)[:], client2.get(object_id)[:])
+  unit_test.assertEqual(client1.get_metadata(object_id)[:],
+                        client2.get_metadata(object_id)[:])
+
 class TestPlasmaClient(unittest.TestCase):
 
   def setUp(self):
@@ -207,22 +216,42 @@ class TestPlasmaManager(unittest.TestCase):
     plasma_store_command2 = [plasma_store_executable, "-s", store_name2]
 
     if USE_VALGRIND:
-      self.p2 = subprocess.Popen(["valgrind", "--track-origins=yes", "--error-exitcode=1"] + plasma_store_command1)
-      self.p3 = subprocess.Popen(["valgrind", "--track-origins=yes", "--error-exitcode=1"] + plasma_store_command2)
+      self.p2 = subprocess.Popen(["valgrind", "--track-origins=yes", "--leak-check=full", "--error-exitcode=1"] + plasma_store_command1)
+      self.p3 = subprocess.Popen(["valgrind", "--track-origins=yes", "--leak-check=full", "--error-exitcode=1"] + plasma_store_command2)
     else:
       self.p2 = subprocess.Popen(plasma_store_command1)
       self.p3 = subprocess.Popen(plasma_store_command2)
 
+    # Start a Redis server.
+    redis_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../common/thirdparty/redis-3.2.3/src/redis-server")
+    self.redis_process = None
+    manager_redis_args = []
+    if os.path.exists(redis_path):
+      redis_port = 6379
+      with open(os.devnull, 'w') as FNULL:
+        self.redis_process = subprocess.Popen([redis_path,
+                                               "--port", str(redis_port)],
+                                              stdout=FNULL)
+      time.sleep(0.1)
+      manager_redis_args = ["-d", "{addr}:{port}".format(addr="127.0.0.1",
+                                                      port=redis_port)]
+
     # Start two PlasmaManagers.
     self.port1 = random.randint(10000, 50000)
     self.port2 = random.randint(10000, 50000)
     plasma_manager_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../build/plasma_manager")
-    plasma_manager_command1 = [plasma_manager_executable, "-s", store_name1, "-m", "127.0.0.1", "-p", str(self.port1)]
-    plasma_manager_command2 = [plasma_manager_executable, "-s", store_name2, "-m", "127.0.0.1", "-p", str(self.port2)]
+    plasma_manager_command1 = [plasma_manager_executable,
+                               "-s", store_name1,
+                               "-m", "127.0.0.1",
+                               "-p", str(self.port1)] + manager_redis_args
+    plasma_manager_command2 = [plasma_manager_executable,
+                               "-s", store_name2,
+                               "-m", "127.0.0.1",
+                               "-p", str(self.port2)] + manager_redis_args
 
     if USE_VALGRIND:
-      self.p4 = subprocess.Popen(["valgrind", "--track-origins=yes", "--error-exitcode=1"] + plasma_manager_command1)
-      self.p5 = subprocess.Popen(["valgrind", "--track-origins=yes", "--error-exitcode=1"] + plasma_manager_command2)
+      self.p4 = subprocess.Popen(["valgrind", "--track-origins=yes", "--leak-check=full", "--error-exitcode=1"] + plasma_manager_command1)
+      self.p5 = subprocess.Popen(["valgrind", "--track-origins=yes", "--leak-check=full", "--error-exitcode=1"] + plasma_manager_command2)
       time.sleep(2.0)
     else:
       self.p4 = subprocess.Popen(plasma_manager_command1)
@@ -253,6 +282,63 @@ class TestPlasmaManager(unittest.TestCase):
       self.p3.kill()
       self.p4.kill()
       self.p5.kill()
+    if self.redis_process:
+      self.redis_process.kill()
+
+  def test_fetch(self):
+    if self.redis_process is None:
+      print("Cannot test fetch without a running redis instance.")
+      self.assertTrue(False)
+    for _ in range(100):
+      # Create an object.
+      object_id1, memory_buffer1, metadata1 = create_object(self.client1, 2000, 2000)
+      # Fetch the object from the other plasma store.
+      # TODO(swang): This line is a hack! It makes sure that the entry will be
+      # in the object table once we call the fetch operation. Remove once
+      # retries are implemented by Ray common.
+      time.sleep(0.1)
+      successes = self.client2.fetch([object_id1])
+      self.assertEqual(successes, [True])
+      # Compare the two buffers.
+      assert_get_object_equal(self, self.client1, self.client2, object_id1,
+                              memory_buffer=memory_buffer1, metadata=metadata1)
+      # Fetch in the other direction. These should return quickly because
+      # client1 already has the object.
+      successes = self.client1.fetch([object_id1])
+      self.assertEqual(successes, [True])
+      assert_get_object_equal(self, self.client2, self.client1, object_id1,
+                              memory_buffer=memory_buffer1, metadata=metadata1)
+
+  def test_fetch_multiple(self):
+    if self.redis_process is None:
+      print("Cannot test fetch without a running redis instance.")
+      self.assertTrue(False)
+    for _ in range(20):
+      # Create two objects and a third fake one that doesn't exist.
+      object_id1, memory_buffer1, metadata1 = create_object(self.client1, 2000, 2000)
+      missing_object_id = random_object_id()
+      object_id2, memory_buffer2, metadata2 = create_object(self.client1, 2000, 2000)
+      object_ids = [object_id1, missing_object_id, object_id2]
+      # Fetch the objects from the other plasma store. The second object ID
+      # should timeout since it does not exist.
+      # TODO(swang): This line is a hack! It makes sure that the entry will be
+      # in the object table once we call the fetch operation. Remove once
+      # retries are implemented by Ray common.
+      time.sleep(0.1)
+      successes = self.client2.fetch(object_ids)
+      self.assertEqual(successes, [True, False, True])
+      # Compare the buffers of the objects that do exist.
+      assert_get_object_equal(self, self.client1, self.client2, object_id1,
+                              memory_buffer=memory_buffer1, metadata=metadata1)
+      assert_get_object_equal(self, self.client1, self.client2, object_id2,
+                              memory_buffer=memory_buffer2, metadata=metadata2)
+      # Fetch in the other direction. The fake object still does not exist.
+      successes = self.client1.fetch(object_ids)
+      self.assertEqual(successes, [True, False, True])
+      assert_get_object_equal(self, self.client2, self.client1, object_id1,
+                              memory_buffer=memory_buffer1, metadata=metadata1)
+      assert_get_object_equal(self, self.client2, self.client1, object_id2,
+                              memory_buffer=memory_buffer2, metadata=metadata2)
 
   def test_transfer(self):
     for _ in range(100):
@@ -261,25 +347,21 @@ class TestPlasmaManager(unittest.TestCase):
       # Transfer the buffer to the the other PlasmaStore.
       self.client1.transfer("127.0.0.1", self.port2, object_id1)
       # Compare the two buffers.
-      self.assertEqual(memory_buffer1[:], self.client2.get(object_id1)[:])
-      self.assertEqual(self.client1.get(object_id1)[:], self.client2.get(object_id1)[:])
-      self.assertEqual(metadata1[:], self.client2.get_metadata(object_id1)[:])
-      self.assertEqual(self.client1.get_metadata(object_id1)[:], self.client2.get_metadata(object_id1)[:])
+      assert_get_object_equal(self, self.client1, self.client2, object_id1,
+                              memory_buffer=memory_buffer1, metadata=metadata1)
       # Transfer the buffer again.
       self.client1.transfer("127.0.0.1", self.port2, object_id1)
-      self.assertEqual(metadata1[:], self.client2.get_metadata(object_id1)[:])
       # Compare the two buffers.
-      self.assertEqual(self.client1.get(object_id1)[:], self.client2.get(object_id1)[:])
+      assert_get_object_equal(self, self.client1, self.client2, object_id1,
+                              memory_buffer=memory_buffer1, metadata=metadata1)
 
       # Create an object.
       object_id2, memory_buffer2, metadata2 = create_object(self.client2, 20000, 20000)
       # Transfer the buffer to the the other PlasmaStore.
       self.client2.transfer("127.0.0.1", self.port1, object_id2)
       # Compare the two buffers.
-      self.assertEqual(memory_buffer2[:], self.client2.get(object_id2)[:])
-      self.assertEqual(self.client1.get(object_id2)[:], self.client2.get(object_id2)[:])
-      self.assertEqual(metadata2[:], self.client2.get_metadata(object_id2)[:])
-      self.assertEqual(self.client1.get_metadata(object_id2)[:], self.client2.get_metadata(object_id2)[:])
+      assert_get_object_equal(self, self.client1, self.client2, object_id2,
+                              memory_buffer=memory_buffer2, metadata=metadata2)
 
   def test_illegal_functionality(self):
     # Create an object id string.
@@ -307,8 +389,8 @@ class TestPlasmaManager(unittest.TestCase):
 if __name__ == "__main__":
   if len(sys.argv) > 1:
     # pop the argument so we don't mess with unittest's own argument parser
-    arg = sys.argv.pop()
-    if arg == "valgrind":
+    if sys.argv[-1] == "valgrind":
+      arg = sys.argv.pop()
       USE_VALGRIND = True
       print("Using valgrind for tests")
   unittest.main(verbosity=2)

From 63ec24478487de76a984c525c0303e3a817e885d Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Tue, 18 Oct 2016 18:27:43 -0700
Subject: [PATCH 85/91] Connect local scheduler to Plasma. (#11)

* Receive notifications about sealed objects from Plasma, and schedule tasks only when the dependencies are available locally.

* Fix formatting.

* Use version of Plasma with fix.

* Fix.

* Factor out the scheduling algorithm and use worker_index instead of the client socket to identify workers

* Fixes

* clang-format

* fix remaining linter errors
---
 .clang-format      |   5 ++
 .travis.yml        |  11 +++
 Makefile           |   6 +-
 common             |   2 +-
 photon.h           |  28 ++++++-
 photon_algorithm.c | 184 +++++++++++++++++++++++++++++++++++++++++++
 photon_algorithm.h |  88 +++++++++++++++++++++
 photon_scheduler.c | 189 ++++++++++++++++++++++++---------------------
 photon_scheduler.h |  31 +++++---
 test/test.py       |  53 +++++++++++--
 10 files changed, 485 insertions(+), 112 deletions(-)
 create mode 100644 .clang-format
 create mode 100644 photon_algorithm.c
 create mode 100644 photon_algorithm.h

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 000000000..3fcffcbd3
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,5 @@
+BasedOnStyle: Chromium
+DerivePointerAlignment: false
+IndentCaseLabels: false
+PointerAlignment: Right
+SpaceAfterCStyleCast: true
diff --git a/.travis.yml b/.travis.yml
index 360027785..ac6ca4a6a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -45,6 +45,17 @@ matrix:
 
 install:
   - ./install-dependencies.sh
+
+  # Install Plasma side by side.
+  - cd ..
+  - git clone https://github.com/ray-project/plasma.git
+  - cd plasma
+  - git checkout f189ca746b57f22371ef10077aa535492bbd8421
+  - make
+  - source setup-env.sh
+  - cd ../photon
+
+  # Install Photon.
   - make
   - cd common/lib/python
   - python setup.py install --user
diff --git a/Makefile b/Makefile
index 436502d6a..81f38d06c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 CC = gcc
-CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -Icommon/thirdparty -fPIC
+CFLAGS = -g -Wall --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -Icommon -Icommon/thirdparty -fPIC
 BUILD = build
 
 all: $(BUILD)/photon_scheduler $(BUILD)/photon_client.a
@@ -7,8 +7,8 @@ all: $(BUILD)/photon_scheduler $(BUILD)/photon_client.a
 $(BUILD)/photon_client.a: photon_client.o
 	ar rcs $(BUILD)/photon_client.a photon_client.o
 
-$(BUILD)/photon_scheduler: photon.h photon_scheduler.c common
-	$(CC) $(CFLAGS) -o $@ photon_scheduler.c common/build/libcommon.a common/thirdparty/hiredis/libhiredis.a -Icommon/thirdparty -Icommon/
+$(BUILD)/photon_scheduler: photon.h photon_scheduler.c photon_algorithm.c common
+	$(CC) $(CFLAGS) -o $@ photon_scheduler.c photon_algorithm.c common/build/libcommon.a common/thirdparty/hiredis/libhiredis.a -Icommon/thirdparty/ -Icommon/ ../plasma/build/libplasma_client.a -I../plasma/src/
 
 common: FORCE
 	git submodule update --init --recursive
diff --git a/common b/common
index 7be1a93d6..535bc8f0b 160000
--- a/common
+++ b/common
@@ -1 +1 @@
-Subproject commit 7be1a93d64ca36fc639e11f81de1483e0bd17b8c
+Subproject commit 535bc8f0b8dac8f3ef0b66c3fd8b265ab0e6c787
diff --git a/photon.h b/photon.h
index a59e5566f..298a27383 100644
--- a/photon.h
+++ b/photon.h
@@ -1,6 +1,11 @@
 #ifndef PHOTON_H
 #define PHOTON_H
 
+#include "common/task.h"
+#include "common/state/db.h"
+#include "utarray.h"
+#include "uthash.h"
+
 enum photon_message_type {
   /** Notify the local scheduler that a task has finished. */
   TASK_DONE = 64,
@@ -11,4 +16,25 @@ enum photon_message_type {
   EXECUTE_TASK,
 };
 
-#endif
+// clang-format off
+/** Contains all information that is associated to a worker. */
+typedef struct {
+  int sock;
+} worker;
+// clang-format on
+
+/* These are needed to define the UT_arrays. */
+UT_icd task_ptr_icd;
+UT_icd worker_icd;
+
+/** Resources that are exposed to the scheduling algorithm. */
+typedef struct {
+  /** List of workers available to this node. The index into this array
+   *  is the worker_index and is used to identify workers throughout
+   *  the program. */
+  UT_array *workers;
+  /* The handle to the database. */
+  db_handle *db;
+} scheduler_info;
+
+#endif /* PHOTON_H */
diff --git a/photon_algorithm.c b/photon_algorithm.c
new file mode 100644
index 000000000..9f3f65d78
--- /dev/null
+++ b/photon_algorithm.c
@@ -0,0 +1,184 @@
+#include "photon_algorithm.h"
+
+#include <stdbool.h>
+#include "utarray.h"
+
+#include "state/task_log.h"
+#include "photon.h"
+#include "photon_scheduler.h"
+
+typedef struct {
+  /* Object id of this object. */
+  object_id object_id;
+  /* Handle for the uthash table. */
+  UT_hash_handle handle;
+} available_object;
+
+/** Part of the photon state that is maintained by the scheduling algorithm. */
+struct scheduler_state {
+  /** An array of pointers to tasks that are waiting to be scheduled. */
+  UT_array *task_queue;
+  /** An array of worker indices corresponding to clients that are
+   *  waiting for tasks. */
+  UT_array *available_workers;
+  /** A hash map of the objects that are available in the local Plasma store.
+   *  This information could be a little stale. */
+  available_object *local_objects;
+};
+
+scheduler_state *make_scheduler_state(void) {
+  scheduler_state *state = malloc(sizeof(scheduler_state));
+  /* Initialize an empty hash map for the cache of local available objects. */
+  state->local_objects = NULL;
+  /* Initialize the local data structures used for queuing tasks and workers. */
+  utarray_new(state->task_queue, &task_ptr_icd);
+  utarray_new(state->available_workers, &ut_int_icd);
+  return state;
+}
+
+void free_scheduler_state(scheduler_state *s) {
+  utarray_free(s->task_queue);
+  utarray_free(s->available_workers);
+  free(s);
+}
+
+/**
+ * Check if all of the remote object arguments for a task are available in the
+ * local object store.
+ *
+ * @param s The scheduler state.
+ * @param task Task specification of the task to check.
+ * @return This returns 1 if all of the remote object arguments for the task are
+ *         present in the local object store, otherwise it returns 0.
+ */
+bool can_run(scheduler_state *s, task_spec *task) {
+  int64_t num_args = task_num_args(task);
+  for (int i = 0; i < num_args; ++i) {
+    if (task_arg_type(task, i) == ARG_BY_REF) {
+      object_id obj_id = *task_arg_id(task, i);
+      available_object *entry;
+      HASH_FIND(handle, s->local_objects, &obj_id, sizeof(object_id), entry);
+      if (entry == NULL) {
+        /* The object is not present locally, so this task cannot be scheduled
+         * right now. */
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+/**
+ * If there is a task whose dependencies are available locally, assign it to the
+ * worker. This does not remove the worker from the available worker queue.
+ *
+ * @param s The scheduler state.
+ * @param worker_index The index of the worker.
+ * @return This returns 1 if it successfully assigned a task to the worker,
+ *         otherwise it returns 0.
+ */
+int find_and_schedule_task_if_possible(scheduler_info *info,
+                                       scheduler_state *state,
+                                       int worker_index) {
+  int found_task_to_schedule = 0;
+  /* Find the first task whose dependencies are available locally. */
+  task_spec *spec;
+  task_instance **task;
+  int i = 0;
+  for (; i < utarray_len(state->task_queue); ++i) {
+    task = (task_instance **) utarray_eltptr(state->task_queue, i);
+    spec = task_instance_task_spec(*task);
+    if (can_run(state, spec)) {
+      found_task_to_schedule = 1;
+      break;
+    }
+  }
+  if (found_task_to_schedule) {
+    /* This task's dependencies are available locally, so assign the task to the
+     * worker. */
+    assign_task_to_worker(info, spec, worker_index);
+    /* Update the task queue data structure and free the task. */
+    free(*task);
+    utarray_erase(state->task_queue, i, 1);
+  }
+  return found_task_to_schedule;
+}
+
+void handle_task_submitted(scheduler_info *info,
+                           scheduler_state *s,
+                           task_spec *task) {
+  /* Create a unique task instance ID. This is different from the task ID and
+   * is used to distinguish between potentially multiple executions of the
+   * task. */
+  task_iid task_iid = globally_unique_id();
+  task_instance *instance =
+      make_task_instance(task_iid, task, TASK_STATUS_WAITING, NIL_ID);
+  /* If this task's dependencies are available locally, and if there is an
+   * available worker, then assign this task to an available worker. Otherwise,
+   * add this task to the local task queue. */
+  int schedule_locally =
+      (utarray_len(s->available_workers) > 0) && can_run(s, task);
+  if (schedule_locally) {
+    /* Get the last available worker in the available worker queue. */
+    int *worker_index = (int *) utarray_back(s->available_workers);
+    /* Tell the available worker to execute the task. */
+    assign_task_to_worker(info, task, *worker_index);
+    /* Remove the available worker from the queue and free the struct. */
+    utarray_pop_back(s->available_workers);
+  } else {
+    /* Add the task to the task queue. This passes ownership of the task queue.
+     * And the task will be freed when it is assigned to a worker. */
+    utarray_push_back(s->task_queue, &instance);
+  }
+  /* Submit the task to redis. */
+  task_log_add_task(info->db, instance);
+  if (schedule_locally) {
+    /* If the task was scheduled locally, we need to free it. Otherwise,
+     * ownership of the task is passed to the task_queue, and it will be freed
+     * when it is assigned to a worker. */
+    free(instance);
+  }
+}
+
+void handle_worker_available(scheduler_info *info,
+                             scheduler_state *state,
+                             int worker_index) {
+  int scheduled_task =
+      find_and_schedule_task_if_possible(info, state, worker_index);
+  /* If we couldn't find a task to schedule, add the worker to the queue of
+   * available workers. */
+  if (!scheduled_task) {
+    for (int *p = (int *) utarray_front(state->available_workers); p != NULL;
+         p = (int *) utarray_next(state->available_workers, p)) {
+      CHECK(*p != worker_index);
+    }
+    /* Add client_sock to a list of available workers. This struct will be freed
+     * when a task is assigned to this worker. */
+    utarray_push_back(state->available_workers, &worker_index);
+    LOG_INFO("Adding worker_index %d to available workers.\n", worker_index);
+  }
+}
+
+void handle_object_available(scheduler_info *info,
+                             scheduler_state *state,
+                             object_id object_id) {
+  /* TODO(rkn): When does this get freed? */
+  available_object *entry =
+      (available_object *) malloc(sizeof(available_object));
+  entry->object_id = object_id;
+  HASH_ADD(handle, state->local_objects, object_id, sizeof(object_id), entry);
+
+  /* Check if we can schedule any tasks. */
+  int num_tasks_scheduled = 0;
+  for (int *p = (int *) utarray_front(state->available_workers); p != NULL;
+       p = (int *) utarray_next(state->available_workers, p)) {
+    /* Schedule a task on this worker if possible. */
+    int scheduled_task = find_and_schedule_task_if_possible(info, state, *p);
+    if (!scheduled_task) {
+      /* There are no tasks we can schedule, so exit the loop. */
+      break;
+    }
+    num_tasks_scheduled += 1;
+  }
+  utarray_erase(state->available_workers, 0, num_tasks_scheduled);
+}
diff --git a/photon_algorithm.h b/photon_algorithm.h
new file mode 100644
index 000000000..714de8869
--- /dev/null
+++ b/photon_algorithm.h
@@ -0,0 +1,88 @@
+#ifndef PHOTON_ALGORITHM_H
+#define PHOTON_ALGORITHM_H
+
+#include "photon.h"
+#include "common/task.h"
+
+/* ==== The scheduling algorithm ====
+ *
+ * This file contains declaration for all functions and data structures
+ * that need to be provided if you want to implement a new algorithms
+ * for the local scheduler.
+ *
+ */
+
+/** Internal state of the scheduling algorithm. */
+typedef struct scheduler_state scheduler_state;
+
+/**
+ * Initialize the scheduler state.
+ *
+ * @return Internal state of the scheduling algorithm.
+ */
+scheduler_state *make_scheduler_state(void);
+
+/**
+ * Free the scheduler state.
+ *
+ * @param state Internal state of the scheduling algorithm.
+ * @return Void.
+ */
+void free_scheduler_state(scheduler_state *state);
+
+/**
+ * This function will be called when a new task is submitted by a worker for
+ * execution.
+ *
+ * @param info Info about resources exposed by photon to the scheduling
+ *        algorithm.
+ * @param state State of the scheduling algorithm.
+ * @param task Task that is submitted by the worker.
+ * @return Void.
+ */
+void handle_task_submitted(scheduler_info *info,
+                           scheduler_state *state,
+                           task_spec *task);
+
+/**
+ * This function will be called when a task is assigned by the global scheduler
+ * for execution on this local scheduler.
+ *
+ * @param info Info about resources exposed by photon to the scheduling
+ *        algorithm.
+ * @param state State of the scheduling algorithm.
+ * @param task Task that is assigned by the global scheduler.
+ * @return Void.
+ */
+void handle_task_assigned(scheduler_info *info,
+                          scheduler_state *state,
+                          task_spec *task);
+
+/**
+ * This function is called if a new object becomes available in the local
+ * plasma store.
+ *
+ * @param info Info about resources exposed by photon to the scheduling
+ *        algorithm.
+ * @param state State of the scheduling algorithm.
+ * @param object_id ID of the object that became available.
+ * @return Void.
+ */
+void handle_object_available(scheduler_info *info,
+                             scheduler_state *state,
+                             object_id object_id);
+
+/**
+ * This function is called when a new worker becomes available
+ *
+ * @param info Info about resources exposed by photon to the scheduling
+ *        algorithm.
+ * @param state State of the scheduling algorithm.
+ * @param worker_index The index of the worker that becomes available.
+ * @return Void.
+ */
+void handle_worker_available(scheduler_info *info,
+                             scheduler_state *state,
+                             int worker_index);
+
+#endif /* PHOTON_ALGORITHM_H */
diff --git a/photon_scheduler.c b/photon_scheduler.c
index 1ce5fb916..bdd18857d 100644
--- a/photon_scheduler.c
+++ b/photon_scheduler.c
@@ -10,111 +10,92 @@
 #include "event_loop.h"
 #include "io.h"
 #include "photon.h"
+#include "photon_algorithm.h"
 #include "photon_scheduler.h"
+#include "plasma_client.h"
 #include "state/db.h"
 #include "state/task_log.h"
 #include "utarray.h"
+#include "uthash.h"
 
-typedef struct {
-  /** The file descriptor used to communicate with the worker. */
-  int client_sock;
-} available_worker;
-
-/* These are needed to define the UT_arrays. */
 UT_icd task_ptr_icd = {sizeof(task_instance *), NULL, NULL, NULL};
-UT_icd worker_icd = {sizeof(available_worker), NULL, NULL, NULL};
+UT_icd worker_icd = {sizeof(worker), NULL, NULL, NULL};
+
+/** Association between the socket fd of a worker and its worker_index. */
+typedef struct {
+  /** The socket fd of a worker. */
+  int sock;
+  /** The index of the worker in scheduler_info->workers. */
+  int64_t worker_index;
+  /** Handle for the hash table. */
+  UT_hash_handle hh;
+} worker_index;
 
 struct local_scheduler_state {
   /* The local scheduler event loop. */
   event_loop *loop;
-  /* The handle to the database. */
-  db_handle *db;
-  /** This is an array of pointers to tasks that are waiting to be scheduled. */
-  UT_array *task_queue;
-  /** This is an array of file descriptors corresponding to clients that are
-   *  waiting for tasks. */
-  UT_array *available_worker_queue;
+  /* The Plasma client. */
+  plasma_store_conn *plasma_conn;
+  /* Association between client socket and worker index. */
+  worker_index *worker_index;
+  /* Info that is exposed to the scheduling algorithm. */
+  scheduler_info *scheduler_info;
+  /* State for the scheduling algorithm. */
+  scheduler_state *scheduler_state;
 };
 
-local_scheduler_state *
-init_local_scheduler(event_loop *loop, const char *redis_addr, int redis_port) {
+local_scheduler_state *init_local_scheduler(event_loop *loop,
+                                            const char *redis_addr,
+                                            int redis_port,
+                                            const char *plasma_socket_name) {
   local_scheduler_state *state = malloc(sizeof(local_scheduler_state));
   state->loop = loop;
-  state->db = db_connect(redis_addr, redis_port, "photon", "", -1);
-  db_attach(state->db, loop);
-  utarray_new(state->task_queue, &task_ptr_icd);
-  utarray_new(state->available_worker_queue, &worker_icd);
+  /* Connect to Plasma. This method will retry if Plasma hasn't started yet. */
+  state->plasma_conn = plasma_store_connect(plasma_socket_name);
+  /* Subscribe to notifications about sealed objects. */
+  int plasma_fd = plasma_subscribe(state->plasma_conn);
+  /* Add the callback that processes the notification to the event loop. */
+  event_loop_add_file(loop, plasma_fd, EVENT_LOOP_READ,
+                      process_plasma_notification, state);
+  state->worker_index = NULL;
+  /* Add scheduler info. */
+  state->scheduler_info = malloc(sizeof(scheduler_info));
+  utarray_new(state->scheduler_info->workers, &worker_icd);
+  /* Connect to Redis. */
+  state->scheduler_info->db =
+      db_connect(redis_addr, redis_port, "photon", "", -1);
+  db_attach(state->scheduler_info->db, loop);
+  /* Add scheduler state. */
+  state->scheduler_state = make_scheduler_state();
   return state;
 };
 
 void free_local_scheduler(local_scheduler_state *s) {
-  db_disconnect(s->db);
-  utarray_free(s->task_queue);
-  utarray_free(s->available_worker_queue);
+  db_disconnect(s->scheduler_info->db);
+  free(s->scheduler_info);
+  free_scheduler_state(s->scheduler_state);
   event_loop_destroy(s->loop);
   free(s);
 }
 
-void handle_submit_task(local_scheduler_state *s, task_spec *task) {
-  /* Create a unique task instance ID. This is different from the task ID and
-   * is used to distinguish between potentially multiple executions of the
-   * task. */
-  task_iid task_iid = globally_unique_id();
-  task_instance *instance =
-      make_task_instance(task_iid, task, TASK_STATUS_WAITING, NIL_ID);
-  /* Assign this task to an available worker. If there are no available workers,
-   * then add this task to the local task queue. */
-  int schedule_locally = utarray_len(s->available_worker_queue) > 0;
-  if (schedule_locally) {
-    /* Get the last available worker in the available worker queue. */
-    available_worker *worker =
-        (available_worker *)utarray_back(s->available_worker_queue);
-    /* Tell the available worker to execute the task. */
-    write_message(worker->client_sock, EXECUTE_TASK, task_size(task),
-                  (uint8_t *)task);
-    /* Remove the available worker from the queue and free the struct. */
-    utarray_pop_back(s->available_worker_queue);
-  } else {
-    /* Add the task to the task queue. This passes ownership of the task queue.
-     * And the task will be freed when it is assigned to a worker. */
-    utarray_push_back(s->task_queue, &instance);
-  }
-  /* Submit the task to redis. */
-  task_log_add_task(s->db, instance);
-  if (schedule_locally) {
-    /* If the task was scheduled locally, we need to free it. Otherwise,
-     * ownership of the task is passed to the task_queue, and it will be freed
-     * when it is assigned to a worker. */
-    free(instance);
-  }
+void assign_task_to_worker(scheduler_info *info,
+                           task_spec *task,
+                           int worker_index) {
+  CHECK(worker_index < utarray_len(info->workers));
+  worker *w = (worker *) utarray_eltptr(info->workers, worker_index);
+  write_message(w->sock, EXECUTE_TASK, task_size(task), (uint8_t *) task);
 }
 
-void handle_get_task(local_scheduler_state *s, int client_sock) {
-  /* If there is an available task, assign that task to this worker. Otherwise
-   * add the worker to the queue of available workers. */
-  if (utarray_len(s->task_queue) > 0) {
-    /* Get the last task in the task queue. */
-    task_instance **back = (task_instance **)utarray_back(s->task_queue);
-    task_spec *task = task_instance_task_spec(*back);
-    /* Send a task to the worker. */
-    write_message(client_sock, EXECUTE_TASK, task_size(task), (uint8_t *)task);
-    /* Update the task queue data structure and free the task. */
-    utarray_pop_back(s->task_queue);
-    free(*back);
-  } else {
-    /* Check that client_sock is not already in the available workers. */
-    for (available_worker *p =
-             (available_worker *)utarray_front(s->available_worker_queue);
-         p != NULL;
-         p = (available_worker *)utarray_next(s->available_worker_queue, p)) {
-      CHECK(p->client_sock != client_sock);
-    }
-    /* Add client_sock to a list of available workers. This struct will be freed
-     * when a task is assigned to this worker. */
-    available_worker worker_info = {.client_sock = client_sock};
-    utarray_push_back(s->available_worker_queue, &worker_info);
-    LOG_INFO("Adding client_sock %d to available workers.\n", client_sock);
-  }
+void process_plasma_notification(event_loop *loop,
+                                 int client_sock,
+                                 void *context,
+                                 int events) {
+  local_scheduler_state *s = context;
+  /* Read the notification from Plasma. */
+  uint8_t *message = (uint8_t *) malloc(sizeof(object_id));
+  recv(client_sock, message, sizeof(object_id), 0);
+  object_id *obj_id = (object_id *) message;
+  handle_object_available(s->scheduler_info, s->scheduler_state, *obj_id);
 }
 
 void process_message(event_loop *loop, int client_sock, void *context,
@@ -126,16 +107,22 @@ void process_message(event_loop *loop, int client_sock, void *context,
   int64_t length;
   read_message(client_sock, &type, &length, &message);
 
+  LOG_DEBUG("New event of type %" PRId64, type);
+
   switch (type) {
   case SUBMIT_TASK: {
-    task_spec *task = (task_spec *)message;
-    CHECK(task_size(task) == length);
-    handle_submit_task(s, task);
+    task_spec *spec = (task_spec *) message;
+    CHECK(task_size(spec) == length);
+    handle_task_submitted(s->scheduler_info, s->scheduler_state, spec);
   } break;
   case TASK_DONE: {
   } break;
   case GET_TASK: {
-    handle_get_task(s, client_sock);
+    worker_index *wi;
+    HASH_FIND_INT(s->worker_index, &client_sock, wi);
+    printf("worker_index is %" PRId64 "\n", wi->worker_index);
+    handle_worker_available(s->scheduler_info, s->scheduler_state,
+                            wi->worker_index);
   } break;
   case DISCONNECT_CLIENT: {
     LOG_INFO("Disconnecting client on fd %d", client_sock);
@@ -156,6 +143,14 @@ void new_client_connection(event_loop *loop, int listener_sock, void *context,
   int new_socket = accept_client(listener_sock);
   event_loop_add_file(loop, new_socket, EVENT_LOOP_READ, process_message, s);
   LOG_INFO("new connection with fd %d", new_socket);
+  /* Add worker to list of workers. */
+  /* TODO(pcm): Where shall we free this? */
+  worker_index *new_worker_index = malloc(sizeof(worker_index));
+  new_worker_index->sock = new_socket;
+  new_worker_index->worker_index = utarray_len(s->scheduler_info->workers);
+  HASH_ADD_INT(s->worker_index, sock, new_worker_index);
+  worker worker = {.sock = new_socket};
+  utarray_push_back(s->scheduler_info->workers, &worker);
 }
 
 /* We need this code so we can clean up when we get a SIGTERM signal. */
@@ -171,11 +166,14 @@ void signal_handler(int signal) {
 
 /* End of the cleanup code. */
 
-void start_server(const char *socket_name, const char *redis_addr,
-                  int redis_port) {
+void start_server(const char *socket_name,
+                  const char *redis_addr,
+                  int redis_port,
+                  const char *plasma_socket_name) {
   int fd = bind_ipc_sock(socket_name);
   event_loop *loop = event_loop_create();
-  g_state = init_local_scheduler(loop, redis_addr, redis_port);
+  g_state =
+      init_local_scheduler(loop, redis_addr, redis_port, plasma_socket_name);
 
   /* Run event loop. */
   event_loop_add_file(loop, fd, EVENT_LOOP_READ, new_client_connection,
@@ -189,8 +187,10 @@ int main(int argc, char *argv[]) {
   char *scheduler_socket_name = NULL;
   /* IP address and port of redis. */
   char *redis_addr_port = NULL;
+  /* Socket name for the local Plasma store. */
+  char *plasma_socket_name = NULL;
   int c;
-  while ((c = getopt(argc, argv, "s:r:")) != -1) {
+  while ((c = getopt(argc, argv, "s:r:p:")) != -1) {
     switch (c) {
     case 's':
       scheduler_socket_name = optarg;
@@ -198,6 +198,9 @@ int main(int argc, char *argv[]) {
     case 'r':
       redis_addr_port = optarg;
       break;
+    case 'p':
+      plasma_socket_name = optarg;
+      break;
     default:
       LOG_ERR("unknown option %c", c);
       exit(-1);
@@ -207,6 +210,11 @@ int main(int argc, char *argv[]) {
     LOG_ERR("please specify socket for incoming connections with -s switch");
     exit(-1);
   }
+  if (!plasma_socket_name) {
+    LOG_ERR("please specify socket for connecting to Plasma with -p switch");
+    exit(-1);
+  }
+  /* Parse the Redis address into an IP address and a port. */
   char redis_addr[16] = {0};
   char redis_port[6] = {0};
   if (!redis_addr_port ||
@@ -215,5 +223,6 @@ int main(int argc, char *argv[]) {
     LOG_ERR("need to specify redis address like 127.0.0.1:6379 with -r switch");
     exit(-1);
   }
-  start_server(scheduler_socket_name, &redis_addr[0], atoi(redis_port));
+  start_server(scheduler_socket_name, &redis_addr[0], atoi(redis_port),
+               plasma_socket_name);
 }
diff --git a/photon_scheduler.h b/photon_scheduler.h
index 591ffe0f5..3ebb03432 100644
--- a/photon_scheduler.h
+++ b/photon_scheduler.h
@@ -2,6 +2,7 @@
 #define PHOTON_SCHEDULER_H
 
 #include "task.h"
+#include "event_loop.h"
 
 typedef struct local_scheduler_state local_scheduler_state;
 
@@ -15,25 +16,37 @@ typedef struct local_scheduler_state local_scheduler_state;
  * @param events Flag for events that are available on the listener socket.
  * @return Void.
  */
-void new_client_connection(event_loop *loop, int listener_sock, void *context,
+void new_client_connection(event_loop *loop,
+                           int listener_sock,
+                           void *context,
                            int events);
 
 /**
- * Assign a task to a worker.
+ * This function can be called by the scheduling algorithm to assign a task
+ * to a worker.
  *
- * @param s State of the local scheduler.
- * @param client_sock Socket by which the worker is connected.
+ * @param info
+ * @param task The task that is submitted to the worker.
+ * @param worker_index The index of the worker the task is submitted to.
  * @return Void.
  */
-void handle_get_task(local_scheduler_state *s, int client_sock);
+void assign_task_to_worker(scheduler_info *info,
+                           task_spec *task,
+                           int worker_index);
 
 /**
- * Handle incoming submit request by a worker.
+ * This is the callback that is used to process a notification from the Plasma
+ * store that an object has been sealed.
  *
- * @param s State of the local scheduler.
- * @param task Task specification of the task to be submitted.
+ * @param loop The local scheduler's event loop.
+ * @param client_sock The file descriptor to read the notification from.
+ * @param context The local scheduler state.
+ * @param events
  * @return Void.
  */
-void handle_submit_task(local_scheduler_state *s, task_spec *task);
+void process_plasma_notification(event_loop *loop,
+                                 int client_sock,
+                                 void *context,
+                                 int events);
 
 #endif /* PHOTON_SCHEDULER_H */
diff --git a/test/test.py b/test/test.py
index 2db6df314..fb35702ad 100644
--- a/test/test.py
+++ b/test/test.py
@@ -6,9 +6,11 @@ import subprocess
 import sys
 import unittest
 import random
+import threading
 import time
 
 import photon
+import plasma
 
 USE_VALGRIND = False
 
@@ -18,14 +20,19 @@ class TestPhotonClient(unittest.TestCase):
     # Start Redis.
     redis_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../common/thirdparty/redis-3.2.3/src/redis-server")
     self.p1 = subprocess.Popen([redis_executable, "--loglevel", "warning"])
+    # Start Plasma.
+    plasma_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../../plasma/build/plasma_store")
+    plasma_socket = "/tmp/plasma_store{}".format(random.randint(0, 10000))
+    self.p2 = subprocess.Popen([plasma_executable, "-s", plasma_socket])
     time.sleep(0.1)
+    self.plasma_client = plasma.PlasmaClient(plasma_socket)
     scheduler_executable = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../build/photon_scheduler")
     scheduler_name = "/tmp/scheduler{}".format(random.randint(0, 10000))
-    command = [scheduler_executable, "-s", scheduler_name, "-r", "127.0.0.1:6379"]
+    command = [scheduler_executable, "-s", scheduler_name, "-r", "127.0.0.1:6379", "-p", plasma_socket]
     if USE_VALGRIND:
-      self.p2 = subprocess.Popen(["valgrind", "--track-origins=yes", "--leak-check=full", "--show-leak-kinds=all"] + command)
+      self.p3 = subprocess.Popen(["valgrind", "--track-origins=yes", "--leak-check=full", "--show-leak-kinds=all"] + command)
     else:
-      self.p2 = subprocess.Popen(command)
+      self.p3 = subprocess.Popen(command)
     if USE_VALGRIND:
       time.sleep(1.0)
     else:
@@ -36,21 +43,30 @@ class TestPhotonClient(unittest.TestCase):
   def tearDown(self):
     # Kill the Redis server.
     self.p1.kill()
+    # Kill Plasma.
+    self.p2.kill()
     # Kill the local scheduler.
     if USE_VALGRIND:
-      self.p2.send_signal(signal.SIGTERM)
-      self.p2.wait()
-      os._exit(self.p2.returncode)
+      self.p3.send_signal(signal.SIGTERM)
+      self.p3.wait()
+      os._exit(self.p3.returncode)
     else:
-      self.p2.kill()
-
+      self.p3.kill()
 
   def test_submit_and_get_task(self):
     # TODO(rkn): This should be a FunctionID.
     function_id = photon.ObjectID(20 * "a")
     object_ids = [photon.ObjectID(20 * chr(i)) for i in range(256)]
+    # Create and seal the objects in the object store so that we can schedule
+    # all of the subsequent tasks.
+    for object_id in object_ids:
+      self.plasma_client.create(object_id.id(), 0)
+      self.plasma_client.seal(object_id.id())
+    # Define some arguments to use for the tasks.
     args_list = [
       [],
+      #{},
+      #(),
       1 * [1],
       10 * [1],
       100 * [1],
@@ -104,6 +120,27 @@ class TestPhotonClient(unittest.TestCase):
       for num_return_vals in [0, 1, 2, 3, 5, 10, 100]:
         new_task = self.photon_client.get_task()
 
+  def test_scheduling_when_objects_ready(self):
+    # Create a task and submit it.
+    object_id = photon.ObjectID(20 * chr(0))
+    # TODO(rkn): This should be a FunctionID.
+    function_id = photon.ObjectID(20 * "a")
+    task = photon.Task(function_id, [object_id], 0)
+    self.photon_client.submit(task)
+    # Launch a thread to get the task.
+    def get_task():
+      self.photon_client.get_task()
+    t = threading.Thread(target=get_task)
+    t.start()
+    # Sleep to give the thread time to call get_task.
+    time.sleep(0.1)
+    # Create and seal the object ID in the object store. This should trigger a
+    # scheduling event.
+    self.plasma_client.create(object_id.id(), 0)
+    self.plasma_client.seal(object_id.id())
+    # Wait until the thread finishes so that we know the task was scheduled.
+    t.join()
+
 if __name__ == "__main__":
   if len(sys.argv) > 1:
     # pop the argument so we don't mess with unittest's own argument parser

From d19f7ad853f68e8bf64ed2732b0e1ffd3ac56e86 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Fri, 21 Oct 2016 00:47:34 -0700
Subject: [PATCH 86/91] Implement plasma_release. (#42)

* Implement plasma_release, enabling clients to indicate when they are no longer using an object.

* Call plasma_release for each plasma_create call, and call plasma_release from the manager.

* Fixes.

* Create client specific contexts for plasma_store callbacks.

* More changes.

* Fixes.

* Cleanup lists of clients using objects when a client disconnects.

* Make names parallel.
---
 lib/python/plasma.py |  50 +++++++++--
 src/plasma.h         |   2 +
 src/plasma_client.c  | 107 +++++++++++++++++++++-
 src/plasma_client.h  |  26 ++++--
 src/plasma_manager.c |  11 ++-
 src/plasma_store.c   | 208 +++++++++++++++++++++++++++++++------------
 src/plasma_store.h   |  56 +++++++-----
 test/test.py         |  92 +++++++++----------
 8 files changed, 413 insertions(+), 139 deletions(-)

diff --git a/lib/python/plasma.py b/lib/python/plasma.py
index 8200471d5..c59855863 100644
--- a/lib/python/plasma.py
+++ b/lib/python/plasma.py
@@ -17,6 +17,45 @@ def make_plasma_id(string):
   object_id = map(ord, string)
   return PlasmaID(plasma_id=ID(*object_id))
 
+class PlasmaBuffer(object):
+  """This is the type of objects returned by calls to get with a PlasmaClient.
+
+  We define our own class instead of directly returning a buffer object so that
+  we can add a custom destructor which notifies Plasma that the object is no
+  longer being used, so the memory in the Plasma store backing the object can
+  potentially be freed.
+
+  Attributes:
+    buffer (buffer): A buffer containing an object in the Plasma store.
+    plasma_id (PlasmaID): The ID of the object in the buffer.
+    plasma_client (PlasmaClient): The PlasmaClient that we use to communicate
+      with the store and manager.
+  """
+  def __init__(self, buff, plasma_id, plasma_client):
+    """Initialize a PlasmaBuffer."""
+    self.buffer = buff
+    self.plasma_id = plasma_id
+    self.plasma_client = plasma_client
+
+  def __del__(self):
+    """Notify Plasma that the object is no longer needed."""
+    self.plasma_client.client.plasma_release(self.plasma_client.plasma_conn, self.plasma_id)
+
+  def __getitem__(self, index):
+    """Read from the PlasmaBuffer as if it were just a regular buffer."""
+    return self.buffer[index]
+
+  def __setitem__(self, index, value):
+    """Write to the PlasmaBuffer as if it were just a regular buffer.
+
+    This should fail because the buffer should be read only.
+    """
+    self.buffer[index] = value
+
+  def __len__(self):
+    """Return the length of the buffer."""
+    return len(self.buffer)
+
 class PlasmaClient(object):
   """The PlasmaClient is used to interface with a plasma store and a plasma manager.
 
@@ -45,6 +84,7 @@ class PlasmaClient(object):
     self.client.plasma_connect.restype = ctypes.c_void_p
     self.client.plasma_create.restype = None
     self.client.plasma_get.restype = None
+    self.client.plasma_release.restype = None
     self.client.plasma_contains.restype = None
     self.client.plasma_seal.restype = None
     self.client.plasma_delete.restype = None
@@ -82,7 +122,7 @@ class PlasmaClient(object):
     metadata = buffer("") if metadata is None else metadata
     metadata = (ctypes.c_ubyte * len(metadata)).from_buffer_copy(metadata)
     self.client.plasma_create(self.plasma_conn, make_plasma_id(object_id), size, ctypes.cast(metadata, ctypes.POINTER(ctypes.c_ubyte * len(metadata))), len(metadata), ctypes.byref(data))
-    return self.buffer_from_read_write_memory(data, size)
+    return PlasmaBuffer(self.buffer_from_read_write_memory(data, size), make_plasma_id(object_id), self)
 
   def get(self, object_id):
     """Create a buffer from the PlasmaStore based on object ID.
@@ -97,8 +137,8 @@ class PlasmaClient(object):
     data = ctypes.c_void_p()
     metadata_size = ctypes.c_int64()
     metadata = ctypes.c_void_p()
-    buf = self.client.plasma_get(self.plasma_conn, make_plasma_id(object_id), ctypes.byref(size), ctypes.byref(data), ctypes.byref(metadata_size), ctypes.byref(metadata))
-    return self.buffer_from_memory(data, size)
+    self.client.plasma_get(self.plasma_conn, make_plasma_id(object_id), ctypes.byref(size), ctypes.byref(data), ctypes.byref(metadata_size), ctypes.byref(metadata))
+    return PlasmaBuffer(self.buffer_from_memory(data, size), make_plasma_id(object_id), self)
 
   def get_metadata(self, object_id):
     """Create a buffer from the PlasmaStore based on object ID.
@@ -113,8 +153,8 @@ class PlasmaClient(object):
     data = ctypes.c_void_p()
     metadata_size = ctypes.c_int64()
     metadata = ctypes.c_void_p()
-    buf = self.client.plasma_get(self.plasma_conn, make_plasma_id(object_id), ctypes.byref(size), ctypes.byref(data), ctypes.byref(metadata_size), ctypes.byref(metadata))
-    return self.buffer_from_memory(metadata, metadata_size)
+    self.client.plasma_get(self.plasma_conn, make_plasma_id(object_id), ctypes.byref(size), ctypes.byref(data), ctypes.byref(metadata_size), ctypes.byref(metadata))
+    return PlasmaBuffer(self.buffer_from_memory(metadata, metadata_size), make_plasma_id(object_id), self)
 
   def contains(self, object_id):
     """Check if the object is present and has been sealed in the PlasmaStore.
diff --git a/src/plasma.h b/src/plasma.h
index 35e4f0db3..3b7948afe 100644
--- a/src/plasma.h
+++ b/src/plasma.h
@@ -46,6 +46,8 @@ enum plasma_message_type {
   PLASMA_CREATE = 128,
   /** Get an object. */
   PLASMA_GET,
+  /** Tell the store that the client no longer needs an object. */
+  PLASMA_RELEASE,
   /** Check if an object is present. */
   PLASMA_CONTAINS,
   /** Seal an object. */
diff --git a/src/plasma_client.c b/src/plasma_client.c
index b01204a1c..68196737f 100644
--- a/src/plasma_client.c
+++ b/src/plasma_client.c
@@ -26,10 +26,29 @@ typedef struct {
   int key;
   /** The result of mmap for this file descriptor. */
   uint8_t *pointer;
+  /** The length of the memory-mapped file. */
+  size_t length;
+  /** The number of objects in this memory-mapped file that are currently being
+   *  used by the client. When this count reaches zeros, we unmap the file. */
+  int count;
   /** Handle for the uthash table. */
   UT_hash_handle hh;
 } client_mmap_table_entry;
 
+typedef struct {
+  /** The ID of the object. This is used as the key in the hash table. */
+  object_id object_id;
+  /** The file descriptor of the memory-mapped file that contains the object. */
+  int fd;
+  /** A count of the number of times this client has called plasma_create or
+   *  plasma_get on this object ID minus the number of calls to plasma_release.
+   *  When this count reaches zero, we remove the entry from the objects_in_use
+   *  and decrement a count in the relevant client_mmap_table_entry. */
+  int count;
+  /** Handle for the uthash table. */
+  UT_hash_handle hh;
+} object_in_use_entry;
+
 /** Information about a connection between a Plasma Client and Plasma Store.
  *  This is used to avoid mapping the same files into memory multiple times. */
 struct plasma_connection {
@@ -37,8 +56,13 @@ struct plasma_connection {
   int store_conn;
   /** File descriptor of the Unix domain socket that connects to the manager. */
   int manager_conn;
-  /** Table of dlmalloc buffer files that have been memory mapped so far. */
+  /** Table of dlmalloc buffer files that have been memory mapped so far. This
+   *  is a hash table mapping a file descriptor to a struct containing the
+   *  address of the corresponding memory-mapped file. */
   client_mmap_table_entry *mmap_table;
+  /** A hash table of the object IDs that are currently being used by this
+   * client. */
+  object_in_use_entry *objects_in_use;
 };
 
 int plasma_request_size(int num_object_ids) {
@@ -90,11 +114,47 @@ uint8_t *lookup_or_mmap(plasma_connection *conn,
     entry = malloc(sizeof(client_mmap_table_entry));
     entry->key = store_fd_val;
     entry->pointer = result;
+    entry->length = map_size;
+    entry->count = 0;
     HASH_ADD_INT(conn->mmap_table, key, entry);
     return result;
   }
 }
 
+void increment_object_count(plasma_connection *conn,
+                            object_id object_id,
+                            int fd) {
+  /* Increment the count of the object to track the fact that it is being used.
+   * The corresponding decrement should happen in plasma_release. */
+  object_in_use_entry *object_entry;
+  HASH_FIND(hh, conn->objects_in_use, &object_id, sizeof(object_id),
+            object_entry);
+  if (object_entry == NULL) {
+    /* Add this object ID to the hash table of object IDs in use. The
+     * corresponding call to free happens in plasma_release. */
+    object_entry = malloc(sizeof(object_in_use_entry));
+    object_entry->object_id = object_id;
+    object_entry->fd = fd;
+    object_entry->count = 0;
+    HASH_ADD(hh, conn->objects_in_use, object_id, sizeof(object_id),
+             object_entry);
+    /* Increment the count of the number of objects in the memory-mapped file
+     * that are being used. The corresponding decrement should happen in
+     * plasma_release. */
+    client_mmap_table_entry *entry;
+    HASH_FIND_INT(conn->mmap_table, &object_entry->fd, entry);
+    CHECK(entry != NULL);
+    CHECK(entry->count >= 0);
+    entry->count += 1;
+  } else {
+    CHECK(object_entry->count > 0);
+  }
+  /* Increment the count of the number of instances of this object that are
+   * being used by this client. The corresponding decrement should happen in
+   * plasma_release. */
+  object_entry->count += 1;
+}
+
 void plasma_create(plasma_connection *conn,
                    object_id object_id,
                    int64_t data_size,
@@ -126,6 +186,10 @@ void plasma_create(plasma_connection *conn,
     /* Copy the metadata to the buffer. */
     memcpy(*data + object->data_size, metadata, metadata_size);
   }
+  /* Increment the count of the number of instances of this object that this
+   * client is using. A call to plasma_release is required to decrement this
+   * count. */
+  increment_object_count(conn, object_id, object->handle.store_fd);
 }
 
 /* This method is used to get both the data and the metadata. */
@@ -150,6 +214,46 @@ void plasma_get(plasma_connection *conn,
     *metadata = *data + object->data_size;
     *metadata_size = object->metadata_size;
   }
+  /* Increment the count of the number of instances of this object that this
+   * client is using. A call to plasma_release is required to decrement this
+   * count. */
+  increment_object_count(conn, object_id, object->handle.store_fd);
+}
+
+void plasma_release(plasma_connection *conn, object_id object_id) {
+  /* Decrement the count of the number of instances of this object that are
+   * being used by this client. The corresponding increment should have happened
+   * in plasma_get. */
+  object_in_use_entry *object_entry;
+  HASH_FIND(hh, conn->objects_in_use, &object_id, sizeof(object_id),
+            object_entry);
+  CHECK(object_entry != NULL);
+  object_entry->count -= 1;
+  CHECK(object_entry->count >= 0);
+  /* Check if the client is no longer using this object. */
+  if (object_entry->count == 0) {
+    /* Decrement the count of the number of objects in this memory-mapped file
+     * that the client is using. The corresponding increment should have
+     * happened in plasma_get. */
+    client_mmap_table_entry *entry;
+    HASH_FIND_INT(conn->mmap_table, &object_entry->fd, entry);
+    CHECK(entry != NULL);
+    entry->count -= 1;
+    CHECK(entry->count >= 0);
+    /* If none are being used then unmap the file. */
+    if (entry->count == 0) {
+      munmap(entry->pointer, entry->length);
+      /* Remove the corresponding entry from the hash table. */
+      HASH_DELETE(hh, conn->mmap_table, entry);
+      free(entry);
+    }
+    /* Tell the store that the client no longer needs the object. */
+    plasma_request req = make_plasma_request(object_id);
+    plasma_send_request(conn->store_conn, PLASMA_RELEASE, &req);
+    /* Remove the entry from the hash table of objects currently in use. */
+    HASH_DELETE(hh, conn->objects_in_use, object_entry);
+    free(object_entry);
+  }
 }
 
 /* This method is used to query whether the plasma store contains an object. */
@@ -230,6 +334,7 @@ plasma_connection *plasma_connect(const char *store_socket_name,
     result->manager_conn = -1;
   }
   result->mmap_table = NULL;
+  result->objects_in_use = NULL;
   return result;
 }
 
diff --git a/src/plasma_client.h b/src/plasma_client.h
index 9468397cc..ca64b826f 100644
--- a/src/plasma_client.h
+++ b/src/plasma_client.h
@@ -40,12 +40,12 @@ plasma_request *make_plasma_multiple_request(int num_object_ids,
  * Connect to the local plasma store and plasma manager. Return
  * the resulting connection.
  *
- * @param socket_name The name of the UNIX domain socket to use
- *        to connect to the Plasma Store.
- * @param manager_addr The IP address of the plasma manager to
- *        connect to.
- * @param manager_addr The port of the plasma manager to connect
- *        to.
+ * @param socket_name The name of the UNIX domain socket to use to connect to
+ *        the Plasma Store.
+ * @param manager_addr The IP address of the plasma manager to connect to. If
+ *        this is NULL, then this function will not connect to a manager.
+ * @param manager_port The port of the plasma manager to connect to. If
+ *        manager_addr is NULL, then this argument is unused.
  * @return The object containing the connection state.
  */
 plasma_connection *plasma_connect(const char *store_socket_name,
@@ -114,6 +114,18 @@ void plasma_get(plasma_connection *conn,
                 int64_t *metadata_size,
                 uint8_t **metadata);
 
+/**
+ * Tell Plasma that the client no longer needs the object. This should be called
+ * after plasma_get when the client is done with the object. After this call,
+ * the address returned by plasma_get is no longer valid. This should be called
+ * once for each call to plasma_get (with the same object ID).
+ *
+ * @param conn The object containing the connection state.
+ * @param object_id The ID of the object that is no longer needed.
+ * @return Void.
+ */
+void plasma_release(plasma_connection *conn, object_id object_id);
+
 /**
  * Check if the object store contains a particular object and the object has
  * been sealed. The result will be stored in has_object.
@@ -161,7 +173,7 @@ void plasma_delete(plasma_connection *conn, object_id object_id);
  *        to the local manager.
  * @param object_id_count The number of object IDs requested.
  * @param object_ids[] The vector of object IDs requested. Length must be at
- * least num_object_ids.
+ *        least num_object_ids.
  * @param is_fetched[] The vector in which to return the success
  *        of each object's fetch operation, in the same order as
  *        object_ids. Length must be at least num_object_ids.
diff --git a/src/plasma_manager.c b/src/plasma_manager.c
index 3dfe21735..cefb297fb 100644
--- a/src/plasma_manager.c
+++ b/src/plasma_manager.c
@@ -312,6 +312,9 @@ void write_object_chunk(client_connection *conn, plasma_request_buffer *buf) {
     /* If we've finished writing this buffer, reset the cursor to zero. */
     LOG_DEBUG("writing on channel %d finished", conn->fd);
     conn->cursor = 0;
+    /* We are done sending the object, so release it. The corresponding call to
+     * plasma_get occurred in process_transfer_request. */
+    plasma_release(conn->manager_state->plasma_conn, buf->object_id);
   }
 }
 
@@ -392,9 +395,11 @@ void process_data_chunk(event_loop *loop,
     return;
   }
 
-  /* Seal the object.*/
+  /* Seal the object and release it. The release corresponds to the call to
+   * plasma_create that occurred in process_data_request. */
   LOG_DEBUG("reading on channel %d finished", data_sock);
   plasma_seal(conn->manager_state->plasma_conn, buf->object_id);
+  plasma_release(conn->manager_state->plasma_conn, buf->object_id);
   /* Notify any clients who were waiting on a fetch to this object. */
   client_object_connection *object_conn, *next;
   client_connection *client_conn;
@@ -459,6 +464,8 @@ void process_transfer_request(event_loop *loop,
   int64_t metadata_size;
   /* TODO(swang): A non-blocking plasma_get, or else we could block here
    * forever if we don't end up sealing this object. */
+  /* The corresponding call to plasma_release will happen in
+   * write_object_chunk. */
   plasma_get(conn->manager_state->plasma_conn, object_id, &data_size, &data,
              &metadata_size, &metadata);
   assert(metadata == data + data_size);
@@ -498,6 +505,8 @@ void process_data_request(event_loop *loop,
   buf->data_size = data_size;
   buf->metadata_size = metadata_size;
 
+  /* The corresponding call to plasma_release should happen in
+   * process_data_chunk. */
   plasma_create(conn->manager_state->plasma_conn, object_id, data_size, NULL,
                 metadata_size, &(buf->data));
   LL_APPEND(conn->transfer_queue, buf);
diff --git a/src/plasma_store.c b/src/plasma_store.c
index 3cb251234..1e86c55d7 100644
--- a/src/plasma_store.c
+++ b/src/plasma_store.c
@@ -61,20 +61,34 @@ typedef struct {
   UT_hash_handle handle;
   /* Pointer to the object data. Needed to free the object. */
   uint8_t *pointer;
+  /** An array of the clients that are currently using this object. */
+  UT_array *clients;
 } object_table_entry;
 
 typedef struct {
   /* Object id of this object. */
   object_id object_id;
-  /* Socket connections of waiting clients. */
-  UT_array *conns;
+  /* An array of the clients that are waiting to get this object. */
+  UT_array *waiting_clients;
   /* Handle for the uthash table. */
   UT_hash_handle handle;
 } object_notify_entry;
 
+/** Contains all information that is associated with a client. */
+struct client {
+  /** The socket used to communicate with the client. */
+  int sock;
+  /** A pointer to the global plasma state. */
+  plasma_store_state *plasma_state;
+};
+
+/* This is used to define the array of clients used to define the
+ * object_table_entry type. */
+UT_icd client_icd = {sizeof(client *), NULL, NULL, NULL};
+
 /* This is used to define the array of object IDs used to define the
  * notification_queue type. */
-UT_icd object_id_icd = {sizeof(object_id), NULL, NULL, NULL};
+UT_icd object_table_entry_icd = {sizeof(object_id), NULL, NULL, NULL};
 
 typedef struct {
   /** Client file descriptor. This is used as a key for the hash table. */
@@ -112,19 +126,37 @@ plasma_store_state *init_plasma_store(event_loop *loop) {
   return state;
 }
 
+/* If this client is not already using the object, add the client to the
+ * object's list of clients, otherwise do nothing. */
+void add_client_to_object_clients(object_table_entry *entry,
+                                  client *client_info) {
+  /* Check if this client is already using the object. */
+  for (int i = 0; i < utarray_len(entry->clients); ++i) {
+    client **c = (client **) utarray_eltptr(entry->clients, i);
+    if (*c == client_info) {
+      return;
+    }
+  }
+  /* Add the client pointer to the list of clients using this object. */
+  utarray_push_back(entry->clients, &client_info);
+}
+
 /* Create a new object buffer in the hash table. */
-void create_object(plasma_store_state *plasma_state,
+void create_object(client *client_context,
                    object_id object_id,
                    int64_t data_size,
                    int64_t metadata_size,
                    plasma_object *result) {
   LOG_DEBUG("creating object"); /* TODO(pcm): add object_id here */
+  plasma_store_state *plasma_state = client_context->plasma_state;
 
   object_table_entry *entry;
+  /* TODO(swang): Return these error to the client instead of exiting. */
   HASH_FIND(handle, plasma_state->open_objects, &object_id, sizeof(object_id),
             entry);
-  /* TODO(swang): Return this error to the client instead of
-   * exiting. */
+  CHECKM(entry == NULL, "Cannot create object twice.");
+  HASH_FIND(handle, plasma_state->sealed_objects, &object_id, sizeof(object_id),
+            entry);
   CHECKM(entry == NULL, "Cannot create object twice.");
 
   uint8_t *pointer = dlmalloc(data_size + metadata_size);
@@ -135,7 +167,7 @@ void create_object(plasma_store_state *plasma_state,
   assert(fd != -1);
 
   entry = malloc(sizeof(object_table_entry));
-  memcpy(&entry->object_id, &object_id, 20);
+  memcpy(&entry->object_id, &object_id, sizeof(object_id));
   entry->info.data_size = data_size;
   entry->info.metadata_size = metadata_size;
   entry->pointer = pointer;
@@ -143,6 +175,7 @@ void create_object(plasma_store_state *plasma_state,
   entry->fd = fd;
   entry->map_size = map_size;
   entry->offset = offset;
+  utarray_new(entry->clients, &client_icd);
   HASH_ADD(handle, plasma_state->open_objects, object_id, sizeof(object_id),
            entry);
   result->handle.store_fd = fd;
@@ -151,13 +184,16 @@ void create_object(plasma_store_state *plasma_state,
   result->metadata_offset = offset + data_size;
   result->data_size = data_size;
   result->metadata_size = metadata_size;
+  /* Record that this client is using this object. */
+  add_client_to_object_clients(entry, client_context);
 }
 
 /* Get an object from the hash table. */
-int get_object(plasma_store_state *plasma_state,
+int get_object(client *client_context,
                int conn,
                object_id object_id,
                plasma_object *result) {
+  plasma_store_state *plasma_state = client_context->plasma_state;
   object_table_entry *entry;
   HASH_FIND(handle, plasma_state->sealed_objects, &object_id, sizeof(object_id),
             entry);
@@ -168,6 +204,9 @@ int get_object(plasma_store_state *plasma_state,
     result->metadata_offset = entry->offset + entry->info.data_size;
     result->data_size = entry->info.data_size;
     result->metadata_size = entry->info.metadata_size;
+    /* If necessary, record that this client is using this object. In the case
+     * where entry == NULL, this will be called from seal_object. */
+    add_client_to_object_clients(entry, client_context);
     return OBJECT_FOUND;
   } else {
     object_notify_entry *notify_entry;
@@ -177,18 +216,54 @@ int get_object(plasma_store_state *plasma_state,
     if (!notify_entry) {
       notify_entry = malloc(sizeof(object_notify_entry));
       memset(notify_entry, 0, sizeof(object_notify_entry));
-      utarray_new(notify_entry->conns, &ut_int_icd);
-      memcpy(&notify_entry->object_id, &object_id, 20);
+      utarray_new(notify_entry->waiting_clients, &client_icd);
+      memcpy(&notify_entry->object_id, &object_id, sizeof(object_id));
       HASH_ADD(handle, plasma_state->objects_notify, object_id,
                sizeof(object_id), notify_entry);
     }
-    utarray_push_back(notify_entry->conns, &conn);
+    utarray_push_back(notify_entry->waiting_clients, &client_context);
   }
   return OBJECT_NOT_FOUND;
 }
 
+int remove_client_from_object_clients(object_table_entry *entry,
+                                      client *client_info) {
+  /* Find the location of the client in the array. */
+  for (int i = 0; i < utarray_len(entry->clients); ++i) {
+    client **c = (client **) utarray_eltptr(entry->clients, i);
+    if (*c == client_info) {
+      /* Remove the client from the array. */
+      utarray_erase(entry->clients, i, 1);
+      /* Return 1 to indicate that the client was removed. */
+      return 1;
+    }
+  }
+  /* Return 0 to indicate that the client was not removed. */
+  return 0;
+}
+
+void release_object(client *client_context, object_id object_id) {
+  plasma_store_state *plasma_state = client_context->plasma_state;
+  object_table_entry *open_entry;
+  object_table_entry *sealed_entry;
+
+  HASH_FIND(handle, plasma_state->open_objects, &object_id, sizeof(object_id),
+            open_entry);
+  HASH_FIND(handle, plasma_state->sealed_objects, &object_id, sizeof(object_id),
+            sealed_entry);
+  /* Exactly one of open_entry and sealed_entry should be NULL. */
+  CHECK((open_entry == NULL) != (sealed_entry == NULL));
+  /* Remove the client from the object's array of clients. */
+  if (open_entry != NULL) {
+    CHECK(remove_client_from_object_clients(open_entry, client_context) == 1);
+  } else {
+    CHECK(remove_client_from_object_clients(sealed_entry, client_context) == 1);
+  }
+}
+
 /* Check if an object is present. */
-int contains_object(plasma_store_state *plasma_state, object_id object_id) {
+int contains_object(client *client_context, object_id object_id) {
+  plasma_store_state *plasma_state = client_context->plasma_state;
   object_table_entry *entry;
   HASH_FIND(handle, plasma_state->sealed_objects, &object_id, sizeof(object_id),
             entry);
@@ -196,17 +271,15 @@ int contains_object(plasma_store_state *plasma_state, object_id object_id) {
 }
 
 /* Seal an object that has been created in the hash table. */
-void seal_object(plasma_store_state *plasma_state,
-                 object_id object_id,
-                 UT_array **conns,
-                 plasma_object *result) {
+void seal_object(client *client_context, object_id object_id) {
   LOG_DEBUG("sealing object");  // TODO(pcm): add object_id here
+  plasma_store_state *plasma_state = client_context->plasma_state;
   object_table_entry *entry;
   HASH_FIND(handle, plasma_state->open_objects, &object_id, sizeof(object_id),
             entry);
-  if (!entry) {
-    return; /* TODO(pcm): return error */
-  }
+  CHECK(entry != NULL);
+  /* Move the object table entry from the table of open objects to the table of
+   * sealed objects. */
   HASH_DELETE(handle, plasma_state->open_objects, entry);
   HASH_ADD(handle, plasma_state->sealed_objects, object_id, sizeof(object_id),
            entry);
@@ -223,24 +296,34 @@ void seal_object(plasma_store_state *plasma_state,
   object_notify_entry *notify_entry;
   HASH_FIND(handle, plasma_state->objects_notify, &object_id, sizeof(object_id),
             notify_entry);
-  if (!notify_entry) {
-    *conns = NULL;
-    return;
+  if (notify_entry) {
+    plasma_reply reply;
+    memset(&reply, 0, sizeof(reply));
+    plasma_object *result = &reply.object;
+    result->handle.store_fd = entry->fd;
+    result->handle.mmap_size = entry->map_size;
+    result->data_offset = entry->offset;
+    result->metadata_offset = entry->offset + entry->info.data_size;
+    result->data_size = entry->info.data_size;
+    result->metadata_size = entry->info.metadata_size;
+    HASH_DELETE(handle, plasma_state->objects_notify, notify_entry);
+    /* Send notifications to the clients that were waiting for this object. */
+    for (int i = 0; i < utarray_len(notify_entry->waiting_clients); ++i) {
+      client **c = (client **) utarray_eltptr(notify_entry->waiting_clients, i);
+      send_fd((*c)->sock, reply.object.handle.store_fd, (char *) &reply,
+              sizeof(reply));
+      /* Record that the client is using this object. */
+      add_client_to_object_clients(entry, *c);
+    }
+    utarray_free(notify_entry->waiting_clients);
+    free(notify_entry);
   }
-  result->handle.store_fd = entry->fd;
-  result->handle.mmap_size = entry->map_size;
-  result->data_offset = entry->offset;
-  result->metadata_offset = entry->offset + entry->info.data_size;
-  result->data_size = entry->info.data_size;
-  result->metadata_size = entry->info.metadata_size;
-  HASH_DELETE(handle, plasma_state->objects_notify, notify_entry);
-  *conns = notify_entry->conns;
-  free(notify_entry);
 }
 
 /* Delete an object that has been created in the hash table. */
-void delete_object(plasma_store_state *plasma_state, object_id object_id) {
+void delete_object(client *client_context, object_id object_id) {
   LOG_DEBUG("deleting object");  // TODO(rkn): add object_id here
+  plasma_store_state *plasma_state = client_context->plasma_state;
   object_table_entry *entry;
   HASH_FIND(handle, plasma_state->sealed_objects, &object_id, sizeof(object_id),
             entry);
@@ -248,9 +331,12 @@ void delete_object(plasma_store_state *plasma_state, object_id object_id) {
    * error. Maybe we should also support deleting objects that have been created
    * but not sealed. */
   CHECKM(entry != NULL, "To delete an object it must have been sealed.");
+  CHECKM(utarray_len(entry->clients) == 0,
+         "To delete an object, there must be no clients currently using it.");
   uint8_t *pointer = entry->pointer;
   HASH_DELETE(handle, plasma_state->sealed_objects, entry);
   dlfree(pointer);
+  utarray_free(entry->clients);
   free(entry);
 }
 
@@ -260,7 +346,6 @@ void send_notifications(event_loop *loop,
                         void *context,
                         int events) {
   plasma_store_state *plasma_state = context;
-
   notification_queue *queue;
   HASH_FIND_INT(plasma_state->pending_notifications, &client_sock, queue);
   CHECK(queue != NULL);
@@ -268,9 +353,8 @@ void send_notifications(event_loop *loop,
   int num_processed = 0;
   /* Loop over the array of pending notifications and send as many of them as
    * possible. */
-  for (object_id *obj_id = (object_id *) utarray_front(queue->object_ids);
-       obj_id != NULL;
-       obj_id = (object_id *) utarray_next(queue->object_ids, obj_id)) {
+  for (int i = 0; i < utarray_len(queue->object_ids); ++i) {
+    object_id *obj_id = (object_id *) utarray_eltptr(queue->object_ids, i);
     /* Attempt to send a notification about this object ID. */
     int nbytes = send(client_sock, obj_id, sizeof(object_id), 0);
     if (nbytes >= 0) {
@@ -290,8 +374,9 @@ void send_notifications(event_loop *loop,
 }
 
 /* Subscribe to notifications about sealed objects. */
-void subscribe_to_updates(plasma_store_state *plasma_state, int conn) {
+void subscribe_to_updates(client *client_context, int conn) {
   LOG_DEBUG("subscribing to updates");
+  plasma_store_state *plasma_state = client_context->plasma_state;
   char dummy;
   int fd = recv_fd(conn, &dummy, 1);
   CHECKM(HASH_CNT(handle, plasma_state->open_objects) == 0,
@@ -304,7 +389,7 @@ void subscribe_to_updates(plasma_store_state *plasma_state, int conn) {
   notification_queue *queue =
       (notification_queue *) malloc(sizeof(notification_queue));
   queue->subscriber_fd = fd;
-  utarray_new(queue->object_ids, &object_id_icd);
+  utarray_new(queue->object_ids, &object_table_entry_icd);
   HASH_ADD_INT(plasma_state->pending_notifications, subscriber_fd, queue);
   /* Add a callback to the event loop to send queued notifications whenever
    * there is room in the socket's send buffer. */
@@ -316,7 +401,7 @@ void process_message(event_loop *loop,
                      int client_sock,
                      void *context,
                      int events) {
-  plasma_store_state *plasma_state = context;
+  client *client_context = context;
   int64_t type;
   int64_t length;
   plasma_request *req;
@@ -324,48 +409,52 @@ void process_message(event_loop *loop,
   /* We're only sending a single object ID at a time for now. */
   plasma_reply reply;
   memset(&reply, 0, sizeof(reply));
-  UT_array *conns;
-
+  /* Process the different types of requests. */
   switch (type) {
   case PLASMA_CREATE:
-    create_object(plasma_state, req->object_ids[0], req->data_size,
+    create_object(client_context, req->object_ids[0], req->data_size,
                   req->metadata_size, &reply.object);
     send_fd(client_sock, reply.object.handle.store_fd, (char *) &reply,
             sizeof(reply));
     break;
   case PLASMA_GET:
-    if (get_object(plasma_state, client_sock, req->object_ids[0],
+    if (get_object(client_context, client_sock, req->object_ids[0],
                    &reply.object) == OBJECT_FOUND) {
       send_fd(client_sock, reply.object.handle.store_fd, (char *) &reply,
               sizeof(reply));
     }
     break;
+  case PLASMA_RELEASE:
+    release_object(client_context, req->object_ids[0]);
+    break;
   case PLASMA_CONTAINS:
-    if (contains_object(plasma_state, req->object_ids[0]) == OBJECT_FOUND) {
+    if (contains_object(client_context, req->object_ids[0]) == OBJECT_FOUND) {
       reply.has_object = 1;
     }
     plasma_send_reply(client_sock, &reply);
     break;
   case PLASMA_SEAL:
-    seal_object(plasma_state, req->object_ids[0], &conns, &reply.object);
-    if (conns) {
-      for (int *c = (int *) utarray_front(conns); c != NULL;
-           c = (int *) utarray_next(conns, c)) {
-        send_fd(*c, reply.object.handle.store_fd, (char *) &reply,
-                sizeof(reply));
-      }
-      utarray_free(conns);
-    }
+    seal_object(client_context, req->object_ids[0]);
     break;
   case PLASMA_DELETE:
-    delete_object(plasma_state, req->object_ids[0]);
+    delete_object(client_context, req->object_ids[0]);
     break;
   case PLASMA_SUBSCRIBE:
-    subscribe_to_updates(plasma_state, client_sock);
+    subscribe_to_updates(client_context, client_sock);
     break;
   case DISCONNECT_CLIENT: {
     LOG_DEBUG("Disconnecting client on fd %d", client_sock);
     event_loop_remove_file(loop, client_sock);
+    /* If this client was using any objects, remove it from the appropriate
+     * lists. */
+    plasma_store_state *plasma_state = client_context->plasma_state;
+    object_table_entry *entry, *temp_entry;
+    HASH_ITER(handle, plasma_state->open_objects, entry, temp_entry) {
+      remove_client_from_object_clients(entry, client_context);
+    }
+    HASH_ITER(handle, plasma_state->sealed_objects, entry, temp_entry) {
+      remove_client_from_object_clients(entry, client_context);
+    }
   } break;
   default:
     /* This code should be unreachable. */
@@ -379,9 +468,16 @@ void new_client_connection(event_loop *loop,
                            int listener_sock,
                            void *context,
                            int events) {
+  plasma_store_state *plasma_state = context;
   int new_socket = accept_client(listener_sock);
+  /* Create a new client object. This will also be used as the context to use
+   * for events on this client's socket. TODO(rkn): free this somewhere. */
+  client *client_context = (client *) malloc(sizeof(client));
+  client_context->sock = new_socket;
+  client_context->plasma_state = plasma_state;
+  /* Add a callback to handle events on this socket. */
   event_loop_add_file(loop, new_socket, EVENT_LOOP_READ, process_message,
-                      context);
+                      client_context);
   LOG_DEBUG("new connection with fd %d", new_socket);
 }
 
diff --git a/src/plasma_store.h b/src/plasma_store.h
index b22302209..9caa967ab 100644
--- a/src/plasma_store.h
+++ b/src/plasma_store.h
@@ -3,69 +3,79 @@
 
 #include "plasma.h"
 
+typedef struct client client;
+
 typedef struct plasma_store_state plasma_store_state;
 
 /**
- * Create a new object:
+ * Create a new object. The client must do a call to release_object to tell the
+ * store when it is done with the object.
  *
- * @param s The plasma store state.
+ * @param client_context The context of the client making this request.
  * @param object_id Object ID of the object to be created.
  * @param data_size Size in bytes of the object to be created.
  * @param metadata_size Size in bytes of the object metadata.
  * @return Void.
  */
-void create_object(plasma_store_state *s,
+void create_object(client *client_context,
                    object_id object_id,
                    int64_t data_size,
                    int64_t metadata_size,
                    plasma_object *result);
 
 /**
- * Get an object. This method assumes that we currently have or will
- * eventually have this object sealed. If the object has not yet been sealed,
- * the client that requested the object will be notified when it is sealed.
+ * Get an object. This method assumes that we currently have or will eventually
+ * have this object sealed. If the object has not yet been sealed, the client
+ * that requested the object will be notified when it is sealed.
  *
- * @param s The plasma store state.
+ * For each call to get_object, the client must do a call to release_object to
+ * tell the store when it is done with the object.
+ *
+ * @param client_context The context of the client making this request.
  * @param conn The client connection that requests the object.
  * @param object_id Object ID of the object to be gotten.
  * @return The status of the object (object_status in plasma.h).
  */
-int get_object(plasma_store_state *s,
+int get_object(client *client_context,
                int conn,
                object_id object_id,
                plasma_object *result);
 
 /**
- * Seal an object:
+ * Record the fact that a particular client is no longer using an object.
  *
- * @param s The plasma store state.
+ * @param client_context The context of the client making this request.
+ * @param object_id The object ID of the object that is being released.
+ * @param Void.
+ */
+void release_object(client *client_context, object_id object_id);
+
+/**
+ * Seal an object. The object is now immutable and can be accessed with get.
+ *
+ * @param client_context The context of the client making this request.
  * @param object_id Object ID of the object to be sealed.
- * @param conns Returns the connection that are waiting for this object.
-                The caller is responsible for destroying this array.
  * @return Void.
  */
-void seal_object(plasma_store_state *s,
-                 object_id object_id,
-                 UT_array **conns,
-                 plasma_object *result);
+void seal_object(client *client_context, object_id object_id);
 
 /**
  * Check if the plasma store contains an object:
  *
- * @param s The plasma store state.
+ * @param client_context The context of the client making this request.
  * @param object_id Object ID that will be checked.
  * @return OBJECT_FOUND if the object is in the store, OBJECT_NOT_FOUND if not
  */
-int contains_object(plasma_store_state *s, object_id object_id);
+int contains_object(client *client_context, object_id object_id);
 
 /**
  * Delete an object from the plasma store:
  *
- * @param s The plasma store state.
+ * @param client_context The context of the client making this request.
  * @param object_id Object ID of the object to be deleted.
  * @return Void.
  */
-void delete_object(plasma_store_state *s, object_id object_id);
+void delete_object(client *client_context, object_id object_id);
 
 /**
  * Send notifications about sealed objects to the subscribers. This is called
@@ -73,15 +83,15 @@ void delete_object(plasma_store_state *s, object_id object_id);
  * buffered, and this will be called again when the send buffer has room.
  *
  * @param loop The Plasma store event loop.
- * @param client_sock The file descriptor to send the notification to.
- * @param context The plasma store global state.
+ * @param client_sock The socket of the client to send the notification to.
+ * @param plasma_state The plasma store global state.
  * @param events This is needed for this function to have the signature of a
           callback.
  * @return Void.
  */
 void send_notifications(event_loop *loop,
                         int client_sock,
-                        void *context,
+                        void *plasma_state,
                         int events);
 
 #endif /* PLASMA_STORE_H */
diff --git a/test/test.py b/test/test.py
index 94b2acbab..62d904bdb 100644
--- a/test/test.py
+++ b/test/test.py
@@ -127,47 +127,47 @@ class TestPlasmaClient(unittest.TestCase):
     for object_id in real_object_ids:
       self.assertTrue(self.plasma_client.contains(object_id))
 
-  def test_individual_delete(self):
-    length = 100
-    # Create an object id string.
-    object_id = random_object_id()
-    # Create a random metadata string.
-    metadata = generate_metadata(100)
-    # Create a new buffer and write to it.
-    memory_buffer = self.plasma_client.create(object_id, length, metadata)
-    for i in range(length):
-      memory_buffer[i] = chr(i % 256)
-    # Seal the object.
-    self.plasma_client.seal(object_id)
-    # Check that the object is present.
-    self.assertTrue(self.plasma_client.contains(object_id))
-    # Delete the object.
-    self.plasma_client.delete(object_id)
-    # Make sure the object is no longer present.
-    self.assertFalse(self.plasma_client.contains(object_id))
-
-  def test_delete(self):
-    # Create some objects.
-    object_ids = [random_object_id() for _ in range(100)]
-    for object_id in object_ids:
-      length = 100
-      # Create a random metadata string.
-      metadata = generate_metadata(100)
-      # Create a new buffer and write to it.
-      memory_buffer = self.plasma_client.create(object_id, length, metadata)
-      for i in range(length):
-        memory_buffer[i] = chr(i % 256)
-      # Seal the object.
-      self.plasma_client.seal(object_id)
-      # Check that the object is present.
-      self.assertTrue(self.plasma_client.contains(object_id))
-
-    # Delete the objects and make sure they are no longer present.
-    for object_id in object_ids:
-      # Delete the object.
-      self.plasma_client.delete(object_id)
-      # Make sure the object is no longer present.
-      self.assertFalse(self.plasma_client.contains(object_id))
+  # def test_individual_delete(self):
+  #   length = 100
+  #   # Create an object id string.
+  #   object_id = random_object_id()
+  #   # Create a random metadata string.
+  #   metadata = generate_metadata(100)
+  #   # Create a new buffer and write to it.
+  #   memory_buffer = self.plasma_client.create(object_id, length, metadata)
+  #   for i in range(length):
+  #     memory_buffer[i] = chr(i % 256)
+  #   # Seal the object.
+  #   self.plasma_client.seal(object_id)
+  #   # Check that the object is present.
+  #   self.assertTrue(self.plasma_client.contains(object_id))
+  #   # Delete the object.
+  #   self.plasma_client.delete(object_id)
+  #   # Make sure the object is no longer present.
+  #   self.assertFalse(self.plasma_client.contains(object_id))
+  #
+  # def test_delete(self):
+  #   # Create some objects.
+  #   object_ids = [random_object_id() for _ in range(100)]
+  #   for object_id in object_ids:
+  #     length = 100
+  #     # Create a random metadata string.
+  #     metadata = generate_metadata(100)
+  #     # Create a new buffer and write to it.
+  #     memory_buffer = self.plasma_client.create(object_id, length, metadata)
+  #     for i in range(length):
+  #       memory_buffer[i] = chr(i % 256)
+  #     # Seal the object.
+  #     self.plasma_client.seal(object_id)
+  #     # Check that the object is present.
+  #     self.assertTrue(self.plasma_client.contains(object_id))
+  #
+  #   # Delete the objects and make sure they are no longer present.
+  #   for object_id in object_ids:
+  #     # Delete the object.
+  #     self.plasma_client.delete(object_id)
+  #     # Make sure the object is no longer present.
+  #     self.assertFalse(self.plasma_client.contains(object_id))
 
   def test_illegal_functionality(self):
     # Create an object id string.
@@ -349,11 +349,11 @@ class TestPlasmaManager(unittest.TestCase):
       # Compare the two buffers.
       assert_get_object_equal(self, self.client1, self.client2, object_id1,
                               memory_buffer=memory_buffer1, metadata=metadata1)
-      # Transfer the buffer again.
-      self.client1.transfer("127.0.0.1", self.port2, object_id1)
-      # Compare the two buffers.
-      assert_get_object_equal(self, self.client1, self.client2, object_id1,
-                              memory_buffer=memory_buffer1, metadata=metadata1)
+      # # Transfer the buffer again.
+      # self.client1.transfer("127.0.0.1", self.port2, object_id1)
+      # # Compare the two buffers.
+      # assert_get_object_equal(self, self.client1, self.client2, object_id1,
+      #                         memory_buffer=memory_buffer1, metadata=metadata1)
 
       # Create an object.
       object_id2, memory_buffer2, metadata2 = create_object(self.client2, 20000, 20000)

From db6375701efe239ba8124741a8dacaff6344dc89 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Fri, 21 Oct 2016 15:42:29 -0700
Subject: [PATCH 87/91] Prevent ObjectIDs from being pickled. (#42)

---
 lib/python/common_extension.c |  8 ++++++++
 test/test.py                  | 18 ++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/lib/python/common_extension.c b/lib/python/common_extension.c
index efc4b9e01..14f3292d9 100644
--- a/lib/python/common_extension.c
+++ b/lib/python/common_extension.c
@@ -51,9 +51,17 @@ static PyObject *PyObjectID_id(PyObject *self) {
                                     UNIQUE_ID_SIZE);
 }
 
+static PyObject *PyObjectID___reduce__(PyObjectID *self) {
+  PyErr_SetString(CommonError, "ObjectID objects cannot be serialized.");
+  return NULL;
+}
+
 static PyMethodDef PyObjectID_methods[] = {
     {"id", (PyCFunction) PyObjectID_id, METH_NOARGS,
      "Return the hash associated with this ObjectID"},
+    {"__reduce__", (PyCFunction) PyObjectID___reduce__, METH_NOARGS,
+     "Say how to pickle this ObjectID. This raises an exception to prevent"
+     "object IDs from being serialized."},
     {NULL} /* Sentinel */
 };
 
diff --git a/test/test.py b/test/test.py
index a40d2045c..359e8c030 100644
--- a/test/test.py
+++ b/test/test.py
@@ -1,5 +1,6 @@
 from __future__ import print_function
 
+import pickle
 import unittest
 
 import common
@@ -52,6 +53,23 @@ class TestObjectID(unittest.TestCase):
   def test_create_object_id(self):
     object_id = common.ObjectID(20 * "a")
 
+  def test_cannot_pickle_object_ids(self):
+    object_ids = [common.ObjectID(20 * chr(i)) for i in range(256)]
+    def f():
+      return object_ids
+    def g(val=object_ids):
+      return 1
+    def h():
+      x = object_ids[0]
+      return 1
+    # Make sure that object IDs cannot be pickled (including functions that
+    # close over object IDs).
+    self.assertRaises(Exception, lambda : pickling.dumps(object_ids[0]))
+    self.assertRaises(Exception, lambda : pickling.dumps(object_ids))
+    self.assertRaises(Exception, lambda : pickling.dumps(f))
+    self.assertRaises(Exception, lambda : pickling.dumps(g))
+    self.assertRaises(Exception, lambda : pickling.dumps(h))
+
 class TestTask(unittest.TestCase):
 
   def test_create_task(self):

From 7c1b2f702fddaf90ca51df6a00903a1dbc414b20 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Tue, 25 Oct 2016 12:51:30 -0700
Subject: [PATCH 88/91] Remove git submodules and C++ files.

---
 .gitmodules              |   14 -
 src/computation_graph.cc |   27 -
 src/computation_graph.h  |   35 --
 src/ipc.cc               |  202 -------
 src/ipc.h                |  142 -----
 src/objstore.cc          |  375 ------------
 src/objstore.h           |   81 ---
 src/raylib.cc            |  870 ----------------------------
 src/scheduler.cc         | 1187 --------------------------------------
 src/scheduler.h          |  237 --------
 src/utils.cc             |   69 ---
 src/utils.h              |   97 ----
 src/worker.cc            |  497 ----------------
 src/worker.h             |  145 -----
 thirdparty/grpc          |    1 -
 thirdparty/hiredis       |    1 -
 thirdparty/numbuf        |    1 -
 thirdparty/python        |    1 -
 18 files changed, 3982 deletions(-)
 delete mode 100644 src/computation_graph.cc
 delete mode 100644 src/computation_graph.h
 delete mode 100644 src/ipc.cc
 delete mode 100644 src/ipc.h
 delete mode 100644 src/objstore.cc
 delete mode 100644 src/objstore.h
 delete mode 100644 src/raylib.cc
 delete mode 100644 src/scheduler.cc
 delete mode 100644 src/scheduler.h
 delete mode 100644 src/utils.cc
 delete mode 100644 src/utils.h
 delete mode 100644 src/worker.cc
 delete mode 100644 src/worker.h
 delete mode 160000 thirdparty/grpc
 delete mode 160000 thirdparty/hiredis
 delete mode 160000 thirdparty/numbuf
 delete mode 160000 thirdparty/python

diff --git a/.gitmodules b/.gitmodules
index 9eaa0156b..89d94b72e 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,17 +1,3 @@
-[submodule "thirdparty/grpc"]
-	path = thirdparty/grpc
-	url = https://github.com/grpc/grpc
-	ignore = dirty
-[submodule "thirdparty/numbuf"]
-	path = thirdparty/numbuf
-	url = https://github.com/ray-project/numbuf.git
 [submodule "thirdparty/arrow"]
 	path = thirdparty/arrow
 	url = https://github.com/ray-project/arrow.git
-[submodule "thirdparty/python"]
-	path = thirdparty/python
-	url = https://github.com/austinsc/python.git
-	ignore = dirty
-[submodule "thirdparty/hiredis"]
-	path = thirdparty/hiredis
-	url = https://github.com/redis/hiredis.git
diff --git a/src/computation_graph.cc b/src/computation_graph.cc
deleted file mode 100644
index 2aa696286..000000000
--- a/src/computation_graph.cc
+++ /dev/null
@@ -1,27 +0,0 @@
-#include "computation_graph.h"
-
-OperationId ComputationGraph::add_operation(std::unique_ptr<Operation> operation) {
-  OperationId operationid = operations_.size();
-  OperationId creator_operationid = operation->creator_operationid();
-  RAY_CHECK_EQ(spawned_operations_.size(), operationid, "ComputationGraph is attempting to call add_operation, but spawned_operations_.size() != operationid.");
-  operations_.emplace_back(std::move(operation));
-  if (creator_operationid != NO_OPERATION && creator_operationid != ROOT_OPERATION) {
-    spawned_operations_[creator_operationid].push_back(operationid);
-  }
-  spawned_operations_.push_back(std::vector<OperationId>());
-  return operationid;
-}
-
-const Task& ComputationGraph::get_task(OperationId operationid) {
-  RAY_CHECK_NEQ(operationid, ROOT_OPERATION, "ComputationGraph attempting to get_task with operationid == ROOT_OPERATION");
-  RAY_CHECK_NEQ(operationid, NO_OPERATION, "ComputationGraph attempting to get_task with operationid == NO_OPERATION");
-  RAY_CHECK_LT(operationid, operations_.size(), "ComputationGraph attempting to get_task with operationid " << operationid << ", but operationid >= operations_.size().");
-  RAY_CHECK(operations_[operationid]->has_task(), "Calling get_task with operationid " << operationid << ", but this corresponds to a put not a task.");
-  return operations_[operationid]->task();
-}
-
-void ComputationGraph::to_protobuf(CompGraph* computation_graph) {
-  for (OperationId id = 0; id < operations_.size(); ++id) {
-    computation_graph->add_operation()->CopyFrom(*operations_[id]);
-  }
-}
diff --git a/src/computation_graph.h b/src/computation_graph.h
deleted file mode 100644
index 2918569b8..000000000
--- a/src/computation_graph.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef RAY_COMPUTATIONGRAPH_H
-#define RAY_COMPUTATIONGRAPH_H
-
-#include <iostream>
-#include <limits>
-
-#include "ray/ray.h"
-
-#include "graph.pb.h"
-#include "types.pb.h"
-
-// used to represent the root operation (that is, the driver code)
-const OperationId ROOT_OPERATION = std::numeric_limits<OperationId>::max();
-// used to represent the absence of an operation
-const OperationId NO_OPERATION = std::numeric_limits<OperationId>::max() - 1;
-
-class ComputationGraph {
-public:
-  // Add an operation to the computation graph, this returns the OperationId for
-  // the new operation. This method takes ownership over operation.
-  OperationId add_operation(std::unique_ptr<Operation> operation);
-  // Return the task corresponding to a particular OperationId. If operationid
-  // corresponds to a put, then fail.
-  const Task& get_task(OperationId operationid);
-  // Serialize the computation graph to ProtoBuf and store it in computation_graph
-  void to_protobuf(CompGraph* computation_graph);
-private:
-  // maps an OperationId to the corresponding task or put
-  std::vector<std::unique_ptr<Operation> > operations_;
-  // spawned_operations_[operationid] is a vector of the OperationIds of the
-  // operations spawned by the task with OperationId operationid
-  std::vector<std::vector<OperationId> > spawned_operations_;
-};
-
-#endif
diff --git a/src/ipc.cc b/src/ipc.cc
deleted file mode 100644
index 1b492cc07..000000000
--- a/src/ipc.cc
+++ /dev/null
@@ -1,202 +0,0 @@
-#include "ipc.h"
-
-#if defined(__unix__) || defined(__linux__)
-#include <sys/statvfs.h>
-#endif
-
-#include <stdlib.h>
-#include "ray/ray.h"
-#include "utils.h"
-
-ObjHandle::ObjHandle(SegmentId segmentid, size_t size, IpcPointer ipcpointer, size_t metadata_offset)
-  : segmentid_(segmentid), size_(size), ipcpointer_(ipcpointer), metadata_offset_(metadata_offset)
-{}
-
-MessageQueue<>::MessageQueue() : create_(false) { }
-
-MessageQueue<>::~MessageQueue() {
-  if (!name_.empty() && create_) {
-    // Only remove the message queue if we created it.
-    RAY_LOG(RAY_DEBUG, "Removing message queue " << name_.c_str() << ", create = " << create_);
-    bip::message_queue::remove(name_.c_str());
-  }
-}
-
-MessageQueue<>::MessageQueue(MessageQueue&& other) {
-  *this = std::move(other);
-}
-
-MessageQueue<>& MessageQueue<>::operator=(MessageQueue&& other) {
-  name_ = std::move(other.name_);
-  create_ = other.create_;
-  queue_ = std::move(other.queue_);
-  other.name_.clear();  // It is unclear if this is guaranteed, but we need it to hold for the destructor. See: https://stackoverflow.com/a/17735913
-
-  return *this;
-}
-
-
-bool MessageQueue<>::connect(const std::string& name, bool create, size_t message_size, size_t message_capacity) {
-  name_ = name;
-  name_.insert(0, "ray-{BC200A09-2465-431D-AEC7-2F8530B04535}-");
-#if defined(WIN32) || defined(_WIN32)
-  std::replace(name_.begin(), name_.end(), ':', '-');
-#endif
-  try {
-    if (create) {
-      bip::message_queue::remove(name_.c_str()); // remove queue if it has not been properly removed from last run
-      queue_ = std::unique_ptr<bip::message_queue>(new bip::message_queue(bip::create_only, name_.c_str(), message_capacity, message_size));
-      create_ = true; // Only set create_ = true on success.
-    }
-    else {
-      queue_ = std::unique_ptr<bip::message_queue>(new bip::message_queue(bip::open_only, name_.c_str()));
-    }
-  }
-  catch (bip::interprocess_exception &ex) {
-    RAY_CHECK(false, "name = " << name_ << ", create = " << create << ", boost::interprocess exception: " << ex.what());
-  }
-  return true;
-}
-bool MessageQueue<>::connected() {
-  return queue_ != NULL;
-}
-
-bool MessageQueue<>::send(const void * object, size_t size) {
-  bool succeeded;
-  try {
-    // This will return true if the message was successfully sent and false if
-    // the message queue is full.
-    succeeded = queue_->try_send(object, size, 0);
-  }
-  catch (bip::interprocess_exception &ex) {
-    RAY_CHECK(false, "boost::interprocess exception: " << ex.what());
-  }
-  return succeeded;
-}
-
-bool MessageQueue<>::receive(void * object, size_t size) {
-  unsigned int priority;
-  bip::message_queue::size_type recvd_size;
-  try {
-    queue_->receive(object, size, recvd_size, priority);
-  }
-  catch (bip::interprocess_exception &ex) {
-    RAY_CHECK(false, "boost::interprocess exception: " << ex.what());
-  }
-  return true;
-}
-
-MemorySegmentPool::MemorySegmentPool(ObjStoreId objstoreid, std::string& objstore_address, bool create) : objstoreid_(objstoreid), objstore_address_(objstore_address), create_mode_(create) {
-  std::string::iterator split_point = split_ip_address(objstore_address);
-  objstore_port_.assign(split_point, objstore_address.end());
-}
-
-// creates a memory segment if it is not already there; if the pool is in create mode,
-// space is allocated, if it is in open mode, the shared memory is mapped into the process
-void MemorySegmentPool::open_segment(SegmentId segmentid, size_t size) {
-  RAY_LOG(RAY_DEBUG, "Opening segmentid " << segmentid << " on object store " << objstoreid_ << " with port " << objstore_port_ << " with create_mode_ = " << create_mode_);
-  RAY_CHECK(segmentid == segments_.size() || !create_mode_, "Object store " << objstoreid_ << " with port " << objstore_port_ << " is attempting to open segmentid " << segmentid << " on the object store, but segments_.size() = " << segments_.size());
-  if (segmentid >= segments_.size()) { // resize and initialize segments_
-    int current_size = segments_.size();
-    segments_.resize(segmentid + 1);
-    for (int i = current_size; i < segments_.size(); ++i) {
-      segments_[i].first = nullptr;
-      segments_[i].second = SegmentStatusType::UNOPENED;
-    }
-  }
-  if (segments_[segmentid].second == SegmentStatusType::OPENED) {
-    return;
-  }
-  RAY_CHECK_NEQ(segments_[segmentid].second, SegmentStatusType::CLOSED, "Attempting to open segmentid " << segmentid << ", but segments_[segmentid].second == SegmentStatusType::CLOSED.");
-  std::string segment_name = get_segment_name(segmentid);
-  if (create_mode_) {
-    assert(size > 0);
-    bip::shared_memory_object::remove(segment_name.c_str()); // remove segment if it has not been properly removed from last run
-    size_t new_size = (size / page_size_ + 2) * page_size_; // additional room for boost's bookkeeping
-    segments_[segmentid] = std::make_pair(std::unique_ptr<bip::managed_shared_memory>(new bip::managed_shared_memory(bip::create_only, segment_name.c_str(), new_size)), SegmentStatusType::OPENED);
-  } else {
-    segments_[segmentid] = std::make_pair(std::unique_ptr<bip::managed_shared_memory>(new bip::managed_shared_memory(bip::open_only, segment_name.c_str())), SegmentStatusType::OPENED);
-  }
-}
-
-void MemorySegmentPool::unmap_segment(SegmentId segmentid) {
-  segments_[segmentid].first.reset();
-  segments_[segmentid].second = SegmentStatusType::UNOPENED;
-}
-
-void MemorySegmentPool::close_segment(SegmentId segmentid) {
-  RAY_LOG(RAY_DEBUG, "closing segmentid " << segmentid);
-  std::string segment_name = get_segment_name(segmentid);
-  bip::shared_memory_object::remove(segment_name.c_str());
-  segments_[segmentid].first.reset();
-  segments_[segmentid].second = SegmentStatusType::CLOSED;
-}
-
-ObjHandle MemorySegmentPool::allocate(size_t size) {
-  RAY_CHECK(create_mode_, "Attempting to call allocate, but create_mode_ is false");
-  // TODO(pcm): at the moment, this always creates a new segment, this will be changed
-  SegmentId segmentid = segments_.size();
-  open_segment(segmentid, size);
-  objstore_memcheck(size);
-  void* ptr = segments_[segmentid].first->allocate(size);
-  auto handle = segments_[segmentid].first->get_handle_from_address(ptr);
-  return ObjHandle(segmentid, size, handle);
-}
-
-void MemorySegmentPool::deallocate(ObjHandle pointer) {
-  SegmentId segmentid = pointer.segmentid();
-  void* ptr = segments_[segmentid].first->get_address_from_handle(pointer.ipcpointer());
-  segments_[segmentid].first->deallocate(ptr);
-  close_segment(segmentid);
-}
-
-// returns address of the object refered to by the handle, needs to be called on
-// the process that will use the address
-uint8_t* MemorySegmentPool::get_address(ObjHandle pointer) {
-  RAY_CHECK(!create_mode_ || segments_[pointer.segmentid()].second == SegmentStatusType::OPENED, "Object store " << objstoreid_ << " is attempting to call get_address on segmentid " << pointer.segmentid() << ", which has not been opened yet.");
-  if (!create_mode_) {
-    open_segment(pointer.segmentid());
-  }
-  bip::managed_shared_memory* segment = segments_[pointer.segmentid()].first.get();
-  return static_cast<uint8_t*>(segment->get_address_from_handle(pointer.ipcpointer()));
-}
-
-// returns the name of the segment
-std::string MemorySegmentPool::get_segment_name(SegmentId segmentid) {
-  return std::string("ray-{BC200A09-2465-431D-AEC7-2F8530B04535}-objstore-") + std::to_string(objstoreid_) + "-" + objstore_port_ + std::string("-segment-") + std::to_string(segmentid);
-}
-
-MemorySegmentPool::~MemorySegmentPool() {
-  destroy_segments();
-}
-
-void MemorySegmentPool::objstore_memcheck(int64_t size) {
-#if defined(__unix__) || defined(__linux__)
-  struct statvfs buffer;
-  statvfs("/dev/shm/", &buffer);
-  if (size + 100 > buffer.f_bsize * buffer.f_bavail) {
-    MemorySegmentPool::destroy_segments();
-    RAY_LOG(RAY_FATAL, "Not enough memory for allocating object in objectstore.");
-  }
-#endif
-}
-
-void MemorySegmentPool::destroy_segments() {
-  for (size_t segmentid = 0; segmentid < segments_.size(); ++segmentid) {
-    std::string segment_name = get_segment_name(segmentid);
-    segments_[segmentid].first.reset();
-    bip::shared_memory_object::remove(segment_name.c_str());
-  }
-}
-#if defined(WIN32) || defined(_WIN32)
-namespace boost {
-  namespace interprocess {
-    namespace ipcdetail {
-      windows_bootstamp windows_intermodule_singleton<windows_bootstamp>::get() {
-        // HACK: Only do this for Windows as there seems to be no better workaround. Possibly undefined behavior!
-        return reinterpret_cast<windows_bootstamp const &>(std::string("BOOTSTAMP"));
-      }
-    }
-  }
-}
-#endif
diff --git a/src/ipc.h b/src/ipc.h
deleted file mode 100644
index 03300f4ba..000000000
--- a/src/ipc.h
+++ /dev/null
@@ -1,142 +0,0 @@
-#ifndef RAY_IPC_H
-#define RAY_IPC_H
-
-#include <iostream>
-#include <limits>
-
-#if defined(WIN32) || defined(_WIN32)
-#include <boost/interprocess/detail/windows_intermodule_singleton.hpp>
-namespace boost {
-  namespace interprocess {
-    namespace ipcdetail {
-      struct windows_bootstamp;
-      template<>
-      class windows_intermodule_singleton<windows_bootstamp> {
-      public:
-        static windows_bootstamp get();
-      };
-    }
-  }
-}
-#endif
-
-#include <boost/interprocess/managed_shared_memory.hpp>
-#include <boost/interprocess/ipc/message_queue.hpp>
-
-#include "ray/ray.h"
-
-namespace bip = boost::interprocess;
-
-// Methods for inter process communication (abstracts from the shared memory implementation)
-
-// Message Queues: Exchanging objects of type T between processes on a node
-
-template<typename T = void>
-class MessageQueue;
-
-template<>
-class MessageQueue<> {
-public:
-  ~MessageQueue();
-  MessageQueue();
-  MessageQueue(MessageQueue&& other);
-  MessageQueue& operator=(MessageQueue&& other);
-  bool connected();
-protected:
-  bool connect(const std::string& name, bool create, size_t message_size, size_t message_capacity);
-  bool send(const void* object, size_t size);;
-  bool receive(void* object, size_t size);
-private:
-  std::string name_;
-  bool create_;
-  std::unique_ptr<bip::message_queue> queue_;
-};
-
-template<typename T>
-class MessageQueue : public MessageQueue<> {
-public:
-  bool connect(const std::string& name, bool create, size_t capacity = 1000) { return MessageQueue<>::connect(name, create, sizeof(T), capacity); }
-  bool send(const T* object) { return MessageQueue<>::send(object, sizeof(*object)); };
-  bool receive(T* object) { return MessageQueue<>::receive(object, sizeof(*object)); }
-};
-
-// Object Queues
-
-// For communicating between object store and workers, the following
-// messages can be sent:
-
-// ALLOC: workerid, objectid, size -> objhandle:
-// worker requests an allocation from the object store
-// GET: workerid, objectid -> objhandle:
-// worker requests an object from the object store
-// WORKER_DONE: workerid, objectid -> ():
-// worker tells the object store that an object has been finalized
-// ALIAS_DONE: objectid -> ():
-// objstore tells itself that it has finalized something (perhaps an alias)
-
-enum ObjRequestType {ALLOC = 0, GET = 1, WORKER_DONE = 2, ALIAS_DONE = 3};
-
-struct ObjRequest {
-  WorkerId workerid; // worker that sends the request
-  ObjRequestType type; // do we want to allocate a new object or get a handle?
-  ObjectID objectid; // object ID of the object to be returned/allocated
-  int64_t size; // if allocate, that's the size of the object
-  int64_t metadata_offset; // if sending 'WORKER_DONE', that's the location of the metadata relative to the beginning of the object
-};
-
-typedef size_t SegmentId; // index into a memory segment table
-typedef bip::managed_shared_memory::handle_t IpcPointer;
-
-// Object handle: Handle to object that can be passed around between processes
-// that are connected to the same object store
-
-class ObjHandle {
-public:
-  ObjHandle(SegmentId segmentid = 0, size_t size = 0, IpcPointer ipcpointer = IpcPointer(), size_t metadata_offset = 0);
-  SegmentId segmentid() { return segmentid_; }
-  size_t size() { return size_; }
-  IpcPointer ipcpointer() { return ipcpointer_; }
-  size_t metadata_offset() { return metadata_offset_; }
-  void set_metadata_offset(size_t metadata_offset) {metadata_offset_ = metadata_offset; }
-private:
-  SegmentId segmentid_; // which shared memory file the object is stored in
-  IpcPointer ipcpointer_; // pointer to the beginning of the object, exchangeable between processes
-  size_t size_; // total size of the object
-  size_t metadata_offset_; // offset of the metadata that describes this object
-};
-
-// Memory segment pool: A collection of shared memory segments
-// used in two modes:
-// \item on the object store it is used with create = true, in this case the
-// segments are allocated
-// \item on the worker it is used in open mode, with create = false, in this case
-// the segments, which have been created by the object store, are just mapped
-// into memory
-
-enum SegmentStatusType {UNOPENED = 0, OPENED = 1, CLOSED = 2};
-
-class MemorySegmentPool {
-public:
-  MemorySegmentPool(ObjStoreId objstoreid, std::string& objstore_address, bool create); // can be used in two modes: create mode and open mode (see above)
-  ~MemorySegmentPool();
-  ObjHandle allocate(size_t nbytes); // allocate memory, potentially creating a new segment (only run on object store)
-  void deallocate(ObjHandle pointer); // deallocate object, potentially deallocating a new segment (only run on object store)
-  uint8_t* get_address(ObjHandle pointer); // get address of shared object
-  std::string get_segment_name(SegmentId segmentid); // get the name of a segment
-  void unmap_segment(SegmentId segmentid); // unmap a memory segment from a client (only to be called by clients)
-  void destroy_segments();
-  void objstore_memcheck(int64_t size);
-private:
-  void open_segment(SegmentId segmentid, size_t size = 0); // create a segment or map an existing one into memory
-  void close_segment(SegmentId segmentid); // close a segment
-  bool create_mode_; // true in the object stores, false on the workers
-  ObjStoreId objstoreid_; // the identity of the associated object store
-  // The address of the object store.
-  std::string objstore_address_;
-  // The port of the object store. This is used to help avoid name collisions.
-  std::string objstore_port_;
-  size_t page_size_ = bip::mapped_region::get_page_size();
-  std::vector<std::pair<std::unique_ptr<bip::managed_shared_memory>, SegmentStatusType> > segments_;
-};
-
-#endif
diff --git a/src/objstore.cc b/src/objstore.cc
deleted file mode 100644
index 8f5c82931..000000000
--- a/src/objstore.cc
+++ /dev/null
@@ -1,375 +0,0 @@
-#include "objstore.h"
-
-#include <chrono>
-#include "utils.h"
-
-const size_t ObjStoreService::CHUNK_SIZE = 8 * 1024;
-
-// this method needs to be protected by a objstore_lock_
-// TODO(rkn): Make sure that we do not in fact need the objstore_lock_. We want multiple deliveries to be able to happen simultaneously.
-void ObjStoreService::get_data_from(ObjectID objectid, ObjStore::Stub& stub) {
-  RAY_LOG(RAY_DEBUG, "Objstore " << objstoreid_ << " is beginning to get objectid " << objectid);
-  ObjChunk chunk;
-  ClientContext context;
-  StreamObjToRequest stream_request;
-  stream_request.set_objectid(objectid);
-  std::unique_ptr<ClientReader<ObjChunk> > reader(stub.StreamObjTo(&context, stream_request));
-
-  size_t total_size = 0;
-  ObjHandle handle;
-  if (reader->Read(&chunk)) {
-    total_size = chunk.total_size();
-    handle = alloc(objectid, total_size);
-  }
-  size_t num_bytes = 0;
-  segmentpool_lock_.lock();
-  uint8_t* data = segmentpool_->get_address(handle);
-  segmentpool_lock_.unlock();
-  do {
-    RAY_CHECK_LE(num_bytes + chunk.data().size(), total_size, "The reader attempted to stream too many bytes.");
-    std::memcpy(data, chunk.data().c_str(), chunk.data().size());
-    data += chunk.data().size();
-    num_bytes += chunk.data().size();
-  } while (reader->Read(&chunk));
-  RAY_CHECK_GRPC(reader->Finish());
-
-  // finalize object
-  RAY_CHECK_EQ(num_bytes, total_size, "Streamed objectid " << objectid << ", but num_bytes != total_size");
-  object_ready(objectid, chunk.metadata_offset());
-  RAY_LOG(RAY_DEBUG, "finished streaming data, objectid was " << objectid << " and size was " << num_bytes);
-}
-
-ObjStoreService::ObjStoreService(std::shared_ptr<Channel> scheduler_channel)
-  : scheduler_stub_(Scheduler::NewStub(scheduler_channel)) {
-}
-
-void ObjStoreService::register_objstore(const std::string& objstore_address, const std::string& recv_queue_name) {
-  // Create the queue that will be used by workers to send requests to the
-  // object store.
-  RAY_LOG(RAY_INFO, "Object store is creating queue with name " << recv_queue_name);
-  RAY_CHECK(recv_queue_.connect(recv_queue_name, true), "error connecting recv_queue_");
-  objstore_address_ = objstore_address;
-  // Register the object store with the scheduler.
-  ClientContext context;
-  RegisterObjStoreRequest request;
-  request.set_objstore_address(objstore_address);
-  RegisterObjStoreReply reply;
-  RAY_CHECK_GRPC(scheduler_stub_->RegisterObjStore(&context, request, &reply));
-  objstoreid_ = reply.objstoreid();
-  segmentpool_ = std::make_shared<MemorySegmentPool>(objstoreid_, objstore_address_, true);
-}
-
-// this method needs to be protected by a objstores_lock_
-ObjStore::Stub& ObjStoreService::get_objstore_stub(const std::string& objstore_address) {
-  auto iter = objstores_.find(objstore_address);
-  if (iter != objstores_.end())
-    return *(iter->second);
-  auto channel = grpc::CreateChannel(objstore_address, grpc::InsecureChannelCredentials());
-  objstores_.emplace(objstore_address, ObjStore::NewStub(channel));
-  return *objstores_[objstore_address];
-}
-
-Status ObjStoreService::StartDelivery(ServerContext* context, const StartDeliveryRequest* request, AckReply* reply) {
-  // TODO(rkn): We're pushing the delivery task onto a new thread so that this method can return immediately. This matters
-  // because the scheduler holds a lock while DeliverObj is being called. The correct solution is to make DeliverObj
-  // an asynchronous call (and similarly with the rest of the object store service methods).
-  std::string address = request->objstore_address();
-  ObjectID objectid = request->objectid();
-  {
-    std::lock_guard<std::mutex> memory_lock(memory_lock_);
-    if (objectid >= memory_.size()) {
-      memory_.resize(objectid + 1, std::make_pair(ObjHandle(), MemoryStatusType::NOT_PRESENT));
-    }
-    if (memory_[objectid].second == MemoryStatusType::NOT_PRESENT) {
-    }
-    else {
-      RAY_CHECK_NEQ(memory_[objectid].second, MemoryStatusType::DEALLOCATED, "Objstore " << objstoreid_ << " is attempting to get objectid " << objectid << ", but memory_[objectid] == DEALLOCATED.");
-      RAY_LOG(RAY_DEBUG, "Objstore " << objstoreid_ << " already has objectid " << objectid << " or it is already being shipped, so no need to get it again.");
-      return Status::OK;
-    }
-    memory_[objectid].second = MemoryStatusType::PRE_ALLOCED;
-  }
-  delivery_threads_.push_back(std::make_shared<std::thread>([this, address, objectid]() {
-    std::lock_guard<std::mutex> objstores_lock(objstores_lock_);
-    ObjStore::Stub& stub = get_objstore_stub(address);
-    get_data_from(objectid, stub);
-  }));
-  return Status::OK;
-}
-
-Status ObjStoreService::ObjStoreInfo(ServerContext* context, const ObjStoreInfoRequest* request, ObjStoreInfoReply* reply) {
-  std::lock_guard<std::mutex> memory_lock(memory_lock_);
-  for (size_t i = 0; i < memory_.size(); ++i) {
-    if (memory_[i].second == MemoryStatusType::READY) { // is the object available?
-      reply->add_objectid(i);
-    }
-  }
-  /*
-  for (int i = 0; i < request->objectid_size(); ++i) {
-    ObjectID objectid = request->objectid(i);
-    Obj* obj = new Obj();
-    std::string data(memory_[objectid].ptr.data, memory_[objectid].ptr.len); // copies, but for debugging should be ok
-    obj->ParseFromString(data);
-    reply->mutable_obj()->AddAllocated(obj);
-  }
-  */
-  return Status::OK;
-}
-
-Status ObjStoreService::StreamObjTo(ServerContext* context, const StreamObjToRequest* request, ServerWriter<ObjChunk>* writer) {
-  RAY_LOG(RAY_DEBUG, "begin to stream data from object store " << objstoreid_);
-  ObjChunk chunk;
-  ObjectID objectid = request->objectid();
-  memory_lock_.lock();
-  RAY_CHECK_LT(objectid, memory_.size(), "Objstore " << objstoreid_ << " is attempting to use objectid " << objectid << " in StreamObjTo, but this objectid is not present in the object store.");
-  RAY_CHECK_EQ(memory_[objectid].second, MemoryStatusType::READY, "Objstore " << objstoreid_ << " is attempting to stream objectid " << objectid << ", but memory_[objectid].second != MemoryStatusType::READY.");
-  ObjHandle handle = memory_[objectid].first;
-  memory_lock_.unlock(); // TODO(rkn): Make sure we don't still need to hold on to this lock.
-  segmentpool_lock_.lock();
-  const uint8_t* head = segmentpool_->get_address(handle);
-  segmentpool_lock_.unlock();
-  size_t size = handle.size();
-  for (size_t i = 0; i < size; i += CHUNK_SIZE) {
-    chunk.set_metadata_offset(handle.metadata_offset());
-    chunk.set_total_size(size);
-    chunk.set_data(head + i, std::min(CHUNK_SIZE, size - i));
-    RAY_CHECK(writer->Write(chunk), "stream connection prematurely closed")
-  }
-  return Status::OK;
-}
-
-Status ObjStoreService::NotifyAlias(ServerContext* context, const NotifyAliasRequest* request, AckReply* reply) {
-  // NotifyAlias assumes that the objstore already holds canonical_objectid
-  ObjectID alias_objectid = request->alias_objectid();
-  ObjectID canonical_objectid = request->canonical_objectid();
-  RAY_LOG(RAY_DEBUG, "Aliasing objectid " << alias_objectid << " with objectid " << canonical_objectid);
-  {
-    std::lock_guard<std::mutex> memory_lock(memory_lock_);
-    RAY_CHECK_LT(canonical_objectid, memory_.size(), "Attempting to alias objectid " << alias_objectid << " with objectid " << canonical_objectid << ", but objectid " << canonical_objectid << " is not in the objstore.")
-    RAY_CHECK_NEQ(memory_[canonical_objectid].second, MemoryStatusType::NOT_READY, "Attempting to alias objectid " << alias_objectid << " with objectid " << canonical_objectid << ", but objectid " << canonical_objectid << " is not ready yet in the objstore.")
-    RAY_CHECK_NEQ(memory_[canonical_objectid].second, MemoryStatusType::NOT_PRESENT, "Attempting to alias objectid " << alias_objectid << " with objectid " << canonical_objectid << ", but objectid " << canonical_objectid << " is not present in the objstore.")
-    RAY_CHECK_NEQ(memory_[canonical_objectid].second, MemoryStatusType::DEALLOCATED, "Attempting to alias objectid " << alias_objectid << " with objectid " << canonical_objectid << ", but objectid " << canonical_objectid << " has already been deallocated.")
-    if (alias_objectid >= memory_.size()) {
-      memory_.resize(alias_objectid + 1, std::make_pair(ObjHandle(), MemoryStatusType::NOT_PRESENT));
-    }
-    memory_[alias_objectid].first = memory_[canonical_objectid].first;
-    memory_[alias_objectid].second = MemoryStatusType::READY;
-  }
-  ObjRequest done_request;
-  done_request.type = ObjRequestType::ALIAS_DONE;
-  done_request.objectid = alias_objectid;
-  RAY_CHECK(recv_queue_.send(&done_request), "Failed to send message from the object store to itself because the message queue was full.");
-  return Status::OK;
-}
-
-Status ObjStoreService::DeallocateObject(ServerContext* context, const DeallocateObjectRequest* request, AckReply* reply) {
-  ObjectID canonical_objectid = request->canonical_objectid();
-  RAY_LOG(RAY_INFO, "Deallocating canonical_objectid " << canonical_objectid);
-  std::lock_guard<std::mutex> memory_lock(memory_lock_);
-  RAY_CHECK_EQ(memory_[canonical_objectid].second, MemoryStatusType::READY, "Attempting to deallocate canonical_objectid " << canonical_objectid << ", but memory_[canonical_objectid].second = " << memory_[canonical_objectid].second);
-  RAY_CHECK_LT(canonical_objectid, memory_.size(), "Attempting to deallocate canonical_objectid " << canonical_objectid << ", but it is not in the objstore.");
-  segmentpool_lock_.lock();
-  segmentpool_->deallocate(memory_[canonical_objectid].first);
-  segmentpool_lock_.unlock();
-  memory_[canonical_objectid].second = MemoryStatusType::DEALLOCATED;
-  return Status::OK;
-}
-
-// This table describes how the memory status changes in response to requests.
-//
-// MemoryStatus | ObjRequest  | New MemoryStatus | action performed
-// -------------+-------------+------------------+----------------------------
-// NOT_PRESENT  | ALLOC       | NOT_READY        | allocate object
-// NOT_READY    | WORKER_DONE | READY            | send ObjReady to scheduler
-// NOT_READY    | GET         | NOT_READY        | add to get queue
-// READY        | GET         | READY            | return handle
-// READY        | DEALLOC     | DEALLOCATED      | deallocate
-// -------------+-------------+------------------+----------------------------
-void ObjStoreService::process_objstore_request(const ObjRequest request) {
-  switch (request.type) {
-    case ObjRequestType::ALIAS_DONE: {
-        process_gets_for_objectid(request.objectid);
-      }
-      break;
-    default: {
-        RAY_CHECK(false, "Attempting to process request of type " <<  request.type << ". This code should be unreachable.");
-      }
-  }
-}
-
-void ObjStoreService::process_worker_request(const ObjRequest request) {
-  if (request.workerid >= send_queues_.size()) {
-    send_queues_.resize(request.workerid + 1);
-  }
-  if (!send_queues_[request.workerid].connected()) {
-    std::string queue_name = std::string("queue:") + objstore_address_ + std::string(":worker:") + std::to_string(request.workerid) + std::string(":obj");
-    RAY_CHECK(send_queues_[request.workerid].connect(queue_name, false), "error connecting receive_queue_");
-  }
-  {
-    std::lock_guard<std::mutex> memory_lock(memory_lock_);
-    if (request.objectid >= memory_.size()) {
-      memory_.resize(request.objectid + 1, std::make_pair(ObjHandle(), MemoryStatusType::NOT_PRESENT));
-    }
-  }
-  switch (request.type) {
-    case ObjRequestType::ALLOC: {
-        ObjHandle handle = alloc(request.objectid, request.size); // This method acquires memory_lock_
-        RAY_CHECK(send_queues_[request.workerid].send(&handle), "Failed to send message from the object store to the worker with id " << request.workerid << " because the message queue was full.");
-      }
-      break;
-    case ObjRequestType::GET: {
-        std::lock_guard<std::mutex> memory_lock(memory_lock_);
-        std::pair<ObjHandle, MemoryStatusType>& item = memory_[request.objectid];
-        if (item.second == MemoryStatusType::READY) {
-          RAY_LOG(RAY_DEBUG, "Responding to GET request: returning objectid " << request.objectid);
-          RAY_CHECK(send_queues_[request.workerid].send(&item.first), "Failed to send message from the object store to the worker with id " << request.workerid << " because the message queue was full.");
-        } else if (item.second == MemoryStatusType::NOT_READY || item.second == MemoryStatusType::NOT_PRESENT || item.second == MemoryStatusType::PRE_ALLOCED) {
-          std::lock_guard<std::mutex> lock(get_queue_lock_);
-          get_queue_.push_back(std::make_pair(request.workerid, request.objectid));
-        } else {
-          RAY_CHECK(false, "A worker requested objectid " << request.objectid << ", but memory_[objectid].second = " << memory_[request.objectid].second);
-        }
-      }
-      break;
-    case ObjRequestType::WORKER_DONE: {
-        object_ready(request.objectid, request.metadata_offset); // This method acquires memory_lock_
-      }
-      break;
-    default: {
-        RAY_CHECK(false, "Attempting to process request of type " <<  request.type << ". This code should be unreachable.");
-      }
-  }
-}
-
-void ObjStoreService::process_requests() {
-  // TODO(rkn): Should memory_lock_ be used in this method?
-  ObjRequest request;
-  while (true) {
-    RAY_CHECK(recv_queue_.receive(&request), "error receiving over IPC");
-    switch (request.type) {
-      case ObjRequestType::ALLOC: {
-          RAY_LOG(RAY_VERBOSE, "Request (worker " << request.workerid << " to objstore " << objstoreid_ << "): Allocate object with objectid " << request.objectid << " and size " << request.size);
-          process_worker_request(request);
-        }
-        break;
-      case ObjRequestType::GET: {
-          RAY_LOG(RAY_VERBOSE, "Request (worker " << request.workerid << " to objstore " << objstoreid_ << "): Get object with objectid " << request.objectid);
-          process_worker_request(request);
-        }
-        break;
-      case ObjRequestType::WORKER_DONE: {
-          RAY_LOG(RAY_VERBOSE, "Request (worker " << request.workerid << " to objstore " << objstoreid_ << "): Finalize object with objectid " << request.objectid);
-          process_worker_request(request);
-        }
-        break;
-      case ObjRequestType::ALIAS_DONE: {
-          process_objstore_request(request);
-        }
-        break;
-      default: {
-          RAY_CHECK(false, "Attempting to process request of type " <<  request.type << ". This code should be unreachable.");
-        }
-    }
-  }
-}
-
-void ObjStoreService::process_gets_for_objectid(ObjectID objectid) {
-  std::pair<ObjHandle, MemoryStatusType>& item = memory_[objectid];
-  std::lock_guard<std::mutex> get_queue_lock(get_queue_lock_);
-  for (size_t i = 0; i < get_queue_.size(); ++i) {
-    if (get_queue_[i].second == objectid) {
-      ObjHandle& elem = memory_[objectid].first;
-      RAY_CHECK(send_queues_[get_queue_[i].first].send(&item.first), "Failed to send message from the object store to the worker with id " << get_queue_[i].first << " because the message queue was full.");
-      // Remove the get task from the queue
-      std::swap(get_queue_[i], get_queue_[get_queue_.size() - 1]);
-      get_queue_.pop_back();
-      i -= 1;
-    }
-  }
-}
-
-ObjHandle ObjStoreService::alloc(ObjectID objectid, size_t size) {
-  segmentpool_lock_.lock();
-  ObjHandle handle = segmentpool_->allocate(size);
-  segmentpool_lock_.unlock();
-  std::lock_guard<std::mutex> memory_lock(memory_lock_);
-  RAY_LOG(RAY_VERBOSE, "Allocating space for objectid " << objectid << " on object store " << objstoreid_);
-  RAY_CHECK(memory_[objectid].second == MemoryStatusType::NOT_PRESENT || memory_[objectid].second == MemoryStatusType::PRE_ALLOCED, "Attempting to allocate space for objectid " << objectid << ", but memory_[objectid].second = " << memory_[objectid].second);
-  memory_[objectid].first = handle;
-  memory_[objectid].second = MemoryStatusType::NOT_READY;
-  return handle;
-}
-
-void ObjStoreService::object_ready(ObjectID objectid, size_t metadata_offset) {
-  {
-    RAY_LOG(RAY_INFO, "Object with ObjectID " << objectid << " is ready.");
-    std::lock_guard<std::mutex> memory_lock(memory_lock_);
-    std::pair<ObjHandle, MemoryStatusType>& item = memory_[objectid];
-    RAY_CHECK_EQ(item.second, MemoryStatusType::NOT_READY, "A worker notified the object store that objectid " << objectid << " has been written to the object store, but memory_[objectid].second != NOT_READY.");
-    item.first.set_metadata_offset(metadata_offset);
-    item.second = MemoryStatusType::READY;
-  }
-  process_gets_for_objectid(objectid);
-  // Tell the scheduler that the object arrived
-  // TODO(pcm): put this in a separate thread so we don't have to pay the latency here
-  ClientContext objready_context;
-  ObjReadyRequest objready_request;
-  objready_request.set_objectid(objectid);
-  objready_request.set_objstoreid(objstoreid_);
-  AckReply objready_reply;
-  RAY_CHECK_GRPC(scheduler_stub_->ObjReady(&objready_context, objready_request, &objready_reply));
-}
-
-void ObjStoreService::start_objstore_service() {
-  communicator_thread_ = std::thread([this]() {
-    RAY_LOG(RAY_INFO, "started object store communicator server");
-    process_requests();
-  });
-}
-
-void start_objstore(const char* scheduler_addr, const char* node_ip_address) {
-  RAY_LOG(RAY_INFO, "Starting an object store on node " << std::string(node_ip_address));
-  auto scheduler_channel = grpc::CreateChannel(scheduler_addr, grpc::InsecureChannelCredentials());
-  RAY_LOG(RAY_INFO, "Object store connected to scheduler " << scheduler_addr);
-  ObjStoreService service(scheduler_channel);
-  ServerBuilder builder;
-  // Get GRPC to assign an unused port.
-  int port;
-  builder.AddListeningPort(std::string("0.0.0.0:0"), grpc::InsecureServerCredentials(), &port);
-  builder.RegisterService(&service);
-  std::unique_ptr<Server> server(builder.BuildAndStart());
-  if (server == nullptr) {
-    RAY_CHECK(false, "Failed to create the object store service.");
-  }
-  std::string objstore_address = std::string(node_ip_address) + ":" + std::to_string(port);
-  RAY_LOG(RAY_INFO, "This object store has address " << objstore_address);
-  std::string recv_queue_name = std::string("queue:") + objstore_address + std::string(":obj");
-  service.register_objstore(objstore_address, recv_queue_name);
-  service.start_objstore_service();
-  // Process incoming GRPC calls. These may come from the scheduler or from
-  // other object stores. This method does not return.
-  server->Wait();
-}
-
-RayConfig global_ray_config;
-
-int main(int argc, char** argv) {
-  RAY_CHECK_GE(argc, 3, "object store: expected at least two arguments (scheduler ip address and object store ip address)");
-
-  if (argc > 3) {
-    const char* log_file_name = get_cmd_option(argv, argv + argc, "--log-file-name");
-    if (log_file_name) {
-      std::cout << "object store: writing to log file " << log_file_name << std::endl;
-      create_log_dir_or_die(log_file_name);
-      global_ray_config.log_to_file = true;
-      global_ray_config.logfile.open(log_file_name);
-    } else {
-      std::cout << "object store: writing logs to stdout; you can change this by passing --log-file-name <filename> to ./scheduler" << std::endl;
-      global_ray_config.log_to_file = false;
-    }
-  }
-
-  start_objstore(argv[1], argv[2]);
-
-  return 0;
-}
diff --git a/src/objstore.h b/src/objstore.h
deleted file mode 100644
index 351b09068..000000000
--- a/src/objstore.h
+++ /dev/null
@@ -1,81 +0,0 @@
-#ifndef RAY_OBJSTORE_H
-#define RAY_OBJSTORE_H
-
-#include <unordered_map>
-#include <memory>
-#include <thread>
-#include <iostream>
-#include <grpc++/grpc++.h>
-
-#include "ray/ray.h"
-#include "ray.grpc.pb.h"
-#include "types.pb.h"
-#include "ipc.h"
-
-using grpc::Server;
-using grpc::ServerBuilder;
-using grpc::ServerReader;
-using grpc::ServerContext;
-using grpc::ClientContext;
-using grpc::ServerWriter;
-using grpc::ClientReader;
-using grpc::Status;
-
-using grpc::Channel;
-
-// READY:       This is used to indicate that the object has been copied from a
-//              worker and is ready to be used.
-// NOT_READY:   This is used to indicate that memory has been allocated for the
-//              object, but the object hasn't been copied from a worker yet.
-// DEALLOCATED: This is used to indicate that the object has been deallocated.
-// NOT_PRESENT: This is used to indicate that space has not been allocated for
-//              this object in this object store.
-// PRE_ALLOCED: This is used to indicate that the memory has not yet been
-//              alloced, but it will be alloced soon. This is set when we call
-//              StartDelivery.
-enum MemoryStatusType {READY = 0, NOT_READY = 1, DEALLOCATED = 2, NOT_PRESENT = 3, PRE_ALLOCED = 4};
-
-class ObjStoreService final : public ObjStore::Service {
-public:
-  ObjStoreService(std::shared_ptr<Channel> scheduler_channel);
-
-  Status StartDelivery(ServerContext* context, const StartDeliveryRequest* request, AckReply* reply) override;
-  Status StreamObjTo(ServerContext* context, const StreamObjToRequest* request, ServerWriter<ObjChunk>* writer) override;
-  Status NotifyAlias(ServerContext* context, const NotifyAliasRequest* request, AckReply* reply) override;
-  Status DeallocateObject(ServerContext* context, const DeallocateObjectRequest* request, AckReply* reply) override;
-  Status ObjStoreInfo(ServerContext* context, const ObjStoreInfoRequest* request, ObjStoreInfoReply* reply) override;
-  void start_objstore_service();
-  void register_objstore(const std::string& objstore_address, const std::string& recv_queue_name);
-private:
-  void get_data_from(ObjectID objectid, ObjStore::Stub& stub);
-  // check if we already connected to the other objstore, if yes, return reference to connection, otherwise connect
-  ObjStore::Stub& get_objstore_stub(const std::string& objstore_address);
-  void process_worker_request(const ObjRequest request);
-  void process_objstore_request(const ObjRequest request);
-  void process_requests();
-  void process_gets_for_objectid(ObjectID objectid);
-  ObjHandle alloc(ObjectID objectid, size_t size);
-  void object_ready(ObjectID objectid, size_t metadata_offset);
-
-  static const size_t CHUNK_SIZE;
-  std::string objstore_address_;
-  ObjStoreId objstoreid_; // id of this objectstore in the scheduler object store table
-  std::shared_ptr<MemorySegmentPool> segmentpool_;
-  std::mutex segmentpool_lock_;
-  std::vector<std::pair<ObjHandle, MemoryStatusType> > memory_; // object ID -> (memory address, memory status)
-  std::mutex memory_lock_;
-  std::unordered_map<std::string, std::unique_ptr<ObjStore::Stub>> objstores_;
-  std::mutex objstores_lock_;
-  std::unique_ptr<Scheduler::Stub> scheduler_stub_;
-  std::vector<std::pair<WorkerId, ObjectID> > get_queue_;
-  std::mutex get_queue_lock_;
-  MessageQueue<ObjRequest> recv_queue_; // This queue is used by workers to send tasks to the object store.
-  std::vector<MessageQueue<ObjHandle> > send_queues_; // This maps workerid -> queue. The object store uses these queues to send replies to the relevant workers.
-  std::thread communicator_thread_;
-
-  std::vector<std::shared_ptr<std::thread> > delivery_threads_; // TODO(rkn): document
-  // TODO(rkn): possibly add lock, and properly remove these threads from the delivery_threads_ when the deliveries are done
-
-};
-
-#endif
diff --git a/src/raylib.cc b/src/raylib.cc
deleted file mode 100644
index 9ef9cf9aa..000000000
--- a/src/raylib.cc
+++ /dev/null
@@ -1,870 +0,0 @@
-// TODO: - Implement other datatypes for ndarray
-
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
-
-#include <Python.h>
-#include <structmember.h>
-#define PY_ARRAY_UNIQUE_SYMBOL RAYLIB_ARRAY_API
-#include <numpy/arrayobject.h>
-#include <iostream>
-
-#include "types.pb.h"
-#include "worker.h"
-#include "utils.h"
-
-RayConfig global_ray_config;
-
-extern "C" {
-
-static int PyObjectToWorker(PyObject* object, Worker **worker);
-
-// Object references
-
-typedef struct {
-  PyObject_HEAD
-  ObjectID id;
-  // We give the PyObjectID object a reference to the worker capsule object to
-  // make sure that the worker capsule does not go out of scope until all of the
-  // object references have gone out of scope. The reason for this is that the
-  // worker capsule destructor destroys the worker object. If the worker object
-  // has been destroyed, then when the object reference tries to call
-  // worker->decrement_reference_count, we can get a segfault.
-  PyObject* worker_capsule;
-} PyObjectID;
-
-static void PyObjectID_dealloc(PyObjectID *self) {
-  Worker* worker;
-  PyObjectToWorker(self->worker_capsule, &worker);
-  std::vector<ObjectID> objectids;
-  objectids.push_back(self->id);
-  RAY_LOG(RAY_REFCOUNT, "In PyObjectID_dealloc, calling decrement_reference_count for objectid " << self->id);
-  worker->decrement_reference_count(objectids);
-  Py_DECREF(self->worker_capsule); // The corresponding increment happens in PyObjectID_init.
-  self->ob_type->tp_free((PyObject*) self);
-}
-
-static PyObject* PyObjectID_new(PyTypeObject *type, PyObject *args, PyObject *kwds) {
-  PyObjectID* self = (PyObjectID*) type->tp_alloc(type, 0);
-  if (self != NULL) {
-    self->id = 0;
-  }
-  return (PyObject*) self;
-}
-
-static int PyObjectID_init(PyObjectID *self, PyObject *args, PyObject *kwds) {
-  if (!PyArg_ParseTuple(args, "iO", &self->id, &self->worker_capsule)) {
-    return -1;
-  }
-  Worker* worker;
-  PyObjectToWorker(self->worker_capsule, &worker);
-  Py_INCREF(self->worker_capsule); // The corresponding decrement happens in PyObjectID_dealloc.
-  std::vector<ObjectID> objectids;
-  objectids.push_back(self->id);
-  RAY_LOG(RAY_REFCOUNT, "In PyObjectID_init, calling increment_reference_count for objectid " << objectids[0]);
-  worker->increment_reference_count(objectids);
-  return 0;
-};
-
-static int PyObjectID_compare(PyObject* a, PyObject* b) {
-  PyObjectID* A = (PyObjectID*) a;
-  PyObjectID* B = (PyObjectID*) b;
-  if (A->id < B->id) {
-    return -1;
-  }
-  if (A->id > B->id) {
-    return 1;
-  }
-  return 0;
-}
-
-static long PyObjectID_hash(PyObject* a) {
-  PyObjectID* A = (PyObjectID*) a;
-  PyObject* tuple = PyTuple_New(1);
-  PyTuple_SetItem(tuple, 0, PyInt_FromLong(A->id));
-  long hash = PyObject_Hash(tuple);
-  Py_XDECREF(tuple);
-  return hash;
-}
-
-char RAY_ID_LITERAL[] = "id";
-char RAY_OBJECT_ID_LITERAL[] = "object id";
-
-static PyMemberDef PyObjectID_members[] = {
-  {RAY_ID_LITERAL, T_INT, offsetof(PyObjectID, id), 0, RAY_OBJECT_ID_LITERAL},
-  {NULL}
-};
-
-static PyTypeObject PyObjectIDType = {
-  PyObject_HEAD_INIT(NULL)
-  0,                         /* ob_size */
-  "ray.ObjectID",            /* tp_name */
-  sizeof(PyObjectID),        /* tp_basicsize */
-  0,                         /* tp_itemsize */
-  (destructor)PyObjectID_dealloc,          /* tp_dealloc */
-  0,                         /* tp_print */
-  0,                         /* tp_getattr */
-  0,                         /* tp_setattr */
-  PyObjectID_compare,        /* tp_compare */
-  0,                         /* tp_repr */
-  0,                         /* tp_as_number */
-  0,                         /* tp_as_sequence */
-  0,                         /* tp_as_mapping */
-  PyObjectID_hash,           /* tp_hash */
-  0,                         /* tp_call */
-  0,                         /* tp_str */
-  0,                         /* tp_getattro */
-  0,                         /* tp_setattro */
-  0,                         /* tp_as_buffer */
-  Py_TPFLAGS_DEFAULT,        /* tp_flags */
-  "Ray objects",             /* tp_doc */
-  0,                         /* tp_traverse */
-  0,                         /* tp_clear */
-  0,                         /* tp_richcompare */
-  0,                         /* tp_weaklistoffset */
-  0,                         /* tp_iter */
-  0,                         /* tp_iternext */
-  0,                         /* tp_methods */
-  PyObjectID_members,        /* tp_members */
-  0,                         /* tp_getset */
-  0,                         /* tp_base */
-  0,                         /* tp_dict */
-  0,                         /* tp_descr_get */
-  0,                         /* tp_descr_set */
-  0,                         /* tp_dictoffset */
-  (initproc)PyObjectID_init, /* tp_init */
-  0,                         /* tp_alloc */
-  PyObjectID_new,            /* tp_new */
-};
-
-// create PyObjectID from C++ (could be made more efficient if neccessary)
-PyObject* make_pyobjectid(PyObject* worker_capsule, ObjectID objectid) {
-  PyObject* arglist = Py_BuildValue("(iO)", objectid, worker_capsule);
-  PyObject* result = PyObject_CallObject((PyObject*) &PyObjectIDType, arglist);
-  Py_DECREF(arglist);
-  return result;
-}
-
-// Error handling
-
-static PyObject *RayError;
-static PyObject *RaySizeError;
-
-// Pass arguments from Python to C++
-
-static int PyObjectToTask(PyObject* object, Task **task) {
-  if (PyCapsule_IsValid(object, "task")) {
-    *task = static_cast<Task*>(PyCapsule_GetPointer(object, "task"));
-    return 1;
-  } else {
-    PyErr_SetString(PyExc_TypeError, "must be a 'task' capsule");
-    return 0;
-  }
-}
-
-static int PyObjectToObj(PyObject* object, Obj **obj) {
-  if (PyCapsule_IsValid(object, "obj")) {
-    *obj = static_cast<Obj*>(PyCapsule_GetPointer(object, "obj"));
-    return 1;
-  } else {
-    PyErr_SetString(PyExc_TypeError, "must be a 'obj' capsule");
-    return 0;
-  }
-}
-
-static int PyObjectToWorker(PyObject* object, Worker **worker) {
-  if (PyCapsule_IsValid(object, "worker")) {
-    *worker = static_cast<Worker*>(PyCapsule_GetPointer(object, "worker"));
-    return 1;
-  } else {
-    PyErr_SetString(PyExc_TypeError, "must be a 'worker' capsule");
-    return 0;
-  }
-}
-
-static int PyObjectToObjectID(PyObject* object, ObjectID *objectid) {
-  if (PyObject_IsInstance(object, (PyObject*)&PyObjectIDType)) {
-    *objectid = ((PyObjectID*) object)->id;
-    return 1;
-  } else {
-    PyErr_SetString(PyExc_TypeError, "must be an object reference");
-    return 0;
-  }
-}
-
-// Destructors
-
-static void ObjCapsule_Destructor(PyObject* capsule) {
-  Obj* obj = static_cast<Obj*>(PyCapsule_GetPointer(capsule, "obj"));
-  delete obj;
-}
-
-static void WorkerCapsule_Destructor(PyObject* capsule) {
-  Worker* obj = static_cast<Worker*>(PyCapsule_GetPointer(capsule, "worker"));
-  delete obj;
-}
-
-static void TaskCapsule_Destructor(PyObject* capsule) {
-  Task* obj = static_cast<Task*>(PyCapsule_GetPointer(capsule, "task"));
-  delete obj;
-}
-
-// Helper methods
-
-// Pass ownership of both the key and the value to the PyDict.
-// This is only required for PyDicts, not for PyLists or PyTuples, compare
-// https://docs.python.org/2/c-api/dict.html
-// https://docs.python.org/2/c-api/list.html
-// https://docs.python.org/2/c-api/tuple.html
-
-void set_dict_item_and_transfer_ownership(PyObject* dict, PyObject* key, PyObject* val) {
-  PyDict_SetItem(dict, key, val);
-  Py_XDECREF(key);
-  Py_XDECREF(val);
-}
-
-// This converts an Python ObjectID to an Python integer.
-static PyObject* serialize_objectid(PyObject* self, PyObject* args) {
-  Worker* worker;
-  ObjectID objectid;
-  if (!PyArg_ParseTuple(args, "O&O&", &PyObjectToWorker, &worker, &PyObjectToObjectID, &objectid)) {
-    return NULL;
-  }
-  return PyInt_FromLong(objectid);
-}
-
-// This converts a Python integer to a Python ObjectID.
-static PyObject* deserialize_objectid(PyObject* self, PyObject* args) {
-  PyObject* worker_capsule;
-  int objectid;
-  if (!PyArg_ParseTuple(args, "Oi", &worker_capsule, &objectid)) {
-    return NULL;
-  }
-  return make_pyobjectid(worker_capsule, static_cast<ObjectID>(objectid));
-}
-
-static PyObject* allocate_buffer(PyObject* self, PyObject* args) {
-  Worker* worker;
-  ObjectID objectid;
-  SegmentId segmentid;
-  long size;
-  if (!PyArg_ParseTuple(args, "O&O&l", &PyObjectToWorker, &worker, &PyObjectToObjectID, &objectid, &size)) {
-    return NULL;
-  }
-  void* address = reinterpret_cast<void*>(const_cast<char*>(worker->allocate_buffer(objectid, size, segmentid)));
-  std::vector<npy_intp> dim({size});
-  PyObject* t = PyTuple_New(2);
-  PyTuple_SetItem(t, 0, PyArray_SimpleNewFromData(1, dim.data(), NPY_BYTE, address));
-  PyTuple_SetItem(t, 1, PyInt_FromLong(segmentid));
-  return t;
-}
-
-static PyObject* finish_buffer(PyObject* self, PyObject* args) {
-  Worker* worker;
-  ObjectID objectid;
-  long segmentid;
-  long metadata_offset;
-  if (!PyArg_ParseTuple(args, "O&O&ll", &PyObjectToWorker, &worker, &PyObjectToObjectID, &objectid, &segmentid, &metadata_offset)) {
-    return NULL;
-  }
-  return worker->finish_buffer(objectid, segmentid, metadata_offset);
-}
-
-static PyObject* get_buffer(PyObject* self, PyObject* args) {
-  Worker* worker;
-  ObjectID objectid;
-  int64_t size;
-  SegmentId segmentid;
-  int64_t metadata_offset;
-  if (!PyArg_ParseTuple(args, "O&O&", &PyObjectToWorker, &worker, &PyObjectToObjectID, &objectid)) {
-    return NULL;
-  }
-  void* address = reinterpret_cast<void*>(const_cast<char*>(worker->get_buffer(objectid, size, segmentid, metadata_offset)));
-  std::vector<npy_intp> dim({static_cast<npy_intp>(size)});
-  PyObject* t = PyTuple_New(3);
-  PyTuple_SetItem(t, 0, PyArray_SimpleNewFromData(1, dim.data(), NPY_BYTE, address));
-  PyTuple_SetItem(t, 1, PyInt_FromLong(segmentid));
-  PyTuple_SetItem(t, 2, PyInt_FromLong(metadata_offset));
-  return t;
-}
-
-static PyObject* is_arrow(PyObject* self, PyObject* args) {
-  Worker* worker;
-  ObjectID objectid;
-  if (!PyArg_ParseTuple(args, "O&O&", &PyObjectToWorker, &worker, &PyObjectToObjectID, &objectid)) {
-    return NULL;
-  }
-  if (worker->is_arrow(objectid))
-    Py_RETURN_TRUE;
-  else
-    Py_RETURN_FALSE;
-}
-
-static PyObject* unmap_object(PyObject* self, PyObject* args) {
-  Worker* worker;
-  int segmentid;
-  if (!PyArg_ParseTuple(args, "O&i", &PyObjectToWorker, &worker, &segmentid)) {
-    return NULL;
-  }
-  worker->unmap_object(segmentid);
-  Py_RETURN_NONE;
-}
-
-static PyObject* serialize_task(PyObject* self, PyObject* args) {
-  PyObject* worker_capsule;
-  Task* task = new Task(); // TODO: to be freed in capsule destructor
-  char* name;
-  int len;
-  PyObject* arguments;
-  if (!PyArg_ParseTuple(args, "Os#O", &worker_capsule, &name, &len, &arguments)) {
-    return NULL;
-  }
-  task->set_name(std::string(name, len));
-  std::vector<ObjectID> objectids; // This is a vector of all the objectids that are serialized in this task, including objectids that are contained in Python objects that are passed by value.
-  if (PyList_Check(arguments)) {
-    for (size_t i = 0, size = PyList_Size(arguments); i < size; ++i) {
-      PyObject* element = PyList_GetItem(arguments, i);
-      if (PyObject_IsInstance(element, (PyObject*)&PyObjectIDType)) {
-        // Handle the case where the argument to the task is an ObjectID.
-        ObjectID objectid = ((PyObjectID*) element)->id;
-        task->add_arg()->set_objectid(objectid);
-        objectids.push_back(objectid);
-      } else if (PyString_CheckExact(element)) {
-        // Handle the case where the argument to the task is being passed by
-        // value and we receive an argument serialized as a string here.
-        char* buffer;
-        Py_ssize_t length;
-        PyString_AsStringAndSize(element, &buffer, &length);
-        task->add_arg()->set_serialized_arg(std::string(buffer, length));
-      } else {
-        RAY_CHECK(false, "This code should be unreachable.");
-      }
-    }
-  } else {
-    PyErr_SetString(RayError, "serialize_task: second argument needs to be a list");
-    return NULL;
-  }
-  Worker* worker;
-  PyObjectToWorker(worker_capsule, &worker);
-  if (objectids.size() > 0) {
-    RAY_LOG(RAY_REFCOUNT, "In serialize_task, calling increment_reference_count for contained objectids");
-    worker->increment_reference_count(objectids);
-  }
-  std::string output;
-  task->SerializeToString(&output);
-  int task_size = output.length();
-  return PyCapsule_New(static_cast<void*>(task), "task", &TaskCapsule_Destructor);
-}
-
-static PyObject* deserialize_task(PyObject* worker_capsule, const Task& task) {
-  std::vector<ObjectID> objectids; // This is a vector of all the objectids that were serialized in this task, including objectids that are contained in Python objects that are passed by value.
-  PyObject* string = PyString_FromStringAndSize(task.name().c_str(), task.name().size());
-  int argsize = task.arg_size();
-  PyObject* arglist = PyList_New(argsize);
-  for (int i = 0; i < argsize; ++i) {
-    if (task.arg(i).serialized_arg().empty()) {
-      PyList_SetItem(arglist, i, make_pyobjectid(worker_capsule, task.arg(i).objectid()));
-      objectids.push_back(task.arg(i).objectid());
-    } else {
-      PyObject* serialized_arg = PyString_FromStringAndSize(task.arg(i).serialized_arg().data(), task.arg(i).serialized_arg().size());
-      PyList_SetItem(arglist, i, serialized_arg);
-    }
-  }
-  Worker* worker;
-  PyObjectToWorker(worker_capsule, &worker);
-  worker->decrement_reference_count(objectids);
-  int resultsize = task.result_size();
-  std::vector<ObjectID> result_objectids;
-  PyObject* resultlist = PyList_New(resultsize);
-  for (int i = 0; i < resultsize; ++i) {
-    PyList_SetItem(resultlist, i, make_pyobjectid(worker_capsule, task.result(i)));
-    result_objectids.push_back(task.result(i));
-  }
-  worker->decrement_reference_count(result_objectids); // The corresponding increment is done in SubmitTask in the scheduler.
-  PyObject* t = PyTuple_New(3); // We set the items of the tuple using PyTuple_SetItem, because that transfers ownership to the tuple.
-  PyTuple_SetItem(t, 0, string);
-  PyTuple_SetItem(t, 1, arglist);
-  PyTuple_SetItem(t, 2, resultlist);
-  return t;
-}
-
-// Ray Python API
-
-static PyObject* create_worker(PyObject* self, PyObject* args) {
-  const char* node_ip_address;
-  const char* scheduler_address;
-  // The object store address can be the empty string, in which case the
-  // scheduler will choose the object store address.
-  const char* objstore_address;
-  int mode;
-  const char* log_file_name;
-  if (!PyArg_ParseTuple(args, "sssis", &node_ip_address, &scheduler_address, &objstore_address, &mode, &log_file_name)) {
-    return NULL;
-  }
-  // Set the logging file.
-  create_log_dir_or_die(log_file_name);
-  global_ray_config.log_to_file = true;
-  global_ray_config.logfile.open(log_file_name);
-  // Create the worker.
-  bool is_driver = (mode != Mode::WORKER_MODE);
-  Worker* worker = new Worker(std::string(node_ip_address), std::string(scheduler_address), static_cast<Mode>(mode));
-  // Register the worker.
-  worker->register_worker(std::string(node_ip_address), std::string(objstore_address), is_driver);
-
-  PyObject* t = PyTuple_New(2);
-  PyObject* worker_capsule = PyCapsule_New(static_cast<void*>(worker), "worker", &WorkerCapsule_Destructor);
-  PyTuple_SetItem(t, 0, worker_capsule);
-  PyTuple_SetItem(t, 1, PyString_FromString(worker->get_worker_address()));
-  return t;
-}
-
-static PyObject* disconnect(PyObject* self, PyObject* args) {
-  Worker* worker;
-  if (!PyArg_ParseTuple(args, "O&", &PyObjectToWorker, &worker)) {
-    return NULL;
-  }
-  worker->disconnect();
-  Py_RETURN_NONE;
-}
-
-static PyObject* connected(PyObject* self, PyObject* args) {
-  Worker* worker;
-  if (!PyArg_ParseTuple(args, "O&", &PyObjectToWorker, &worker)) {
-    return NULL;
-  }
-  if (worker->connected()) {
-    Py_RETURN_TRUE;
-  }
-  Py_RETURN_FALSE;
-}
-
-static PyObject* wait_for_next_message(PyObject* self, PyObject* args) {
-  PyObject* worker_capsule;
-  if (!PyArg_ParseTuple(args, "O", &worker_capsule)) {
-    return NULL;
-  }
-  Worker* worker;
-  PyObjectToWorker(worker_capsule, &worker);
-  if (std::unique_ptr<WorkerMessage> message = worker->receive_next_message()) {
-    bool task_present = !message->task().name().empty();
-    bool function_present = !message->function().implementation().empty();
-    bool reusable_variable_present = !message->reusable_variable().name().empty();
-    bool function_to_run_present = !message->function_to_run().implementation().empty();
-    RAY_CHECK(task_present + function_present + reusable_variable_present + function_to_run_present <= 1, "The worker message should contain at most one item.");
-    PyObject* t = PyTuple_New(2);
-    if (task_present) {
-      PyTuple_SetItem(t, 0, PyString_FromString("task"));
-      PyTuple_SetItem(t, 1, deserialize_task(worker_capsule, message->task()));
-    } else if (function_present) {
-      PyTuple_SetItem(t, 0, PyString_FromString("function"));
-      PyObject* remote_function_data = PyTuple_New(2);
-      PyTuple_SetItem(remote_function_data, 0, PyString_FromStringAndSize(message->function().name().data(), static_cast<ssize_t>(message->function().name().size())));
-      PyTuple_SetItem(remote_function_data, 1, PyString_FromStringAndSize(message->function().implementation().data(), static_cast<ssize_t>(message->function().implementation().size())));
-      PyTuple_SetItem(t, 1, remote_function_data);
-    } else if (reusable_variable_present) {
-      PyTuple_SetItem(t, 0, PyString_FromString("reusable_variable"));
-      PyObject* reusable_variable = PyTuple_New(3);
-      PyTuple_SetItem(reusable_variable, 0, PyString_FromStringAndSize(message->reusable_variable().name().data(), static_cast<ssize_t>(message->reusable_variable().name().size())));
-      PyTuple_SetItem(reusable_variable, 1, PyString_FromStringAndSize(message->reusable_variable().initializer().implementation().data(), static_cast<ssize_t>(message->reusable_variable().initializer().implementation().size())));
-      PyTuple_SetItem(reusable_variable, 2, PyString_FromStringAndSize(message->reusable_variable().reinitializer().implementation().data(), static_cast<ssize_t>(message->reusable_variable().reinitializer().implementation().size())));
-      PyTuple_SetItem(t, 1, reusable_variable);
-    } else if (function_to_run_present) {
-      PyTuple_SetItem(t, 0, PyString_FromString("function_to_run"));
-      PyTuple_SetItem(t, 1, PyString_FromStringAndSize(message->function_to_run().implementation().data(), static_cast<ssize_t>(message->function_to_run().implementation().size())));
-    } else {
-      PyTuple_SetItem(t, 0, PyString_FromString("die"));
-      Py_INCREF(Py_None);
-      PyTuple_SetItem(t, 1, Py_None);
-    }
-    return t;
-  }
-  RAY_CHECK(false, "This code should be unreachable.");
-  Py_RETURN_NONE;
-}
-
-static PyObject* run_function_on_all_workers(PyObject* self, PyObject* args) {
-  Worker* worker;
-  const char* function;
-  int function_size;
-  if (!PyArg_ParseTuple(args, "O&s#", &PyObjectToWorker, &worker, &function, &function_size)) {
-    return NULL;
-  }
-  worker->run_function_on_all_workers(std::string(function, static_cast<size_t>(function_size)));
-  Py_RETURN_NONE;
-}
-
-static PyObject* export_remote_function(PyObject* self, PyObject* args) {
-  Worker* worker;
-  const char* function_name;
-  const char* function;
-  int function_size;
-  if (!PyArg_ParseTuple(args, "O&ss#", &PyObjectToWorker, &worker, &function_name, &function, &function_size)) {
-    return NULL;
-  }
-  if (worker->export_remote_function(std::string(function_name), std::string(function, static_cast<size_t>(function_size)))) {
-    Py_RETURN_TRUE;
-  } else {
-    Py_RETURN_FALSE;
-  }
-}
-
-static PyObject* export_reusable_variable(PyObject* self, PyObject* args) {
-  Worker* worker;
-  const char* name;
-  int name_size;
-  const char* initializer;
-  int initializer_size;
-  const char* reinitializer;
-  int reinitializer_size;
-  if (!PyArg_ParseTuple(args, "O&s#s#s#", &PyObjectToWorker, &worker, &name, &name_size, &initializer, &initializer_size, &reinitializer, &reinitializer_size)) {
-    return NULL;
-  }
-  std::string name_str(name, static_cast<size_t>(name_size));
-  std::string initializer_str(initializer, static_cast<size_t>(initializer_size));
-  std::string reinitializer_str(reinitializer, static_cast<size_t>(reinitializer_size));
-  worker->export_reusable_variable(name_str, initializer_str, reinitializer_str);
-  Py_RETURN_NONE;
-}
-
-static PyObject* submit_task(PyObject* self, PyObject* args) {
-  PyObject* worker_capsule;
-  Task* task;
-  if (!PyArg_ParseTuple(args, "OO&", &worker_capsule, &PyObjectToTask, &task)) {
-    return NULL;
-  }
-  Worker* worker;
-  PyObjectToWorker(worker_capsule, &worker);
-  SubmitTaskRequest request;
-  request.set_allocated_task(task);
-  SubmitTaskReply reply = worker->submit_task(&request);
-  request.release_task(); // TODO: Make sure that task is not moved, otherwise capsule pointer needs to be updated
-  if (reply.no_workers()) {
-    PyErr_SetString(RayError, "No workers have registered with the scheduler, so this function cannot be run.");
-    return NULL;
-  }
-  if (!reply.function_registered()) {
-    PyErr_SetString(RayError, "No worker has registered this function with the scheduler.");
-    return NULL;
-  }
-  int size = reply.result_size();
-  PyObject* list = PyList_New(size);
-  std::vector<ObjectID> result_objectids;
-  for (int i = 0; i < size; ++i) {
-    PyList_SetItem(list, i, make_pyobjectid(worker_capsule, reply.result(i)));
-    result_objectids.push_back(reply.result(i));
-  }
-  worker->decrement_reference_count(result_objectids); // The corresponding increment is done in SubmitTask in the scheduler.
-  return list;
-}
-
-static PyObject* ready_for_new_task(PyObject* self, PyObject* args) {
-  Worker* worker;
-  if (!PyArg_ParseTuple(args, "O&", &PyObjectToWorker, &worker)) {
-    return NULL;
-  }
-  worker->ready_for_new_task();
-  Py_RETURN_NONE;
-}
-
-static PyObject* register_remote_function(PyObject* self, PyObject* args) {
-  Worker* worker;
-  const char* function_name;
-  int num_return_vals;
-  if (!PyArg_ParseTuple(args, "O&si", &PyObjectToWorker, &worker, &function_name, &num_return_vals)) {
-    return NULL;
-  }
-  worker->register_remote_function(std::string(function_name), num_return_vals);
-  Py_RETURN_NONE;
-}
-
-static PyObject* notify_failure(PyObject* self, PyObject* args) {
-  Worker* worker;
-  const char* name;
-  const char* error_message;
-  int type;
-  if (!PyArg_ParseTuple(args, "O&ssi", &PyObjectToWorker, &worker, &name, &error_message, &type)) {
-    return NULL;
-  }
-  worker->notify_failure(static_cast<FailedType>(type), std::string(name), std::string(error_message));
-  Py_RETURN_NONE;
-}
-
-static PyObject* get_objectid(PyObject* self, PyObject* args) {
-  PyObject* worker_capsule;
-  if (!PyArg_ParseTuple(args, "O", &worker_capsule)) {
-    return NULL;
-  }
-  Worker* worker;
-  PyObjectToWorker(worker_capsule, &worker);
-  ObjectID objectid = worker->get_objectid();
-  return make_pyobjectid(worker_capsule, objectid);
-}
-
-static PyObject* add_contained_objectids(PyObject* self, PyObject* args) {
-  Worker* worker;
-  ObjectID objectid;
-  PyObject* contained_objectids;
-  if (!PyArg_ParseTuple(args, "O&O&O", &PyObjectToWorker, &worker, &PyObjectToObjectID, &objectid, &contained_objectids)) {
-    return NULL;
-  }
-  RAY_CHECK(PyList_Check(contained_objectids), "The contained_objectids argument must be a list.")
-  std::vector<ObjectID> vec_contained_objectids;
-  size_t size = PyList_Size(contained_objectids);
-  for (size_t i = 0; i < size; ++i) {
-    ObjectID contained_objectid;
-    PyObjectToObjectID(PyList_GetItem(contained_objectids, i), &contained_objectid);
-    vec_contained_objectids.push_back(contained_objectid);
-  }
-  worker->add_contained_objectids(objectid, vec_contained_objectids);
-  Py_RETURN_NONE;
-}
-
-static PyObject* request_object(PyObject* self, PyObject* args) {
-  Worker* worker;
-  ObjectID objectid;
-  if (!PyArg_ParseTuple(args, "O&O&", &PyObjectToWorker, &worker, &PyObjectToObjectID, &objectid)) {
-    return NULL;
-  }
-  worker->request_object(objectid);
-  Py_RETURN_NONE;
-}
-
-static PyObject* wait(PyObject* self, PyObject* args) {
-  Worker* worker;
-  PyObject* objectids;
-  if (!PyArg_ParseTuple(args, "O&O", &PyObjectToWorker, &worker, &objectids)) {
-    return NULL;
-  }
-  std::vector<ObjectID> objectids_vec;
-  for (size_t i = 0; i < PyList_Size(objectids); ++i) {
-    ObjectID objectid;
-    PyObjectToObjectID(PyList_GetItem(objectids, i), &objectid);
-    objectids_vec.push_back(objectid);
-  }
-  std::vector<int> indices = worker->wait(objectids_vec);
-  PyObject* result = PyList_New(indices.size());
-  for (size_t i = 0; i < indices.size(); ++i) {
-    PyList_SetItem(result, i, PyInt_FromLong(indices[i]));
-  }
-  return result;
-}
-
-static PyObject* alias_objectids(PyObject* self, PyObject* args) {
-  Worker* worker;
-  ObjectID alias_objectid;
-  ObjectID target_objectid;
-  if (!PyArg_ParseTuple(args, "O&O&O&", &PyObjectToWorker, &worker, &PyObjectToObjectID, &alias_objectid, &PyObjectToObjectID, &target_objectid)) {
-    return NULL;
-  }
-  worker->alias_objectids(alias_objectid, target_objectid);
-  Py_RETURN_NONE;
-}
-
-static PyObject* scheduler_info(PyObject* self, PyObject* args) {
-  Worker* worker;
-  if (!PyArg_ParseTuple(args, "O&", &PyObjectToWorker, &worker)) {
-    return NULL;
-  }
-  ClientContext context;
-  SchedulerInfoRequest request;
-  SchedulerInfoReply reply;
-  worker->scheduler_info(context, request, reply);
-
-  // Unpack the target object reference information.
-  PyObject* target_objectid_list = PyList_New(reply.target_objectid_size());
-  for (size_t i = 0; i < reply.target_objectid_size(); ++i) {
-    PyList_SetItem(target_objectid_list, i, PyInt_FromLong(reply.target_objectid(i)));
-  }
-  // Unpack the reference count information.
-  PyObject* reference_count_list = PyList_New(reply.reference_count_size());
-  for (size_t i = 0; i < reply.reference_count_size(); ++i) {
-    PyList_SetItem(reference_count_list, i, PyInt_FromLong(reply.reference_count(i)));
-  }
-  // Unpack the available worker information.
-  PyObject* available_worker_list = PyList_New(reply.avail_worker_size());
-  for (size_t i = 0; i < reply.avail_worker_size(); ++i) {
-    PyList_SetItem(available_worker_list, i, PyInt_FromLong(reply.avail_worker(i)));
-  }
-  // Unpack the object store information.
-  PyObject* objstore_list = PyList_New(reply.objstore_size());
-  for (size_t i = 0; i < reply.objstore_size(); ++i) {
-    PyObject* objstore_data = PyDict_New();
-    set_dict_item_and_transfer_ownership(objstore_data, PyString_FromString("objstoreid"), PyInt_FromLong(reply.objstore(i).objstoreid()));
-    set_dict_item_and_transfer_ownership(objstore_data, PyString_FromString("address"), PyString_FromStringAndSize(reply.objstore(i).address().data(), reply.objstore(i).address().size()));
-    PyList_SetItem(objstore_list, i, objstore_data);
-  }
-
-  // Store the unpacked values in a dictionary to return.
-  PyObject* dict = PyDict_New();
-  set_dict_item_and_transfer_ownership(dict, PyString_FromString("target_objectids"), target_objectid_list);
-  set_dict_item_and_transfer_ownership(dict, PyString_FromString("reference_counts"), reference_count_list);
-  set_dict_item_and_transfer_ownership(dict, PyString_FromString("available_workers"), available_worker_list);
-  set_dict_item_and_transfer_ownership(dict, PyString_FromString("objstores"), objstore_list);
-  return dict;
-}
-
-static PyObject* failure_to_dict(const Failure& failure) {
-  PyObject* failure_dict = PyDict_New();
-  set_dict_item_and_transfer_ownership(failure_dict, PyString_FromString("workerid"), PyInt_FromLong(failure.workerid()));
-  set_dict_item_and_transfer_ownership(failure_dict, PyString_FromString("worker_address"), PyString_FromStringAndSize(failure.worker_address().data(), failure.worker_address().size()));
-  set_dict_item_and_transfer_ownership(failure_dict, PyString_FromString("function_name"), PyString_FromStringAndSize(failure.name().data(), failure.name().size()));
-  set_dict_item_and_transfer_ownership(failure_dict, PyString_FromString("error_message"), PyString_FromStringAndSize(failure.error_message().data(), failure.error_message().size()));
-  return failure_dict;
-}
-
-static PyObject* task_info(PyObject* self, PyObject* args) {
-  Worker* worker;
-  if (!PyArg_ParseTuple(args, "O&", &PyObjectToWorker, &worker)) {
-    return NULL;
-  }
-  ClientContext context;
-  TaskInfoRequest request;
-  TaskInfoReply reply;
-  worker->task_info(context, request, reply);
-
-  PyObject* failed_tasks_list = PyList_New(reply.failed_task_size());
-  for (size_t i = 0; i < reply.failed_task_size(); ++i) {
-    const TaskStatus& info = reply.failed_task(i);
-    PyObject* info_dict = PyDict_New();
-    set_dict_item_and_transfer_ownership(info_dict, PyString_FromString("worker_address"), PyString_FromStringAndSize(info.worker_address().data(), info.worker_address().size()));
-    set_dict_item_and_transfer_ownership(info_dict, PyString_FromString("function_name"), PyString_FromStringAndSize(info.function_name().data(), info.function_name().size()));
-    set_dict_item_and_transfer_ownership(info_dict, PyString_FromString("operationid"), PyInt_FromLong(info.operationid()));
-    set_dict_item_and_transfer_ownership(info_dict, PyString_FromString("error_message"), PyString_FromStringAndSize(info.error_message().data(), info.error_message().size()));
-    PyList_SetItem(failed_tasks_list, i, info_dict);
-  }
-
-  PyObject* running_tasks_list = PyList_New(reply.running_task_size());
-  for (size_t i = 0; i < reply.running_task_size(); ++i) {
-    const TaskStatus& info = reply.running_task(i);
-    PyObject* info_dict = PyDict_New();
-    set_dict_item_and_transfer_ownership(info_dict, PyString_FromString("worker_address"), PyString_FromStringAndSize(info.worker_address().data(), info.worker_address().size()));
-    set_dict_item_and_transfer_ownership(info_dict, PyString_FromString("function_name"), PyString_FromStringAndSize(info.function_name().data(), info.function_name().size()));
-    set_dict_item_and_transfer_ownership(info_dict, PyString_FromString("operationid"), PyInt_FromLong(info.operationid()));
-    PyList_SetItem(running_tasks_list, i, info_dict);
-  }
-
-  PyObject* failed_remote_function_imports = PyList_New(reply.failed_remote_function_import_size());
-  for (size_t i = 0; i < reply.failed_remote_function_import_size(); ++i) {
-    PyList_SetItem(failed_remote_function_imports, i, failure_to_dict(reply.failed_remote_function_import(i)));
-  }
-
-  PyObject* failed_reusable_variable_imports = PyList_New(reply.failed_reusable_variable_import_size());
-  for (size_t i = 0; i < reply.failed_reusable_variable_import_size(); ++i) {
-    PyList_SetItem(failed_reusable_variable_imports, i, failure_to_dict(reply.failed_reusable_variable_import(i)));
-  }
-
-  PyObject* failed_reinitialize_reusable_variables = PyList_New(reply.failed_reinitialize_reusable_variable_size());
-  for (size_t i = 0; i < reply.failed_reinitialize_reusable_variable_size(); ++i) {
-    PyList_SetItem(failed_reinitialize_reusable_variables, i, failure_to_dict(reply.failed_reinitialize_reusable_variable(i)));
-  }
-
-  PyObject* failed_function_to_runs = PyList_New(reply.failed_function_to_run_size());
-  for (size_t i = 0; i < reply.failed_function_to_run_size(); ++i) {
-    PyList_SetItem(failed_function_to_runs, i, failure_to_dict(reply.failed_function_to_run(i)));
-  }
-
-  PyObject* dict = PyDict_New();
-  set_dict_item_and_transfer_ownership(dict, PyString_FromString("failed_tasks"), failed_tasks_list);
-  set_dict_item_and_transfer_ownership(dict, PyString_FromString("running_tasks"), running_tasks_list);
-  set_dict_item_and_transfer_ownership(dict, PyString_FromString("failed_remote_function_imports"), failed_remote_function_imports);
-  set_dict_item_and_transfer_ownership(dict, PyString_FromString("failed_reusable_variable_imports"), failed_reusable_variable_imports);
-  set_dict_item_and_transfer_ownership(dict, PyString_FromString("failed_reinitialize_reusable_variables"), failed_reinitialize_reusable_variables);
-  set_dict_item_and_transfer_ownership(dict, PyString_FromString("failed_function_to_runs"), failed_function_to_runs);
-  return dict;
-}
-
-static PyObject* dump_computation_graph(PyObject* self, PyObject* args) {
-  Worker* worker;
-  const char* output_file_name;
-  if (!PyArg_ParseTuple(args, "O&s", &PyObjectToWorker, &worker, &output_file_name)) {
-    return NULL;
-  }
-  ClientContext context;
-  SchedulerInfoRequest request;
-  SchedulerInfoReply reply;
-  worker->scheduler_info(context, request, reply);
-  std::fstream output(output_file_name, std::ios::out | std::ios::trunc | std::ios::binary);
-  RAY_CHECK(reply.computation_graph().SerializeToOstream(&output), "Cannot dump computation graph to file " << output_file_name);
-  Py_RETURN_NONE;
-}
-
-static PyObject* kill_workers(PyObject* self, PyObject* args) {
-  Worker* worker;
-  if (!PyArg_ParseTuple(args, "O&", &PyObjectToWorker, &worker)) {
-    return NULL;
-  }
-  ClientContext context;
-  if (worker->kill_workers(context)) {
-    Py_RETURN_TRUE;
-  } else {
-    Py_RETURN_FALSE;
-  }
-}
-
-static PyMethodDef RayLibMethods[] = {
- { "serialize_objectid", serialize_objectid, METH_VARARGS, "serialize an object id" },
- { "deserialize_objectid", deserialize_objectid, METH_VARARGS, "deserialize an object id" },
- { "allocate_buffer", allocate_buffer, METH_VARARGS, "Allocates and returns buffer for objectid."},
- { "finish_buffer", finish_buffer, METH_VARARGS, "Makes the buffer immutable and closes memory segment of objectid."},
- { "get_buffer", get_buffer, METH_VARARGS, "Gets buffer for objectid"},
- { "is_arrow", is_arrow, METH_VARARGS, "is the object in the local object store an arrow object?"},
- { "unmap_object", unmap_object, METH_VARARGS, "unmap the object from the client's shared memory pool"},
- { "serialize_task", serialize_task, METH_VARARGS, "serialize a task to protocol buffers" },
- { "create_worker", create_worker, METH_VARARGS, "connect to the scheduler and the object store" },
- { "disconnect", disconnect, METH_VARARGS, "disconnect the worker from the scheduler and the object store" },
- { "connected", connected, METH_VARARGS, "check if the worker is connected to the scheduler and the object store" },
- { "register_remote_function", register_remote_function, METH_VARARGS, "register a function with the scheduler" },
- { "notify_failure", notify_failure, METH_VARARGS, "notify the scheduler of a failure" },
- { "add_contained_objectids", add_contained_objectids, METH_VARARGS, "notify the scheduler about the object IDs contained in a remote object" },
- { "get_objectid", get_objectid, METH_VARARGS, "register a new object reference with the scheduler" },
- { "request_object" , request_object, METH_VARARGS, "request an object to be delivered to the local object store" },
- { "wait" , wait, METH_VARARGS, "checks the scheduler to see if a object can be gotten" },
- { "alias_objectids", alias_objectids, METH_VARARGS, "make two objectids refer to the same object" },
- { "wait_for_next_message", wait_for_next_message, METH_VARARGS, "get next message from scheduler (blocking)" },
- { "submit_task", submit_task, METH_VARARGS, "call a remote function" },
- { "ready_for_new_task", ready_for_new_task, METH_VARARGS, "notify the scheduler that the worker is ready for a new task" },
- { "scheduler_info", scheduler_info, METH_VARARGS, "get info about scheduler state" },
- { "task_info", task_info, METH_VARARGS, "get information about task statuses and failures" },
- { "run_function_on_all_workers", run_function_on_all_workers, METH_VARARGS, "run an arbitrary function on all workers" },
- { "export_remote_function", export_remote_function, METH_VARARGS, "export a remote function to workers" },
- { "export_reusable_variable", export_reusable_variable, METH_VARARGS, "export a reusable variable to the workers" },
- { "dump_computation_graph", dump_computation_graph, METH_VARARGS, "dump the current computation graph to a file" },
- { "kill_workers", kill_workers, METH_VARARGS, "kills all of the workers" },
- { NULL, NULL, 0, NULL }
-};
-
-PyMODINIT_FUNC initlibraylib(void) {
-  PyObject* m;
-  PyObjectIDType.tp_new = PyType_GenericNew;
-  if (PyType_Ready(&PyObjectIDType) < 0) {
-    return;
-  }
-  m = Py_InitModule3("libraylib", RayLibMethods, "Python C Extension for Ray");
-  Py_INCREF(&PyObjectIDType);
-  PyModule_AddObject(m, "ObjectID", (PyObject *)&PyObjectIDType);
-  char ray_error[] = "ray.error";
-  char ray_size_error[] = "ray_size.error";
-  RayError = PyErr_NewException(ray_error, NULL, NULL);
-  RaySizeError = PyErr_NewException(ray_size_error, NULL, NULL);
-  Py_INCREF(RayError);
-  Py_INCREF(RaySizeError);
-  PyModule_AddObject(m, "ray_error", RayError);
-  PyModule_AddObject(m, "ray_size_error", RaySizeError);
-  import_array();
-
-  // Export constants used for the worker mode types so they can be accessed
-  // from Python. The Mode enum is defined in worker.h.
-  PyModule_AddIntConstant(m, "SCRIPT_MODE", Mode::SCRIPT_MODE);
-  PyModule_AddIntConstant(m, "WORKER_MODE", Mode::WORKER_MODE);
-  PyModule_AddIntConstant(m, "PYTHON_MODE", Mode::PYTHON_MODE);
-  PyModule_AddIntConstant(m, "SILENT_MODE", Mode::SILENT_MODE);
-
-  // Export constants for the failure types so they can be accessed from Python.
-  // The FailedType enum is defined in types.proto.
-  PyModule_AddIntConstant(m, "FailedTask", FailedType::FailedTask);
-  PyModule_AddIntConstant(m, "FailedRemoteFunctionImport", FailedType::FailedRemoteFunctionImport);
-  PyModule_AddIntConstant(m, "FailedReusableVariableImport", FailedType::FailedReusableVariableImport);
-  PyModule_AddIntConstant(m, "FailedReinitializeReusableVariable", FailedType::FailedReinitializeReusableVariable);
-  PyModule_AddIntConstant(m, "FailedFunctionToRun", FailedType::FailedFunctionToRun);
-}
-
-}
diff --git a/src/scheduler.cc b/src/scheduler.cc
deleted file mode 100644
index 3a9814dc4..000000000
--- a/src/scheduler.cc
+++ /dev/null
@@ -1,1187 +0,0 @@
-#include "scheduler.h"
-
-#include <random>
-#include <thread>
-#include <chrono>
-#include <sstream>
-
-#include "utils.h"
-
-// Macro used for acquiring locks. Required to pass along the field name and the line number without duplicating code.
-#define GET(FieldName) get(FieldName, #FieldName, __LINE__)
-
-#ifndef NDEBUG
-template<>
-class SchedulerService::MySynchronizedPtr<void> {
-  SchedulerService* me_;  // If NULL, then no lock is being checked
-  size_t order_delta_;
-  const char* name_;
-  unsigned int line_number_;
-  // ID returned seems to always be zero on Mac.
-  // I unfortunately can't find a workaround, so if the returned ID is zero, then the caller should not rely on it identifying the thread.
-  static unsigned long long get_thread_id() {
-    unsigned long long id = 0;
-    std::stringstream ss;
-    ss << std::this_thread::get_id();
-    ss >> id;
-    return id;
-  }
-protected:
-  MySynchronizedPtr& operator=(MySynchronizedPtr&& other) {
-    if (this != &other) {
-      me_ = std::move(other.me_);
-      order_delta_ = std::move(other.order_delta_);
-      name_ = std::move(other.name_);
-      line_number_ = std::move(other.line_number_);
-
-      other.me_ = NULL;  // Disable lock checking logic on other now that it has been moved
-    }
-    return *this;
-  }
-  ~MySynchronizedPtr() {
-    unsigned long long thread_id = get_thread_id();
-    if (thread_id != 0 && me_ != NULL) {
-      auto lock_orders = me_->lock_orders_.unchecked_get();
-      // Look for a previous lock on this thread -- it must exist, since this thread supposedly had the lock...
-      auto found = lock_orders->begin();
-      while (found != lock_orders->end() && found->first != thread_id) {
-        ++found;
-      }
-      RAY_CHECK(found != lock_orders->end() && found->second.first >= order_delta_, "Thread " << thread_id << " attempted to unlock a lock it didn't hold on line " << line_number_);
-      // Subtract back the delta
-      found->second.first -= order_delta_;
-      found->second.second = name_;
-      // If it goes to zero, then this thread no longer has locks, so remove it from the list
-      if (found->second.first == 0) {
-        using std::swap; swap(*found, lock_orders->back());
-        lock_orders->pop_back();
-      }
-      me_ = NULL;
-    }
-  }
-  MySynchronizedPtr(MySynchronizedPtr&& other) : me_() {
-    *this = std::move(other);
-  }
-  MySynchronizedPtr(SchedulerService* me, size_t order, const char* name, unsigned int line_number) : me_(me), order_delta_(order), name_(name), line_number_(line_number) {
-    unsigned long long thread_id = get_thread_id();
-    if (thread_id != 0 && me_ != NULL) {
-      auto lock_orders = me_->lock_orders_.unchecked_get();
-      auto found = lock_orders->begin();
-      // Look for a previous lock on this thread -- it shouldn't exist since these are not recursive locks
-      while (found != lock_orders->end() && found->first != thread_id) {
-        ++found;
-      }
-      if (found == lock_orders->end()) {
-        found = lock_orders->insert(found, std::make_pair(thread_id, std::make_pair(0, name_)));
-      } else if (thread_id != 0) {
-        RAY_CHECK_GE(order, found->second.first, "Thread " << thread_id << " attempted to lock " << name_ << " on line " << line_number_ << " after " << found->second.second);
-      }
-      // Store the delta between the last lock and this lock (each identified by the field offset) so we can reverse it
-      order_delta_ = order - found->second.first;
-      // Record the fact that we locked this field in the scheduler
-      found->second.first = order;
-      found->second.second = name_;
-    }
-  }
-};
-template<class T>
-class SchedulerService::MySynchronizedPtr : SynchronizedPtr<T>, MySynchronizedPtr<void> {
-  // TODO(mniknami): release(), etc. are private here -- implementing them is extra work we don't need yet
-public:
-  using SynchronizedPtr<T>::operator*;
-  using SynchronizedPtr<T>::operator->;
-  MySynchronizedPtr(SchedulerService& me, Synchronized<T>& value, const char* name, unsigned int line_number) :
-    SynchronizedPtr<T>(value.unchecked_get()),
-    MySynchronizedPtr<void>(&me, static_cast<size_t>(reinterpret_cast<unsigned char const *>(&value) - reinterpret_cast<unsigned char const *>(&me)), name, line_number) {
-  }
-  MySynchronizedPtr(MySynchronizedPtr&& other) = default;
-  MySynchronizedPtr& operator=(MySynchronizedPtr&& other) = default;
-};
-
-template<class T>
-SchedulerService::MySynchronizedPtr<T> SchedulerService::get(Synchronized<T>& my_field, const char* name, unsigned int line_number) { return MySynchronizedPtr<T>(*this, my_field, name, line_number); }
-
-template<class T>
-SchedulerService::MySynchronizedPtr<const T> SchedulerService::get(const Synchronized<T>& my_field, const char* name, unsigned int line_number) const { return MySynchronizedPtr<const T>(*this, my_field, name, line_number); }
-#else
-template<class T>
-SchedulerService::MySynchronizedPtr<T> SchedulerService::get(Synchronized<T>& my_field, const char* name, unsigned int line_number) { (void) name; (void) line_number; return my_field.unchecked_get(); }
-
-template<class T>
-SchedulerService::MySynchronizedPtr<const T> SchedulerService::get(const Synchronized<T>& my_field, const char* name, unsigned int line_number) const { (void) name; (void) line_number; return my_field.unchecked_get(); }
-#endif
-
-SchedulerService::SchedulerService(SchedulingAlgorithmType scheduling_algorithm) : scheduling_algorithm_(scheduling_algorithm) {}
-
-Status SchedulerService::SubmitTask(ServerContext* context, const SubmitTaskRequest* request, SubmitTaskReply* reply) {
-  std::unique_ptr<Task> task(new Task(request->task())); // need to copy, because request is const
-  size_t num_return_vals;
-  // If there are no workers, then we will set this to true below.
-  reply->set_no_workers(false);
-  {
-    auto fntable = GET(fntable_);
-    FnTable::const_iterator fn = fntable->find(task->name());
-    if (fn == fntable->end()) {
-      num_return_vals = 0;
-      reply->set_function_registered(false);
-      // Check if there are any workers registered with the scheduler, so that
-      // we can tell the worker if there aren't so that it can display a better
-      // error message.
-      int num_live_workers = 0;
-      auto workers = GET(workers_);
-      for (size_t i = 0; i < workers->size(); ++i) {
-        WorkerHandle* worker = &(*workers)[i];
-        // Check if this is a driver and that it is still connected.
-        if (worker->current_task != ROOT_OPERATION && worker->worker_stub) {
-          num_live_workers += 1;
-        }
-      }
-      if (num_live_workers == 0) {
-        reply->set_no_workers(true);
-      }
-    } else {
-      num_return_vals = fn->second.num_return_vals();
-      reply->set_function_registered(true);
-    }
-  }
-  if (reply->function_registered()) {
-    std::vector<ObjectID> result_objectids;
-    for (size_t i = 0; i < num_return_vals; ++i) {
-      ObjectID result = register_new_object();
-      reply->add_result(result);
-      task->add_result(result);
-      result_objectids.push_back(result);
-    }
-    {
-      auto reference_counts = GET(reference_counts_);
-      increment_ref_count(result_objectids, reference_counts); // We increment once so the objectids don't go out of scope before we reply to the worker that called SubmitTask. The corresponding decrement will happen in submit_task in raylib.
-      increment_ref_count(result_objectids, reference_counts); // We increment once so the objectids don't go out of scope before the task is scheduled on the worker. The corresponding decrement will happen in deserialize_task in raylib.
-    }
-
-    auto operation = std::unique_ptr<Operation>(new Operation());
-    operation->set_allocated_task(task.release());
-    operation->set_creator_operationid((*GET(workers_))[request->workerid()].current_task);
-
-    OperationId operationid = GET(computation_graph_)->add_operation(std::move(operation));
-    GET(task_queue_)->push_back(operationid);
-    schedule();
-  }
-  return Status::OK;
-}
-
-Status SchedulerService::PutObj(ServerContext* context, const PutObjRequest* request, PutObjReply* reply) {
-  ObjectID objectid = register_new_object();
-  auto operation = std::unique_ptr<Operation>(new Operation());
-  operation->mutable_put()->set_objectid(objectid);
-  operation->set_creator_operationid((*GET(workers_))[request->workerid()].current_task);
-  GET(computation_graph_)->add_operation(std::move(operation));
-  reply->set_objectid(objectid);
-  schedule();
-  return Status::OK;
-}
-
-Status SchedulerService::RequestObj(ServerContext* context, const RequestObjRequest* request, AckReply* reply) {
-  size_t size = GET(objtable_)->size();
-  ObjectID objectid = request->objectid();
-  RAY_CHECK_LT(objectid, size, "internal error: no object with objectid " << objectid << " exists");
-  auto operation = std::unique_ptr<Operation>(new Operation());
-  operation->mutable_get()->set_objectid(objectid);
-  operation->set_creator_operationid((*GET(workers_))[request->workerid()].current_task);
-  GET(computation_graph_)->add_operation(std::move(operation));
-  GET(get_queue_)->push_back(std::make_pair(request->workerid(), objectid));
-  schedule();
-  return Status::OK;
-}
-
-Status SchedulerService::AliasObjectIDs(ServerContext* context, const AliasObjectIDsRequest* request, AckReply* reply) {
-  ObjectID alias_objectid = request->alias_objectid();
-  ObjectID target_objectid = request->target_objectid();
-  RAY_LOG(RAY_ALIAS, "Aliasing objectid " << alias_objectid << " with objectid " << target_objectid);
-  RAY_CHECK_NEQ(alias_objectid, target_objectid, "internal error: attempting to alias objectid " << alias_objectid << " with itself.");
-  size_t size = GET(objtable_)->size();
-  RAY_CHECK_LT(alias_objectid, size, "internal error: no object with objectid " << alias_objectid << " exists");
-  RAY_CHECK_LT(target_objectid, size, "internal error: no object with objectid " << target_objectid << " exists");
-  {
-    auto target_objectids = GET(target_objectids_);
-    RAY_CHECK_EQ((*target_objectids)[alias_objectid], UNITIALIZED_ALIAS, "internal error: attempting to alias objectid " << alias_objectid << " with objectid " << target_objectid << ", but objectid " << alias_objectid << " has already been aliased with objectid " << (*target_objectids)[alias_objectid]);
-    (*target_objectids)[alias_objectid] = target_objectid;
-  }
-  (*GET(reverse_target_objectids_))[target_objectid].push_back(alias_objectid);
-  {
-    // The corresponding increment was done in register_new_object.
-    auto reference_counts = GET(reference_counts_); // we grab this lock because decrement_ref_count assumes it has been acquired
-    auto contained_objectids = GET(contained_objectids_); // we grab this lock because decrement_ref_count assumes it has been acquired
-    decrement_ref_count(std::vector<ObjectID>({alias_objectid}), reference_counts, contained_objectids);
-  }
-  schedule();
-  return Status::OK;
-}
-
-Status SchedulerService::RegisterObjStore(ServerContext* context, const RegisterObjStoreRequest* request, RegisterObjStoreReply* reply) {
-  auto objtable = GET(objtable_); // to protect objects_in_transit_
-  auto objstores = GET(objstores_);
-  ObjStoreId objstoreid = objstores->size();
-  auto channel = grpc::CreateChannel(request->objstore_address(), grpc::InsecureChannelCredentials());
-  objstores->push_back(ObjStoreHandle());
-  (*objstores)[objstoreid].address = request->objstore_address();
-  (*objstores)[objstoreid].channel = channel;
-  (*objstores)[objstoreid].objstore_stub = ObjStore::NewStub(channel);
-  reply->set_objstoreid(objstoreid);
-  objects_in_transit_.push_back(std::vector<ObjectID>());
-  return Status::OK;
-}
-
-Status SchedulerService::RegisterWorker(ServerContext* context, const RegisterWorkerRequest* request, RegisterWorkerReply* reply) {
-  std::string worker_address = request->worker_address();
-  std::string objstore_address = request->objstore_address();
-  std::string node_ip_address = request->node_ip_address();
-  bool is_driver = request->is_driver();
-  RAY_LOG(RAY_INFO, "Registering a worker from node with IP address " << node_ip_address);
-  // Find the object store to connect to. We use the max size to indicate that
-  // the object store for this worker has not been found.
-  ObjStoreId objstoreid = std::numeric_limits<size_t>::max();
-  // TODO: HACK: num_attempts is a hack
-  for (int num_attempts = 0; num_attempts < 30; ++num_attempts) {
-    auto objstores = GET(objstores_);
-    for (size_t i = 0; i < objstores->size(); ++i) {
-      if (objstore_address != "" && (*objstores)[i].address == objstore_address) {
-        // This object store address is the same as the provided object store
-        // address.
-        objstoreid = i;
-      }
-      if ((*objstores)[i].address.compare(0, node_ip_address.size(), node_ip_address) == 0) {
-        // The object store address was not provided and this object store
-        // address has node_ip_address as a prefix, so it is on the same machine
-        // as the worker that is registering.
-        objstoreid = i;
-        objstore_address = (*objstores)[i].address;
-      }
-    }
-    if (objstoreid == std::numeric_limits<size_t>::max()) {
-      std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    } else {
-      break;
-    }
-  }
-  if (objstore_address.empty()) {
-    RAY_CHECK_NEQ(objstoreid, std::numeric_limits<size_t>::max(), "No object store with IP address " << node_ip_address << " has registered.");
-  } else {
-    RAY_CHECK_NEQ(objstoreid, std::numeric_limits<size_t>::max(), "Object store with address " << objstore_address << " not yet registered.");
-  }
-  // Populate the worker information.
-  WorkerId workerid;
-  {
-    auto workers = GET(workers_);
-    workerid = workers->size();
-    workers->push_back(WorkerHandle());
-    auto channel = grpc::CreateChannel(worker_address, grpc::InsecureChannelCredentials());
-    (*workers)[workerid].channel = channel;
-    (*workers)[workerid].objstoreid = objstoreid;
-    (*workers)[workerid].worker_stub = WorkerService::NewStub(channel);
-    (*workers)[workerid].worker_address = worker_address;
-    (*workers)[workerid].initial_exports_done = false;
-    if (is_driver) {
-      (*workers)[workerid].current_task = ROOT_OPERATION; // We use this field to identify which workers are drivers.
-    } else {
-      (*workers)[workerid].current_task = NO_OPERATION;
-    }
-  }
-  RAY_LOG(RAY_INFO, "Finished registering worker with workerid " << workerid << ", worker address " << worker_address << " on node with IP address " << node_ip_address << ", is_driver = " << is_driver << ", assigned to object store with id " << objstoreid << " and address " << objstore_address);
-  reply->set_workerid(workerid);
-  reply->set_objstoreid(objstoreid);
-  reply->set_objstore_address(objstore_address);
-  schedule();
-  return Status::OK;
-}
-
-Status SchedulerService::RegisterRemoteFunction(ServerContext* context, const RegisterRemoteFunctionRequest* request, AckReply* reply) {
-  RAY_LOG(RAY_INFO, "register function " << request->function_name() <<  " from workerid " << request->workerid());
-  register_function(request->function_name(), request->workerid(), request->num_return_vals());
-  schedule();
-  return Status::OK;
-}
-
-Status SchedulerService::NotifyFailure(ServerContext* context, const NotifyFailureRequest* request, AckReply* reply) {
-  const Failure failure = request->failure();
-  WorkerId workerid = failure.workerid();
-  if (failure.type() == FailedType::FailedTask) {
-    // A task threw an exception while executing.
-    TaskStatus failed_task_info;
-    {
-      auto workers = GET(workers_);
-      failed_task_info.set_operationid((*workers)[workerid].current_task);
-      failed_task_info.set_function_name(failure.name());
-      failed_task_info.set_worker_address((*workers)[workerid].worker_address);
-      failed_task_info.set_error_message(failure.error_message());
-    }
-    GET(failed_tasks_)->push_back(failed_task_info);
-    RAY_LOG(RAY_INFO, "Error: Task " << failed_task_info.operationid() << " executing function " << failed_task_info.function_name() << " on worker " << workerid << " failed with error message:\n" << failed_task_info.error_message());
-  } else if (failure.type() == FailedType::FailedRemoteFunctionImport) {
-    // An exception was thrown while a remote function was being imported.
-    GET(failed_remote_function_imports_)->push_back(failure);
-    RAY_LOG(RAY_INFO, "Error: Worker " << workerid << " failed to import remote function " << failure.name() << ", failed with error message:\n" << failure.error_message());
-  } else if (failure.type() == FailedType::FailedReusableVariableImport) {
-    // An exception was thrown while a reusable variable was being imported.
-    GET(failed_reusable_variable_imports_)->push_back(failure);
-    RAY_LOG(RAY_INFO, "Error: Worker " << workerid << " failed to import reusable variable " << failure.name() << ", failed with error message:\n" << failure.error_message());
-  } else if (failure.type() == FailedType::FailedReinitializeReusableVariable) {
-    // An exception was thrown while a reusable variable was being imported.
-    GET(failed_reinitialize_reusable_variables_)->push_back(failure);
-    RAY_LOG(RAY_INFO, "Error: Worker " << workerid << " failed to reinitialize a reusable variable after running remote function " << failure.name() << ", failed with error message:\n" << failure.error_message());
-  } else if (failure.type() == FailedType::FailedFunctionToRun) {
-    // An exception was thrown while a function was being run on all workers.
-    GET(failed_function_to_runs_)->push_back(failure);
-    RAY_LOG(RAY_INFO, "Error: Worker " << workerid << " failed to run function " << failure.name() << " on all workers, failed with error message:\n" << failure.error_message());
-  } else {
-    RAY_CHECK(false, "This code should be unreachable.")
-  }
-  // Print the failure on the relevant driver. TODO(rkn): At the moment, this
-  // prints the failure on all of the drivers. It should probably only print it
-  // on the driver that caused the problem.
-  auto workers = GET(workers_);
-  for (size_t i = 0; i < workers->size(); ++i) {
-    WorkerHandle* worker = &(*workers)[i];
-    // Check if the worker is still connected.
-    if (worker->worker_stub) {
-      // Check if this is a driver.
-      if (worker->current_task == ROOT_OPERATION) {
-        ClientContext client_context;
-        PrintErrorMessageRequest print_request;
-        print_request.mutable_failure()->CopyFrom(request->failure());
-        AckReply print_reply;
-        // RAY_CHECK_GRPC(worker->worker_stub->PrintErrorMessage(&client_context, print_request, &print_reply));
-      }
-    }
-  }
-  return Status::OK;
-}
-
-Status SchedulerService::ObjReady(ServerContext* context, const ObjReadyRequest* request, AckReply* reply) {
-  ObjectID objectid = request->objectid();
-  RAY_LOG(RAY_DEBUG, "object " << objectid << " ready on store " << request->objstoreid());
-  add_canonical_objectid(objectid);
-  add_location(objectid, request->objstoreid());
-  {
-    // If this is the first time that ObjReady has been called for this objectid,
-    // the corresponding increment was done in register_new_object in the
-    // scheduler. For all subsequent calls to ObjReady, the corresponding
-    // increment was done in deliver_object_if_necessary in the scheduler.
-    auto reference_counts = GET(reference_counts_); // we grab this lock because decrement_ref_count assumes it has been acquired
-    auto contained_objectids = GET(contained_objectids_); // we grab this lock because decrement_ref_count assumes it has been acquired
-    decrement_ref_count(std::vector<ObjectID>({objectid}), reference_counts, contained_objectids);
-  }
-  schedule();
-  return Status::OK;
-}
-
-Status SchedulerService::ReadyForNewTask(ServerContext* context, const ReadyForNewTaskRequest* request, AckReply* reply) {
-  WorkerId workerid = request->workerid();
-  {
-    auto workers = GET(workers_);
-    OperationId operationid = (*workers)[workerid].current_task;
-    RAY_LOG(RAY_INFO, "worker " << workerid << " is ready for a new task");
-    RAY_CHECK(operationid != ROOT_OPERATION, "A driver appears to have called ReadyForNewTask.");
-    {
-      // Check if the worker has been initialized yet, and if not, then give it
-      // all of the exported functions and all of the exported reusable variables.
-      if (!(*workers)[workerid].initial_exports_done) {
-        // This only needs to happen for this specific worker and not for all
-        // workers.
-        export_everything_to_all_workers_if_necessary(workers);
-      }
-    }
-    (*workers)[workerid].current_task = NO_OPERATION; // clear operation ID
-  }
-  GET(avail_workers_)->push_back(workerid);
-  schedule();
-  return Status::OK;
-}
-
-Status SchedulerService::IncrementRefCount(ServerContext* context, const IncrementRefCountRequest* request, AckReply* reply) {
-  int num_objectids = request->objectid_size();
-  RAY_CHECK_NEQ(num_objectids, 0, "Scheduler received IncrementRefCountRequest with 0 objectids.");
-  std::vector<ObjectID> objectids;
-  for (int i = 0; i < num_objectids; ++i) {
-    objectids.push_back(request->objectid(i));
-  }
-  auto reference_counts = GET(reference_counts_);
-  increment_ref_count(objectids, reference_counts);
-  return Status::OK;
-}
-
-Status SchedulerService::DecrementRefCount(ServerContext* context, const DecrementRefCountRequest* request, AckReply* reply) {
-  int num_objectids = request->objectid_size();
-  RAY_CHECK_NEQ(num_objectids, 0, "Scheduler received DecrementRefCountRequest with 0 objectids.");
-  std::vector<ObjectID> objectids;
-  for (int i = 0; i < num_objectids; ++i) {
-    objectids.push_back(request->objectid(i));
-  }
-  auto reference_counts = GET(reference_counts_); // we grab this lock, because decrement_ref_count assumes it has been acquired
-  auto contained_objectids = GET(contained_objectids_); // we grab this lock because decrement_ref_count assumes it has been acquired
-  decrement_ref_count(objectids, reference_counts, contained_objectids);
-  return Status::OK;
-}
-
-Status SchedulerService::AddContainedObjectIDs(ServerContext* context, const AddContainedObjectIDsRequest* request, AckReply* reply) {
-  ObjectID objectid = request->objectid();
-  // if (!is_canonical(objectid)) {
-    // TODO(rkn): Perhaps we don't need this check. It won't work because the objstore may not have called ObjReady yet.
-    // RAY_LOG(RAY_FATAL, "Attempting to add contained objectids for non-canonical objectid " << objectid);
-  // }
-  auto contained_objectids = GET(contained_objectids_);
-  RAY_CHECK_EQ((*contained_objectids)[objectid].size(), 0, "Attempting to add contained objectids for objectid " << objectid << ", but contained_objectids_[objectid].size() != 0.");
-  for (int i = 0; i < request->contained_objectid_size(); ++i) {
-    (*contained_objectids)[objectid].push_back(request->contained_objectid(i));
-  }
-  return Status::OK;
-}
-
-Status SchedulerService::SchedulerInfo(ServerContext* context, const SchedulerInfoRequest* request, SchedulerInfoReply* reply) {
-  get_info(*request, reply);
-  return Status::OK;
-}
-
-Status SchedulerService::TaskInfo(ServerContext* context, const TaskInfoRequest* request, TaskInfoReply* reply) {
-  auto failed_tasks = GET(failed_tasks_);
-  auto failed_remote_function_imports = GET(failed_remote_function_imports_);
-  auto failed_reusable_variable_imports = GET(failed_reusable_variable_imports_);
-  auto failed_reinitialize_reusable_variables = GET(failed_reinitialize_reusable_variables_);
-  auto failed_function_to_runs = GET(failed_function_to_runs_);
-  auto computation_graph = GET(computation_graph_);
-  auto workers = GET(workers_);
-  // Return information about the failed tasks.
-  for (int i = 0; i < failed_tasks->size(); ++i) {
-    TaskStatus* info = reply->add_failed_task();
-    *info = (*failed_tasks)[i];
-  }
-  // Return information about currently running tasks.
-  for (size_t i = 0; i < workers->size(); ++i) {
-    OperationId operationid = (*workers)[i].current_task;
-    if (operationid != NO_OPERATION && operationid != ROOT_OPERATION) {
-      const Task& task = computation_graph->get_task(operationid);
-      TaskStatus* info = reply->add_running_task();
-      info->set_operationid(operationid);
-      info->set_function_name(task.name());
-      info->set_worker_address((*workers)[i].worker_address);
-    }
-  }
-  // Return information about failed remote function imports.
-  for (size_t i = 0; i < failed_remote_function_imports->size(); ++i) {
-    Failure* failure = reply->add_failed_remote_function_import();
-    *failure = (*failed_remote_function_imports)[i];
-  }
-  // Return information about failed reusable variable imports.
-  for (size_t i = 0; i < failed_reusable_variable_imports->size(); ++i) {
-    Failure* failure = reply->add_failed_reusable_variable_import();
-    *failure = (*failed_reusable_variable_imports)[i];
-  }
-  // Return information about failed reusable variable reinitializations.
-  for (size_t i = 0; i < failed_reinitialize_reusable_variables->size(); ++i) {
-    Failure* failure = reply->add_failed_reinitialize_reusable_variable();
-    *failure = (*failed_reinitialize_reusable_variables)[i];
-  }
-  // Return information about functions that failed to run on all workers.
-  for (size_t i = 0; i < failed_function_to_runs->size(); ++i) {
-    Failure* failure = reply->add_failed_function_to_run();
-    *failure = (*failed_function_to_runs)[i];
-  }
-  return Status::OK;
-}
-
-Status SchedulerService::KillWorkers(ServerContext* context, const KillWorkersRequest* request, KillWorkersReply* reply) {
-  // TODO: Update reference counts
-  auto failed_tasks = GET(failed_tasks_);
-  auto get_queue = GET(get_queue_);
-  auto computation_graph = GET(computation_graph_);
-  auto fntable = GET(fntable_);
-  auto avail_workers = GET(avail_workers_);
-  auto task_queue = GET(task_queue_);
-  auto workers = GET(workers_);
-  size_t busy_workers = 0;
-  std::vector<WorkerHandle*> idle_workers;
-  RAY_LOG(RAY_INFO, "Attempting to kill workers.");
-  for (size_t i = 0; i < workers->size(); ++i) {
-    WorkerHandle* worker = &(*workers)[i];
-    if (worker->worker_stub) {
-      if (worker->current_task == NO_OPERATION) {
-        idle_workers.push_back(worker);
-        RAY_CHECK(std::find(avail_workers->begin(), avail_workers->end(), i) != avail_workers->end(), "Worker with workerid " << i << " is idle, but is not in avail_workers_");
-        RAY_LOG(RAY_INFO, "Worker with workerid " << i << " is idle.");
-      } else if (worker->current_task == ROOT_OPERATION) {
-        // Skip the driver
-        RAY_LOG(RAY_INFO, "Worker with workerid " << i << " is a driver.");
-      } else {
-        ++busy_workers;
-        RAY_LOG(RAY_INFO, "Worker with workerid " << i << " is running a task.");
-      }
-    }
-  }
-  if (task_queue->empty() && busy_workers == 0) {
-    RAY_LOG(RAY_INFO, "Killing " << idle_workers.size() << " idle workers.");
-    for (WorkerHandle* idle_worker : idle_workers) {
-      ClientContext client_context;
-      DieRequest die_request;
-      AckReply die_reply;
-      // TODO: Fault handling... what if a worker refuses to die? We just assume it dies here.
-      RAY_CHECK_GRPC(idle_worker->worker_stub->Die(&client_context, die_request, &die_reply));
-      idle_worker->worker_stub.reset();
-    }
-    avail_workers->clear();
-    fntable->clear();
-    reply->set_success(true);
-  } else {
-    RAY_LOG(RAY_INFO, "Either the task queue is not empty or there are still busy workers, so we are not killing any workers.");
-    reply->set_success(false);
-  }
-  return Status::OK;
-}
-
-Status SchedulerService::RunFunctionOnAllWorkers(ServerContext* context, const RunFunctionOnAllWorkersRequest* request, AckReply* reply) {
-  auto workers = GET(workers_);
-  export_everything_to_all_workers_if_necessary(workers);
-  auto exported_functions_to_run = GET(exported_functions_to_run_);
-  // TODO(rkn): Does this do a deep copy?
-  exported_functions_to_run->push_back(std::unique_ptr<Function>(new Function(request->function())));
-  for (size_t i = 0; i < workers->size(); ++i) {
-    if ((*workers)[i].current_task != ROOT_OPERATION) {
-      export_function_to_run_to_worker(i, exported_functions_to_run->size() - 1, workers, exported_functions_to_run);
-    }
-  }
-  return Status::OK;
-}
-
-Status SchedulerService::ExportRemoteFunction(ServerContext* context, const ExportRemoteFunctionRequest* request, AckReply* reply) {
-  auto workers = GET(workers_);
-  export_everything_to_all_workers_if_necessary(workers);
-  auto exported_remote_functions = GET(exported_remote_functions_);
-  // TODO(rkn): Does this do a deep copy?
-  exported_remote_functions->push_back(std::unique_ptr<Function>(new Function(request->function())));
-  for (size_t i = 0; i < workers->size(); ++i) {
-    if ((*workers)[i].current_task != ROOT_OPERATION) {
-      export_remote_function_to_worker(i, exported_remote_functions->size() - 1, workers, exported_remote_functions);
-    }
-  }
-  return Status::OK;
-}
-
-Status SchedulerService::ExportReusableVariable(ServerContext* context, const ExportReusableVariableRequest* request, AckReply* reply) {
-  auto workers = GET(workers_);
-  export_everything_to_all_workers_if_necessary(workers);
-  auto exported_reusable_variables = GET(exported_reusable_variables_);
-  // TODO(rkn): Does this do a deep copy?
-  exported_reusable_variables->push_back(std::unique_ptr<ReusableVar>(new ReusableVar(request->reusable_variable())));
-  for (size_t i = 0; i < workers->size(); ++i) {
-    if ((*workers)[i].current_task != ROOT_OPERATION) {
-      export_reusable_variable_to_worker(i, exported_reusable_variables->size() - 1, workers, exported_reusable_variables);
-    }
-  }
-  return Status::OK;
-}
-
-Status SchedulerService::Wait(ServerContext* context, const WaitRequest* request, WaitReply* reply) {
-  auto objtable = GET(objtable_);
-  for (int i = 0; i < request->objectids_size(); ++i) {
-    ObjectID objectid = request->objectids(i);
-    if (has_canonical_objectid(objectid)) {
-      ObjectID canonical_objectid = get_canonical_objectid(objectid);
-      RAY_CHECK_LT(canonical_objectid, objtable->size(), "Canonical_objectid is outside object table.");
-      if ((*objtable)[canonical_objectid].size() != 0) {
-        reply->add_indices(i);
-      }
-    }
-  }
-  return Status::OK;
-}
-
-void SchedulerService::deliver_object_async_if_necessary(ObjectID canonical_objectid, ObjStoreId from, ObjStoreId to) {
-  bool object_present_or_in_transit;
-  {
-    auto objtable = GET(objtable_);
-    auto &locations = (*objtable)[canonical_objectid];
-    bool object_present = std::binary_search(locations.begin(), locations.end(), to);
-    auto &objects_in_flight = objects_in_transit_[to];
-    bool object_in_transit = (std::find(objects_in_flight.begin(), objects_in_flight.end(), canonical_objectid) != objects_in_flight.end());
-    object_present_or_in_transit = object_present || object_in_transit;
-    if (!object_present_or_in_transit) {
-      objects_in_flight.push_back(canonical_objectid);
-    }
-  }
-  if (!object_present_or_in_transit) {
-    deliver_object_async(canonical_objectid, from, to);
-  }
-}
-
-// TODO(rkn): This could execute multiple times with the same arguments before
-// the delivery finishes, but we only want it to happen once. Currently, the
-// redundancy is handled by the object store, which will only execute the
-// delivery once. However, we may want to handle it in the scheduler in the
-// future.
-//
-// deliver_object_async assumes that the aliasing for objectid has already been completed. That is, has_canonical_objectid(objectid) == true
-void SchedulerService::deliver_object_async(ObjectID canonical_objectid, ObjStoreId from, ObjStoreId to) {
-  RAY_CHECK_NEQ(from, to, "attempting to deliver canonical_objectid " << canonical_objectid << " from objstore " << from << " to itself.");
-  RAY_CHECK(is_canonical(canonical_objectid), "attempting to deliver objectid " << canonical_objectid << ", but this objectid is not a canonical objectid.");
-  {
-    // We increment once so the objectid doesn't go out of scope before the ObjReady
-    // method is called. The corresponding decrement will happen in ObjReady in
-    // the scheduler.
-    auto reference_counts = GET(reference_counts_); // we grab this lock because increment_ref_count assumes it has been acquired
-    increment_ref_count(std::vector<ObjectID>({canonical_objectid}), reference_counts);
-  }
-  ClientContext context;
-  AckReply reply;
-  StartDeliveryRequest request;
-  request.set_objectid(canonical_objectid);
-  auto objstores = GET(objstores_);
-  request.set_objstore_address((*objstores)[from].address);
-  RAY_CHECK_GRPC((*objstores)[to].objstore_stub->StartDelivery(&context, request, &reply));
-}
-
-void SchedulerService::schedule() {
-  // See what we can do in get_queue_
-  perform_gets();
-  if (scheduling_algorithm_ == SCHEDULING_ALGORITHM_NAIVE) {
-    schedule_tasks_naively(); // See what we can do in task_queue_
-  } else if (scheduling_algorithm_ == SCHEDULING_ALGORITHM_LOCALITY_AWARE) {
-    schedule_tasks_location_aware(); // See what we can do in task_queue_
-  } else {
-    RAY_CHECK(false, "scheduling algorithm not known");
-  }
-  perform_notify_aliases(); // See what we can do in alias_notification_queue_
-}
-
-// assign_task assumes that the canonical objectids for its arguments are all ready, that is has_canonical_objectid() is true for all of the call's arguments
-void SchedulerService::assign_task(OperationId operationid, WorkerId workerid, const MySynchronizedPtr<ComputationGraph> &computation_graph) {
-  // assign_task takes computation_graph as an argument, which is obtained by
-  // GET(computation_graph_), so we know that the data structure has been
-  // locked.
-  ObjStoreId objstoreid = get_store(workerid);
-  const Task& task = computation_graph->get_task(operationid);
-  ClientContext context;
-  ExecuteTaskRequest request;
-  AckReply reply;
-  RAY_LOG(RAY_INFO, "starting to send arguments");
-  for (size_t i = 0; i < task.arg_size(); ++i) {
-    if (task.arg(i).serialized_arg().empty()) {
-      ObjectID objectid = task.arg(i).objectid();
-      ObjectID canonical_objectid = get_canonical_objectid(objectid);
-      // Notify the relevant objstore about potential aliasing when it's ready
-      GET(alias_notification_queue_)->push_back(std::make_pair(objstoreid, std::make_pair(objectid, canonical_objectid)));
-      attempt_notify_alias(objstoreid, objectid, canonical_objectid);
-      RAY_LOG(RAY_DEBUG, "task contains object ref " << canonical_objectid);
-      deliver_object_async_if_necessary(canonical_objectid, pick_objstore(canonical_objectid), objstoreid);
-    }
-  }
-  {
-    auto workers = GET(workers_);
-    (*workers)[workerid].current_task = operationid;
-    request.mutable_task()->CopyFrom(task); // TODO(rkn): Is ownership handled properly here?
-    RAY_CHECK_GRPC((*workers)[workerid].worker_stub->ExecuteTask(&context, request, &reply));
-  }
-}
-
-bool SchedulerService::can_run(const Task& task) {
-  auto objtable = GET(objtable_);
-  for (int i = 0; i < task.arg_size(); ++i) {
-    if (task.arg(i).serialized_arg().empty()) {
-      ObjectID objectid = task.arg(i).objectid();
-      if (!has_canonical_objectid(objectid)) {
-        return false;
-      }
-      ObjectID canonical_objectid = get_canonical_objectid(objectid);
-      if (canonical_objectid >= objtable->size() || (*objtable)[canonical_objectid].size() == 0) {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
-ObjectID SchedulerService::register_new_object() {
-  // If we don't simultaneously lock objtable_ and target_objectids_, we will probably get errors.
-  // TODO(rkn): increment/decrement_reference_count also acquire reference_counts_lock_ and target_objectids_lock_ (through has_canonical_objectid()), which caused deadlock in the past
-  auto reference_counts = GET(reference_counts_);
-  auto contained_objectids = GET(contained_objectids_);
-  auto objtable = GET(objtable_);
-  auto target_objectids = GET(target_objectids_);
-  auto reverse_target_objectids = GET(reverse_target_objectids_);
-  ObjectID objtable_size = objtable->size();
-  ObjectID target_objectids_size = target_objectids->size();
-  ObjectID reverse_target_objectids_size = reverse_target_objectids->size();
-  ObjectID reference_counts_size = reference_counts->size();
-  ObjectID contained_objectids_size = contained_objectids->size();
-  RAY_CHECK_EQ(objtable_size, target_objectids_size, "objtable_ and target_objectids_ should have the same size, but objtable_.size() = " << objtable_size << " and target_objectids_.size() = " << target_objectids_size);
-  RAY_CHECK_EQ(objtable_size, reverse_target_objectids_size, "objtable_ and reverse_target_objectids_ should have the same size, but objtable_.size() = " << objtable_size << " and reverse_target_objectids_.size() = " << reverse_target_objectids_size);
-  RAY_CHECK_EQ(objtable_size, reference_counts_size, "objtable_ and reference_counts_ should have the same size, but objtable_.size() = " << objtable_size << " and reference_counts_.size() = " << reference_counts_size);
-  RAY_CHECK_EQ(objtable_size, contained_objectids_size, "objtable_ and contained_objectids_ should have the same size, but objtable_.size() = " << objtable_size << " and contained_objectids_.size() = " << contained_objectids_size);
-  objtable->push_back(std::vector<ObjStoreId>());
-  target_objectids->push_back(UNITIALIZED_ALIAS);
-  reverse_target_objectids->push_back(std::vector<ObjectID>());
-  reference_counts->push_back(0);
-  contained_objectids->push_back(std::vector<ObjectID>());
-  {
-    // We increment once so the objectid doesn't go out of scope before the ObjReady
-    // method is called. The corresponding decrement will happen either in
-    // ObjReady in the scheduler or in AliasObjectIDs in the scheduler.
-    increment_ref_count(std::vector<ObjectID>({objtable_size}), reference_counts); // Note that reference_counts_lock_ is acquired above, as assumed by increment_ref_count
-  }
-  return objtable_size;
-}
-
-void SchedulerService::add_location(ObjectID canonical_objectid, ObjStoreId objstoreid) {
-  // add_location must be called with a canonical objectid
-  RAY_CHECK_NEQ((*GET(reference_counts_))[canonical_objectid], DEALLOCATED, "Calling ObjReady with canonical_objectid " << canonical_objectid << ", but this objectid has already been deallocated");
-  RAY_CHECK(is_canonical(canonical_objectid), "Attempting to call add_location with a non-canonical objectid (objectid " << canonical_objectid << ")");
-  auto objtable = GET(objtable_);
-  RAY_CHECK_LT(canonical_objectid, objtable->size(), "trying to put an object in the object store that was not registered with the scheduler (objectid " << canonical_objectid << ")");
-  // do a binary search
-  auto &locations = (*objtable)[canonical_objectid];
-  auto pos = std::lower_bound(locations.begin(), locations.end(), objstoreid);
-  if (pos == locations.end() || objstoreid < *pos) {
-    locations.insert(pos, objstoreid);
-  }
-  auto &objects_in_flight = objects_in_transit_[objstoreid];
-  objects_in_flight.erase(std::remove(objects_in_flight.begin(), objects_in_flight.end(), canonical_objectid), objects_in_flight.end());
-}
-
-void SchedulerService::add_canonical_objectid(ObjectID objectid) {
-  auto target_objectids = GET(target_objectids_);
-  RAY_CHECK_LT(objectid, target_objectids->size(), "internal error: attempting to insert objectid " << objectid << " in target_objectids_, but target_objectids_.size() is " << target_objectids->size());
-  RAY_CHECK((*target_objectids)[objectid] == UNITIALIZED_ALIAS || (*target_objectids)[objectid] == objectid, "internal error: attempting to declare objectid " << objectid << " as a canonical objectid, but target_objectids_[objectid] is already aliased with objectid " << (*target_objectids)[objectid]);
-  (*target_objectids)[objectid] = objectid;
-}
-
-ObjStoreId SchedulerService::get_store(WorkerId workerid) {
-  auto workers = GET(workers_);
-  ObjStoreId result = (*workers)[workerid].objstoreid;
-  return result;
-}
-
-void SchedulerService::register_function(const std::string& name, WorkerId workerid, size_t num_return_vals) {
-  auto fntable = GET(fntable_);
-  FnInfo& info = (*fntable)[name];
-  info.set_num_return_vals(num_return_vals);
-  info.add_worker(workerid);
-}
-
-void SchedulerService::get_info(const SchedulerInfoRequest& request, SchedulerInfoReply* reply) {
-  auto computation_graph = GET(computation_graph_);
-  auto fntable = GET(fntable_);
-  auto avail_workers = GET(avail_workers_);
-  auto task_queue = GET(task_queue_);
-  auto reference_counts = GET(reference_counts_);
-  auto objstores = GET(objstores_);
-  auto target_objectids = GET(target_objectids_);
-  auto function_table = reply->mutable_function_table();
-  // Return info about the reference counts.
-  for (int i = 0; i < reference_counts->size(); ++i) {
-    reply->add_reference_count((*reference_counts)[i]);
-  }
-  // Return info about the target objectids.
-  for (int i = 0; i < target_objectids->size(); ++i) {
-    reply->add_target_objectid((*target_objectids)[i]);
-  }
-  // Return info about the function table.
-  for (const auto& entry : *fntable) {
-    (*function_table)[entry.first].set_num_return_vals(entry.second.num_return_vals());
-    for (const WorkerId& worker : entry.second.workers()) {
-      (*function_table)[entry.first].add_workerid(worker);
-    }
-  }
-  // Return info about the task queue.
-  for (const auto& entry : *task_queue) {
-    reply->add_operationid(entry);
-  }
-  // Return info about the available workers.
-  for (const WorkerId& entry : *avail_workers) {
-    reply->add_avail_worker(entry);
-  }
-  // Return info about the computation graph.
-  computation_graph->to_protobuf(reply->mutable_computation_graph());
-  // Return info about the object stores.
-  for (int i = 0; i < objstores->size(); ++i) {
-    ObjstoreData* objstore_data = reply->add_objstore();
-    objstore_data->set_objstoreid(i);
-    objstore_data->set_address((*objstores)[i].address);
-  }
-}
-
-// pick_objstore must be called with a canonical_objectid
-ObjStoreId SchedulerService::pick_objstore(ObjectID canonical_objectid) {
-  std::mt19937 rng;
-  RAY_CHECK(is_canonical(canonical_objectid), "Attempting to call pick_objstore with a non-canonical objectid, (objectid " << canonical_objectid << ")");
-  auto objtable = GET(objtable_);
-  std::uniform_int_distribution<int> uni(0, (*objtable)[canonical_objectid].size() - 1);
-  ObjStoreId objstoreid = (*objtable)[canonical_objectid][uni(rng)];
-  return objstoreid;
-}
-
-bool SchedulerService::is_canonical(ObjectID objectid) {
-  auto target_objectids = GET(target_objectids_);
-  RAY_CHECK_NEQ((*target_objectids)[objectid], UNITIALIZED_ALIAS, "Attempting to call is_canonical on an objectid for which aliasing is not complete or the object is not ready, target_objectids_[objectid] == UNITIALIZED_ALIAS for objectid " << objectid << ".");
-  return objectid == (*target_objectids)[objectid];
-}
-
-void SchedulerService::perform_gets() {
-  auto get_queue = GET(get_queue_);
-  // Complete all get tasks that can be completed.
-  for (int i = 0; i < get_queue->size(); ++i) {
-    const std::pair<WorkerId, ObjectID>& get_request = (*get_queue)[i];
-    ObjectID objectid = get_request.second;
-    WorkerId workerid = get_request.first;
-    ObjStoreId objstoreid = get_store(workerid);
-    if (!has_canonical_objectid(objectid)) {
-      RAY_LOG(RAY_ALIAS, "objectid " << objectid << " does not have a canonical_objectid, so continuing");
-      continue;
-    }
-    ObjectID canonical_objectid = get_canonical_objectid(objectid);
-    RAY_LOG(RAY_DEBUG, "attempting to get objectid " << get_request.second << " with canonical objectid " << canonical_objectid << " to objstore " << objstoreid);
-    int num_stores = (*GET(objtable_))[canonical_objectid].size();
-    if (num_stores > 0) {
-      deliver_object_async_if_necessary(canonical_objectid, pick_objstore(canonical_objectid), objstoreid);
-      // Notify the relevant objstore about potential aliasing when it's ready
-      GET(alias_notification_queue_)->push_back(std::make_pair(objstoreid, std::make_pair(objectid, canonical_objectid)));
-      // Remove the get task from the queue
-      std::swap((*get_queue)[i], (*get_queue)[get_queue->size() - 1]);
-      get_queue->pop_back();
-      i -= 1;
-    }
-  }
-}
-
-void SchedulerService::schedule_tasks_naively() {
-  auto computation_graph = GET(computation_graph_);
-  auto fntable = GET(fntable_);
-  auto avail_workers = GET(avail_workers_);
-  auto task_queue = GET(task_queue_);
-  for (int i = 0; i < avail_workers->size(); ++i) {
-    // Submit all tasks whose arguments are ready.
-    WorkerId workerid = (*avail_workers)[i];
-    for (auto it = task_queue->begin(); it != task_queue->end(); ++it) {
-      // The use of erase(it) below invalidates the iterator, but we
-      // immediately break out of the inner loop, so the iterator is not used
-      // after the erase
-      const OperationId operationid = *it;
-      const Task& task = computation_graph->get_task(operationid);
-      auto& workers = (*fntable)[task.name()].workers();
-      if (std::binary_search(workers.begin(), workers.end(), workerid) && can_run(task)) {
-        assign_task(operationid, workerid, computation_graph);
-        task_queue->erase(it);
-        std::swap((*avail_workers)[i], (*avail_workers)[avail_workers->size() - 1]);
-        avail_workers->pop_back();
-        i -= 1;
-        break;
-      }
-    }
-  }
-}
-
-void SchedulerService::schedule_tasks_location_aware() {
-  auto computation_graph = GET(computation_graph_);
-  auto fntable = GET(fntable_);
-  auto avail_workers = GET(avail_workers_);
-  auto task_queue = GET(task_queue_);
-  for (int i = 0; i < avail_workers->size(); ++i) {
-    // Submit all tasks whose arguments are ready.
-    WorkerId workerid = (*avail_workers)[i];
-    ObjStoreId objstoreid = get_store(workerid);
-    auto bestit = task_queue->end(); // keep track of the task that fits the worker best so far
-    size_t min_num_shipped_objects = std::numeric_limits<size_t>::max(); // number of objects that need to be transfered for this worker
-    for (auto it = task_queue->begin(); it != task_queue->end(); ++it) {
-      OperationId operationid = *it;
-      const Task& task = computation_graph->get_task(operationid);
-      auto& workers = (*fntable)[task.name()].workers();
-      if (std::binary_search(workers.begin(), workers.end(), workerid) && can_run(task)) {
-        // determine how many objects would need to be shipped
-        size_t num_shipped_objects = 0;
-        for (int j = 0; j < task.arg_size(); ++j) {
-          if (task.arg(j).serialized_arg().empty()) {
-            ObjectID objectid = task.arg(j).objectid();
-            RAY_CHECK(has_canonical_objectid(objectid), "no canonical object ref found even though task is ready; that should not be possible!");
-            ObjectID canonical_objectid = get_canonical_objectid(objectid);
-            {
-              // check if the object is already in the local object store
-              auto objtable = GET(objtable_);
-              if (!std::binary_search((*objtable)[canonical_objectid].begin(), (*objtable)[canonical_objectid].end(), objstoreid)) {
-                num_shipped_objects += 1;
-              }
-            }
-          }
-        }
-        if (num_shipped_objects < min_num_shipped_objects) {
-          min_num_shipped_objects = num_shipped_objects;
-          bestit = it;
-        }
-      }
-    }
-    // if we found a suitable task
-    if (bestit != task_queue->end()) {
-      assign_task(*bestit, workerid, computation_graph);
-      task_queue->erase(bestit);
-      std::swap((*avail_workers)[i], (*avail_workers)[avail_workers->size() - 1]);
-      avail_workers->pop_back();
-      i -= 1;
-    }
-  }
-}
-
-void SchedulerService::perform_notify_aliases() {
-  auto alias_notification_queue = GET(alias_notification_queue_);
-  for (int i = 0; i < alias_notification_queue->size(); ++i) {
-    const std::pair<WorkerId, std::pair<ObjectID, ObjectID> > alias_notification = (*alias_notification_queue)[i];
-    ObjStoreId objstoreid = alias_notification.first;
-    ObjectID alias_objectid = alias_notification.second.first;
-    ObjectID canonical_objectid = alias_notification.second.second;
-    if (attempt_notify_alias(objstoreid, alias_objectid, canonical_objectid)) { // this locks both the objstore_ and objtable_
-      // the attempt to notify the objstore of the objectid aliasing succeeded, so remove the notification task from the queue
-      std::swap((*alias_notification_queue)[i], (*alias_notification_queue)[alias_notification_queue->size() - 1]);
-      alias_notification_queue->pop_back();
-      i -= 1;
-    }
-  }
-}
-
-bool SchedulerService::has_canonical_objectid(ObjectID objectid) {
-  auto target_objectids = GET(target_objectids_);
-  ObjectID objectid_temp = objectid;
-  while (true) {
-    RAY_CHECK_LT(objectid_temp, target_objectids->size(), "Attempting to index target_objectids_ with objectid " << objectid_temp << ", but target_objectids_.size() = " << target_objectids->size());
-    if ((*target_objectids)[objectid_temp] == UNITIALIZED_ALIAS) {
-      return false;
-    }
-    if ((*target_objectids)[objectid_temp] == objectid_temp) {
-      return true;
-    }
-    objectid_temp = (*target_objectids)[objectid_temp];
-  }
-}
-
-ObjectID SchedulerService::get_canonical_objectid(ObjectID objectid) {
-  // get_canonical_objectid assumes that has_canonical_objectid(objectid) is true
-  auto target_objectids = GET(target_objectids_);
-  ObjectID objectid_temp = objectid;
-  while (true) {
-    RAY_CHECK_LT(objectid_temp, target_objectids->size(), "Attempting to index target_objectids_ with objectid " << objectid_temp << ", but target_objectids_.size() = " << target_objectids->size());
-    RAY_CHECK_NEQ((*target_objectids)[objectid_temp], UNITIALIZED_ALIAS, "Attempting to get canonical objectid for objectid " << objectid << ", which aliases, objectid " << objectid_temp << ", but target_objectids_[objectid_temp] == UNITIALIZED_ALIAS for objectid_temp = " << objectid_temp << ".");
-    if ((*target_objectids)[objectid_temp] == objectid_temp) {
-      return objectid_temp;
-    }
-    objectid_temp = (*target_objectids)[objectid_temp];
-    RAY_LOG(RAY_ALIAS, "Looping in get_canonical_objectid.");
-  }
-}
-
-bool SchedulerService::attempt_notify_alias(ObjStoreId objstoreid, ObjectID alias_objectid, ObjectID canonical_objectid) {
-  // return true if successful and false otherwise
-  if (alias_objectid == canonical_objectid) {
-    // no need to do anything
-    return true;
-  }
-  {
-    auto objtable = GET(objtable_);
-    if (!std::binary_search((*objtable)[canonical_objectid].begin(), (*objtable)[canonical_objectid].end(), objstoreid)) {
-      // the objstore doesn't have the object for canonical_objectid yet, so it's too early to notify the objstore about the alias
-      return false;
-    }
-  }
-  ClientContext context;
-  AckReply reply;
-  NotifyAliasRequest request;
-  request.set_alias_objectid(alias_objectid);
-  request.set_canonical_objectid(canonical_objectid);
-  RAY_CHECK_GRPC((*GET(objstores_))[objstoreid].objstore_stub->NotifyAlias(&context, request, &reply));
-  return true;
-}
-
-void SchedulerService::deallocate_object(ObjectID canonical_objectid, const MySynchronizedPtr<std::vector<RefCount> > &reference_counts, const MySynchronizedPtr<std::vector<std::vector<ObjectID> > > &contained_objectids) {
-  // deallocate_object should only be called from decrement_ref_count (note that
-  // deallocate_object also recursively calls decrement_ref_count). Both of
-  // these methods take reference_counts and contained_objectids as argumens,
-  // which are obtained by GET(reference_counts) and GET(contained_objectids_),
-  // so we know that those data structures have been locked
-  RAY_LOG(RAY_REFCOUNT, "Deallocating canonical_objectid " << canonical_objectid << ".");
-  {
-    auto objtable = GET(objtable_);
-    auto &locations = (*objtable)[canonical_objectid];
-    auto objstores = GET(objstores_); // TODO(rkn): Should this be inside the for loop instead?
-    for (int i = 0; i < locations.size(); ++i) {
-      ClientContext context;
-      AckReply reply;
-      DeallocateObjectRequest request;
-      request.set_canonical_objectid(canonical_objectid);
-      ObjStoreId objstoreid = locations[i];
-      RAY_LOG(RAY_REFCOUNT, "Attempting to deallocate canonical_objectid " << canonical_objectid << " from objstore " << objstoreid);
-      RAY_CHECK_GRPC((*objstores)[objstoreid].objstore_stub->DeallocateObject(&context, request, &reply));
-    }
-    locations.clear();
-  }
-  // Decrement the reference count for all of the object IDs contained in this
-  // object. The corresponding increments happen in add_contained_objectids in
-  // worker.cc.
-  decrement_ref_count((*contained_objectids)[canonical_objectid], reference_counts, contained_objectids);
-}
-
-void SchedulerService::increment_ref_count(const std::vector<ObjectID> &objectids, const MySynchronizedPtr<std::vector<RefCount> > &reference_counts) {
-  // increment_ref_count takes reference_counts as an argument, which is
-  // obtained by GET(reference_counts_), so we know that the data structure has
-  // been locked
-  for (int i = 0; i < objectids.size(); ++i) {
-    ObjectID objectid = objectids[i];
-    RAY_CHECK_NEQ((*reference_counts)[objectid], DEALLOCATED, "Attempting to increment the reference count for objectid " << objectid << ", but this object appears to have been deallocated already.");
-    (*reference_counts)[objectid] += 1;
-    RAY_LOG(RAY_REFCOUNT, "Incremented ref count for objectid " << objectid <<". New reference count is " << (*reference_counts)[objectid]);
-  }
-}
-
-void SchedulerService::decrement_ref_count(const std::vector<ObjectID> &objectids, const MySynchronizedPtr<std::vector<RefCount> > &reference_counts, const MySynchronizedPtr<std::vector<std::vector<ObjectID> > > &contained_objectids) {
-  // decrement_ref_count takes reference_counts and contained_objectids as
-  // arguments, which are obtained by GET(reference_counts_) and
-  // GET(contained_objectids_), so we know that those data structures have been
-  // locked
-  for (int i = 0; i < objectids.size(); ++i) {
-    ObjectID objectid = objectids[i];
-    RAY_CHECK_NEQ((*reference_counts)[objectid], DEALLOCATED, "Attempting to decrement the reference count for objectid " << objectid << ", but this object appears to have been deallocated already.");
-    RAY_CHECK_NEQ((*reference_counts)[objectid], 0, "Attempting to decrement the reference count for objectid " << objectid << ", but the reference count for this object is already 0.");
-    (*reference_counts)[objectid] -= 1;
-    RAY_LOG(RAY_REFCOUNT, "Decremented ref count for objectid " << objectid << ". New reference count is " << (*reference_counts)[objectid]);
-    // See if we can deallocate the object
-    std::vector<ObjectID> equivalent_objectids;
-    get_equivalent_objectids(objectid, equivalent_objectids);
-    bool can_deallocate = true;
-    for (int j = 0; j < equivalent_objectids.size(); ++j) {
-      if ((*reference_counts)[equivalent_objectids[j]] != 0) {
-        can_deallocate = false;
-        break;
-      }
-    }
-    if (can_deallocate) {
-      ObjectID canonical_objectid = equivalent_objectids[0];
-      RAY_CHECK(is_canonical(canonical_objectid), "canonical_objectid is not canonical.");
-      deallocate_object(canonical_objectid, reference_counts, contained_objectids);
-      for (int j = 0; j < equivalent_objectids.size(); ++j) {
-        (*reference_counts)[equivalent_objectids[j]] = DEALLOCATED;
-      }
-    }
-  }
-}
-
-void SchedulerService::upstream_objectids(ObjectID objectid, std::vector<ObjectID> &objectids, const MySynchronizedPtr<std::vector<std::vector<ObjectID> > > &reverse_target_objectids) {
-  // upstream_objectids takes reverse_target_objectids as an argument, which is
-  // obtained by GET(reverse_target_objectids_), so we know the data structure
-  // has been locked.
-  objectids.push_back(objectid);
-  for (int i = 0; i < (*reverse_target_objectids)[objectid].size(); ++i) {
-    upstream_objectids((*reverse_target_objectids)[objectid][i], objectids, reverse_target_objectids);
-  }
-}
-
-void SchedulerService::get_equivalent_objectids(ObjectID objectid, std::vector<ObjectID> &equivalent_objectids) {
-  auto target_objectids = GET(target_objectids_);
-  ObjectID downstream_objectid = objectid;
-  while ((*target_objectids)[downstream_objectid] != downstream_objectid && (*target_objectids)[downstream_objectid] != UNITIALIZED_ALIAS) {
-    RAY_LOG(RAY_ALIAS, "Looping in get_equivalent_objectids");
-    downstream_objectid = (*target_objectids)[downstream_objectid];
-  }
-  upstream_objectids(downstream_objectid, equivalent_objectids, GET(reverse_target_objectids_));
-}
-
-
-void SchedulerService::export_function_to_run_to_worker(WorkerId workerid, int function_index, MySynchronizedPtr<std::vector<WorkerHandle> > &workers, const MySynchronizedPtr<std::vector<std::unique_ptr<Function> > > &exported_functions_to_run) {
-  RAY_LOG(RAY_INFO, "exporting function to run with index " << function_index << " to worker " << workerid);
-  ClientContext context;
-  RunFunctionOnWorkerRequest request;
-  request.mutable_function()->CopyFrom(*(*exported_functions_to_run)[function_index].get());
-  AckReply reply;
-  RAY_CHECK_GRPC((*workers)[workerid].worker_stub->RunFunctionOnWorker(&context, request, &reply));
-}
-
-void SchedulerService::export_remote_function_to_worker(WorkerId workerid, int function_index, MySynchronizedPtr<std::vector<WorkerHandle> > &workers, const MySynchronizedPtr<std::vector<std::unique_ptr<Function> > > &exported_remote_functions) {
-  RAY_LOG(RAY_INFO, "exporting remote function with index " << function_index << " to worker " << workerid);
-  ClientContext context;
-  ImportRemoteFunctionRequest request;
-  request.mutable_function()->CopyFrom(*(*exported_remote_functions)[function_index].get());
-  AckReply reply;
-  RAY_CHECK_GRPC((*workers)[workerid].worker_stub->ImportRemoteFunction(&context, request, &reply));
-}
-
-void SchedulerService::export_reusable_variable_to_worker(WorkerId workerid, int reusable_variable_index, MySynchronizedPtr<std::vector<WorkerHandle> > &workers, const MySynchronizedPtr<std::vector<std::unique_ptr<ReusableVar> > > &exported_reusable_variables) {
-  RAY_LOG(RAY_INFO, "exporting reusable variable with index " << reusable_variable_index << " to worker " << workerid);
-  ClientContext context;
-  ImportReusableVariableRequest request;
-  request.mutable_reusable_variable()->CopyFrom(*(*exported_reusable_variables)[reusable_variable_index].get());
-  AckReply reply;
-  RAY_CHECK_GRPC((*workers)[workerid].worker_stub->ImportReusableVariable(&context, request, &reply));
-}
-
-void SchedulerService::export_everything_to_all_workers_if_necessary(MySynchronizedPtr<std::vector<WorkerHandle> > &workers) {
-  auto exported_functions_to_run = GET(exported_functions_to_run_);
-  auto exported_remote_functions = GET(exported_remote_functions_);
-  auto exported_reusable_variables = GET(exported_reusable_variables_);
-  for (size_t workerid = 0; workerid < workers->size(); ++workerid) {
-    if ((*workers)[workerid].current_task != ROOT_OPERATION && !(*workers)[workerid].initial_exports_done) {
-      // Export the functions to run to the worker.
-      for (int i = 0; i < exported_functions_to_run->size(); ++i) {
-        export_function_to_run_to_worker(workerid, i, workers, exported_functions_to_run);
-      }
-      // Export the remote functions to the worker.
-      for (int i = 0; i < exported_remote_functions->size(); ++i) {
-        export_remote_function_to_worker(workerid, i, workers, exported_remote_functions);
-      }
-      // Export the reusable variables to the worker.
-      for (int i = 0; i < exported_reusable_variables->size(); ++i) {
-        export_reusable_variable_to_worker(workerid, i, workers, exported_reusable_variables);
-      }
-      // Record that we have done this so we do not need to do it again for this
-      // worker.
-      (*workers)[workerid].initial_exports_done = true;
-    }
-  }
-}
-
-void start_scheduler_service(const char* service_addr, SchedulingAlgorithmType scheduling_algorithm) {
-  std::string service_address(service_addr);
-  std::string::iterator split_point = split_ip_address(service_address);
-  std::string port;
-  port.assign(split_point, service_address.end());
-  SchedulerService service(scheduling_algorithm);
-  ServerBuilder builder;
-  builder.AddListeningPort(std::string("0.0.0.0:") + port, grpc::InsecureServerCredentials());
-  builder.RegisterService(&service);
-  std::unique_ptr<Server> server(builder.BuildAndStart());
-  if (server == nullptr) {
-    RAY_CHECK(false, "Failed to create the scheduler service.");
-  }
-  server->Wait();
-}
-
-RayConfig global_ray_config;
-
-int main(int argc, char** argv) {
-  SchedulingAlgorithmType scheduling_algorithm = SCHEDULING_ALGORITHM_LOCALITY_AWARE;
-  RAY_CHECK_GE(argc, 2, "scheduler: expected at least one argument (scheduler ip address)");
-  if (argc > 2) {
-    const char* log_file_name = get_cmd_option(argv, argv + argc, "--log-file-name");
-    if (log_file_name) {
-      std::cout << "scheduler: writing to log file " << log_file_name << std::endl;
-      create_log_dir_or_die(log_file_name);
-      global_ray_config.log_to_file = true;
-      global_ray_config.logfile.open(log_file_name);
-    } else {
-      std::cout << "scheduler: writing logs to stdout; you can change this by passing --log-file-name <filename> to ./scheduler" << std::endl;
-      global_ray_config.log_to_file = false;
-    }
-    const char* scheduling_algorithm_name = get_cmd_option(argv, argv + argc, "--scheduler-algorithm");
-    if (scheduling_algorithm_name) {
-      if (std::string(scheduling_algorithm_name) == "naive") {
-        RAY_LOG(RAY_INFO, "scheduler: using 'naive' scheduler");
-        scheduling_algorithm = SCHEDULING_ALGORITHM_NAIVE;
-      }
-      if (std::string(scheduling_algorithm_name) == "locality_aware") {
-        RAY_LOG(RAY_INFO, "scheduler: using 'locality aware' scheduler");
-        scheduling_algorithm = SCHEDULING_ALGORITHM_LOCALITY_AWARE;
-      }
-    }
-  }
-  start_scheduler_service(argv[1], scheduling_algorithm);
-  return 0;
-}
diff --git a/src/scheduler.h b/src/scheduler.h
deleted file mode 100644
index ec56cf6d8..000000000
--- a/src/scheduler.h
+++ /dev/null
@@ -1,237 +0,0 @@
-#ifndef RAY_SCHEDULER_H
-#define RAY_SCHEDULER_H
-
-
-#include <deque>
-#include <memory>
-#include <algorithm>
-#include <iostream>
-#include <limits>
-
-#include <grpc++/grpc++.h>
-
-#include "ray/ray.h"
-#include "ray.grpc.pb.h"
-#include "types.pb.h"
-
-#include "utils.h"
-#include "computation_graph.h"
-
-using grpc::Server;
-using grpc::ServerBuilder;
-using grpc::ServerReader;
-using grpc::ServerContext;
-using grpc::Status;
-
-using grpc::ClientContext;
-
-using grpc::Channel;
-
-typedef size_t RefCount;
-
-const ObjectID UNITIALIZED_ALIAS = std::numeric_limits<ObjectID>::max();
-const RefCount DEALLOCATED = std::numeric_limits<RefCount>::max();
-
-struct WorkerHandle {
-  std::shared_ptr<Channel> channel;
-  std::unique_ptr<WorkerService::Stub> worker_stub; // If null, the worker has died
-  ObjStoreId objstoreid;
-  std::string worker_address;
-  // This field is initialized to false, and it is set to true after all of the
-  // initial exports have been shipped to this worker.
-  bool initial_exports_done;
-  OperationId current_task;
-};
-
-struct ObjStoreHandle {
-  std::shared_ptr<Channel> channel;
-  std::unique_ptr<ObjStore::Stub> objstore_stub;
-  std::string address;
-};
-
-enum SchedulingAlgorithmType {
-  SCHEDULING_ALGORITHM_NAIVE = 0,
-  SCHEDULING_ALGORITHM_LOCALITY_AWARE = 1
-};
-
-class SchedulerService : public Scheduler::Service {
-public:
-  SchedulerService(SchedulingAlgorithmType scheduling_algorithm);
-
-  Status SubmitTask(ServerContext* context, const SubmitTaskRequest* request, SubmitTaskReply* reply) override;
-  Status PutObj(ServerContext* context, const PutObjRequest* request, PutObjReply* reply) override;
-  Status RequestObj(ServerContext* context, const RequestObjRequest* request, AckReply* reply) override;
-  Status AliasObjectIDs(ServerContext* context, const AliasObjectIDsRequest* request, AckReply* reply) override;
-  Status RegisterObjStore(ServerContext* context, const RegisterObjStoreRequest* request, RegisterObjStoreReply* reply) override;
-  Status RegisterWorker(ServerContext* context, const RegisterWorkerRequest* request, RegisterWorkerReply* reply) override;
-  Status RegisterRemoteFunction(ServerContext* context, const RegisterRemoteFunctionRequest* request, AckReply* reply) override;
-  Status ObjReady(ServerContext* context, const ObjReadyRequest* request, AckReply* reply) override;
-  Status ReadyForNewTask(ServerContext* context, const ReadyForNewTaskRequest* request, AckReply* reply) override;
-  Status IncrementRefCount(ServerContext* context, const IncrementRefCountRequest* request, AckReply* reply) override;
-  Status DecrementRefCount(ServerContext* context, const DecrementRefCountRequest* request, AckReply* reply) override;
-  Status AddContainedObjectIDs(ServerContext* context, const AddContainedObjectIDsRequest* request, AckReply* reply) override;
-  Status SchedulerInfo(ServerContext* context, const SchedulerInfoRequest* request, SchedulerInfoReply* reply) override;
-  Status TaskInfo(ServerContext* context, const TaskInfoRequest* request, TaskInfoReply* reply) override;
-  Status KillWorkers(ServerContext* context, const KillWorkersRequest* request, KillWorkersReply* reply) override;
-  Status RunFunctionOnAllWorkers(ServerContext* context, const RunFunctionOnAllWorkersRequest* request, AckReply* reply) override;
-  Status ExportRemoteFunction(ServerContext* context, const ExportRemoteFunctionRequest* request, AckReply* reply) override;
-  Status ExportReusableVariable(ServerContext* context, const ExportReusableVariableRequest* request, AckReply* reply) override;
-  Status NotifyFailure(ServerContext*, const NotifyFailureRequest* request, AckReply* reply) override;
-  Status Wait(ServerContext*, const WaitRequest* request, WaitReply* reply) override;
-
-#ifdef NDEBUG
-  // If we've disabled assertions, then just use regular SynchronizedPtr to skip lock checking.
-  template<class T>
-  using MySynchronizedPtr = SynchronizedPtr<T>;
-#else
-  // A SynchronizedPtr specialized for this class to dynamically check that locks are obtained in the correct order (in the order of field declarations).
-  template<class T>
-  class MySynchronizedPtr;
-#endif
-
-  // This will ask an object store to send an object to another object store if
-  // the object is not already present in that object store and is not already
-  // being transmitted.
-  void deliver_object_async_if_necessary(ObjectID objectid, ObjStoreId from, ObjStoreId to);
-  // ask an object store to send object to another object store
-  void deliver_object_async(ObjectID objectid, ObjStoreId from, ObjStoreId to);
-  // assign a task to a worker
-  void schedule();
-  // execute a task on a worker and ship required object IDs
-  void assign_task(OperationId operationid, WorkerId workerid, const MySynchronizedPtr<ComputationGraph> &computation_graph);
-  // checks if the dependencies of the task are met
-  bool can_run(const Task& task);
-  // register a new object with the scheduler and return its object ID
-  ObjectID register_new_object();
-  // register the location of the object ID in the object table
-  void add_location(ObjectID objectid, ObjStoreId objstoreid);
-  // indicate that objectid is a canonical objectid
-  void add_canonical_objectid(ObjectID objectid);
-  // get object store associated with a workerid
-  ObjStoreId get_store(WorkerId workerid);
-  // register a function with the scheduler
-  void register_function(const std::string& name, WorkerId workerid, size_t num_return_vals);
-  // get information about the scheduler state
-  void get_info(const SchedulerInfoRequest& request, SchedulerInfoReply* reply);
-private:
-  // pick an objectstore that holds a given object (needs protection by objects_lock_)
-  ObjStoreId pick_objstore(ObjectID objectid);
-  // checks if objectid is a canonical objectid
-  bool is_canonical(ObjectID objectid);
-  // Perform all queued up gets that can be performed.
-  void perform_gets();
-  // schedule tasks using the naive algorithm
-  void schedule_tasks_naively();
-  // schedule tasks using a scheduling algorithm that takes into account data locality
-  void schedule_tasks_location_aware();
-  void perform_notify_aliases();
-  // checks if aliasing for objectid has been completed
-  bool has_canonical_objectid(ObjectID objectid);
-  // get the canonical objectid for an objectid
-  ObjectID get_canonical_objectid(ObjectID objectid);
-  // attempt to notify the objstore about potential objectid aliasing, returns true if successful, if false then retry later
-  bool attempt_notify_alias(ObjStoreId objstoreid, ObjectID alias_objectid, ObjectID canonical_objectid);
-  // tell all of the objstores holding canonical_objectid to deallocate it, the
-  // data structures are passed into ensure that the appropriate locks are held.
-  void deallocate_object(ObjectID canonical_objectid, const MySynchronizedPtr<std::vector<RefCount> > &reference_counts, const MySynchronizedPtr<std::vector<std::vector<ObjectID> > > &contained_objectids);
-  // increment the ref counts for the object IDs in objectids, the data
-  // structures are passed into ensure that the appropriate locks are held.
-  void increment_ref_count(const std::vector<ObjectID> &objectids, const MySynchronizedPtr<std::vector<RefCount> > &reference_count);
-  // decrement the ref counts for the object IDs in objectids, the data
-  // structures are passed into ensure that the appropriate locks are held.
-  void decrement_ref_count(const std::vector<ObjectID> &objectids, const MySynchronizedPtr<std::vector<RefCount> > &reference_count, const MySynchronizedPtr<std::vector<std::vector<ObjectID> > > &contained_objectids);
-  // Find all of the object IDs which are upstream of objectid (including objectid itself). That is, you can get from everything in objectids to objectid by repeatedly indexing in target_objectids_.
-  void upstream_objectids(ObjectID objectid, std::vector<ObjectID> &objectids, const MySynchronizedPtr<std::vector<std::vector<ObjectID> > > &reverse_target_objectids);
-  // Find all of the object IDs that refer to the same object as objectid (as best as we can determine at the moment). The information may be incomplete because not all of the aliases may be known.
-  void get_equivalent_objectids(ObjectID objectid, std::vector<ObjectID> &equivalent_objectids);
-  // Export a function to run to a worker.
-  void export_function_to_run_to_worker(WorkerId workerid, int function_index, MySynchronizedPtr<std::vector<WorkerHandle> > &workers, const MySynchronizedPtr<std::vector<std::unique_ptr<Function> > > &exported_functions_to_run);
-  // Export a remote function to a worker.
-  void export_remote_function_to_worker(WorkerId workerid, int function_index, MySynchronizedPtr<std::vector<WorkerHandle> > &workers, const MySynchronizedPtr<std::vector<std::unique_ptr<Function> > > &exported_remote_functions);
-  // Export a reusable variable to a worker
-  void export_reusable_variable_to_worker(WorkerId workerid, int reusable_variable_index, MySynchronizedPtr<std::vector<WorkerHandle> > &workers, const MySynchronizedPtr<std::vector<std::unique_ptr<ReusableVar> > > &exported_reusable_variables);
-  // Export all exports to all workers that need them. This happens the first
-  // time any export would be exported to a worker or when a worker first calls
-  // ReadyForNewTask.
-  void export_everything_to_all_workers_if_necessary(MySynchronizedPtr<std::vector<WorkerHandle> > &workers);
-
-  template<class T>
-  MySynchronizedPtr<T> get(Synchronized<T>& my_field, const char* name,unsigned int line_number);
-  template<class T>
-  MySynchronizedPtr<const T> get(const Synchronized<T>& my_field, const char* name,unsigned int line_number) const;
-
-  // Preferably keep this as the first field to distinguish it from the rest
-  // Maps every thread to an identifier of a lock it is holding, as well the name of the lock.
-  // Internally, the identifier for each lock is the offset of the field being locked.
-  // When we lock, we set the field offset and store the difference; the difference should always be positive. If not, we throw.
-  // When we unlock, we subtract back the field offset to restore it to the previous field that was locked.
-  mutable Synchronized<std::vector<std::pair<unsigned long long, std::pair<size_t, const char*> > > > lock_orders_;
-
-  // List of failed tasks
-  Synchronized<std::vector<TaskStatus> > failed_tasks_;
-  // A list of remote functions import failures.
-  Synchronized<std::vector<Failure> > failed_remote_function_imports_;
-  // A list of reusable variables import failures.
-  Synchronized<std::vector<Failure> > failed_reusable_variable_imports_;
-  // A list of reusable variables reinitialization failures.
-  Synchronized<std::vector<Failure> > failed_reinitialize_reusable_variables_;
-  // A list of function to run failures.
-  Synchronized<std::vector<Failure> > failed_function_to_runs_;
-  // List of pending get calls.
-  Synchronized<std::vector<std::pair<WorkerId, ObjectID> > > get_queue_;
-  // The computation graph tracks the operations that have been submitted to the
-  // scheduler and is mostly used for fault tolerance.
-  Synchronized<ComputationGraph> computation_graph_;
-  // Hash map from function names to workers where the function is registered.
-  Synchronized<FnTable> fntable_;
-  // Vector of all workers that are currently idle.
-  Synchronized<std::vector<WorkerId> > avail_workers_;
-  // List of pending tasks.
-  Synchronized<std::deque<OperationId> > task_queue_;
-  // Reference counts. Currently, reference_counts_[objectid] is the number of
-  // existing references held to objectid. This is done for all objectids, not just
-  // canonical_objectids. This data structure completely ignores aliasing. If the
-  // object corresponding to objectid has been deallocated, then
-  // reference_counts[objectid] will equal DEALLOCATED.
-  Synchronized<std::vector<RefCount> > reference_counts_;
-  // contained_objectids_[objectid] is a vector of all of the objectids contained inside the object referred to by objectid
-  Synchronized<std::vector<std::vector<ObjectID> > > contained_objectids_;
-  // Vector of all workers registered in the system. Their index in this vector
-  // is the workerid.
-  Synchronized<std::vector<WorkerHandle> > workers_;
-  // List of pending alias notifications. Each element consists of (objstoreid, (alias_objectid, canonical_objectid)).
-  Synchronized<std::vector<std::pair<ObjStoreId, std::pair<ObjectID, ObjectID> > > > alias_notification_queue_;
-  // Mapping from canonical objectid to list of object stores where the object is stored. Non-canonical (aliased) objectids should not be used to index objtable_.
-  Synchronized<ObjTable> objtable_; // This lock protects objtable_ and objects_in_transit_
-  // Vector of all object stores registered in the system. Their index in this
-  // vector is the objstoreid.
-  Synchronized<std::vector<ObjStoreHandle> > objstores_;
-  // Mapping from an aliased objectid to the objectid it is aliased with. If an
-  // objectid is a canonical objectid (meaning it is not aliased), then
-  // target_objectids_[objectid] == objectid. For each objectid, target_objectids_[objectid]
-  // is initialized to UNITIALIZED_ALIAS and the correct value is filled later
-  // when it is known.
-  Synchronized<std::vector<ObjectID> > target_objectids_;
-  // This data structure maps an objectid to all of the objectids that alias it (there could be multiple such objectids).
-  Synchronized<std::vector<std::vector<ObjectID> > > reverse_target_objectids_;
-  // For each object store objstoreid, objects_in_transit_[objstoreid] is a
-  // vector of the canonical object IDs that are being streamed to that
-  // object store but are not yet present. object IDs are added to this
-  // in deliver_object_async_if_necessary (to ensure that we do not attempt to deliver
-  // the same object to a given object store twice), and object IDs are
-  // removed when add_location is called (from ObjReady), and they are moved to
-  // the objtable_. Note that objects_in_transit_ and objtable_ share the same
-  // lock (objects_lock_). // TODO(rkn): Consider making this part of the
-  // objtable data structure.
-  std::vector<std::vector<ObjectID> > objects_in_transit_;
-  // All of the functions that have been exported to the workers to run.
-  Synchronized<std::vector<std::unique_ptr<Function> > > exported_functions_to_run_;
-  // All of the remote functions that have been exported to the workers.
-  Synchronized<std::vector<std::unique_ptr<Function> > > exported_remote_functions_;
-  // All of the reusable variables that have been exported to the workers.
-  Synchronized<std::vector<std::unique_ptr<ReusableVar> > > exported_reusable_variables_;
-  // the scheduling algorithm that will be used
-  SchedulingAlgorithmType scheduling_algorithm_;
-};
-
-#endif
diff --git a/src/utils.cc b/src/utils.cc
deleted file mode 100644
index 14f26bf83..000000000
--- a/src/utils.cc
+++ /dev/null
@@ -1,69 +0,0 @@
-#include "utils.h"
-
-#include "ray/ray.h"
-
-#include <sys/stat.h>
-#ifdef _S_IREAD  // Visual C++ runtime?
-#include <direct.h>  // _mkdir
-#else
-namespace {
-  int _mkdir(char const* path) {
-    return mkdir(path, S_IRWXU | S_IRWXG | S_IRWXO);
-  }
-}
-#endif
-
-std::string::iterator split_ip_address(std::string& ip_address) {
-  if (ip_address[0] == '[') { // IPv6
-    auto split_end = std::find(ip_address.begin() + 1, ip_address.end(), ']');
-    if(split_end != ip_address.end()) {
-      split_end++;
-    }
-    if(split_end != ip_address.end() && *split_end == ':') {
-      return split_end;
-    }
-    RAY_CHECK(false, "ip address should contain a port number");
-  } else { // IPv4
-    auto split_point = std::find(ip_address.rbegin(), ip_address.rend(), ':').base();
-    RAY_CHECK_NEQ(split_point, ip_address.begin(), "ip address should contain a port number");
-    return split_point;
-  }
-}
-
-const char* get_cmd_option(char** begin, char** end, const std::string& option) {
-  char** it = std::find(begin, end, option);
-  if (it != end && ++it != end) {
-    return *it;
-  }
-  return 0;
-}
-
-void create_directories(const char* log_file_name) {
-  bool success = _mkdir(log_file_name) != -1 || errno == EEXIST;
-  if (!success) {
-    // If we couldn't create it directly and it didn't already exist, then try to create it from the root...
-    // Note that we keep going until the end even if creating the root fails, because we don't necessarily have access to the root
-    bool stop = false;
-    size_t i = 0;
-    do {
-      stop = log_file_name[i] == '\0';
-      bool delimiter = stop || log_file_name[i] == '/' || log_file_name[i] == '\\';
-      if (!stop) {
-        ++i;
-      }
-      if (delimiter) {
-        std::string ancestor(log_file_name, i);
-        success = _mkdir(ancestor.c_str()) != -1 || errno == EEXIST;
-      }
-    } while (!stop);
-  }
-  RAY_CHECK(success, "Failed to create directory for " << log_file_name);
-}
-
-void create_log_dir_or_die(const char* log_file_name) {
-  std::string dirname = log_file_name;
-  while (!dirname.empty() && dirname.back() != '/' && dirname.back() != '\\') {
-    dirname.pop_back();
-  }
-  return create_directories(dirname.c_str());
-}
diff --git a/src/utils.h b/src/utils.h
deleted file mode 100644
index 3f1801929..000000000
--- a/src/utils.h
+++ /dev/null
@@ -1,97 +0,0 @@
-#ifndef RAY_UTILS_H
-#define RAY_UTILS_H
-
-#include <mutex>
-#include <string>
-
-template<class T = void, class Mutex = std::mutex>
-class Synchronized;
-
-template<class T, class Mutex>
-class Synchronized<const T, Mutex>;  // Prevent use of const T; it doesn't make sense
-
-template<class T, class Mutex> struct SynchronizedSource { typedef Synchronized<T, Mutex> type; };
-template<class T, class Mutex> struct SynchronizedSource<const T, Mutex> { typedef const Synchronized<T, Mutex> type; };
-template<class T, class Mutex> struct SynchronizedSource<volatile T, Mutex> { typedef volatile Synchronized<T, Mutex> type; };
-template<class T, class Mutex> struct SynchronizedSource<const volatile T, Mutex> { typedef const Synchronized<T, Mutex> type; };
-
-template<class T>
-class SynchronizedPtr : public std::unique_lock<typename SynchronizedSource<T, void>::type> {
-protected:
-  typedef std::unique_lock<typename SynchronizedSource<T, void>::type> base_type;
-  // Make these private; they don't make much sense externally...
-  using base_type::mutex;
-public:
-  typedef T value_type;
-  SynchronizedPtr(typename base_type::mutex_type& value) : base_type(value) { }
-  value_type& operator*() const { return *mutex()->unsafe_get(); }
-  value_type* operator->() const { return mutex() ? mutex()->unsafe_get() : NULL; }
-};
-
-template<class T>
-class Synchronized<T, void> {
-  T value_;
-public:
-  typedef T element_type;
-  template<class... U>
-  Synchronized(U&&... args) : value_(std::forward<U>(args)...) { }
-  Synchronized(const Synchronized& other) : value_((std::lock_guard<Synchronized>(other), other.value_)) { }
-  Synchronized(Synchronized&& other) : value_((std::lock_guard<Synchronized>(other), std::move(other.value_))) { }
-  Synchronized& operator =(const Synchronized& other)
-  {
-    if (this != &other)
-    {
-      std::lock_guard<Synchronized> guard_this(*this);
-      std::lock_guard<Synchronized> guard_other(other);
-      value_ = other.value_;
-    }
-    return *this;
-  }
-  Synchronized& operator =(Synchronized&& other)
-  {
-    if (this != &other)
-    {
-      std::lock_guard<Synchronized> guard_this(*this);
-      std::lock_guard<Synchronized> guard_other(other);
-      value_ = std::move(other.value_);
-    }
-    return *this;
-  }
-  virtual void lock() const = 0;
-  virtual void unlock() const = 0;
-  virtual bool try_lock() const = 0;
-  element_type* unsafe_get() { return &value_; }
-  const element_type* unsafe_get() const { return &value_; }
-};
-
-template<class Mutex>
-class Synchronized<void, Mutex> {
-  mutable Mutex mutex_;
-public:
-  typedef Mutex mutex_type;
-  void lock() const { return mutex_.lock(); }
-  void unlock() const { return mutex_.unlock(); }
-  bool try_lock() const { return mutex_.try_lock(); }
-};
-
-template<class T, class Mutex>
-class Synchronized : public Synchronized<T, void>, public Synchronized<void, Mutex> {
-  typedef Synchronized<T, void> base1_type;
-  typedef Synchronized<void, Mutex> base2_type;
-public:
-  template<class... U>
-  Synchronized(U&&... args) : base1_type(std::forward<U>(args)...), base2_type() { }
-  SynchronizedPtr<T> unchecked_get() { return *this; }
-  SynchronizedPtr<const T> unchecked_get() const { return *this; }
-  void lock() const override { return base2_type::lock(); }
-  void unlock() const override { return base2_type::unlock(); }
-  bool try_lock() const override { return base2_type::try_lock(); }
-};
-
-std::string::iterator split_ip_address(std::string& ip_address);
-
-const char* get_cmd_option(char** begin, char** end, const std::string& option);
-
-void create_log_dir_or_die(const char* log_file_name);
-
-#endif
diff --git a/src/worker.cc b/src/worker.cc
deleted file mode 100644
index 8d024392f..000000000
--- a/src/worker.cc
+++ /dev/null
@@ -1,497 +0,0 @@
-#include "worker.h"
-
-#include <atomic>
-#include <random>
-#include <chrono>
-#include <thread>
-
-#include "utils.h"
-
-extern "C" {
-  static PyObject *RayError;
-}
-
-inline WorkerServiceImpl::WorkerServiceImpl(const std::string& send_queue_name, Mode mode)
-  : mode_(mode) {
-  RAY_LOG(RAY_INFO, "Worker service connecting to queue " << send_queue_name);
-  RAY_CHECK(send_queue_.connect(send_queue_name, false), "error connecting send_queue_");
-}
-
-Status WorkerServiceImpl::ExecuteTask(ServerContext* context, const ExecuteTaskRequest* request, AckReply* reply) {
-  RAY_CHECK(mode_ == Mode::WORKER_MODE, "ExecuteTask can only be called on workers.");
-  RAY_LOG(RAY_INFO, "invoked task " << request->task().name());
-  std::unique_ptr<WorkerMessage> message(new WorkerMessage());
-  message->mutable_task()->CopyFrom(request->task());
-  {
-    WorkerMessage* message_ptr = message.get();
-    RAY_CHECK(send_queue_.send(&message_ptr), "Failed to send message from the worker service to the worker because the message queue was full.");
-  }
-  // The message will get deleted in receive_next_message().
-  message.release();
-  return Status::OK;
-}
-
-Status WorkerServiceImpl::RunFunctionOnWorker(ServerContext* context, const RunFunctionOnWorkerRequest* request, AckReply* reply) {
-  RAY_CHECK(mode_ == Mode::WORKER_MODE, "RunFunctionOnWorker can only be called on workers.");
-  std::unique_ptr<WorkerMessage> message(new WorkerMessage());
-  message->mutable_function_to_run()->CopyFrom(request->function());
-  RAY_LOG(RAY_INFO, "Running function on worker.");
-  {
-    WorkerMessage* message_ptr = message.get();
-    RAY_CHECK(send_queue_.send(&message_ptr), "Failed to send message from the worker service to the worker because the message queue was full.");
-  }
-  // The message will get deleted in receive_next_message().
-  message.release();
-  return Status::OK;
-}
-
-Status WorkerServiceImpl::ImportRemoteFunction(ServerContext* context, const ImportRemoteFunctionRequest* request, AckReply* reply) {
-  RAY_CHECK(mode_ == Mode::WORKER_MODE, "ImportRemoteFunction can only be called on workers.");
-  std::unique_ptr<WorkerMessage> message(new WorkerMessage());
-  message->mutable_function()->CopyFrom(request->function());
-  RAY_LOG(RAY_INFO, "importing function");
-  {
-    WorkerMessage* message_ptr = message.get();
-    RAY_CHECK(send_queue_.send(&message_ptr), "Failed to send message from the worker service to the worker because the message queue was full.");
-  }
-  // The message will get deleted in receive_next_message().
-  message.release();
-  return Status::OK;
-}
-
-Status WorkerServiceImpl::ImportReusableVariable(ServerContext* context, const ImportReusableVariableRequest* request, AckReply* reply) {
-  RAY_CHECK(mode_ == Mode::WORKER_MODE, "ImportReusableVariable can only be called on workers.");
-  std::unique_ptr<WorkerMessage> message(new WorkerMessage());
-  message->mutable_reusable_variable()->CopyFrom(request->reusable_variable());
-  RAY_LOG(RAY_INFO, "importing reusable variable");
-  {
-    WorkerMessage* message_ptr = message.get();
-    RAY_CHECK(send_queue_.send(&message_ptr), "Failed to send message from the worker service to the worker because the message queue was full.");
-  }
-  // The message will get deleted in receive_next_message().
-  message.release();
-  return Status::OK;
-}
-
-Status WorkerServiceImpl::Die(ServerContext* context, const DieRequest* request, AckReply* reply) {
-  RAY_CHECK(mode_ == Mode::WORKER_MODE, "Die can only be called on workers.");
-  WorkerMessage* message_ptr = NULL;
-  RAY_CHECK(send_queue_.send(&message_ptr), "Failed to send message from the worker service to the worker because the message queue was full.");
-  return Status::OK;
-}
-
-Status WorkerServiceImpl::PrintErrorMessage(ServerContext* context, const PrintErrorMessageRequest* request, AckReply* reply) {
-  RAY_CHECK(mode_ != Mode::WORKER_MODE, "PrintErrorMessage can only be called on drivers.");
-  if (mode_ == Mode::SILENT_MODE) {
-    // Do not log error messages in this case. This is just used for the tests.
-    return Status::OK;
-  }
-  const Failure failure = request->failure();
-  WorkerId workerid = failure.workerid();
-  if (failure.type() == FailedType::FailedTask) {
-    // A task threw an exception while executing.
-    std::cout << "Error: Worker " << workerid << " failed to execute function " << failure.name() << ". Failed with error message:\n" << failure.error_message() << std::endl;
-  } else if (failure.type() == FailedType::FailedRemoteFunctionImport) {
-    // An exception was thrown while a remote function was being imported.
-    std::cout << "Error: Worker " << workerid << " failed to import remote function " << failure.name() << ", failed with error message:\n" << failure.error_message() << std::endl;
-  } else if (failure.type() == FailedType::FailedReusableVariableImport) {
-    // An exception was thrown while a reusable variable was being imported.
-    std::cout << "Error: Worker " << workerid << " failed to import reusable variable " << failure.name() << ", failed with error message:\n" << failure.error_message() << std::endl;
-  } else if (failure.type() == FailedType::FailedReinitializeReusableVariable) {
-    // An exception was thrown while a reusable variable was being reinitialized.
-    std::cout << "Error: Worker " << workerid << " failed to reinitialize a reusable variable after running remote function " << failure.name() << ", failed with error message:\n" << failure.error_message() << std::endl;
-  } else if (failure.type() == FailedType::FailedFunctionToRun) {
-    // An exception was thrown while a function was being run on all workers.
-    std::cout << "Error: Worker " << workerid << " failed to run function " << failure.name() << " on all workers, failed with error message:\n" << failure.error_message() << std::endl;
-  } else {
-    RAY_CHECK(false, "This code should be unreachable.")
-  }
-  return Status::OK;
-}
-
-Worker::Worker(const std::string& node_ip_address, const std::string& scheduler_address, Mode mode)
-    : scheduler_address_(scheduler_address),
-      node_ip_address_(node_ip_address),
-      mode_(mode) {
-  auto scheduler_channel = grpc::CreateChannel(scheduler_address, grpc::InsecureChannelCredentials());
-  scheduler_stub_ = Scheduler::NewStub(scheduler_channel);
-  // Generate a random string to use for naming the message queue to avoid
-  // collisions with message queues created by other workers.
-  std::random_device rd;
-  std::mt19937 rng(rd());
-  std::uniform_int_distribution<int> queue_name_generator(0, 10000000);
-  receive_queue_name_ = "worker_receive_queue:" + std::to_string(queue_name_generator(rng));
-  RAY_LOG(RAY_INFO, "Worker creating queue " << receive_queue_name_);
-  RAY_CHECK(receive_queue_.connect(receive_queue_name_, true), "error connecting receive_queue_");
-}
-
-
-SubmitTaskReply Worker::submit_task(SubmitTaskRequest* request, int max_retries, int retry_wait_milliseconds) {
-  RAY_CHECK(connected_, "Attempted to perform submit_task but failed.");
-  SubmitTaskReply reply;
-  request->set_workerid(workerid_);
-  for (int i = 0; i < 1 + max_retries; ++i) {
-    ClientContext context;
-    RAY_CHECK_GRPC(scheduler_stub_->SubmitTask(&context, *request, &reply));
-    if (reply.function_registered()) {
-      break;
-    }
-    RAY_LOG(RAY_INFO, "The function " << request->task().name() << " was not registered, so attempting to resubmit the task.");
-    std::this_thread::sleep_for(std::chrono::milliseconds(retry_wait_milliseconds));
-  }
-  return reply;
-}
-
-bool Worker::kill_workers(ClientContext &context) {
-  KillWorkersRequest request;
-  KillWorkersReply reply;
-  RAY_CHECK_GRPC(scheduler_stub_->KillWorkers(&context, request, &reply));
-  return reply.success();
-}
-
-void Worker::register_worker(const std::string& node_ip_address, const std::string& objstore_address, bool is_driver) {
-  if (mode_ == Mode::WORKER_MODE) {
-    start_worker_service(mode_);
-    RAY_CHECK(!worker_address_.empty(), "The worker address is empty. This should be initialized by start_worker_service, so it is possible that the thread synchronization failed.")
-  }
-  unsigned int retry_wait_milliseconds = 20;
-  RegisterWorkerRequest request;
-  request.set_node_ip_address(node_ip_address);
-  request.set_worker_address(worker_address_);
-  // The object store address can be the empty string, in which case the
-  // scheduler will assign an object store address.
-  request.set_objstore_address(objstore_address);
-  request.set_is_driver(is_driver);
-  RegisterWorkerReply reply;
-  Status status;
-  // TODO: HACK: retrying is a hack
-  for (int i = 0; i < 5; ++i) {
-    ClientContext context;
-    status = scheduler_stub_->RegisterWorker(&context, request, &reply);
-    if (status.error_code() != grpc::UNAVAILABLE) {
-      break;
-    }
-    // Note that each pass through the loop may take substantially longer than
-    // retry_wait_milliseconds because grpc may do its own retrying.
-    std::this_thread::sleep_for(std::chrono::milliseconds(retry_wait_milliseconds));
-  }
-  RAY_CHECK_GRPC(status);
-  workerid_ = reply.workerid();
-  objstoreid_ = reply.objstoreid();
-  objstore_address_ = reply.objstore_address();
-  segmentpool_ = std::make_shared<MemorySegmentPool>(objstoreid_, objstore_address_, false);
-  // Connect to the queue for sending requests to the object store.
-  std::string request_obj_queue_name = std::string("queue:") + objstore_address_ + std::string(":obj");
-  RAY_LOG(RAY_INFO, "Worker connecting to queue with name " << request_obj_queue_name << " to send requests to the object store.");
-  RAY_CHECK(request_obj_queue_.connect(request_obj_queue_name, false), "error connecting request_obj_queue_");
-  // Create a queue for receiving messages from the object store.
-  std::string receive_obj_queue_name = std::string("queue:") + objstore_address_ + std::string(":worker:") + std::to_string(workerid_) + std::string(":obj");
-  RAY_LOG(RAY_INFO, "Worker creating queue with name " << receive_obj_queue_name << " to receive messages from the object store.");
-  RAY_CHECK(receive_obj_queue_.connect(receive_obj_queue_name, true), "error connecting receive_obj_queue_");
-  connected_ = true;
-  return;
-}
-
-void Worker::request_object(ObjectID objectid) {
-  RAY_CHECK(connected_, "Attempted to perform request_object but failed.");
-  RequestObjRequest request;
-  request.set_workerid(workerid_);
-  request.set_objectid(objectid);
-  AckReply reply;
-  ClientContext context;
-  RAY_CHECK_GRPC(scheduler_stub_->RequestObj(&context, request, &reply));
-  return;
-}
-
-ObjectID Worker::get_objectid() {
-  // first get objectid for the new object
-  RAY_CHECK(connected_, "Attempted to perform get_objectid but failed.");
-  PutObjRequest request;
-  request.set_workerid(workerid_);
-  PutObjReply reply;
-  ClientContext context;
-  RAY_CHECK_GRPC(scheduler_stub_->PutObj(&context, request, &reply));
-  return reply.objectid();
-}
-
-void Worker::add_contained_objectids(ObjectID objectid, std::vector<ObjectID> &contained_objectids) {
-  RAY_CHECK(connected_, "Attempted to perform add_contained_objectids but failed.");
-  if (contained_objectids.size() > 0) {
-    RAY_LOG(RAY_REFCOUNT, "In add_contained_objectids, calling increment_reference_count for contained objectids");
-    // Notify the scheduler that some object references are serialized in the
-    // objstore. The corresponding decrement happens when the object
-    // corresponding to objectid is deallocated.
-    increment_reference_count(contained_objectids);
-    // Notify the scheduler about the objectids that we are serializing in the objstore.
-    AddContainedObjectIDsRequest contained_objectids_request;
-    contained_objectids_request.set_objectid(objectid);
-    for (int i = 0; i < contained_objectids.size(); ++i) {
-      contained_objectids_request.add_contained_objectid(contained_objectids[i]); // TODO(rkn): The naming here is bad
-    }
-    AckReply reply;
-    ClientContext context;
-     RAY_CHECK_GRPC(scheduler_stub_->AddContainedObjectIDs(&context, contained_objectids_request, &reply));
-  }
-}
-
-#define CHECK_ARROW_STATUS(s, msg)                              \
-  do {                                                          \
-    arrow::Status _s = (s);                                     \
-    if (!_s.ok()) {                                             \
-      std::string _errmsg = std::string(msg) + _s.ToString();   \
-      PyErr_SetString(RayError, _errmsg.c_str());            \
-      return NULL;                                              \
-    }                                                           \
-  } while (0);
-
-const char* Worker::allocate_buffer(ObjectID objectid, int64_t size, SegmentId& segmentid) {
-  RAY_CHECK(connected_, "Attempted to perform put_arrow but failed.");
-  ObjRequest request;
-  request.workerid = workerid_;
-  request.type = ObjRequestType::ALLOC;
-  request.objectid = objectid;
-  request.size = size;
-  RAY_CHECK(request_obj_queue_.send(&request), "Failed to send request from the worker to the object store because the message queue was full.");
-  ObjHandle result;
-  RAY_CHECK(receive_obj_queue_.receive(&result), "error receiving over IPC");
-  const char* address = reinterpret_cast<const char*>(segmentpool_->get_address(result));
-  segmentid = result.segmentid();
-  return address;
-}
-
-PyObject* Worker::finish_buffer(ObjectID objectid, SegmentId segmentid, int64_t metadata_offset) {
-  segmentpool_->unmap_segment(segmentid);
-  ObjRequest request;
-  request.workerid = workerid_;
-  request.objectid = objectid;
-  request.type = ObjRequestType::WORKER_DONE;
-  request.metadata_offset = metadata_offset;
-  RAY_CHECK(request_obj_queue_.send(&request), "Failed to send request from the worker to the object store because the message queue was full.");
-  Py_RETURN_NONE;
-}
-
-const char* Worker::get_buffer(ObjectID objectid, int64_t &size, SegmentId& segmentid, int64_t& metadata_offset) {
-  RAY_CHECK(connected_, "Attempted to perform get_arrow but failed.");
-  ObjRequest request;
-  request.workerid = workerid_;
-  request.type = ObjRequestType::GET;
-  request.objectid = objectid;
-  RAY_CHECK(request_obj_queue_.send(&request), "Failed to send request from the worker to the object store because the message queue was full.");
-  ObjHandle result;
-  RAY_CHECK(receive_obj_queue_.receive(&result), "error receiving over IPC");
-  const char* address = reinterpret_cast<const char*>(segmentpool_->get_address(result));
-  size = result.size();
-  segmentid = result.segmentid();
-  metadata_offset = result.metadata_offset();
-  return address;
-}
-
-bool Worker::is_arrow(ObjectID objectid) {
-  RAY_CHECK(connected_, "Attempted to perform is_arrow but failed.");
-  ObjRequest request;
-  request.workerid = workerid_;
-  request.type = ObjRequestType::GET;
-  request.objectid = objectid;
-  RAY_CHECK(request_obj_queue_.send(&request), "Failed to send request from the worker to the object store because the message queue was full.");
-  ObjHandle result;
-  RAY_CHECK(receive_obj_queue_.receive(&result), "error receiving over IPC");
-  return result.metadata_offset() != 0;
-}
-
-void Worker::unmap_object(ObjectID objectid) {
-  if (!connected_) {
-    RAY_LOG(RAY_DEBUG, "Attempted to perform unmap_object but failed.");
-    return;
-  }
-  segmentpool_->unmap_segment(objectid);
-}
-
-void Worker::alias_objectids(ObjectID alias_objectid, ObjectID target_objectid) {
-  RAY_CHECK(connected_, "Attempted to perform alias_objectids but failed.");
-  ClientContext context;
-  AliasObjectIDsRequest request;
-  request.set_alias_objectid(alias_objectid);
-  request.set_target_objectid(target_objectid);
-  AckReply reply;
-  RAY_CHECK_GRPC(scheduler_stub_->AliasObjectIDs(&context, request, &reply));
-}
-
-void Worker::increment_reference_count(std::vector<ObjectID> &objectids) {
-  if (!connected_) {
-    RAY_LOG(RAY_DEBUG, "Attempting to increment_reference_count for objectids, but connected_ = " << connected_ << " so returning instead.");
-    return;
-  }
-  if (objectids.size() > 0) {
-    ClientContext context;
-    IncrementRefCountRequest request;
-    for (int i = 0; i < objectids.size(); ++i) {
-      RAY_LOG(RAY_REFCOUNT, "Incrementing reference count for objectid " << objectids[i]);
-      request.add_objectid(objectids[i]);
-    }
-    AckReply reply;
-    RAY_CHECK_GRPC(scheduler_stub_->IncrementRefCount(&context, request, &reply));
-  }
-}
-
-void Worker::decrement_reference_count(std::vector<ObjectID> &objectids) {
-  if (!connected_) {
-    RAY_LOG(RAY_DEBUG, "Attempting to decrement_reference_count, but connected_ = " << connected_ << " so returning instead.");
-    return;
-  }
-  if (objectids.size() > 0) {
-    ClientContext context;
-    DecrementRefCountRequest request;
-    for (int i = 0; i < objectids.size(); ++i) {
-      RAY_LOG(RAY_REFCOUNT, "Decrementing reference count for objectid " << objectids[i]);
-      request.add_objectid(objectids[i]);
-    }
-    AckReply reply;
-    RAY_CHECK_GRPC(scheduler_stub_->DecrementRefCount(&context, request, &reply));
-  }
-}
-
-void Worker::register_remote_function(const std::string& name, size_t num_return_vals) {
-  RAY_CHECK(connected_, "Attempted to perform register_function but failed.");
-  ClientContext context;
-  RegisterRemoteFunctionRequest request;
-  request.set_workerid(workerid_);
-  request.set_function_name(name);
-  request.set_num_return_vals(num_return_vals);
-  AckReply reply;
-  RAY_CHECK_GRPC(scheduler_stub_->RegisterRemoteFunction(&context, request, &reply));
-}
-
-void Worker::notify_failure(FailedType type, const std::string& name, const std::string& error_message) {
-  RAY_CHECK(connected_, "Attempted to perform notify_failure but failed.");
-  ClientContext context;
-  NotifyFailureRequest request;
-  request.mutable_failure()->set_type(type);
-  request.mutable_failure()->set_workerid(workerid_);
-  request.mutable_failure()->set_worker_address(worker_address_);
-  request.mutable_failure()->set_name(name);
-  request.mutable_failure()->set_error_message(error_message);
-  AckReply reply;
-  RAY_CHECK_GRPC(scheduler_stub_->NotifyFailure(&context, request, &reply));
-}
-
-std::unique_ptr<WorkerMessage> Worker::receive_next_message() {
-  WorkerMessage* message_ptr;
-  RAY_CHECK(receive_queue_.receive(&message_ptr), "error receiving over IPC");
-  return std::unique_ptr<WorkerMessage>(message_ptr);
-}
-
-void Worker::ready_for_new_task() {
-  RAY_CHECK(connected_, "Attempted to perform ready_for_new_task but failed.");
-  ClientContext context;
-  ReadyForNewTaskRequest request;
-  request.set_workerid(workerid_);
-  AckReply reply;
-  RAY_CHECK_GRPC(scheduler_stub_->ReadyForNewTask(&context, request, &reply));
-}
-
-void Worker::disconnect() {
-  connected_ = false;
-  // Shut down the worker service. This will cause the call to server->Wait() to
-  // return.
-  // server_ptr_->Shutdown();
-  // Wait for the thread that launched the worker service to return.
-  // worker_server_thread_.join();
-}
-
-// TODO(rkn): Should we be using pointers or references? And should they be const?
-void Worker::scheduler_info(ClientContext &context, SchedulerInfoRequest &request, SchedulerInfoReply &reply) {
-  RAY_CHECK(connected_, "Attempted to get scheduler info but failed.");
-  RAY_CHECK_GRPC(scheduler_stub_->SchedulerInfo(&context, request, &reply));
-}
-
-void Worker::task_info(ClientContext &context, TaskInfoRequest &request, TaskInfoReply &reply) {
-  RAY_CHECK(connected_, "Attempted to get worker info but failed.");
-  RAY_CHECK_GRPC(scheduler_stub_->TaskInfo(&context, request, &reply));
-}
-
-std::vector<int> Worker::wait(std::vector<ObjectID>& objectids) {
-  RAY_CHECK(connected_, "Attempted to test if object was ready but failed.");
-  ClientContext context;
-  WaitRequest request;
-  WaitReply reply;
-  for (int i = 0; i < objectids.size(); ++i) {
-    request.add_objectids(objectids[i]);
-  }
-  RAY_CHECK_GRPC(scheduler_stub_->Wait(&context, request, &reply));
-  std::vector<int> result;
-  for (int i = 0; i < reply.indices_size(); ++i) {
-    result.push_back(reply.indices(i));
-  }
-  return result;
-}
-
-void Worker::run_function_on_all_workers(const std::string& function) {
-  RAY_CHECK(connected_, "Attempted to run function on all workers but failed.");
-  ClientContext context;
-  RunFunctionOnAllWorkersRequest request;
-  request.mutable_function()->set_implementation(function);
-  AckReply reply;
-  RAY_CHECK_GRPC(scheduler_stub_->RunFunctionOnAllWorkers(&context, request, &reply));
-}
-
-bool Worker::export_remote_function(const std::string& function_name, const std::string& function) {
-  RAY_CHECK(connected_, "Attempted to export function but failed.");
-  ClientContext context;
-  ExportRemoteFunctionRequest request;
-  request.mutable_function()->set_name(function_name);
-  request.mutable_function()->set_implementation(function);
-  AckReply reply;
-  RAY_CHECK_GRPC(scheduler_stub_->ExportRemoteFunction(&context, request, &reply));
-  return true;
-}
-
-void Worker::export_reusable_variable(const std::string& name, const std::string& initializer, const std::string& reinitializer) {
-  RAY_CHECK(connected_, "Attempted to export reusable variable but failed.");
-  ClientContext context;
-  ExportReusableVariableRequest request;
-  request.mutable_reusable_variable()->set_name(name);
-  request.mutable_reusable_variable()->mutable_initializer()->set_implementation(initializer);
-  request.mutable_reusable_variable()->mutable_reinitializer()->set_implementation(reinitializer);
-  AckReply reply;
-  RAY_CHECK_GRPC(scheduler_stub_->ExportReusableVariable(&context, request, &reply));
-}
-
-// Communication between the WorkerServer and the Worker happens via a message
-// queue. This is because the Python interpreter needs to be single threaded
-// (in our case running in the main thread), whereas the WorkerService will
-// run in a separate thread and potentially utilize multiple threads.
-void Worker::start_worker_service(Mode mode) {
-  // Use atomics so the worker service thread can signal the outside thread that
-  // the worker service has been started.
-  std::atomic_bool worker_service_started;
-  worker_service_started.store(false);
-  // Launch a new thread for running the worker service. We store this as a
-  // field so that we can clean it up when we disconnect the worker.
-  worker_server_thread_ = std::thread([this, mode, &worker_service_started]() {
-    // Create the worker service.
-    WorkerServiceImpl service(receive_queue_name_, mode);
-    ServerBuilder builder;
-    // Let GRPC choose an unused port.
-    int port;
-    builder.AddListeningPort(std::string("0.0.0.0:0"), grpc::InsecureServerCredentials(), &port);
-    builder.RegisterService(&service);
-    std::unique_ptr<Server> server(builder.BuildAndStart());
-    if (server == nullptr) {
-      RAY_CHECK(false, "Failed to create the worker service.");
-    }
-    worker_address_ = node_ip_address_ + ":" + std::to_string(port);
-    server_ptr_ = server.get();
-    RAY_LOG(RAY_INFO, "worker server listening at " << worker_address_);
-    worker_service_started.store(true);
-    // Wait for work and process work. This method does not return until
-    // Shutdown is called from a different thread.
-    server->Wait();
-    RAY_LOG(RAY_INFO, "Worker service thread returning.")
-  });
-  // Wait for the worker service to start. This essentially implements a
-  // condition variable using atomics, but that failed on Mac OS X on Travis.
-  while (!worker_service_started.load()) {
-    RAY_LOG(RAY_INFO, "Looping while waiting for the worker service to start.");
-    std::this_thread::sleep_for(std::chrono::milliseconds(100));
-  }
-}
diff --git a/src/worker.h b/src/worker.h
deleted file mode 100644
index 18149972c..000000000
--- a/src/worker.h
+++ /dev/null
@@ -1,145 +0,0 @@
-#ifndef RAY_WORKER_H
-#define RAY_WORKER_H
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <thread>
-
-#include <grpc++/grpc++.h>
-
-#include <Python.h>
-
-using grpc::Server;
-using grpc::ServerBuilder;
-using grpc::ServerContext;
-using grpc::Status;
-
-#include "ray.grpc.pb.h"
-#include "ray/ray.h"
-#include "ipc.h"
-
-using grpc::Channel;
-using grpc::ClientContext;
-using grpc::ClientWriter;
-
-// These three constants are used to define the mode that a worker is running
-// in. Right now, this is mostly used for determining how to print information
-// about task failures.
-enum Mode {SCRIPT_MODE, WORKER_MODE, PYTHON_MODE, SILENT_MODE};
-
-class WorkerServiceImpl final : public WorkerService::Service {
-public:
-  WorkerServiceImpl(const std::string& worker_address, Mode mode);
-  Status ExecuteTask(ServerContext* context, const ExecuteTaskRequest* request, AckReply* reply) override;
-  Status RunFunctionOnWorker(ServerContext* context, const RunFunctionOnWorkerRequest* request, AckReply* reply) override;
-  Status ImportRemoteFunction(ServerContext* context, const ImportRemoteFunctionRequest* request, AckReply* reply) override;
-  Status Die(ServerContext* context, const DieRequest* request, AckReply* reply) override;
-  Status ImportReusableVariable(ServerContext* context, const ImportReusableVariableRequest* request, AckReply* reply) override;
-  Status PrintErrorMessage(ServerContext* context, const PrintErrorMessageRequest* request, AckReply* reply) override;
-private:
-  // The queue used to send commands from the worker service to the worker. This
-  // corresponds to the receive_queue_ in the worker.
-  MessageQueue<WorkerMessage*> send_queue_;
-  // This is true if the worker service is part of a driver process and false
-  // if it is part of a worker process.
-  Mode mode_;
-};
-
-class Worker {
- public:
-  Worker(const std::string& node_ip_address, const std::string& scheduler_address, Mode mode);
-
-  // Submit a remote task to the scheduler. If the function in the task is not
-  // registered with the scheduler, we will sleep for retry_wait_milliseconds
-  // and try to resubmit the task to the scheduler up to max_retries more times.
-  SubmitTaskReply submit_task(SubmitTaskRequest* request, int max_retries = 10, int retry_wait_milliseconds = 500);
-  // Requests the scheduler to kill workers
-  bool kill_workers(ClientContext &context);
-  // send request to the scheduler to register this worker
-  void register_worker(const std::string& ip_address, const std::string& objstore_address, bool is_driver);
-  // get a new object ID that is registered with the scheduler
-  ObjectID get_objectid();
-  // request an object to be delivered to the local object store
-  void request_object(ObjectID objectid);
-  // Notify the scheduler about the object IDs contained within a remote object.
-  void add_contained_objectids(ObjectID objectid, std::vector<ObjectID> &contained_objectids);
-  // Allocates buffer for objectid with size of size
-  const char* allocate_buffer(ObjectID objectid, int64_t size, SegmentId& segmentid);
-  // Finishes buffer with segmentid and an offset of metadata_ofset
-  PyObject* finish_buffer(ObjectID objectid, SegmentId segmentid, int64_t metadata_offset);
-  // Gets the buffer for objectid
-  const char* get_buffer(ObjectID objectid, int64_t& size, SegmentId& segmentid, int64_t& metadata_offset);
-  // determine if the object stored in objectid is an arrow object // TODO(pcm): more general mechanism for this?
-  bool is_arrow(ObjectID objectid);
-  // unmap the segment containing an object from the local address space
-  void unmap_object(ObjectID objectid);
-  // make `alias_objectid` refer to the same object that `target_objectid` refers to
-  void alias_objectids(ObjectID alias_objectid, ObjectID target_objectid);
-  // increment the reference count for objectid
-  void increment_reference_count(std::vector<ObjectID> &objectid);
-  // decrement the reference count for objectid
-  void decrement_reference_count(std::vector<ObjectID> &objectid);
-  // Notify the scheduler that a remote function has been imported successfully.
-  void register_remote_function(const std::string& name, size_t num_return_vals);
-  // Notify the scheduler that a failure has occurred.
-  void notify_failure(FailedType type, const std::string& name, const std::string& error_message);
-  // Start the worker server which accepts commands from the scheduler. For
-  // workers, these commands are stored in the message queue, which is read by
-  // the Python interpreter. For drivers, these commands are only for printing
-  // error messages.
-  void start_worker_service(Mode mode);
-  // wait for next task from the RPC system. If null, it means there are no more tasks and the worker should shut down.
-  std::unique_ptr<WorkerMessage> receive_next_message();
-  // Tell the scheduler that the worker is ready for a new task.
-  void ready_for_new_task();
-  // disconnect the worker
-  void disconnect();
-  // return connected_
-  bool connected() { return connected_; }
-  // get info about scheduler state
-  void scheduler_info(ClientContext &context, SchedulerInfoRequest &request, SchedulerInfoReply &reply);
-  // get task statuses from scheduler
-  void task_info(ClientContext &context, TaskInfoRequest &request, TaskInfoReply &reply);
-  // gets indices of available objects
-  std::vector<int> wait(std::vector<ObjectID>& objectids);
-  // Export a function to be run on all workers.
-  void run_function_on_all_workers(const std::string& function);
-  // export function to workers
-  bool export_remote_function(const std::string& function_name, const std::string& function);
-  // export reusable variable to workers
-  void export_reusable_variable(const std::string& name, const std::string& initializer, const std::string& reinitializer);
-  // return the worker address
-  const char* get_worker_address() { return worker_address_.c_str(); }
-
- private:
-  Mode mode_;
-  bool connected_;
-  const size_t CHUNK_SIZE = 8 * 1024;
-  std::unique_ptr<Scheduler::Stub> scheduler_stub_;
-  Server* server_ptr_;
-  std::thread worker_server_thread_;
-  bip::managed_shared_memory segment_;
-  WorkerId workerid_;
-  ObjStoreId objstoreid_;
-  std::string scheduler_address_;
-  std::string objstore_address_;
-  std::string worker_address_;
-  std::string node_ip_address_;
-  // The queue used to send commands from the worker service to the worker.
-  // This queue is created by the worker. This corresponds to the send_queue_ in
-  // the worker service.
-  MessageQueue<WorkerMessage*> receive_queue_;
-  // The name of the receive queue.
-  std::string receive_queue_name_;
-  // The queue used to send requests to the object store. There is a single
-  // queue shared by all workers sending requests to the object store, and this
-  // queue is created by the object store.
-  MessageQueue<ObjRequest> request_obj_queue_;
-  // The queue used to receive object addresses from the object store. This
-  // queue is created by this worker.
-  MessageQueue<ObjHandle> receive_obj_queue_;
-  std::shared_ptr<MemorySegmentPool> segmentpool_;
-};
-
-#endif
diff --git a/thirdparty/grpc b/thirdparty/grpc
deleted file mode 160000
index 2a69139aa..000000000
--- a/thirdparty/grpc
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 2a69139aa7f609e439c24a46754252a5f9d37500
diff --git a/thirdparty/hiredis b/thirdparty/hiredis
deleted file mode 160000
index 5f98e1d35..000000000
--- a/thirdparty/hiredis
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 5f98e1d35dcf00a026793ada2662f6e1ba77eb17
diff --git a/thirdparty/numbuf b/thirdparty/numbuf
deleted file mode 160000
index 7055c6f79..000000000
--- a/thirdparty/numbuf
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 7055c6f793f8b0aadb71cef9c81dce615e0cc77f
diff --git a/thirdparty/python b/thirdparty/python
deleted file mode 160000
index 3f8fa0052..000000000
--- a/thirdparty/python
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 3f8fa00528daa3e3849be251f05227842905c7a9

From 1915539c5f1da95c975256dbb3ef38137acec2db Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Tue, 25 Oct 2016 13:57:23 -0700
Subject: [PATCH 89/91] Rearrange files to prepare to merge into Ray.

---
 .clang-format                                 |   9 -
 .gitignore                                    |  38 --
 .gitmodules                                   |   3 -
 .travis.yml                                   |  50 --
 .travis/check-git-clang-format-output.sh      |  18 -
 .travis/git-clang-format                      | 476 ------------------
 LICENSE                                       | 201 --------
 install-dependencies.sh                       |  21 -
 Makefile => src/common/Makefile               |   0
 {build => src/common/build}/.gitkeep          |   0
 common.c => src/common/common.c               |   0
 common.h => src/common/common.h               |   0
 {doc => src/common/doc}/tasks.md              |   0
 event_loop.c => src/common/event_loop.c       |   0
 event_loop.h => src/common/event_loop.h       |   0
 io.c => src/common/io.c                       |   0
 io.h => src/common/io.h                       |   0
 .../common/lib}/python/common_extension.c     |   0
 .../common/lib}/python/common_extension.h     |   0
 .../common/lib}/python/common_module.c        |   0
 {lib => src/common/lib}/python/setup.py       |   0
 logging.c => src/common/logging.c             |   0
 logging.h => src/common/logging.h             |   0
 {state => src/common/state}/db.h              |   0
 {state => src/common/state}/object_table.h    |   0
 {state => src/common/state}/redis.c           |   0
 {state => src/common/state}/redis.h           |   0
 {state => src/common/state}/task_log.h        |   0
 {state => src/common/state}/task_table.h      |   0
 task.c => src/common/task.c                   |   0
 task.h => src/common/task.h                   |   0
 {test => src/common/test}/common_tests.c      |   0
 {test => src/common/test}/db_tests.c          |   0
 {test => src/common/test}/example_task.h      |   0
 {test => src/common/test}/io_tests.c          |   0
 {test => src/common/test}/redis_tests.c       |   0
 {test => src/common/test}/task_tests.c        |   0
 {test => src/common/test}/test.py             |   0
 {thirdparty => src/common/thirdparty}/ae/ae.c |   0
 {thirdparty => src/common/thirdparty}/ae/ae.h |   0
 .../common/thirdparty}/ae/ae_epoll.c          |   0
 .../common/thirdparty}/ae/ae_evport.c         |   0
 .../common/thirdparty}/ae/ae_kqueue.c         |   0
 .../common/thirdparty}/ae/ae_select.c         |   0
 .../common/thirdparty}/ae/config.h            |   0
 .../common/thirdparty}/ae/zmalloc.h           |   0
 .../common/thirdparty}/build-redis.sh         |   0
 .../common/thirdparty}/greatest.h             |   0
 {thirdparty => src/common/thirdparty}/hiredis |   0
 .../common/thirdparty}/utarray.h              |   0
 .../common/thirdparty}/uthash.h               |   0
 .../common/thirdparty}/utlist.h               |   0
 .../common/thirdparty}/utstring.h             |   0
 53 files changed, 816 deletions(-)
 delete mode 100644 .clang-format
 delete mode 100644 .gitignore
 delete mode 100644 .gitmodules
 delete mode 100644 .travis.yml
 delete mode 100755 .travis/check-git-clang-format-output.sh
 delete mode 100755 .travis/git-clang-format
 delete mode 100644 LICENSE
 delete mode 100755 install-dependencies.sh
 rename Makefile => src/common/Makefile (100%)
 rename {build => src/common/build}/.gitkeep (100%)
 rename common.c => src/common/common.c (100%)
 rename common.h => src/common/common.h (100%)
 rename {doc => src/common/doc}/tasks.md (100%)
 rename event_loop.c => src/common/event_loop.c (100%)
 rename event_loop.h => src/common/event_loop.h (100%)
 rename io.c => src/common/io.c (100%)
 rename io.h => src/common/io.h (100%)
 rename {lib => src/common/lib}/python/common_extension.c (100%)
 rename {lib => src/common/lib}/python/common_extension.h (100%)
 rename {lib => src/common/lib}/python/common_module.c (100%)
 rename {lib => src/common/lib}/python/setup.py (100%)
 rename logging.c => src/common/logging.c (100%)
 rename logging.h => src/common/logging.h (100%)
 rename {state => src/common/state}/db.h (100%)
 rename {state => src/common/state}/object_table.h (100%)
 rename {state => src/common/state}/redis.c (100%)
 rename {state => src/common/state}/redis.h (100%)
 rename {state => src/common/state}/task_log.h (100%)
 rename {state => src/common/state}/task_table.h (100%)
 rename task.c => src/common/task.c (100%)
 rename task.h => src/common/task.h (100%)
 rename {test => src/common/test}/common_tests.c (100%)
 rename {test => src/common/test}/db_tests.c (100%)
 rename {test => src/common/test}/example_task.h (100%)
 rename {test => src/common/test}/io_tests.c (100%)
 rename {test => src/common/test}/redis_tests.c (100%)
 rename {test => src/common/test}/task_tests.c (100%)
 rename {test => src/common/test}/test.py (100%)
 rename {thirdparty => src/common/thirdparty}/ae/ae.c (100%)
 rename {thirdparty => src/common/thirdparty}/ae/ae.h (100%)
 rename {thirdparty => src/common/thirdparty}/ae/ae_epoll.c (100%)
 rename {thirdparty => src/common/thirdparty}/ae/ae_evport.c (100%)
 rename {thirdparty => src/common/thirdparty}/ae/ae_kqueue.c (100%)
 rename {thirdparty => src/common/thirdparty}/ae/ae_select.c (100%)
 rename {thirdparty => src/common/thirdparty}/ae/config.h (100%)
 rename {thirdparty => src/common/thirdparty}/ae/zmalloc.h (100%)
 rename {thirdparty => src/common/thirdparty}/build-redis.sh (100%)
 rename {thirdparty => src/common/thirdparty}/greatest.h (100%)
 rename {thirdparty => src/common/thirdparty}/hiredis (100%)
 rename {thirdparty => src/common/thirdparty}/utarray.h (100%)
 rename {thirdparty => src/common/thirdparty}/uthash.h (100%)
 rename {thirdparty => src/common/thirdparty}/utlist.h (100%)
 rename {thirdparty => src/common/thirdparty}/utstring.h (100%)

diff --git a/.clang-format b/.clang-format
deleted file mode 100644
index 89b87e25d..000000000
--- a/.clang-format
+++ /dev/null
@@ -1,9 +0,0 @@
-BasedOnStyle: Chromium
-DerivePointerAlignment: true
-IndentCaseLabels: false
-PointerAlignment: Right
-SpaceAfterCStyleCast: true
-AllowShortBlocksOnASingleLine: false
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortFunctionsOnASingleLine: false
-AllowShortIfStatementsOnASingleLine: false
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index fff8ef269..000000000
--- a/.gitignore
+++ /dev/null
@@ -1,38 +0,0 @@
-*~
-
-# Object files
-*.o
-*.ko
-*.obj
-*.elf
-
-# Precompiled Headers
-*.gch
-*.pch
-
-# Libraries
-*.lib
-*.a
-*.la
-*.lo
-
-# Shared objects (inc. Windows DLLs)
-*.dll
-*.so
-*.so.*
-*.dylib
-
-# Executables
-*.exe
-*.out
-*.app
-*.i*86
-*.x86_64
-*.hex
-
-# Debug files
-*.dSYM/
-*.su
-
-# Build files
-build/*
diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index 4026f8268..000000000
--- a/.gitmodules
+++ /dev/null
@@ -1,3 +0,0 @@
-[submodule "thirdparty/hiredis"]
-	path = thirdparty/hiredis
-	url = https://github.com/redis/hiredis
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index d0e14edf5..000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-sudo: required
-
-language: generic
-
-matrix:
-  include:
-    - os: linux
-      dist: trusty
-      python: "2.7"
-    - os: linux
-      dist: trusty
-      python: "3.5"
-    - os: osx
-      osx_image: xcode7
-      python: "2.7"
-    - os: osx
-      osx_image: xcode7
-      python: "3.5"
-    - os: linux
-      dist: trusty
-      env: LINT=1
-      before_install:
-        # In case we ever want to use a different version of clang-format:
-        #- wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
-        #- echo "deb http://apt.llvm.org/trusty/ llvm-toolchain-trusty main" | sudo tee -a /etc/apt/sources.list > /dev/null
-        - sudo apt-get update -qq
-        - sudo apt-get install -qq clang-format-3.8
-      install: []
-      script:
-        - .travis/check-git-clang-format-output.sh
-    - os: linux
-      dist: trusty
-      python: "2.7"
-      env: VALGRIND=1
-      before_install:
-        - sudo apt-get update -qq
-        - sudo apt-get install -qq valgrind
-      script:
-        - make valgrind
-
-install:
-  - ./install-dependencies.sh
-  - make
-  - make test
-  - cd lib/python
-  - python setup.py install --user
-  - cd ../..
-
-script:
-  - python test/test.py
diff --git a/.travis/check-git-clang-format-output.sh b/.travis/check-git-clang-format-output.sh
deleted file mode 100755
index d71f78357..000000000
--- a/.travis/check-git-clang-format-output.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-if [ "$TRAVIS_PULL_REQUEST" == "false" ] ; then
-  # Not in a pull request, so compare against parent commit
-  base_commit="HEAD^"
-  echo "Running clang-format against parent commit $(git rev-parse $base_commit)"
-else
-  base_commit="$TRAVIS_BRANCH"
-  echo "Running clang-format against branch $base_commit, with hash $(git rev-parse $base_commit)"
-fi
-output="$(.travis/git-clang-format --binary clang-format-3.8 --commit $base_commit --diff --exclude ^thirdparty/)"
-if [ "$output" == "no modified files to format" ] || [ "$output" == "clang-format did not modify any files" ] ; then
-  echo "clang-format passed."
-  exit 0
-else
-  echo "clang-format failed:"
-  echo "$output"
-  exit 1
-fi
diff --git a/.travis/git-clang-format b/.travis/git-clang-format
deleted file mode 100755
index 37b352835..000000000
--- a/.travis/git-clang-format
+++ /dev/null
@@ -1,476 +0,0 @@
-#!/usr/bin/env python
-#
-#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-r"""                                                                             
-clang-format git integration                                                     
-============================                                                     
-                                                                                 
-This file provides a clang-format integration for git. Put it somewhere in your  
-path and ensure that it is executable. Then, "git clang-format" will invoke      
-clang-format on the changes in current files or a specific commit.               
-                                                                                 
-For further details, run:                                                        
-git clang-format -h                                                              
-                                                                                 
-Requires Python 2.7                                                              
-"""               
-
-import argparse
-import collections
-import contextlib
-import errno
-import os
-import re
-import subprocess
-import sys
-
-usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]'
-
-desc = '''
-Run clang-format on all lines that differ between the working directory
-and <commit>, which defaults to HEAD.  Changes are only applied to the working
-directory.
-The following git-config settings set the default of the corresponding option:
-  clangFormat.binary
-  clangFormat.commit
-  clangFormat.extension
-  clangFormat.style
-'''
-
-# Name of the temporary index file in which save the output of clang-format.
-# This file is created within the .git directory.
-temp_index_basename = 'clang-format-index'
-
-
-Range = collections.namedtuple('Range', 'start, count')
-
-
-def main():
-  config = load_git_config()
-
-  # In order to keep '--' yet allow options after positionals, we need to
-  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
-  # nargs=argparse.REMAINDER disallows options after positionals.)
-  argv = sys.argv[1:]
-  try:
-    idx = argv.index('--')
-  except ValueError:
-    dash_dash = []
-  else:
-    dash_dash = argv[idx:]
-    argv = argv[:idx]
-
-  default_extensions = ','.join([
-      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
-      'c', 'h',  # C
-      'm',  # ObjC
-      'mm',  # ObjC++
-      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
-      # Other languages that clang-format supports
-      'proto', 'protodevel',  # Protocol Buffers
-      'js',  # JavaScript
-      'ts',  # TypeScript
-      ])
-
-  p = argparse.ArgumentParser(
-    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
-    description=desc)
-  p.add_argument('--binary',
-                 default=config.get('clangformat.binary', 'clang-format'),
-                 help='path to clang-format'),
-  p.add_argument('--commit',
-                 default=config.get('clangformat.commit', 'HEAD'),
-                 help='default commit to use if none is specified'),
-  p.add_argument('--diff', action='store_true',
-                 help='print a diff instead of applying the changes')
-  p.add_argument('--extensions',
-                 default=config.get('clangformat.extensions',
-                                    default_extensions),
-                 help=('comma-separated list of file extensions to format, '
-                       'excluding the period and case-insensitive')),
-  p.add_argument('--exclude', help='Exclude files matching this regex.')
-  p.add_argument('-f', '--force', action='store_true',
-                 help='allow changes to unstaged files')
-  p.add_argument('-p', '--patch', action='store_true',
-                 help='select hunks interactively')
-  p.add_argument('-q', '--quiet', action='count', default=0,
-                 help='print less information')
-  p.add_argument('--style',
-                 default=config.get('clangformat.style', None),
-                 help='passed to clang-format'),
-  p.add_argument('-v', '--verbose', action='count', default=0,
-                 help='print extra information')
-  # We gather all the remaining positional arguments into 'args' since we need
-  # to use some heuristics to determine whether or not <commit> was present.
-  # However, to print pretty messages, we make use of metavar and help.
-  p.add_argument('args', nargs='*', metavar='<commit>',
-                 help='revision from which to compute the diff')
-  p.add_argument('ignored', nargs='*', metavar='<file>...',
-                 help='if specified, only consider differences in these files')
-  opts = p.parse_args(argv)
-
-  opts.verbose -= opts.quiet
-  del opts.quiet
-
-  commit, files = interpret_args(opts.args, dash_dash, opts.commit)
-  changed_lines = compute_diff_and_extract_lines(commit, files)
-  if opts.verbose >= 1:
-    ignored_files = set(changed_lines)
-  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
-  if opts.exclude:
-    for filename in changed_lines.keys():
-      if re.match(opts.exclude, filename):
-        del changed_lines[filename]
-  if opts.verbose >= 1:
-    ignored_files.difference_update(changed_lines)
-    if ignored_files:
-      print 'Ignoring changes in the following files:'
-      for filename in ignored_files:
-        print '   ', filename
-    if changed_lines:
-      print 'Running clang-format on the following files:'
-      for filename in changed_lines:
-        print '   ', filename
-  if not changed_lines:
-    print 'no modified files to format'
-    return
-  # The computed diff outputs absolute paths, so we must cd before accessing
-  # those files.
-  cd_to_toplevel()
-  old_tree = create_tree_from_workdir(changed_lines)
-  new_tree = run_clang_format_and_save_to_tree(changed_lines,
-                                               binary=opts.binary,
-                                               style=opts.style)
-  if opts.verbose >= 1:
-    print 'old tree:', old_tree
-    print 'new tree:', new_tree
-  if old_tree == new_tree:
-    if opts.verbose >= 0:
-      print 'clang-format did not modify any files'
-  elif opts.diff:
-    print_diff(old_tree, new_tree)
-  else:
-    changed_files = apply_changes(old_tree, new_tree, force=opts.force,
-                                  patch_mode=opts.patch)
-    if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
-      print 'changed files:'
-      for filename in changed_files:
-        print '   ', filename
-
-
-def load_git_config(non_string_options=None):
-  """Return the git configuration as a dictionary.
-  All options are assumed to be strings unless in `non_string_options`, in which
-  is a dictionary mapping option name (in lower case) to either "--bool" or
-  "--int"."""
-  if non_string_options is None:
-    non_string_options = {}
-  out = {}
-  for entry in run('git', 'config', '--list', '--null').split('\0'):
-    if entry:
-      name, value = entry.split('\n', 1)
-      if name in non_string_options:
-        value = run('git', 'config', non_string_options[name], name)
-      out[name] = value
-  return out
-
-
-def interpret_args(args, dash_dash, default_commit):
-  """Interpret `args` as "[commit] [--] [files...]" and return (commit, files).
-  It is assumed that "--" and everything that follows has been removed from
-  args and placed in `dash_dash`.
-  If "--" is present (i.e., `dash_dash` is non-empty), the argument to its
-  left (if present) is taken as commit.  Otherwise, the first argument is
-  checked if it is a commit or a file.  If commit is not given,
-  `default_commit` is used."""
-  if dash_dash:
-    if len(args) == 0:
-      commit = default_commit
-    elif len(args) > 1:
-      die('at most one commit allowed; %d given' % len(args))
-    else:
-      commit = args[0]
-    object_type = get_object_type(commit)
-    if object_type not in ('commit', 'tag'):
-      if object_type is None:
-        die("'%s' is not a commit" % commit)
-      else:
-        die("'%s' is a %s, but a commit was expected" % (commit, object_type))
-    files = dash_dash[1:]
-  elif args:
-    if disambiguate_revision(args[0]):
-      commit = args[0]
-      files = args[1:]
-    else:
-      commit = default_commit
-      files = args
-  else:
-    commit = default_commit
-    files = []
-  return commit, files
-
-
-def disambiguate_revision(value):
-  """Returns True if `value` is a revision, False if it is a file, or dies."""
-  # If `value` is ambiguous (neither a commit nor a file), the following
-  # command will die with an appropriate error message.
-  run('git', 'rev-parse', value, verbose=False)
-  object_type = get_object_type(value)
-  if object_type is None:
-    return False
-  if object_type in ('commit', 'tag'):
-    return True
-  die('`%s` is a %s, but a commit or filename was expected' %
-      (value, object_type))
-
-
-def get_object_type(value):
-  """Returns a string description of an object's type, or None if it is not
-  a valid git object."""
-  cmd = ['git', 'cat-file', '-t', value]
-  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-  stdout, stderr = p.communicate()
-  if p.returncode != 0:
-    return None
-  return stdout.strip()
-
-
-def compute_diff_and_extract_lines(commit, files):
-  """Calls compute_diff() followed by extract_lines()."""
-  diff_process = compute_diff(commit, files)
-  changed_lines = extract_lines(diff_process.stdout)
-  diff_process.stdout.close()
-  diff_process.wait()
-  if diff_process.returncode != 0:
-    # Assume error was already printed to stderr.
-    sys.exit(2)
-  return changed_lines
-
-
-def compute_diff(commit, files):
-  """Return a subprocess object producing the diff from `commit`.
-  The return value's `stdin` file object will produce a patch with the
-  differences between the working directory and `commit`, filtered on `files`
-  (if non-empty).  Zero context lines are used in the patch."""
-  cmd = ['git', 'diff-index', '-p', '-U0', commit, '--']
-  cmd.extend(files)
-  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
-  p.stdin.close()
-  return p
-
-
-def extract_lines(patch_file):
-  """Extract the changed lines in `patch_file`.
-  The return value is a dictionary mapping filename to a list of (start_line,
-  line_count) pairs.
-  The input must have been produced with ``-U0``, meaning unidiff format with
-  zero lines of context.  The return value is a dict mapping filename to a
-  list of line `Range`s."""
-  matches = {}
-  for line in patch_file:
-    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
-    if match:
-      filename = match.group(1).rstrip('\r\n')
-    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
-    if match:
-      start_line = int(match.group(1))
-      line_count = 1
-      if match.group(3):
-        line_count = int(match.group(3))
-      if line_count > 0:
-        matches.setdefault(filename, []).append(Range(start_line, line_count))
-  return matches
-
-
-def filter_by_extension(dictionary, allowed_extensions):
-  """Delete every key in `dictionary` that doesn't have an allowed extension.
-  `allowed_extensions` must be a collection of lowercase file extensions,
-  excluding the period."""
-  allowed_extensions = frozenset(allowed_extensions)
-  for filename in dictionary.keys():
-    base_ext = filename.rsplit('.', 1)
-    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
-      del dictionary[filename]
-
-
-def cd_to_toplevel():
-  """Change to the top level of the git repository."""
-  toplevel = run('git', 'rev-parse', '--show-toplevel')
-  os.chdir(toplevel)
-
-
-def create_tree_from_workdir(filenames):
-  """Create a new git tree with the given files from the working directory.
-  Returns the object ID (SHA-1) of the created tree."""
-  return create_tree(filenames, '--stdin')
-
-
-def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format',
-                                      style=None):
-  """Run clang-format on each file and save the result to a git tree.
-  Returns the object ID (SHA-1) of the created tree."""
-  def index_info_generator():
-    for filename, line_ranges in changed_lines.iteritems():
-      mode = oct(os.stat(filename).st_mode)
-      blob_id = clang_format_to_blob(filename, line_ranges, binary=binary,
-                                     style=style)
-      yield '%s %s\t%s' % (mode, blob_id, filename)
-  return create_tree(index_info_generator(), '--index-info')
-
-
-def create_tree(input_lines, mode):
-  """Create a tree object from the given input.
-  If mode is '--stdin', it must be a list of filenames.  If mode is
-  '--index-info' is must be a list of values suitable for "git update-index
-  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
-  is invalid."""
-  assert mode in ('--stdin', '--index-info')
-  cmd = ['git', 'update-index', '--add', '-z', mode]
-  with temporary_index_file():
-    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
-    for line in input_lines:
-      p.stdin.write('%s\0' % line)
-    p.stdin.close()
-    if p.wait() != 0:
-      die('`%s` failed' % ' '.join(cmd))
-    tree_id = run('git', 'write-tree')
-    return tree_id
-
-
-def clang_format_to_blob(filename, line_ranges, binary='clang-format',
-                         style=None):
-  """Run clang-format on the given file and save the result to a git blob.
-  Returns the object ID (SHA-1) of the created blob."""
-  clang_format_cmd = [binary, filename]
-  if style:
-    clang_format_cmd.extend(['-style='+style])
-  clang_format_cmd.extend([
-      '-lines=%s:%s' % (start_line, start_line+line_count-1)
-      for start_line, line_count in line_ranges])
-  try:
-    clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE,
-                                    stdout=subprocess.PIPE)
-  except OSError as e:
-    if e.errno == errno.ENOENT:
-      die('cannot find executable "%s"' % binary)
-    else:
-      raise
-  clang_format.stdin.close()
-  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
-  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
-                                 stdout=subprocess.PIPE)
-  clang_format.stdout.close()
-  stdout = hash_object.communicate()[0]
-  if hash_object.returncode != 0:
-    die('`%s` failed' % ' '.join(hash_object_cmd))
-  if clang_format.wait() != 0:
-    die('`%s` failed' % ' '.join(clang_format_cmd))
-  return stdout.rstrip('\r\n')
-
-
-@contextlib.contextmanager
-def temporary_index_file(tree=None):
-  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
-  the file afterward."""
-  index_path = create_temporary_index(tree)
-  old_index_path = os.environ.get('GIT_INDEX_FILE')
-  os.environ['GIT_INDEX_FILE'] = index_path
-  try:
-    yield
-  finally:
-    if old_index_path is None:
-      del os.environ['GIT_INDEX_FILE']
-    else:
-      os.environ['GIT_INDEX_FILE'] = old_index_path
-    os.remove(index_path)
-
-
-def create_temporary_index(tree=None):
-  """Create a temporary index file and return the created file's path.
-  If `tree` is not None, use that as the tree to read in.  Otherwise, an
-  empty index is created."""
-  gitdir = run('git', 'rev-parse', '--git-dir')
-  path = os.path.join(gitdir, temp_index_basename)
-  if tree is None:
-    tree = '--empty'
-  run('git', 'read-tree', '--index-output='+path, tree)
-  return path
-
-
-def print_diff(old_tree, new_tree):
-  """Print the diff between the two trees to stdout."""
-  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
-  # is expected to be viewed by the user, and only the former does nice things
-  # like color and pagination.
-  subprocess.check_call(['git', 'diff', old_tree, new_tree, '--'])
-
-
-def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
-  """Apply the changes in `new_tree` to the working directory.
-  Bails if there are local changes in those files and not `force`.  If
-  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
-  changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree,
-                      new_tree).rstrip('\0').split('\0')
-  if not force:
-    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
-    if unstaged_files:
-      print >>sys.stderr, ('The following files would be modified but '
-                           'have unstaged changes:')
-      print >>sys.stderr, unstaged_files
-      print >>sys.stderr, 'Please commit, stage, or stash them first.'
-      sys.exit(2)
-  if patch_mode:
-    # In patch mode, we could just as well create an index from the new tree
-    # and checkout from that, but then the user will be presented with a
-    # message saying "Discard ... from worktree".  Instead, we use the old
-    # tree as the index and checkout from new_tree, which gives the slightly
-    # better message, "Apply ... to index and worktree".  This is not quite
-    # right, since it won't be applied to the user's index, but oh well.
-    with temporary_index_file(old_tree):
-      subprocess.check_call(['git', 'checkout', '--patch', new_tree])
-    index_tree = old_tree
-  else:
-    with temporary_index_file(new_tree):
-      run('git', 'checkout-index', '-a', '-f')
-  return changed_files
-
-
-def run(*args, **kwargs):
-  stdin = kwargs.pop('stdin', '')
-  verbose = kwargs.pop('verbose', True)
-  strip = kwargs.pop('strip', True)
-  for name in kwargs:
-    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
-  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-                       stdin=subprocess.PIPE)
-  stdout, stderr = p.communicate(input=stdin)
-  if p.returncode == 0:
-    if stderr:
-      if verbose:
-        print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
-      print >>sys.stderr, stderr.rstrip()
-    if strip:
-      stdout = stdout.rstrip('\r\n')
-    return stdout
-  if verbose:
-    print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode)
-  if stderr:
-    print >>sys.stderr, stderr.rstrip()
-  sys.exit(2)
-
-
-def die(message):
-  print >>sys.stderr, 'error:', message
-  sys.exit(2)
-
-
-if __name__ == '__main__':
-  main()
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 8dada3eda..000000000
--- a/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/install-dependencies.sh b/install-dependencies.sh
deleted file mode 100755
index f84da1684..000000000
--- a/install-dependencies.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env bash
-
-ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
-
-platform="unknown"
-unamestr="$(uname)"
-if [[ "$unamestr" == "Linux" ]]; then
-  echo "Platform is linux."
-  platform="linux"
-elif [[ "$unamestr" == "Darwin" ]]; then
-  echo "Platform is macosx."
-  platform="macosx"
-else
-  echo "Unrecognized platform."
-  exit 1
-fi
-
-if [[ $platform == "linux" ]]; then
-  sudo apt-get update
-  sudo apt-get install -y git python-dev
-fi
diff --git a/Makefile b/src/common/Makefile
similarity index 100%
rename from Makefile
rename to src/common/Makefile
diff --git a/build/.gitkeep b/src/common/build/.gitkeep
similarity index 100%
rename from build/.gitkeep
rename to src/common/build/.gitkeep
diff --git a/common.c b/src/common/common.c
similarity index 100%
rename from common.c
rename to src/common/common.c
diff --git a/common.h b/src/common/common.h
similarity index 100%
rename from common.h
rename to src/common/common.h
diff --git a/doc/tasks.md b/src/common/doc/tasks.md
similarity index 100%
rename from doc/tasks.md
rename to src/common/doc/tasks.md
diff --git a/event_loop.c b/src/common/event_loop.c
similarity index 100%
rename from event_loop.c
rename to src/common/event_loop.c
diff --git a/event_loop.h b/src/common/event_loop.h
similarity index 100%
rename from event_loop.h
rename to src/common/event_loop.h
diff --git a/io.c b/src/common/io.c
similarity index 100%
rename from io.c
rename to src/common/io.c
diff --git a/io.h b/src/common/io.h
similarity index 100%
rename from io.h
rename to src/common/io.h
diff --git a/lib/python/common_extension.c b/src/common/lib/python/common_extension.c
similarity index 100%
rename from lib/python/common_extension.c
rename to src/common/lib/python/common_extension.c
diff --git a/lib/python/common_extension.h b/src/common/lib/python/common_extension.h
similarity index 100%
rename from lib/python/common_extension.h
rename to src/common/lib/python/common_extension.h
diff --git a/lib/python/common_module.c b/src/common/lib/python/common_module.c
similarity index 100%
rename from lib/python/common_module.c
rename to src/common/lib/python/common_module.c
diff --git a/lib/python/setup.py b/src/common/lib/python/setup.py
similarity index 100%
rename from lib/python/setup.py
rename to src/common/lib/python/setup.py
diff --git a/logging.c b/src/common/logging.c
similarity index 100%
rename from logging.c
rename to src/common/logging.c
diff --git a/logging.h b/src/common/logging.h
similarity index 100%
rename from logging.h
rename to src/common/logging.h
diff --git a/state/db.h b/src/common/state/db.h
similarity index 100%
rename from state/db.h
rename to src/common/state/db.h
diff --git a/state/object_table.h b/src/common/state/object_table.h
similarity index 100%
rename from state/object_table.h
rename to src/common/state/object_table.h
diff --git a/state/redis.c b/src/common/state/redis.c
similarity index 100%
rename from state/redis.c
rename to src/common/state/redis.c
diff --git a/state/redis.h b/src/common/state/redis.h
similarity index 100%
rename from state/redis.h
rename to src/common/state/redis.h
diff --git a/state/task_log.h b/src/common/state/task_log.h
similarity index 100%
rename from state/task_log.h
rename to src/common/state/task_log.h
diff --git a/state/task_table.h b/src/common/state/task_table.h
similarity index 100%
rename from state/task_table.h
rename to src/common/state/task_table.h
diff --git a/task.c b/src/common/task.c
similarity index 100%
rename from task.c
rename to src/common/task.c
diff --git a/task.h b/src/common/task.h
similarity index 100%
rename from task.h
rename to src/common/task.h
diff --git a/test/common_tests.c b/src/common/test/common_tests.c
similarity index 100%
rename from test/common_tests.c
rename to src/common/test/common_tests.c
diff --git a/test/db_tests.c b/src/common/test/db_tests.c
similarity index 100%
rename from test/db_tests.c
rename to src/common/test/db_tests.c
diff --git a/test/example_task.h b/src/common/test/example_task.h
similarity index 100%
rename from test/example_task.h
rename to src/common/test/example_task.h
diff --git a/test/io_tests.c b/src/common/test/io_tests.c
similarity index 100%
rename from test/io_tests.c
rename to src/common/test/io_tests.c
diff --git a/test/redis_tests.c b/src/common/test/redis_tests.c
similarity index 100%
rename from test/redis_tests.c
rename to src/common/test/redis_tests.c
diff --git a/test/task_tests.c b/src/common/test/task_tests.c
similarity index 100%
rename from test/task_tests.c
rename to src/common/test/task_tests.c
diff --git a/test/test.py b/src/common/test/test.py
similarity index 100%
rename from test/test.py
rename to src/common/test/test.py
diff --git a/thirdparty/ae/ae.c b/src/common/thirdparty/ae/ae.c
similarity index 100%
rename from thirdparty/ae/ae.c
rename to src/common/thirdparty/ae/ae.c
diff --git a/thirdparty/ae/ae.h b/src/common/thirdparty/ae/ae.h
similarity index 100%
rename from thirdparty/ae/ae.h
rename to src/common/thirdparty/ae/ae.h
diff --git a/thirdparty/ae/ae_epoll.c b/src/common/thirdparty/ae/ae_epoll.c
similarity index 100%
rename from thirdparty/ae/ae_epoll.c
rename to src/common/thirdparty/ae/ae_epoll.c
diff --git a/thirdparty/ae/ae_evport.c b/src/common/thirdparty/ae/ae_evport.c
similarity index 100%
rename from thirdparty/ae/ae_evport.c
rename to src/common/thirdparty/ae/ae_evport.c
diff --git a/thirdparty/ae/ae_kqueue.c b/src/common/thirdparty/ae/ae_kqueue.c
similarity index 100%
rename from thirdparty/ae/ae_kqueue.c
rename to src/common/thirdparty/ae/ae_kqueue.c
diff --git a/thirdparty/ae/ae_select.c b/src/common/thirdparty/ae/ae_select.c
similarity index 100%
rename from thirdparty/ae/ae_select.c
rename to src/common/thirdparty/ae/ae_select.c
diff --git a/thirdparty/ae/config.h b/src/common/thirdparty/ae/config.h
similarity index 100%
rename from thirdparty/ae/config.h
rename to src/common/thirdparty/ae/config.h
diff --git a/thirdparty/ae/zmalloc.h b/src/common/thirdparty/ae/zmalloc.h
similarity index 100%
rename from thirdparty/ae/zmalloc.h
rename to src/common/thirdparty/ae/zmalloc.h
diff --git a/thirdparty/build-redis.sh b/src/common/thirdparty/build-redis.sh
similarity index 100%
rename from thirdparty/build-redis.sh
rename to src/common/thirdparty/build-redis.sh
diff --git a/thirdparty/greatest.h b/src/common/thirdparty/greatest.h
similarity index 100%
rename from thirdparty/greatest.h
rename to src/common/thirdparty/greatest.h
diff --git a/thirdparty/hiredis b/src/common/thirdparty/hiredis
similarity index 100%
rename from thirdparty/hiredis
rename to src/common/thirdparty/hiredis
diff --git a/thirdparty/utarray.h b/src/common/thirdparty/utarray.h
similarity index 100%
rename from thirdparty/utarray.h
rename to src/common/thirdparty/utarray.h
diff --git a/thirdparty/uthash.h b/src/common/thirdparty/uthash.h
similarity index 100%
rename from thirdparty/uthash.h
rename to src/common/thirdparty/uthash.h
diff --git a/thirdparty/utlist.h b/src/common/thirdparty/utlist.h
similarity index 100%
rename from thirdparty/utlist.h
rename to src/common/thirdparty/utlist.h
diff --git a/thirdparty/utstring.h b/src/common/thirdparty/utstring.h
similarity index 100%
rename from thirdparty/utstring.h
rename to src/common/thirdparty/utstring.h

From ad55166472f46e85fe8aae273763df45eb5fa4a4 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Tue, 25 Oct 2016 14:16:23 -0700
Subject: [PATCH 90/91] Rearrange local scheduler files to prepare to merge
 into Ray.

---
 .clang-format                                 |   5 -
 .gitignore                                    |  33 --
 .gitmodules                                   |   3 -
 .travis.yml                                   |  71 ---
 .travis/check-git-clang-format-output.sh      |  18 -
 .travis/git-clang-format                      | 476 ------------------
 LICENSE                                       | 201 --------
 README.md                                     |   2 -
 common                                        |   1 -
 install-dependencies.sh                       |  21 -
 setup-env.sh                                  |   5 -
 Makefile => src/photon/Makefile               |   0
 {build => src/photon/build}/.gitkeep          |   0
 .../photon/lib}/python/photon_extension.c     |   0
 {lib => src/photon/lib}/python/setup.py       |   0
 photon.h => src/photon/photon.h               |   0
 .../photon/photon_algorithm.c                 |   0
 .../photon/photon_algorithm.h                 |   0
 photon_client.c => src/photon/photon_client.c |   0
 photon_client.h => src/photon/photon_client.h |   0
 .../photon/photon_scheduler.c                 |   0
 .../photon/photon_scheduler.h                 |   0
 {test => src/photon/test}/test.py             |   0
 23 files changed, 836 deletions(-)
 delete mode 100644 .clang-format
 delete mode 100644 .gitignore
 delete mode 100644 .gitmodules
 delete mode 100644 .travis.yml
 delete mode 100755 .travis/check-git-clang-format-output.sh
 delete mode 100755 .travis/git-clang-format
 delete mode 100644 LICENSE
 delete mode 100644 README.md
 delete mode 160000 common
 delete mode 100755 install-dependencies.sh
 delete mode 100755 setup-env.sh
 rename Makefile => src/photon/Makefile (100%)
 rename {build => src/photon/build}/.gitkeep (100%)
 rename {lib => src/photon/lib}/python/photon_extension.c (100%)
 rename {lib => src/photon/lib}/python/setup.py (100%)
 rename photon.h => src/photon/photon.h (100%)
 rename photon_algorithm.c => src/photon/photon_algorithm.c (100%)
 rename photon_algorithm.h => src/photon/photon_algorithm.h (100%)
 rename photon_client.c => src/photon/photon_client.c (100%)
 rename photon_client.h => src/photon/photon_client.h (100%)
 rename photon_scheduler.c => src/photon/photon_scheduler.c (100%)
 rename photon_scheduler.h => src/photon/photon_scheduler.h (100%)
 rename {test => src/photon/test}/test.py (100%)

diff --git a/.clang-format b/.clang-format
deleted file mode 100644
index 3fcffcbd3..000000000
--- a/.clang-format
+++ /dev/null
@@ -1,5 +0,0 @@
-BasedOnStyle: Chromium
-DerivePointerAlignment: false
-IndentCaseLabels: false
-PointerAlignment: Right
-SpaceAfterCStyleCast: true
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index f805e810e..000000000
--- a/.gitignore
+++ /dev/null
@@ -1,33 +0,0 @@
-# Object files
-*.o
-*.ko
-*.obj
-*.elf
-
-# Precompiled Headers
-*.gch
-*.pch
-
-# Libraries
-*.lib
-*.a
-*.la
-*.lo
-
-# Shared objects (inc. Windows DLLs)
-*.dll
-*.so
-*.so.*
-*.dylib
-
-# Executables
-*.exe
-*.out
-*.app
-*.i*86
-*.x86_64
-*.hex
-
-# Debug files
-*.dSYM/
-*.su
diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index 3f2c8add4..000000000
--- a/.gitmodules
+++ /dev/null
@@ -1,3 +0,0 @@
-[submodule "common"]
-	path = common
-	url = https://github.com/ray-project/common
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index ac6ca4a6a..000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,71 +0,0 @@
-sudo: required
-
-language: generic
-
-matrix:
-  include:
-    - os: linux
-      dist: trusty
-      python: "2.7"
-    - os: linux
-      dist: trusty
-      python: "3.5"
-    - os: osx
-      osx_image: xcode7
-      python: "2.7"
-    - os: osx
-      osx_image: xcode7
-      python: "3.5"
-    - os: linux
-      dist: trusty
-      python: "2.7"
-      env: LINT=1
-      before_install:
-        # In case we ever want to use a different version of clang-format:
-        #- wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
-        #- echo "deb http://apt.llvm.org/trusty/ llvm-toolchain-trusty main" | sudo tee -a /etc/apt/sources.list > /dev/null
-        - sudo apt-get update -qq
-        - sudo apt-get install -qq clang-format-3.8
-      install: []
-      script:
-        - .travis/check-git-clang-format-output.sh
-    - os: linux
-      dist: trusty
-      python: "2.7"
-      env: VALGRIND=1
-      before_install:
-        - sudo apt-get update -qq
-        - sudo apt-get install -qq valgrind
-      script:
-        - cd common
-        - make test
-        - cd ..
-        - source setup-env.sh
-        - python test/test.py valgrind
-
-install:
-  - ./install-dependencies.sh
-
-  # Install Plasma side by side.
-  - cd ..
-  - git clone https://github.com/ray-project/plasma.git
-  - cd plasma
-  - git checkout f189ca746b57f22371ef10077aa535492bbd8421
-  - make
-  - source setup-env.sh
-  - cd ../photon
-
-  # Install Photon.
-  - make
-  - cd common/lib/python
-  - python setup.py install --user
-  - cd ../../..
-  - cd lib/python
-  - python setup.py install --user
-  - cd ../..
-  - cd common
-  - make test
-  - cd ..
-
-script:
-  - python test/test.py
diff --git a/.travis/check-git-clang-format-output.sh b/.travis/check-git-clang-format-output.sh
deleted file mode 100755
index d71f78357..000000000
--- a/.travis/check-git-clang-format-output.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-if [ "$TRAVIS_PULL_REQUEST" == "false" ] ; then
-  # Not in a pull request, so compare against parent commit
-  base_commit="HEAD^"
-  echo "Running clang-format against parent commit $(git rev-parse $base_commit)"
-else
-  base_commit="$TRAVIS_BRANCH"
-  echo "Running clang-format against branch $base_commit, with hash $(git rev-parse $base_commit)"
-fi
-output="$(.travis/git-clang-format --binary clang-format-3.8 --commit $base_commit --diff --exclude ^thirdparty/)"
-if [ "$output" == "no modified files to format" ] || [ "$output" == "clang-format did not modify any files" ] ; then
-  echo "clang-format passed."
-  exit 0
-else
-  echo "clang-format failed:"
-  echo "$output"
-  exit 1
-fi
diff --git a/.travis/git-clang-format b/.travis/git-clang-format
deleted file mode 100755
index b0e458303..000000000
--- a/.travis/git-clang-format
+++ /dev/null
@@ -1,476 +0,0 @@
-#!/usr/bin/env python
-#
-#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-r"""
-clang-format git integration
-============================
-
-This file provides a clang-format integration for git. Put it somewhere in your
-path and ensure that it is executable. Then, "git clang-format" will invoke
-clang-format on the changes in current files or a specific commit.
-
-For further details, run:
-git clang-format -h
-
-Requires Python 2.7
-"""
-
-import argparse
-import collections
-import contextlib
-import errno
-import os
-import re
-import subprocess
-import sys
-
-usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]'
-
-desc = '''
-Run clang-format on all lines that differ between the working directory
-and <commit>, which defaults to HEAD.  Changes are only applied to the working
-directory.
-The following git-config settings set the default of the corresponding option:
-  clangFormat.binary
-  clangFormat.commit
-  clangFormat.extension
-  clangFormat.style
-'''
-
-# Name of the temporary index file in which save the output of clang-format.
-# This file is created within the .git directory.
-temp_index_basename = 'clang-format-index'
-
-
-Range = collections.namedtuple('Range', 'start, count')
-
-
-def main():
-  config = load_git_config()
-
-  # In order to keep '--' yet allow options after positionals, we need to
-  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
-  # nargs=argparse.REMAINDER disallows options after positionals.)
-  argv = sys.argv[1:]
-  try:
-    idx = argv.index('--')
-  except ValueError:
-    dash_dash = []
-  else:
-    dash_dash = argv[idx:]
-    argv = argv[:idx]
-
-  default_extensions = ','.join([
-      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
-      'c', 'h',  # C
-      'm',  # ObjC
-      'mm',  # ObjC++
-      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
-      # Other languages that clang-format supports
-      'proto', 'protodevel',  # Protocol Buffers
-      'js',  # JavaScript
-      'ts',  # TypeScript
-      ])
-
-  p = argparse.ArgumentParser(
-    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
-    description=desc)
-  p.add_argument('--binary',
-                 default=config.get('clangformat.binary', 'clang-format'),
-                 help='path to clang-format'),
-  p.add_argument('--commit',
-                 default=config.get('clangformat.commit', 'HEAD'),
-                 help='default commit to use if none is specified'),
-  p.add_argument('--diff', action='store_true',
-                 help='print a diff instead of applying the changes')
-  p.add_argument('--extensions',
-                 default=config.get('clangformat.extensions',
-                                    default_extensions),
-                 help=('comma-separated list of file extensions to format, '
-                       'excluding the period and case-insensitive')),
-  p.add_argument('--exclude', help='Exclude files matching this regex.')
-  p.add_argument('-f', '--force', action='store_true',
-                 help='allow changes to unstaged files')
-  p.add_argument('-p', '--patch', action='store_true',
-                 help='select hunks interactively')
-  p.add_argument('-q', '--quiet', action='count', default=0,
-                 help='print less information')
-  p.add_argument('--style',
-                 default=config.get('clangformat.style', None),
-                 help='passed to clang-format'),
-  p.add_argument('-v', '--verbose', action='count', default=0,
-                 help='print extra information')
-  # We gather all the remaining positional arguments into 'args' since we need
-  # to use some heuristics to determine whether or not <commit> was present.
-  # However, to print pretty messages, we make use of metavar and help.
-  p.add_argument('args', nargs='*', metavar='<commit>',
-                 help='revision from which to compute the diff')
-  p.add_argument('ignored', nargs='*', metavar='<file>...',
-                 help='if specified, only consider differences in these files')
-  opts = p.parse_args(argv)
-
-  opts.verbose -= opts.quiet
-  del opts.quiet
-
-  commit, files = interpret_args(opts.args, dash_dash, opts.commit)
-  changed_lines = compute_diff_and_extract_lines(commit, files)
-  if opts.verbose >= 1:
-    ignored_files = set(changed_lines)
-  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
-  if opts.exclude:
-    for filename in changed_lines.keys():
-      if re.match(opts.exclude, filename):
-        del changed_lines[filename]
-  if opts.verbose >= 1:
-    ignored_files.difference_update(changed_lines)
-    if ignored_files:
-      print 'Ignoring changes in the following files:'
-      for filename in ignored_files:
-        print '   ', filename
-    if changed_lines:
-      print 'Running clang-format on the following files:'
-      for filename in changed_lines:
-        print '   ', filename
-  if not changed_lines:
-    print 'no modified files to format'
-    return
-  # The computed diff outputs absolute paths, so we must cd before accessing
-  # those files.
-  cd_to_toplevel()
-  old_tree = create_tree_from_workdir(changed_lines)
-  new_tree = run_clang_format_and_save_to_tree(changed_lines,
-                                               binary=opts.binary,
-                                               style=opts.style)
-  if opts.verbose >= 1:
-    print 'old tree:', old_tree
-    print 'new tree:', new_tree
-  if old_tree == new_tree:
-    if opts.verbose >= 0:
-      print 'clang-format did not modify any files'
-  elif opts.diff:
-    print_diff(old_tree, new_tree)
-  else:
-    changed_files = apply_changes(old_tree, new_tree, force=opts.force,
-                                  patch_mode=opts.patch)
-    if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
-      print 'changed files:'
-      for filename in changed_files:
-        print '   ', filename
-
-
-def load_git_config(non_string_options=None):
-  """Return the git configuration as a dictionary.
-  All options are assumed to be strings unless in `non_string_options`, in which
-  is a dictionary mapping option name (in lower case) to either "--bool" or
-  "--int"."""
-  if non_string_options is None:
-    non_string_options = {}
-  out = {}
-  for entry in run('git', 'config', '--list', '--null').split('\0'):
-    if entry:
-      name, value = entry.split('\n', 1)
-      if name in non_string_options:
-        value = run('git', 'config', non_string_options[name], name)
-      out[name] = value
-  return out
-
-
-def interpret_args(args, dash_dash, default_commit):
-  """Interpret `args` as "[commit] [--] [files...]" and return (commit, files).
-  It is assumed that "--" and everything that follows has been removed from
-  args and placed in `dash_dash`.
-  If "--" is present (i.e., `dash_dash` is non-empty), the argument to its
-  left (if present) is taken as commit.  Otherwise, the first argument is
-  checked if it is a commit or a file.  If commit is not given,
-  `default_commit` is used."""
-  if dash_dash:
-    if len(args) == 0:
-      commit = default_commit
-    elif len(args) > 1:
-      die('at most one commit allowed; %d given' % len(args))
-    else:
-      commit = args[0]
-    object_type = get_object_type(commit)
-    if object_type not in ('commit', 'tag'):
-      if object_type is None:
-        die("'%s' is not a commit" % commit)
-      else:
-        die("'%s' is a %s, but a commit was expected" % (commit, object_type))
-    files = dash_dash[1:]
-  elif args:
-    if disambiguate_revision(args[0]):
-      commit = args[0]
-      files = args[1:]
-    else:
-      commit = default_commit
-      files = args
-  else:
-    commit = default_commit
-    files = []
-  return commit, files
-
-
-def disambiguate_revision(value):
-  """Returns True if `value` is a revision, False if it is a file, or dies."""
-  # If `value` is ambiguous (neither a commit nor a file), the following
-  # command will die with an appropriate error message.
-  run('git', 'rev-parse', value, verbose=False)
-  object_type = get_object_type(value)
-  if object_type is None:
-    return False
-  if object_type in ('commit', 'tag'):
-    return True
-  die('`%s` is a %s, but a commit or filename was expected' %
-      (value, object_type))
-
-
-def get_object_type(value):
-  """Returns a string description of an object's type, or None if it is not
-  a valid git object."""
-  cmd = ['git', 'cat-file', '-t', value]
-  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-  stdout, stderr = p.communicate()
-  if p.returncode != 0:
-    return None
-  return stdout.strip()
-
-
-def compute_diff_and_extract_lines(commit, files):
-  """Calls compute_diff() followed by extract_lines()."""
-  diff_process = compute_diff(commit, files)
-  changed_lines = extract_lines(diff_process.stdout)
-  diff_process.stdout.close()
-  diff_process.wait()
-  if diff_process.returncode != 0:
-    # Assume error was already printed to stderr.
-    sys.exit(2)
-  return changed_lines
-
-
-def compute_diff(commit, files):
-  """Return a subprocess object producing the diff from `commit`.
-  The return value's `stdin` file object will produce a patch with the
-  differences between the working directory and `commit`, filtered on `files`
-  (if non-empty).  Zero context lines are used in the patch."""
-  cmd = ['git', 'diff-index', '-p', '-U0', commit, '--']
-  cmd.extend(files)
-  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
-  p.stdin.close()
-  return p
-
-
-def extract_lines(patch_file):
-  """Extract the changed lines in `patch_file`.
-  The return value is a dictionary mapping filename to a list of (start_line,
-  line_count) pairs.
-  The input must have been produced with ``-U0``, meaning unidiff format with
-  zero lines of context.  The return value is a dict mapping filename to a
-  list of line `Range`s."""
-  matches = {}
-  for line in patch_file:
-    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
-    if match:
-      filename = match.group(1).rstrip('\r\n')
-    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
-    if match:
-      start_line = int(match.group(1))
-      line_count = 1
-      if match.group(3):
-        line_count = int(match.group(3))
-      if line_count > 0:
-        matches.setdefault(filename, []).append(Range(start_line, line_count))
-  return matches
-
-
-def filter_by_extension(dictionary, allowed_extensions):
-  """Delete every key in `dictionary` that doesn't have an allowed extension.
-  `allowed_extensions` must be a collection of lowercase file extensions,
-  excluding the period."""
-  allowed_extensions = frozenset(allowed_extensions)
-  for filename in dictionary.keys():
-    base_ext = filename.rsplit('.', 1)
-    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
-      del dictionary[filename]
-
-
-def cd_to_toplevel():
-  """Change to the top level of the git repository."""
-  toplevel = run('git', 'rev-parse', '--show-toplevel')
-  os.chdir(toplevel)
-
-
-def create_tree_from_workdir(filenames):
-  """Create a new git tree with the given files from the working directory.
-  Returns the object ID (SHA-1) of the created tree."""
-  return create_tree(filenames, '--stdin')
-
-
-def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format',
-                                      style=None):
-  """Run clang-format on each file and save the result to a git tree.
-  Returns the object ID (SHA-1) of the created tree."""
-  def index_info_generator():
-    for filename, line_ranges in changed_lines.iteritems():
-      mode = oct(os.stat(filename).st_mode)
-      blob_id = clang_format_to_blob(filename, line_ranges, binary=binary,
-                                     style=style)
-      yield '%s %s\t%s' % (mode, blob_id, filename)
-  return create_tree(index_info_generator(), '--index-info')
-
-
-def create_tree(input_lines, mode):
-  """Create a tree object from the given input.
-  If mode is '--stdin', it must be a list of filenames.  If mode is
-  '--index-info' is must be a list of values suitable for "git update-index
-  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
-  is invalid."""
-  assert mode in ('--stdin', '--index-info')
-  cmd = ['git', 'update-index', '--add', '-z', mode]
-  with temporary_index_file():
-    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
-    for line in input_lines:
-      p.stdin.write('%s\0' % line)
-    p.stdin.close()
-    if p.wait() != 0:
-      die('`%s` failed' % ' '.join(cmd))
-    tree_id = run('git', 'write-tree')
-    return tree_id
-
-
-def clang_format_to_blob(filename, line_ranges, binary='clang-format',
-                         style=None):
-  """Run clang-format on the given file and save the result to a git blob.
-  Returns the object ID (SHA-1) of the created blob."""
-  clang_format_cmd = [binary, filename]
-  if style:
-    clang_format_cmd.extend(['-style='+style])
-  clang_format_cmd.extend([
-      '-lines=%s:%s' % (start_line, start_line+line_count-1)
-      for start_line, line_count in line_ranges])
-  try:
-    clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE,
-                                    stdout=subprocess.PIPE)
-  except OSError as e:
-    if e.errno == errno.ENOENT:
-      die('cannot find executable "%s"' % binary)
-    else:
-      raise
-  clang_format.stdin.close()
-  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
-  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
-                                 stdout=subprocess.PIPE)
-  clang_format.stdout.close()
-  stdout = hash_object.communicate()[0]
-  if hash_object.returncode != 0:
-    die('`%s` failed' % ' '.join(hash_object_cmd))
-  if clang_format.wait() != 0:
-    die('`%s` failed' % ' '.join(clang_format_cmd))
-  return stdout.rstrip('\r\n')
-
-
-@contextlib.contextmanager
-def temporary_index_file(tree=None):
-  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
-  the file afterward."""
-  index_path = create_temporary_index(tree)
-  old_index_path = os.environ.get('GIT_INDEX_FILE')
-  os.environ['GIT_INDEX_FILE'] = index_path
-  try:
-    yield
-  finally:
-    if old_index_path is None:
-      del os.environ['GIT_INDEX_FILE']
-    else:
-      os.environ['GIT_INDEX_FILE'] = old_index_path
-    os.remove(index_path)
-
-
-def create_temporary_index(tree=None):
-  """Create a temporary index file and return the created file's path.
-  If `tree` is not None, use that as the tree to read in.  Otherwise, an
-  empty index is created."""
-  gitdir = run('git', 'rev-parse', '--git-dir')
-  path = os.path.join(gitdir, temp_index_basename)
-  if tree is None:
-    tree = '--empty'
-  run('git', 'read-tree', '--index-output='+path, tree)
-  return path
-
-
-def print_diff(old_tree, new_tree):
-  """Print the diff between the two trees to stdout."""
-  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
-  # is expected to be viewed by the user, and only the former does nice things
-  # like color and pagination.
-  subprocess.check_call(['git', 'diff', old_tree, new_tree, '--'])
-
-
-def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
-  """Apply the changes in `new_tree` to the working directory.
-  Bails if there are local changes in those files and not `force`.  If
-  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
-  changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree,
-                      new_tree).rstrip('\0').split('\0')
-  if not force:
-    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
-    if unstaged_files:
-      print >>sys.stderr, ('The following files would be modified but '
-                           'have unstaged changes:')
-      print >>sys.stderr, unstaged_files
-      print >>sys.stderr, 'Please commit, stage, or stash them first.'
-      sys.exit(2)
-  if patch_mode:
-    # In patch mode, we could just as well create an index from the new tree
-    # and checkout from that, but then the user will be presented with a
-    # message saying "Discard ... from worktree".  Instead, we use the old
-    # tree as the index and checkout from new_tree, which gives the slightly
-    # better message, "Apply ... to index and worktree".  This is not quite
-    # right, since it won't be applied to the user's index, but oh well.
-    with temporary_index_file(old_tree):
-      subprocess.check_call(['git', 'checkout', '--patch', new_tree])
-    index_tree = old_tree
-  else:
-    with temporary_index_file(new_tree):
-      run('git', 'checkout-index', '-a', '-f')
-  return changed_files
-
-
-def run(*args, **kwargs):
-  stdin = kwargs.pop('stdin', '')
-  verbose = kwargs.pop('verbose', True)
-  strip = kwargs.pop('strip', True)
-  for name in kwargs:
-    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
-  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-                       stdin=subprocess.PIPE)
-  stdout, stderr = p.communicate(input=stdin)
-  if p.returncode == 0:
-    if stderr:
-      if verbose:
-        print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
-      print >>sys.stderr, stderr.rstrip()
-    if strip:
-      stdout = stdout.rstrip('\r\n')
-    return stdout
-  if verbose:
-    print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode)
-  if stderr:
-    print >>sys.stderr, stderr.rstrip()
-  sys.exit(2)
-
-
-def die(message):
-  print >>sys.stderr, 'error:', message
-  sys.exit(2)
-
-
-if __name__ == '__main__':
-  main()
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 8dada3eda..000000000
--- a/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/README.md b/README.md
deleted file mode 100644
index 92e08a503..000000000
--- a/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-# Photon
-A local scheduler and node manager for Ray.
diff --git a/common b/common
deleted file mode 160000
index 535bc8f0b..000000000
--- a/common
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 535bc8f0b8dac8f3ef0b66c3fd8b265ab0e6c787
diff --git a/install-dependencies.sh b/install-dependencies.sh
deleted file mode 100755
index f84da1684..000000000
--- a/install-dependencies.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env bash
-
-ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
-
-platform="unknown"
-unamestr="$(uname)"
-if [[ "$unamestr" == "Linux" ]]; then
-  echo "Platform is linux."
-  platform="linux"
-elif [[ "$unamestr" == "Darwin" ]]; then
-  echo "Platform is macosx."
-  platform="macosx"
-else
-  echo "Unrecognized platform."
-  exit 1
-fi
-
-if [[ $platform == "linux" ]]; then
-  sudo apt-get update
-  sudo apt-get install -y git python-dev
-fi
diff --git a/setup-env.sh b/setup-env.sh
deleted file mode 100755
index 7c4350150..000000000
--- a/setup-env.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-echo "Adding Photon to PYTHONPATH" 1>&2
-
-ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
-
-export PYTHONPATH="$ROOT_DIR/lib/python/:$PYTHONPATH"
diff --git a/Makefile b/src/photon/Makefile
similarity index 100%
rename from Makefile
rename to src/photon/Makefile
diff --git a/build/.gitkeep b/src/photon/build/.gitkeep
similarity index 100%
rename from build/.gitkeep
rename to src/photon/build/.gitkeep
diff --git a/lib/python/photon_extension.c b/src/photon/lib/python/photon_extension.c
similarity index 100%
rename from lib/python/photon_extension.c
rename to src/photon/lib/python/photon_extension.c
diff --git a/lib/python/setup.py b/src/photon/lib/python/setup.py
similarity index 100%
rename from lib/python/setup.py
rename to src/photon/lib/python/setup.py
diff --git a/photon.h b/src/photon/photon.h
similarity index 100%
rename from photon.h
rename to src/photon/photon.h
diff --git a/photon_algorithm.c b/src/photon/photon_algorithm.c
similarity index 100%
rename from photon_algorithm.c
rename to src/photon/photon_algorithm.c
diff --git a/photon_algorithm.h b/src/photon/photon_algorithm.h
similarity index 100%
rename from photon_algorithm.h
rename to src/photon/photon_algorithm.h
diff --git a/photon_client.c b/src/photon/photon_client.c
similarity index 100%
rename from photon_client.c
rename to src/photon/photon_client.c
diff --git a/photon_client.h b/src/photon/photon_client.h
similarity index 100%
rename from photon_client.h
rename to src/photon/photon_client.h
diff --git a/photon_scheduler.c b/src/photon/photon_scheduler.c
similarity index 100%
rename from photon_scheduler.c
rename to src/photon/photon_scheduler.c
diff --git a/photon_scheduler.h b/src/photon/photon_scheduler.h
similarity index 100%
rename from photon_scheduler.h
rename to src/photon/photon_scheduler.h
diff --git a/test/test.py b/src/photon/test/test.py
similarity index 100%
rename from test/test.py
rename to src/photon/test/test.py

From 02d4050499272eeb3ffe1f7a2ef3f5746af3c4a1 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Tue, 25 Oct 2016 14:27:45 -0700
Subject: [PATCH 91/91] Rearrange plasma files to prepare for merging into Ray.

---
 .clang-format                                 |   5 -
 .gitignore                                    |   3 -
 .gitmodules                                   |   3 -
 .travis.yml                                   |  50 --
 .travis/check-git-clang-format-output.sh      |  18 -
 .travis/git-clang-format                      | 490 ------------------
 LICENSE                                       | 201 -------
 README.md                                     |   7 -
 common                                        |   1 -
 setup-env.sh                                  |   5 -
 Makefile => src/plasma/Makefile               |   0
 {build => src/plasma/build}/.gitkeep          |   0
 {doc => src/plasma/doc}/plasma-doxy-config    |   0
 src/{ => plasma}/example.c                    |   0
 src/{ => plasma}/fling.c                      |   0
 src/{ => plasma}/fling.h                      |   0
 {lib => src/plasma/lib}/python/plasma.py      |   0
 src/{ => plasma}/malloc.c                     |   0
 src/{ => plasma}/malloc.h                     |   0
 src/{ => plasma}/plasma.h                     |   0
 src/{ => plasma}/plasma_client.c              |   0
 src/{ => plasma}/plasma_client.h              |   0
 src/{ => plasma}/plasma_manager.c             |   0
 src/{ => plasma}/plasma_manager.h             |   0
 src/{ => plasma}/plasma_store.c               |   0
 src/{ => plasma}/plasma_store.h               |   0
 {test => src/plasma/test}/test.py             |   0
 .../plasma/thirdparty}/dlmalloc.c             |   0
 28 files changed, 783 deletions(-)
 delete mode 100644 .clang-format
 delete mode 100644 .gitignore
 delete mode 100644 .gitmodules
 delete mode 100644 .travis.yml
 delete mode 100755 .travis/check-git-clang-format-output.sh
 delete mode 100755 .travis/git-clang-format
 delete mode 100644 LICENSE
 delete mode 100644 README.md
 delete mode 160000 common
 delete mode 100644 setup-env.sh
 rename Makefile => src/plasma/Makefile (100%)
 rename {build => src/plasma/build}/.gitkeep (100%)
 rename {doc => src/plasma/doc}/plasma-doxy-config (100%)
 rename src/{ => plasma}/example.c (100%)
 rename src/{ => plasma}/fling.c (100%)
 rename src/{ => plasma}/fling.h (100%)
 rename {lib => src/plasma/lib}/python/plasma.py (100%)
 rename src/{ => plasma}/malloc.c (100%)
 rename src/{ => plasma}/malloc.h (100%)
 rename src/{ => plasma}/plasma.h (100%)
 rename src/{ => plasma}/plasma_client.c (100%)
 rename src/{ => plasma}/plasma_client.h (100%)
 rename src/{ => plasma}/plasma_manager.c (100%)
 rename src/{ => plasma}/plasma_manager.h (100%)
 rename src/{ => plasma}/plasma_store.c (100%)
 rename src/{ => plasma}/plasma_store.h (100%)
 rename {test => src/plasma/test}/test.py (100%)
 rename {thirdparty => src/plasma/thirdparty}/dlmalloc.c (100%)

diff --git a/.clang-format b/.clang-format
deleted file mode 100644
index 8957bdc17..000000000
--- a/.clang-format
+++ /dev/null
@@ -1,5 +0,0 @@
-BasedOnStyle: Chromium
-DerivePointerAlignment: true
-IndentCaseLabels: false
-PointerAlignment: Right
-SpaceAfterCStyleCast: true
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index ea25290f1..000000000
--- a/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-build/*
-*~
-*.pyc
diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index 9d57a168a..000000000
--- a/.gitmodules
+++ /dev/null
@@ -1,3 +0,0 @@
-[submodule "common"]
-	path = common
-	url = https://github.com/ray-project/common.git
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index d592044fe..000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-sudo: required
-
-language: generic
-
-matrix:
-  include:
-    - os: linux
-      dist: trusty
-      python: "2.7"
-    - os: linux
-      dist: trusty
-      python: "3.5"
-    - os: osx
-      osx_image: xcode7
-      python: "2.7"
-    - os: osx
-      osx_image: xcode7
-      python: "3.5"
-    - os: linux
-      dist: trusty
-      python: "2.7"
-      env: LINT=1
-      before_install:
-        # In case we ever want to use a different version of clang-format:
-        #- wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
-        #- echo "deb http://apt.llvm.org/trusty/ llvm-toolchain-trusty main" | sudo tee -a /etc/apt/sources.list > /dev/null
-        - sudo apt-get update -qq
-        - sudo apt-get install -qq clang-format-3.8
-      install: []
-      script:
-        - .travis/check-git-clang-format-output.sh
-    - os: linux
-      dist: trusty
-      python: "2.7"
-      env: VALGRIND=1
-      before_install:
-        - sudo apt-get update -qq
-        - sudo apt-get install -qq valgrind
-      script:
-        - make
-        - source setup-env.sh
-        - python test/test.py valgrind
-
-install:
-  - make
-  - make test
-
-script:
-  - source setup-env.sh
-  - python test/test.py
diff --git a/.travis/check-git-clang-format-output.sh b/.travis/check-git-clang-format-output.sh
deleted file mode 100755
index d71f78357..000000000
--- a/.travis/check-git-clang-format-output.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-if [ "$TRAVIS_PULL_REQUEST" == "false" ] ; then
-  # Not in a pull request, so compare against parent commit
-  base_commit="HEAD^"
-  echo "Running clang-format against parent commit $(git rev-parse $base_commit)"
-else
-  base_commit="$TRAVIS_BRANCH"
-  echo "Running clang-format against branch $base_commit, with hash $(git rev-parse $base_commit)"
-fi
-output="$(.travis/git-clang-format --binary clang-format-3.8 --commit $base_commit --diff --exclude ^thirdparty/)"
-if [ "$output" == "no modified files to format" ] || [ "$output" == "clang-format did not modify any files" ] ; then
-  echo "clang-format passed."
-  exit 0
-else
-  echo "clang-format failed:"
-  echo "$output"
-  exit 1
-fi
diff --git a/.travis/git-clang-format b/.travis/git-clang-format
deleted file mode 100755
index 116635ab7..000000000
--- a/.travis/git-clang-format
+++ /dev/null
@@ -1,490 +0,0 @@
-#!/usr/bin/env python
-#
-#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-r"""                                                                             
-clang-format git integration                                                     
-============================                                                     
-                                                                                 
-This file provides a clang-format integration for git. Put it somewhere in your  
-path and ensure that it is executable. Then, "git clang-format" will invoke      
-clang-format on the changes in current files or a specific commit.               
-                                                                                 
-For further details, run:                                                        
-git clang-format -h                                                              
-                                                                                 
-Requires Python 2.7                                                              
-"""               
-
-import argparse
-import collections
-import contextlib
-import errno
-import os
-import re
-import subprocess
-import sys
-
-usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]'
-
-desc = '''
-Run clang-format on all lines that differ between the working directory
-and <commit>, which defaults to HEAD.  Changes are only applied to the working
-directory.
-
-The following git-config settings set the default of the corresponding option:
-  clangFormat.binary
-  clangFormat.commit
-  clangFormat.extension
-  clangFormat.style
-'''
-
-# Name of the temporary index file in which save the output of clang-format.
-# This file is created within the .git directory.
-temp_index_basename = 'clang-format-index'
-
-
-Range = collections.namedtuple('Range', 'start, count')
-
-
-def main():
-  config = load_git_config()
-
-  # In order to keep '--' yet allow options after positionals, we need to
-  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
-  # nargs=argparse.REMAINDER disallows options after positionals.)
-  argv = sys.argv[1:]
-  try:
-    idx = argv.index('--')
-  except ValueError:
-    dash_dash = []
-  else:
-    dash_dash = argv[idx:]
-    argv = argv[:idx]
-
-  default_extensions = ','.join([
-      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
-      'c', 'h',  # C
-      'm',  # ObjC
-      'mm',  # ObjC++
-      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
-      # Other languages that clang-format supports
-      'proto', 'protodevel',  # Protocol Buffers
-      'js',  # JavaScript
-      'ts',  # TypeScript
-      ])
-
-  p = argparse.ArgumentParser(
-    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
-    description=desc)
-  p.add_argument('--binary',
-                 default=config.get('clangformat.binary', 'clang-format'),
-                 help='path to clang-format'),
-  p.add_argument('--commit',
-                 default=config.get('clangformat.commit', 'HEAD'),
-                 help='default commit to use if none is specified'),
-  p.add_argument('--diff', action='store_true',
-                 help='print a diff instead of applying the changes')
-  p.add_argument('--extensions',
-                 default=config.get('clangformat.extensions',
-                                    default_extensions),
-                 help=('comma-separated list of file extensions to format, '
-                       'excluding the period and case-insensitive')),
-  p.add_argument('--exclude', help='Exclude files matching this regex.')
-  p.add_argument('-f', '--force', action='store_true',
-                 help='allow changes to unstaged files')
-  p.add_argument('-p', '--patch', action='store_true',
-                 help='select hunks interactively')
-  p.add_argument('-q', '--quiet', action='count', default=0,
-                 help='print less information')
-  p.add_argument('--style',
-                 default=config.get('clangformat.style', None),
-                 help='passed to clang-format'),
-  p.add_argument('-v', '--verbose', action='count', default=0,
-                 help='print extra information')
-  # We gather all the remaining positional arguments into 'args' since we need
-  # to use some heuristics to determine whether or not <commit> was present.
-  # However, to print pretty messages, we make use of metavar and help.
-  p.add_argument('args', nargs='*', metavar='<commit>',
-                 help='revision from which to compute the diff')
-  p.add_argument('ignored', nargs='*', metavar='<file>...',
-                 help='if specified, only consider differences in these files')
-  opts = p.parse_args(argv)
-
-  opts.verbose -= opts.quiet
-  del opts.quiet
-
-  commit, files = interpret_args(opts.args, dash_dash, opts.commit)
-  changed_lines = compute_diff_and_extract_lines(commit, files)
-  if opts.verbose >= 1:
-    ignored_files = set(changed_lines)
-  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
-  if opts.exclude:
-    for filename in changed_lines.keys():
-      if re.match(opts.exclude, filename):
-        del changed_lines[filename]
-  if opts.verbose >= 1:
-    ignored_files.difference_update(changed_lines)
-    if ignored_files:
-      print 'Ignoring changes in the following files:'
-      for filename in ignored_files:
-        print '   ', filename
-    if changed_lines:
-      print 'Running clang-format on the following files:'
-      for filename in changed_lines:
-        print '   ', filename
-  if not changed_lines:
-    print 'no modified files to format'
-    return
-  # The computed diff outputs absolute paths, so we must cd before accessing
-  # those files.
-  cd_to_toplevel()
-  old_tree = create_tree_from_workdir(changed_lines)
-  new_tree = run_clang_format_and_save_to_tree(changed_lines,
-                                               binary=opts.binary,
-                                               style=opts.style)
-  if opts.verbose >= 1:
-    print 'old tree:', old_tree
-    print 'new tree:', new_tree
-  if old_tree == new_tree:
-    if opts.verbose >= 0:
-      print 'clang-format did not modify any files'
-  elif opts.diff:
-    print_diff(old_tree, new_tree)
-  else:
-    changed_files = apply_changes(old_tree, new_tree, force=opts.force,
-                                  patch_mode=opts.patch)
-    if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
-      print 'changed files:'
-      for filename in changed_files:
-        print '   ', filename
-
-
-def load_git_config(non_string_options=None):
-  """Return the git configuration as a dictionary.
-
-  All options are assumed to be strings unless in `non_string_options`, in which
-  is a dictionary mapping option name (in lower case) to either "--bool" or
-  "--int"."""
-  if non_string_options is None:
-    non_string_options = {}
-  out = {}
-  for entry in run('git', 'config', '--list', '--null').split('\0'):
-    if entry:
-      name, value = entry.split('\n', 1)
-      if name in non_string_options:
-        value = run('git', 'config', non_string_options[name], name)
-      out[name] = value
-  return out
-
-
-def interpret_args(args, dash_dash, default_commit):
-  """Interpret `args` as "[commit] [--] [files...]" and return (commit, files).
-
-  It is assumed that "--" and everything that follows has been removed from
-  args and placed in `dash_dash`.
-
-  If "--" is present (i.e., `dash_dash` is non-empty), the argument to its
-  left (if present) is taken as commit.  Otherwise, the first argument is
-  checked if it is a commit or a file.  If commit is not given,
-  `default_commit` is used."""
-  if dash_dash:
-    if len(args) == 0:
-      commit = default_commit
-    elif len(args) > 1:
-      die('at most one commit allowed; %d given' % len(args))
-    else:
-      commit = args[0]
-    object_type = get_object_type(commit)
-    if object_type not in ('commit', 'tag'):
-      if object_type is None:
-        die("'%s' is not a commit" % commit)
-      else:
-        die("'%s' is a %s, but a commit was expected" % (commit, object_type))
-    files = dash_dash[1:]
-  elif args:
-    if disambiguate_revision(args[0]):
-      commit = args[0]
-      files = args[1:]
-    else:
-      commit = default_commit
-      files = args
-  else:
-    commit = default_commit
-    files = []
-  return commit, files
-
-
-def disambiguate_revision(value):
-  """Returns True if `value` is a revision, False if it is a file, or dies."""
-  # If `value` is ambiguous (neither a commit nor a file), the following
-  # command will die with an appropriate error message.
-  run('git', 'rev-parse', value, verbose=False)
-  object_type = get_object_type(value)
-  if object_type is None:
-    return False
-  if object_type in ('commit', 'tag'):
-    return True
-  die('`%s` is a %s, but a commit or filename was expected' %
-      (value, object_type))
-
-
-def get_object_type(value):
-  """Returns a string description of an object's type, or None if it is not
-  a valid git object."""
-  cmd = ['git', 'cat-file', '-t', value]
-  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-  stdout, stderr = p.communicate()
-  if p.returncode != 0:
-    return None
-  return stdout.strip()
-
-
-def compute_diff_and_extract_lines(commit, files):
-  """Calls compute_diff() followed by extract_lines()."""
-  diff_process = compute_diff(commit, files)
-  changed_lines = extract_lines(diff_process.stdout)
-  diff_process.stdout.close()
-  diff_process.wait()
-  if diff_process.returncode != 0:
-    # Assume error was already printed to stderr.
-    sys.exit(2)
-  return changed_lines
-
-
-def compute_diff(commit, files):
-  """Return a subprocess object producing the diff from `commit`.
-
-  The return value's `stdin` file object will produce a patch with the
-  differences between the working directory and `commit`, filtered on `files`
-  (if non-empty).  Zero context lines are used in the patch."""
-  cmd = ['git', 'diff-index', '-p', '-U0', commit, '--']
-  cmd.extend(files)
-  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
-  p.stdin.close()
-  return p
-
-
-def extract_lines(patch_file):
-  """Extract the changed lines in `patch_file`.
-
-  The return value is a dictionary mapping filename to a list of (start_line,
-  line_count) pairs.
-
-  The input must have been produced with ``-U0``, meaning unidiff format with
-  zero lines of context.  The return value is a dict mapping filename to a
-  list of line `Range`s."""
-  matches = {}
-  for line in patch_file:
-    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
-    if match:
-      filename = match.group(1).rstrip('\r\n')
-    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
-    if match:
-      start_line = int(match.group(1))
-      line_count = 1
-      if match.group(3):
-        line_count = int(match.group(3))
-      if line_count > 0:
-        matches.setdefault(filename, []).append(Range(start_line, line_count))
-  return matches
-
-
-def filter_by_extension(dictionary, allowed_extensions):
-  """Delete every key in `dictionary` that doesn't have an allowed extension.
-
-  `allowed_extensions` must be a collection of lowercase file extensions,
-  excluding the period."""
-  allowed_extensions = frozenset(allowed_extensions)
-  for filename in dictionary.keys():
-    base_ext = filename.rsplit('.', 1)
-    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
-      del dictionary[filename]
-
-
-def cd_to_toplevel():
-  """Change to the top level of the git repository."""
-  toplevel = run('git', 'rev-parse', '--show-toplevel')
-  os.chdir(toplevel)
-
-
-def create_tree_from_workdir(filenames):
-  """Create a new git tree with the given files from the working directory.
-
-  Returns the object ID (SHA-1) of the created tree."""
-  return create_tree(filenames, '--stdin')
-
-
-def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format',
-                                      style=None):
-  """Run clang-format on each file and save the result to a git tree.
-
-  Returns the object ID (SHA-1) of the created tree."""
-  def index_info_generator():
-    for filename, line_ranges in changed_lines.iteritems():
-      mode = oct(os.stat(filename).st_mode)
-      blob_id = clang_format_to_blob(filename, line_ranges, binary=binary,
-                                     style=style)
-      yield '%s %s\t%s' % (mode, blob_id, filename)
-  return create_tree(index_info_generator(), '--index-info')
-
-
-def create_tree(input_lines, mode):
-  """Create a tree object from the given input.
-
-  If mode is '--stdin', it must be a list of filenames.  If mode is
-  '--index-info' is must be a list of values suitable for "git update-index
-  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
-  is invalid."""
-  assert mode in ('--stdin', '--index-info')
-  cmd = ['git', 'update-index', '--add', '-z', mode]
-  with temporary_index_file():
-    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
-    for line in input_lines:
-      p.stdin.write('%s\0' % line)
-    p.stdin.close()
-    if p.wait() != 0:
-      die('`%s` failed' % ' '.join(cmd))
-    tree_id = run('git', 'write-tree')
-    return tree_id
-
-
-def clang_format_to_blob(filename, line_ranges, binary='clang-format',
-                         style=None):
-  """Run clang-format on the given file and save the result to a git blob.
-
-  Returns the object ID (SHA-1) of the created blob."""
-  clang_format_cmd = [binary, filename]
-  if style:
-    clang_format_cmd.extend(['-style='+style])
-  clang_format_cmd.extend([
-      '-lines=%s:%s' % (start_line, start_line+line_count-1)
-      for start_line, line_count in line_ranges])
-  try:
-    clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE,
-                                    stdout=subprocess.PIPE)
-  except OSError as e:
-    if e.errno == errno.ENOENT:
-      die('cannot find executable "%s"' % binary)
-    else:
-      raise
-  clang_format.stdin.close()
-  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
-  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
-                                 stdout=subprocess.PIPE)
-  clang_format.stdout.close()
-  stdout = hash_object.communicate()[0]
-  if hash_object.returncode != 0:
-    die('`%s` failed' % ' '.join(hash_object_cmd))
-  if clang_format.wait() != 0:
-    die('`%s` failed' % ' '.join(clang_format_cmd))
-  return stdout.rstrip('\r\n')
-
-
-@contextlib.contextmanager
-def temporary_index_file(tree=None):
-  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
-  the file afterward."""
-  index_path = create_temporary_index(tree)
-  old_index_path = os.environ.get('GIT_INDEX_FILE')
-  os.environ['GIT_INDEX_FILE'] = index_path
-  try:
-    yield
-  finally:
-    if old_index_path is None:
-      del os.environ['GIT_INDEX_FILE']
-    else:
-      os.environ['GIT_INDEX_FILE'] = old_index_path
-    os.remove(index_path)
-
-
-def create_temporary_index(tree=None):
-  """Create a temporary index file and return the created file's path.
-
-  If `tree` is not None, use that as the tree to read in.  Otherwise, an
-  empty index is created."""
-  gitdir = run('git', 'rev-parse', '--git-dir')
-  path = os.path.join(gitdir, temp_index_basename)
-  if tree is None:
-    tree = '--empty'
-  run('git', 'read-tree', '--index-output='+path, tree)
-  return path
-
-
-def print_diff(old_tree, new_tree):
-  """Print the diff between the two trees to stdout."""
-  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
-  # is expected to be viewed by the user, and only the former does nice things
-  # like color and pagination.
-  subprocess.check_call(['git', 'diff', old_tree, new_tree, '--'])
-
-
-def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
-  """Apply the changes in `new_tree` to the working directory.
-
-  Bails if there are local changes in those files and not `force`.  If
-  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
-  changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree,
-                      new_tree).rstrip('\0').split('\0')
-  if not force:
-    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
-    if unstaged_files:
-      print >>sys.stderr, ('The following files would be modified but '
-                           'have unstaged changes:')
-      print >>sys.stderr, unstaged_files
-      print >>sys.stderr, 'Please commit, stage, or stash them first.'
-      sys.exit(2)
-  if patch_mode:
-    # In patch mode, we could just as well create an index from the new tree
-    # and checkout from that, but then the user will be presented with a
-    # message saying "Discard ... from worktree".  Instead, we use the old
-    # tree as the index and checkout from new_tree, which gives the slightly
-    # better message, "Apply ... to index and worktree".  This is not quite
-    # right, since it won't be applied to the user's index, but oh well.
-    with temporary_index_file(old_tree):
-      subprocess.check_call(['git', 'checkout', '--patch', new_tree])
-    index_tree = old_tree
-  else:
-    with temporary_index_file(new_tree):
-      run('git', 'checkout-index', '-a', '-f')
-  return changed_files
-
-
-def run(*args, **kwargs):
-  stdin = kwargs.pop('stdin', '')
-  verbose = kwargs.pop('verbose', True)
-  strip = kwargs.pop('strip', True)
-  for name in kwargs:
-    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
-  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-                       stdin=subprocess.PIPE)
-  stdout, stderr = p.communicate(input=stdin)
-  if p.returncode == 0:
-    if stderr:
-      if verbose:
-        print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
-      print >>sys.stderr, stderr.rstrip()
-    if strip:
-      stdout = stdout.rstrip('\r\n')
-    return stdout
-  if verbose:
-    print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode)
-  if stderr:
-    print >>sys.stderr, stderr.rstrip()
-  sys.exit(2)
-
-
-def die(message):
-  print >>sys.stderr, 'error:', message
-  sys.exit(2)
-
-
-if __name__ == '__main__':
-  main()
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 8dada3eda..000000000
--- a/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/README.md b/README.md
deleted file mode 100644
index 563088235..000000000
--- a/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Plasma
-
-Plasma is an experimental in-memory object manager. It is under development and
-not ready for general use.
-
-## clang-format
-Run .travis/git-clang-format to automatically format changes in the checkout.
diff --git a/common b/common
deleted file mode 160000
index da3a3127e..000000000
--- a/common
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit da3a3127e095f679651119f0debfafcade1b0b94
diff --git a/setup-env.sh b/setup-env.sh
deleted file mode 100644
index a1b2cb4b1..000000000
--- a/setup-env.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-echo "Adding Plasma to PYTHONPATH" 1>&2
-
-ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
-
-export PYTHONPATH="$ROOT_DIR/lib/python/:$PYTHONPATH"
diff --git a/Makefile b/src/plasma/Makefile
similarity index 100%
rename from Makefile
rename to src/plasma/Makefile
diff --git a/build/.gitkeep b/src/plasma/build/.gitkeep
similarity index 100%
rename from build/.gitkeep
rename to src/plasma/build/.gitkeep
diff --git a/doc/plasma-doxy-config b/src/plasma/doc/plasma-doxy-config
similarity index 100%
rename from doc/plasma-doxy-config
rename to src/plasma/doc/plasma-doxy-config
diff --git a/src/example.c b/src/plasma/example.c
similarity index 100%
rename from src/example.c
rename to src/plasma/example.c
diff --git a/src/fling.c b/src/plasma/fling.c
similarity index 100%
rename from src/fling.c
rename to src/plasma/fling.c
diff --git a/src/fling.h b/src/plasma/fling.h
similarity index 100%
rename from src/fling.h
rename to src/plasma/fling.h
diff --git a/lib/python/plasma.py b/src/plasma/lib/python/plasma.py
similarity index 100%
rename from lib/python/plasma.py
rename to src/plasma/lib/python/plasma.py
diff --git a/src/malloc.c b/src/plasma/malloc.c
similarity index 100%
rename from src/malloc.c
rename to src/plasma/malloc.c
diff --git a/src/malloc.h b/src/plasma/malloc.h
similarity index 100%
rename from src/malloc.h
rename to src/plasma/malloc.h
diff --git a/src/plasma.h b/src/plasma/plasma.h
similarity index 100%
rename from src/plasma.h
rename to src/plasma/plasma.h
diff --git a/src/plasma_client.c b/src/plasma/plasma_client.c
similarity index 100%
rename from src/plasma_client.c
rename to src/plasma/plasma_client.c
diff --git a/src/plasma_client.h b/src/plasma/plasma_client.h
similarity index 100%
rename from src/plasma_client.h
rename to src/plasma/plasma_client.h
diff --git a/src/plasma_manager.c b/src/plasma/plasma_manager.c
similarity index 100%
rename from src/plasma_manager.c
rename to src/plasma/plasma_manager.c
diff --git a/src/plasma_manager.h b/src/plasma/plasma_manager.h
similarity index 100%
rename from src/plasma_manager.h
rename to src/plasma/plasma_manager.h
diff --git a/src/plasma_store.c b/src/plasma/plasma_store.c
similarity index 100%
rename from src/plasma_store.c
rename to src/plasma/plasma_store.c
diff --git a/src/plasma_store.h b/src/plasma/plasma_store.h
similarity index 100%
rename from src/plasma_store.h
rename to src/plasma/plasma_store.h
diff --git a/test/test.py b/src/plasma/test/test.py
similarity index 100%
rename from test/test.py
rename to src/plasma/test/test.py
diff --git a/thirdparty/dlmalloc.c b/src/plasma/thirdparty/dlmalloc.c
similarity index 100%
rename from thirdparty/dlmalloc.c
rename to src/plasma/thirdparty/dlmalloc.c