change filenames and directory structure to use halo (#81)

2026-06-28 03:34:48 +08:00 · 2016-06-03 18:32:57 -07:00
parent b58eaf84ee
commit 67086f663e
41 changed files with 446 additions and 446 deletions
@@ -0,0 +1,2 @@
+import random, linalg
+from core import *
@@ -0,0 +1,234 @@
+from typing import List
+import numpy as np
+import arrays.single as single
+import halo
+
+__all__ = ["BLOCK_SIZE", "DistArray", "assemble", "zeros", "ones", "copy",
+           "eye", "triu", "tril", "blockwise_dot", "dot", "transpose", "add", "subtract", "numpy_to_dist", "subblocks"]
+
+BLOCK_SIZE = 10
+
+class DistArray(object):
+  def construct(self, shape, objrefs=None):
+    self.shape = shape
+    self.ndim = len(shape)
+    self.num_blocks = [int(np.ceil(1.0 * a / BLOCK_SIZE)) for a in self.shape]
+    self.objrefs = objrefs if objrefs is not None else np.empty(self.num_blocks, dtype=object)
+    if self.num_blocks != list(self.objrefs.shape):
+      raise Exception("The fields `num_blocks` and `objrefs` are inconsistent, `num_blocks` is {} and `objrefs` has shape {}".format(self.num_blocks, list(self.objrefs.shape)))
+
+  def deserialize(self, primitives):
+    (shape, objrefs) = primitives
+    self.construct(shape, objrefs)
+
+  def serialize(self):
+    return (self.shape, self.objrefs)
+
+  def __init__(self, shape=None):
+    if shape is not None:
+      self.construct(shape)
+
+  @staticmethod
+  def compute_block_lower(index, shape):
+    if len(index) != len(shape):
+      raise Exception("The fields `index` and `shape` must have the same length, but `index` is {} and `shape` is {}.".format(index, shape))
+    return [elem * BLOCK_SIZE for elem in index]
+
+  @staticmethod
+  def compute_block_upper(index, shape):
+    if len(index) != len(shape):
+      raise Exception("The fields `index` and `shape` must have the same length, but `index` is {} and `shape` is {}.".format(index, shape))
+    upper = []
+    for i in range(len(shape)):
+      upper.append(min((index[i] + 1) * BLOCK_SIZE, shape[i]))
+    return upper
+
+  @staticmethod
+  def compute_block_shape(index, shape):
+    lower = DistArray.compute_block_lower(index, shape)
+    upper = DistArray.compute_block_upper(index, shape)
+    return [u - l for (l, u) in zip(lower, upper)]
+
+  @staticmethod
+  def compute_num_blocks(shape):
+    return [int(np.ceil(1.0 * a / BLOCK_SIZE)) for a in shape]
+
+  def assemble(self):
+    """Assemble an array on this node from a distributed array object reference."""
+    first_block = halo.pull(self.objrefs[(0,) * self.ndim])
+    dtype = first_block.dtype
+    result = np.zeros(self.shape, dtype=dtype)
+    for index in np.ndindex(*self.num_blocks):
+      lower = DistArray.compute_block_lower(index, self.shape)
+      upper = DistArray.compute_block_upper(index, self.shape)
+      result[[slice(l, u) for (l, u) in zip(lower, upper)]] = halo.pull(self.objrefs[index])
+    return result
+
+  def __getitem__(self, sliced):
+    # TODO(rkn): fix this, this is just a placeholder that should work but is inefficient
+    a = self.assemble()
+    return a[sliced]
+
+@halo.distributed([DistArray], [np.ndarray])
+def assemble(a):
+  return a.assemble()
+
+# TODO(rkn): what should we call this method
+@halo.distributed([np.ndarray], [DistArray])
+def numpy_to_dist(a):
+  result = DistArray(a.shape)
+  for index in np.ndindex(*result.num_blocks):
+    lower = DistArray.compute_block_lower(index, a.shape)
+    upper = DistArray.compute_block_upper(index, a.shape)
+    result.objrefs[index] = halo.push(a[[slice(l, u) for (l, u) in zip(lower, upper)]])
+  return result
+
+@halo.distributed([List[int], str], [DistArray])
+def zeros(shape, dtype_name="float"):
+  result = DistArray(shape)
+  for index in np.ndindex(*result.num_blocks):
+    result.objrefs[index] = single.zeros(DistArray.compute_block_shape(index, shape), dtype_name=dtype_name)
+  return result
+
+@halo.distributed([List[int], str], [DistArray])
+def ones(shape, dtype_name="float"):
+  result = DistArray(shape)
+  for index in np.ndindex(*result.num_blocks):
+    result.objrefs[index] = single.ones(DistArray.compute_block_shape(index, shape), dtype_name=dtype_name)
+  return result
+
+@halo.distributed([DistArray], [DistArray])
+def copy(a):
+  result = DistArray(a.shape)
+  for index in np.ndindex(*result.num_blocks):
+    result.objrefs[index] = a.objrefs[index] # We don't need to actually copy the objects because cluster-level objects are assumed to be immutable.
+  return result
+
+@halo.distributed([int, int, str], [DistArray])
+def eye(dim1, dim2=-1, dtype_name="float"):
+  dim2 = dim1 if dim2 == -1 else dim2
+  shape = [dim1, dim2]
+  result = DistArray(shape)
+  for (i, j) in np.ndindex(*result.num_blocks):
+    block_shape = DistArray.compute_block_shape([i, j], shape)
+    if i == j:
+      result.objrefs[i, j] = single.eye(block_shape[0], block_shape[1], dtype_name=dtype_name)
+    else:
+      result.objrefs[i, j] = single.zeros(block_shape, dtype_name=dtype_name)
+  return result
+
+@halo.distributed([DistArray], [DistArray])
+def triu(a):
+  if a.ndim != 2:
+    raise Exception("Input must have 2 dimensions, but a.ndim is " + str(a.ndim))
+  result = DistArray(a.shape)
+  for (i, j) in np.ndindex(*result.num_blocks):
+    if i < j:
+      result.objrefs[i, j] = single.copy(a.objrefs[i, j])
+    elif i == j:
+      result.objrefs[i, j] = single.triu(a.objrefs[i, j])
+    else:
+      result.objrefs[i, j] = single.zeros_like(a.objrefs[i, j])
+  return result
+
+@halo.distributed([DistArray], [DistArray])
+def tril(a):
+  if a.ndim != 2:
+    raise Exception("Input must have 2 dimensions, but a.ndim is " + str(a.ndim))
+  result = DistArray(a.shape)
+  for (i, j) in np.ndindex(*result.num_blocks):
+    if i > j:
+      result.objrefs[i, j] = single.copy(a.objrefs[i, j])
+    elif i == j:
+      result.objrefs[i, j] = single.tril(a.objrefs[i, j])
+    else:
+      result.objrefs[i, j] = single.zeros_like(a.objrefs[i, j])
+  return result
+
+@halo.distributed([np.ndarray, None], [np.ndarray])
+def blockwise_dot(*matrices):
+  n = len(matrices)
+  if n % 2 != 0:
+    raise Exception("blockwise_dot expects an even number of arguments, but len(matrices) is {}.".format(n))
+  shape = (matrices[0].shape[0], matrices[n / 2].shape[1])
+  result = np.zeros(shape)
+  for i in range(n / 2):
+    result += np.dot(matrices[i], matrices[n / 2 + i])
+  return result
+
+@halo.distributed([DistArray, DistArray], [DistArray])
+def dot(a, b):
+  if a.ndim != 2:
+    raise Exception("dot expects its arguments to be 2-dimensional, but a.ndim = {}.".format(a.ndim))
+  if b.ndim != 2:
+    raise Exception("dot expects its arguments to be 2-dimensional, but b.ndim = {}.".format(b.ndim))
+  if a.shape[1] != b.shape[0]:
+    raise Exception("dot expects a.shape[1] to equal b.shape[0], but a.shape = {} and b.shape = {}.".format(a.shape, b.shape))
+  shape = [a.shape[0], b.shape[1]]
+  result = DistArray(shape)
+  for (i, j) in np.ndindex(*result.num_blocks):
+    args = list(a.objrefs[i, :]) + list(b.objrefs[:, j])
+    result.objrefs[i, j] = blockwise_dot(*args)
+  return result
+
+# This is not in numpy, should we expose this?
+@halo.distributed([DistArray, List[int], None], [DistArray])
+def subblocks(a, *ranges):
+  """
+  This function produces a distributed array from a subset of the blocks in the `a`. The result and `a` will have the same number of dimensions.For example,
+      subblocks(a, [0, 1], [2, 4])
+  will produce a DistArray whose objrefs are
+      [[a.objrefs[0, 2], a.objrefs[0, 4]],
+       [a.objrefs[1, 2], a.objrefs[1, 4]]]
+  We allow the user to pass in an empty list [] to indicate the full range.
+  """
+  ranges = list(ranges)
+  if len(ranges) != a.ndim:
+    raise Exception("sub_blocks expects to receive a number of ranges equal to a.ndim, but it received {} ranges and a.ndim = {}.".format(len(ranges), a.ndim))
+  for i in range(len(ranges)):
+    if ranges[i] == []: # We allow the user to pass in an empty list to indicate the full range
+      ranges[i] = range(a.num_blocks[i])
+    if not np.alltrue(ranges[i] == np.sort(ranges[i])):
+      raise Exception("Ranges passed to sub_blocks must be sorted, but the {}th range is {}.".format(i, ranges[i]))
+    if ranges[i][0] < 0:
+      raise Exception("Values in the ranges passed to sub_blocks must be at least 0, but the {}th range is {}.".format(i, ranges[i]))
+    if ranges[i][-1] >= a.num_blocks[i]:
+        raise Exception("Values in the ranges passed to sub_blocks must be less than the relevant number of blocks, but the {}th range is {}, and a.num_blocks = {}.".format(i, ranges[i], a.num_blocks))
+  last_index = [r[-1] for r in ranges]
+  last_block_shape = DistArray.compute_block_shape(last_index, a.shape)
+  shape = [(len(ranges[i]) - 1) * BLOCK_SIZE + last_block_shape[i] for i in range(a.ndim)]
+  result = DistArray(shape)
+  for index in np.ndindex(*result.num_blocks):
+    print tuple([ranges[i][index[i]] for i in range(a.ndim)])
+    result.objrefs[index] = a.objrefs[tuple([ranges[i][index[i]] for i in range(a.ndim)])]
+  return result
+
+@halo.distributed([DistArray], [DistArray])
+def transpose(a):
+  if a.ndim != 2:
+    raise Exception("transpose expects its argument to be 2-dimensional, but a.ndim = {}, a.shape = {}.".format(a.ndim, a.shape))
+  result = DistArray([a.shape[1], a.shape[0]])
+  for i in range(result.num_blocks[0]):
+    for j in range(result.num_blocks[1]):
+      result.objrefs[i, j] = single.transpose(a.objrefs[j, i])
+  return result
+
+# TODO(rkn): support broadcasting?
+@halo.distributed([DistArray, DistArray], [DistArray])
+def add(x1, x2):
+  if x1.shape != x2.shape:
+    raise Exception("add expects arguments `x1` and `x2` to have the same shape, but x1.shape = {}, and x2.shape = {}.".format(x1.shape, x2.shape))
+  result = DistArray(x1.shape)
+  for index in np.ndindex(*result.num_blocks):
+    result.objrefs[index] = single.add(x1.objrefs[index], x2.objrefs[index])
+  return result
+
+# TODO(rkn): support broadcasting?
+@halo.distributed([DistArray, DistArray], [DistArray])
+def subtract(x1, x2):
+  if x1.shape != x2.shape:
+    raise Exception("subtract expects arguments `x1` and `x2` to have the same shape, but x1.shape = {}, and x2.shape = {}.".format(x1.shape, x2.shape))
+  result = DistArray(x1.shape)
+  for index in np.ndindex(*result.num_blocks):
+    result.objrefs[index] = single.subtract(x1.objrefs[index], x2.objrefs[index])
+  return result
@@ -0,0 +1,192 @@
+from typing import List
+
+import numpy as np
+import arrays.single as single
+import halo
+
+from core import *
+
+__all__ = ["tsqr", "modified_lu", "tsqr_hr", "qr"]
+
+@halo.distributed([DistArray], [DistArray, np.ndarray])
+def tsqr(a):
+  """
+  arguments:
+    a: a distributed matrix
+  Suppose that
+    a.shape == (M, N)
+    K == min(M, N)
+  return values:
+    q: DistArray, if q_full = halo.context.pull(DistArray, q).assemble(), then
+      q_full.shape == (M, K)
+      np.allclose(np.dot(q_full.T, q_full), np.eye(K)) == True
+    r: np.ndarray, if r_val = halo.context.pull(np.ndarray, r), then
+      r_val.shape == (K, N)
+      np.allclose(r, np.triu(r)) == True
+  """
+  if len(a.shape) != 2:
+    raise Exception("tsqr requires len(a.shape) == 2, but a.shape is {}".format(a.shape))
+  if a.num_blocks[1] != 1:
+    raise Exception("tsqr requires a.num_blocks[1] == 1, but a.num_blocks is {}".format(a.num_blocks))
+
+  num_blocks = a.num_blocks[0]
+  K = int(np.ceil(np.log2(num_blocks))) + 1
+  q_tree = np.empty((num_blocks, K), dtype=object)
+  current_rs = []
+  for i in range(num_blocks):
+    block = a.objrefs[i, 0]
+    q, r = single.linalg.qr(block)
+    q_tree[i, 0] = q
+    current_rs.append(r)
+  for j in range(1, K):
+    new_rs = []
+    for i in range(int(np.ceil(1.0 * len(current_rs) / 2))):
+      stacked_rs = single.vstack(*current_rs[(2 * i):(2 * i + 2)])
+      q, r = single.linalg.qr(stacked_rs)
+      q_tree[i, j] = q
+      new_rs.append(r)
+    current_rs = new_rs
+  assert len(current_rs) == 1, "len(current_rs) = " + str(len(current_rs))
+
+  q_result = DistArray()
+
+  # handle the special case in which the whole DistArray "a" fits in one block
+  # and has fewer rows than columns, this is a bit ugly so think about how to
+  # remove it
+  if a.shape[0] >= a.shape[1]:
+    q_shape = a.shape
+  else:
+    q_shape = [a.shape[0], a.shape[0]]
+  q_num_blocks = DistArray.compute_num_blocks(q_shape)
+  q_result = DistArray()
+  q_objrefs = np.empty(q_num_blocks, dtype=object)
+  q_result.construct(q_shape, q_objrefs)
+
+  # reconstruct output
+  for i in range(num_blocks):
+    q_block_current = q_tree[i, 0]
+    ith_index = i
+    for j in range(1, K):
+      if np.mod(ith_index, 2) == 0:
+        lower = [0, 0]
+        upper = [a.shape[1], BLOCK_SIZE]
+      else:
+        lower = [a.shape[1], 0]
+        upper = [2 * a.shape[1], BLOCK_SIZE]
+      ith_index /= 2
+      q_block_current = single.dot(q_block_current, single.subarray(q_tree[ith_index, j], lower, upper))
+    q_result.objrefs[i] = q_block_current
+  r = current_rs[0]
+  return q_result, r
+
+# TODO(rkn): This is unoptimized, we really want a block version of this.
+@halo.distributed([DistArray], [DistArray, np.ndarray, np.ndarray])
+def modified_lu(q):
+  """
+  Algorithm 5 from http://www.eecs.berkeley.edu/Pubs/TechRpts/2013/EECS-2013-175.pdf
+  takes a matrix q with orthonormal columns, returns l, u, s such that q - s = l * u
+  arguments:
+    q: a two dimensional orthonormal q
+  return values:
+    l: lower triangular
+    u: upper triangular
+    s: a diagonal matrix represented by its diagonal
+  """
+  q = q.assemble()
+  m, b = q.shape[0], q.shape[1]
+  S = np.zeros(b)
+
+  q_work = np.copy(q)
+
+  for i in range(b):
+    S[i] = -1 * np.sign(q_work[i, i])
+    q_work[i, i] -= S[i]
+    q_work[(i + 1):m, i] /= q_work[i, i] # scale ith column of L by diagonal element
+    q_work[(i + 1):m, (i + 1):b] -= np.outer(q_work[(i + 1):m, i], q_work[i, (i + 1):b]) # perform Schur complement update
+
+  L = np.tril(q_work)
+  for i in range(b):
+    L[i, i] = 1
+  U = np.triu(q_work)[:b, :]
+  return numpy_to_dist(halo.push(L)), U, S # TODO(rkn): get rid of push and pull
+
+@halo.distributed([np.ndarray, np.ndarray, np.ndarray, int], [np.ndarray, np.ndarray])
+def tsqr_hr_helper1(u, s, y_top_block, b):
+  y_top = y_top_block[:b, :b]
+  s_full = np.diag(s)
+  t = -1 * np.dot(u, np.dot(s_full, np.linalg.inv(y_top).T))
+  return t, y_top
+
+@halo.distributed([np.ndarray, np.ndarray], [np.ndarray])
+def tsqr_hr_helper2(s, r_temp):
+  s_full = np.diag(s)
+  return np.dot(s_full, r_temp)
+
+@halo.distributed([DistArray], [DistArray, np.ndarray, np.ndarray, np.ndarray])
+def tsqr_hr(a):
+  """Algorithm 6 from http://www.eecs.berkeley.edu/Pubs/TechRpts/2013/EECS-2013-175.pdf"""
+  q, r_temp = tsqr(a)
+  y, u, s = modified_lu(q)
+  y_blocked = halo.pull(y)
+  t, y_top = tsqr_hr_helper1(u, s, y_blocked.objrefs[0, 0], a.shape[1])
+  r = tsqr_hr_helper2(s, r_temp)
+  return y, t, y_top, r
+
+@halo.distributed([np.ndarray, np.ndarray, np.ndarray, np.ndarray], [np.ndarray])
+def qr_helper1(a_rc, y_ri, t, W_c):
+  return a_rc - np.dot(y_ri, np.dot(t.T, W_c))
+
+@halo.distributed([np.ndarray, np.ndarray], [np.ndarray])
+def qr_helper2(y_ri, a_rc):
+  return np.dot(y_ri.T, a_rc)
+
+@halo.distributed([DistArray], [DistArray, DistArray])
+def qr(a):
+  """Algorithm 7 from http://www.eecs.berkeley.edu/Pubs/TechRpts/2013/EECS-2013-175.pdf"""
+  m, n = a.shape[0], a.shape[1]
+  k = min(m, n)
+
+  # we will store our scratch work in a_work
+  a_work = DistArray()
+  a_work.construct(a.shape, np.copy(a.objrefs))
+
+  result_dtype = np.linalg.qr(halo.pull(a.objrefs[0, 0]))[0].dtype.name
+  r_res = halo.pull(zeros([k, n], result_dtype)) # TODO(rkn): It would be preferable not to pull this right after creating it.
+  y_res = halo.pull(zeros([m, k], result_dtype)) # TODO(rkn): It would be preferable not to pull this right after creating it.
+  Ts = []
+
+  for i in range(min(a.num_blocks[0], a.num_blocks[1])): # this differs from the paper, which says "for i in range(a.num_blocks[1])", but that doesn't seem to make any sense when a.num_blocks[1] > a.num_blocks[0]
+    sub_dist_array = subblocks(a_work, range(i, a_work.num_blocks[0]), [i])
+    y, t, _, R = tsqr_hr(sub_dist_array)
+    y_val = halo.pull(y)
+
+    for j in range(i, a.num_blocks[0]):
+      y_res.objrefs[j, i] = y_val.objrefs[j - i, 0]
+    if a.shape[0] > a.shape[1]:
+      # in this case, R needs to be square
+      R_shape = halo.pull(single.shape(R))
+      eye_temp = single.eye(R_shape[1], R_shape[0], dtype_name=result_dtype)
+      r_res.objrefs[i, i] = single.dot(eye_temp, R)
+    else:
+      r_res.objrefs[i, i] = R
+    Ts.append(numpy_to_dist(t))
+
+    for c in range(i + 1, a.num_blocks[1]):
+      W_rcs = []
+      for r in range(i, a.num_blocks[0]):
+        y_ri = y_val.objrefs[r - i, 0]
+        W_rcs.append(qr_helper2(y_ri, a_work.objrefs[r, c]))
+      W_c = single.sum(0, *W_rcs)
+      for r in range(i, a.num_blocks[0]):
+        y_ri = y_val.objrefs[r - i, 0]
+        A_rc = qr_helper1(a_work.objrefs[r, c], y_ri, t, W_c)
+        a_work.objrefs[r, c] = A_rc
+      r_res.objrefs[i, c] = a_work.objrefs[i, c]
+
+  # construct q_res from Ys and Ts
+  q = eye(m, k, dtype_name=result_dtype)
+  for i in range(len(Ts))[::-1]:
+    y_col_block = subblocks(y_res, [], [i])
+    q = subtract(q, dot(y_col_block, dot(Ts[i], dot(transpose(y_col_block), q))))
+
+  return q, r_res
@@ -0,0 +1,17 @@
+from typing import List
+
+import numpy as np
+import arrays.single as single
+import halo
+
+from core import *
+
+@halo.distributed([List[int]], [DistArray])
+def normal(shape):
+  num_blocks = DistArray.compute_num_blocks(shape)
+  objrefs = np.empty(num_blocks, dtype=object)
+  for index in np.ndindex(*num_blocks):
+    objrefs[index] = single.random.normal(DistArray.compute_block_shape(index, shape))
+  result = DistArray()
+  result.construct(shape, objrefs)
+  return result
@@ -0,0 +1,2 @@
+import random, linalg
+from core import *
@@ -0,0 +1,80 @@
+from typing import List
+import numpy as np
+import halo
+
+__all__ = ["zeros", "zeros_like", "ones", "eye", "dot", "vstack", "hstack", "subarray", "copy", "tril", "triu", "diag", "transpose", "add", "subtract", "sum", "shape"]
+
+@halo.distributed([List[int], str, str], [np.ndarray])
+def zeros(shape, dtype_name="float", order="C"):
+  return np.zeros(shape, dtype=np.dtype(dtype_name), order=order)
+
+@halo.distributed([np.ndarray, str, str, bool], [np.ndarray])
+def zeros_like(a, dtype_name="None", order="K", subok=True):
+  dtype_val = None if dtype_name == "None" else np.dtype(dtype_name)
+  return np.zeros_like(a, dtype=dtype_val, order=order, subok=subok)
+
+@halo.distributed([List[int], str, str], [np.ndarray])
+def ones(shape, dtype_name="float", order="C"):
+  return np.ones(shape, dtype=np.dtype(dtype_name), order=order)
+
+@halo.distributed([int, int, int, str], [np.ndarray])
+def eye(N, M=-1, k=0, dtype_name="float"):
+  M = N if M == -1 else M
+  return np.eye(N, M=M, k=k, dtype=np.dtype(dtype_name))
+
+@halo.distributed([np.ndarray, np.ndarray], [np.ndarray])
+def dot(a, b):
+  return np.dot(a, b)
+
+# TODO(rkn): My preferred signature would have been
+# @halo.distributed([List[np.ndarray]], [np.ndarray]) but that currently doesn't
+# work because that would expect a list of ndarrays not a list of ObjRefs
+@halo.distributed([np.ndarray, None], [np.ndarray])
+def vstack(*xs):
+  return np.vstack(xs)
+
+@halo.distributed([np.ndarray, None], [np.ndarray])
+def hstack(*xs):
+  return np.hstack(xs)
+
+# TODO(rkn): this doesn't parallel the numpy API, but we can't really slice an ObjRef, think about this
+@halo.distributed([np.ndarray, List[int], List[int]], [np.ndarray])
+def subarray(a, lower_indices, upper_indices): # TODO(rkn): be consistent about using "index" versus "indices"
+  return a[[slice(l, u) for (l, u) in zip(lower_indices, upper_indices)]]
+
+@halo.distributed([np.ndarray, str], [np.ndarray])
+def copy(a, order="K"):
+  return np.copy(a, order=order)
+
+@halo.distributed([np.ndarray, int], [np.ndarray])
+def tril(m, k=0):
+  return np.tril(m, k=k)
+
+@halo.distributed([np.ndarray, int], [np.ndarray])
+def triu(m, k=0):
+  return np.triu(m, k=k)
+
+@halo.distributed([np.ndarray, int], [np.ndarray])
+def diag(v, k=0):
+  return np.diag(v, k=k)
+
+@halo.distributed([np.ndarray, List[int]], [np.ndarray])
+def transpose(a, axes=[]):
+  axes = None if axes == [] else axes
+  return np.transpose(a, axes=axes)
+
+@halo.distributed([np.ndarray, np.ndarray], [np.ndarray])
+def add(x1, x2):
+  return np.add(x1, x2)
+
+@halo.distributed([np.ndarray, np.ndarray], [np.ndarray])
+def subtract(x1, x2):
+  return np.subtract(x1, x2)
+
+@halo.distributed([int, np.ndarray, None], [np.ndarray])
+def sum(axis, *xs):
+  return np.sum(xs, axis=axis)
+
+@halo.distributed([np.ndarray], [tuple])
+def shape(a):
+  return np.shape(a)
@@ -0,0 +1,88 @@
+from typing import List
+import numpy as np
+import halo
+
+__all__ = ["matrix_power", "solve", "tensorsolve", "tensorinv", "inv",
+           "cholesky", "eigvals", "eigvalsh", "pinv", "slogdet", "det",
+           "svd", "eig", "eigh", "lstsq", "norm", "qr", "cond", "matrix_rank",
+           "LinAlgError", "multi_dot"]
+
+@halo.distributed([np.ndarray, int], [np.ndarray])
+def matrix_power(M, n):
+  return np.linalg.matrix_power(M, n)
+
+@halo.distributed([np.ndarray, np.ndarray], [np.ndarray])
+def solve(a, b):
+  return np.linalg.solve(a, b)
+
+@halo.distributed([np.ndarray], [np.ndarray, np.ndarray])
+def tensorsolve(a):
+  raise NotImplementedError
+
+@halo.distributed([np.ndarray], [np.ndarray, np.ndarray])
+def tensorinv(a):
+  raise NotImplementedError
+
+@halo.distributed([np.ndarray], [np.ndarray])
+def inv(a):
+  return np.linalg.inv(a)
+
+@halo.distributed([np.ndarray], [np.ndarray])
+def cholesky(a):
+  return np.linalg.cholesky(a)
+
+@halo.distributed([np.ndarray], [np.ndarray])
+def eigvals(a):
+  return np.linalg.eigvals(a)
+
+@halo.distributed([np.ndarray], [np.ndarray])
+def eigvalsh(a):
+  raise NotImplementedError
+
+@halo.distributed([np.ndarray], [np.ndarray])
+def pinv(a):
+  return np.linalg.pinv(a)
+
+@halo.distributed([np.ndarray], [int])
+def slogdet(a):
+  raise NotImplementedError
+
+@halo.distributed([np.ndarray], [float])
+def det(a):
+  return np.linalg.det(a)
+
+@halo.distributed([np.ndarray], [np.ndarray, np.ndarray, np.ndarray])
+def svd(a):
+  return np.linalg.svd(a)
+
+@halo.distributed([np.ndarray], [np.ndarray, np.ndarray])
+def eig(a):
+  return np.linalg.eig(a)
+
+@halo.distributed([np.ndarray], [np.ndarray, np.ndarray])
+def eigh(a):
+  return np.linalg.eigh(a)
+
+@halo.distributed([np.ndarray], [np.ndarray, np.ndarray, int, np.ndarray])
+def lstsq(a, b):
+  return np.linalg.lstsq(a)
+
+@halo.distributed([np.ndarray], [float])
+def norm(x):
+  return np.linalg.norm(x)
+
+@halo.distributed([np.ndarray], [np.ndarray, np.ndarray])
+def qr(a):
+  return np.linalg.qr(a)
+
+@halo.distributed([np.ndarray], [float])
+def cond(x):
+  return np.linalg.cond(x)
+
+@halo.distributed([np.ndarray], [int])
+def matrix_rank(M):
+  return np.linalg.matrix_rank(M)
+
+@halo.distributed([np.ndarray, None], [np.ndarray])
+def multi_dot(a):
+  raise NotImplementedError
@@ -0,0 +1,7 @@
+from typing import List
+import numpy as np
+import halo
+
+@halo.distributed([List[int]], [np.ndarray])
+def normal(shape):
+  return np.random.normal(size=shape)
@@ -0,0 +1,3 @@
+import libhalolib as lib
+import serialization
+from worker import scheduler_info, register_module, connect, disconnect, pull, push, distributed
@@ -0,0 +1,39 @@
+import importlib
+
+import halo
+
+def to_primitive(obj):
+  if hasattr(obj, "serialize"):
+    primitive_obj = ((type(obj).__module__, type(obj).__name__), obj.serialize())
+  else:
+    primitive_obj = ("primitive", obj)
+  return primitive_obj
+
+def from_primitive(primitive_obj):
+  if primitive_obj[0] == "primitive":
+    obj = primitive_obj[1]
+  else:
+    # This code assumes that the type module.__dict__[type_name] knows how to deserialize itself
+    type_module, type_name = primitive_obj[0]
+    module = importlib.import_module(type_module)
+    obj = module.__dict__[type_name]()
+    obj.deserialize(primitive_obj[1])
+  return obj
+
+def serialize(worker_capsule, obj):
+  primitive_obj = to_primitive(obj)
+  obj_capsule, contained_objrefs = halo.lib.serialize_object(worker_capsule, primitive_obj) # contained_objrefs is a list of the objrefs contained in obj
+  return obj_capsule, contained_objrefs
+
+def deserialize(worker_capsule, capsule):
+  primitive_obj = halo.lib.deserialize_object(worker_capsule, capsule)
+  return from_primitive(primitive_obj)
+
+def serialize_task(worker_capsule, func_name, args):
+  primitive_args = [(arg if isinstance(arg, halo.lib.ObjRef) else to_primitive(arg)) for arg in args]
+  return halo.lib.serialize_task(worker_capsule, func_name, primitive_args)
+
+def deserialize_task(worker_capsule, task):
+  func_name, primitive_args, return_objrefs = halo.lib.deserialize_task(worker_capsule, task)
+  args = [(arg if isinstance(arg, halo.lib.ObjRef) else from_primitive(arg)) for arg in primitive_args]
+  return func_name, args, return_objrefs
@@ -0,0 +1,134 @@
+import subprocess32 as subprocess
+import os
+import atexit
+import time
+
+import halo
+import halo.worker as worker
+
+_services_path = os.path.dirname(os.path.abspath(__file__))
+
+all_processes = []
+drivers = []
+
+IP_ADDRESS = "127.0.0.1"
+TIMEOUT_SECONDS = 5
+
+def address(host, port):
+  return host + ":" + str(port)
+
+scheduler_port_counter = 0
+def new_scheduler_port():
+  global scheduler_port_counter
+  scheduler_port_counter += 1
+  return 10000 + scheduler_port_counter
+
+worker_port_counter = 0
+def new_worker_port():
+  global worker_port_counter
+  worker_port_counter += 1
+  return 40000 + worker_port_counter
+
+objstore_port_counter = 0
+def new_objstore_port():
+  global objstore_port_counter
+  objstore_port_counter += 1
+  return 20000 + objstore_port_counter
+
+def cleanup():
+  global all_processes
+  for p, address in all_processes:
+    if p.poll() is not None: # process has already terminated
+      print "Process at address " + address + " has already terminated."
+      continue
+    print "Attempting to kill process at address " + address + "."
+    p.kill()
+    time.sleep(0.05) # is this necessary?
+    if p.poll() is not None:
+      print "Successfully killed process at address " + address + "."
+      continue
+    print "Kill attempt failed, attempting to terminate process at address " + address + "."
+    p.terminate()
+    time.sleep(0.05) # is this necessary?
+    if p.poll is not None:
+      print "Successfully terminated process at address " + address + "."
+      continue
+    print "Termination attempt failed, giving up."
+  all_processes = []
+
+  global drivers
+  for driver in drivers:
+    halo.disconnect(driver)
+  if len(drivers) == 0:
+    halo.disconnect()
+  drivers = []
+
+# atexit.register(cleanup)
+
+def start_scheduler(scheduler_address):
+  p = subprocess.Popen([os.path.join(_services_path, "scheduler"), scheduler_address])
+  all_processes.append((p, scheduler_address))
+
+def start_objstore(scheduler_address, objstore_address):
+  p = subprocess.Popen([os.path.join(_services_path, "objstore"), scheduler_address, objstore_address])
+  all_processes.append((p, objstore_address))
+
+def start_worker(test_path, scheduler_address, objstore_address, worker_address):
+  p = subprocess.Popen(["python",
+                        test_path,
+                        "--scheduler-address=" + scheduler_address,
+                        "--objstore-address=" + objstore_address,
+                        "--worker-address=" + worker_address])
+  all_processes.append((p, worker_address))
+
+def start_node(scheduler_address, node_ip_address, num_workers, worker_path=None):
+  """
+  Start an object store and associated workers that will be part of a larger cluster.
+    Assumes the scheduler has already been started.
+
+  :param scheduler_address: ip address and port of the scheduler (which may run on a different node)
+  :param node_ip_address: ip address (without port) of the node this function is run on
+  :param num_workers: the number of workers to be started on this node
+  :worker_path: path of the source code that will be run on the worker
+  """
+  objstore_address = address(node_ip_address, new_objstore_port())
+  start_objstore(scheduler_address, objstore_address)
+  time.sleep(0.2)
+  for _ in range(num_workers):
+    start_worker(worker_path, scheduler_address, objstore_address, address(node_ip_address, new_worker_port()))
+  time.sleep(0.3)
+  halo.connect(scheduler_address, objstore_address, address(node_ip_address, new_worker_port()))
+  time.sleep(0.5)
+
+def start_singlenode_cluster(return_drivers=False, num_objstores=1, num_workers_per_objstore=0, worker_path=None):
+  global drivers
+  if num_workers_per_objstore > 0 and worker_path is None:
+    raise Exception("Attempting to start a cluster with {} workers per object store, but `worker_path` is None.".format(num_workers_per_objstore))
+  if num_workers_per_objstore > 0 and num_objstores < 1:
+    raise Exception("Attempting to start a cluster with {} workers per object store, but `num_objstores` is {}.".format(num_objstores))
+  scheduler_address = address(IP_ADDRESS, new_scheduler_port())
+  start_scheduler(scheduler_address)
+  time.sleep(0.1)
+  objstore_addresses = []
+  # create objstores
+  for i in range(num_objstores):
+    objstore_address = address(IP_ADDRESS, new_objstore_port())
+    objstore_addresses.append(objstore_address)
+    start_objstore(scheduler_address, objstore_address)
+    time.sleep(0.2)
+    for _ in range(num_workers_per_objstore):
+      start_worker(worker_path, scheduler_address, objstore_address, address(IP_ADDRESS, new_worker_port()))
+    time.sleep(0.3)
+  # create drivers
+  if return_drivers:
+    driver_workers = []
+    for i in range(num_objstores):
+      driver_worker = worker.Worker()
+      halo.connect(scheduler_address, objstore_address, address(IP_ADDRESS, new_worker_port()), driver_worker)
+      driver_workers.append(driver_worker)
+      drivers.append(driver_worker)
+    time.sleep(0.5)
+    return driver_workers
+  else:
+    halo.connect(scheduler_address, objstore_addresses[0], address(IP_ADDRESS, new_worker_port()))
+    time.sleep(0.5)
@@ -0,0 +1,211 @@
+from types import ModuleType
+import typing
+import funcsigs
+import numpy as np
+import pynumbuf
+
+import halo
+import serialization
+
+class Worker(object):
+  """The methods in this class are considered unexposed to the user. The functions outside of this class are considered exposed."""
+
+  def __init__(self):
+    self.functions = {}
+    self.handle = None
+
+  def put_object(self, objref, value):
+    """Put `value` in the local object store with objref `objref`. This assumes that the value for `objref` has not yet been placed in the local object store."""
+    if pynumbuf.serializable(value):
+      halo.lib.put_arrow(self.handle, objref, value)
+    else:
+      object_capsule, contained_objrefs = serialization.serialize(self.handle, value) # contained_objrefs is a list of the objrefs contained in object_capsule
+      halo.lib.put_object(self.handle, objref, object_capsule, contained_objrefs)
+
+  def get_object(self, objref):
+    """
+    Return the value from the local object store for objref `objref`. This will
+    block until the value for `objref` has been written to the local object store.
+
+    WARNING: get_object can only be called on a canonical objref.
+    """
+    if halo.lib.is_arrow(self.handle, objref):
+      return halo.lib.get_arrow(self.handle, objref)
+    else:
+      object_capsule = halo.lib.get_object(self.handle, objref)
+      return serialization.deserialize(self.handle, object_capsule)
+
+  def alias_objrefs(self, alias_objref, target_objref):
+    """Make `alias_objref` refer to the same object that `target_objref` refers to."""
+    halo.lib.alias_objrefs(self.handle, alias_objref, target_objref)
+
+  def register_function(self, function):
+    """Notify the scheduler that this worker can execute the function with name `func_name`. Store the function `function` locally."""
+    halo.lib.register_function(self.handle, function.func_name, len(function.return_types))
+    self.functions[function.func_name] = function
+
+  def submit_task(self, func_name, args):
+    """Tell the scheduler to schedule the execution of the function with name `func_name` with arguments `args`. Retrieve object references for the outputs of the function from the scheduler and immediately return them."""
+    task_capsule = serialization.serialize_task(self.handle, func_name, args)
+    objrefs = halo.lib.submit_task(self.handle, task_capsule)
+    return objrefs
+
+# We make `global_worker` a global variable so that there is one worker per worker process.
+global_worker = Worker()
+
+def scheduler_info(worker=global_worker):
+  return halo.lib.scheduler_info(worker.handle);
+
+def register_module(module, recursive=False, worker=global_worker):
+  print "registering functions in module {}.".format(module.__name__)
+  for name in dir(module):
+    val = getattr(module, name)
+    if hasattr(val, "is_distributed") and val.is_distributed:
+      print "registering {}.".format(val.func_name)
+      worker.register_function(val)
+    # elif recursive and isinstance(val, ModuleType):
+    #   register_module(val, recursive, worker)
+
+def connect(scheduler_addr, objstore_addr, worker_addr, worker=global_worker):
+  if hasattr(worker, "handle"):
+    del worker.handle
+  worker.handle = halo.lib.create_worker(scheduler_addr, objstore_addr, worker_addr)
+
+def disconnect(worker=global_worker):
+  halo.lib.disconnect(worker.handle)
+
+def pull(objref, worker=global_worker):
+  halo.lib.request_object(worker.handle, objref)
+  return worker.get_object(objref)
+
+def push(value, worker=global_worker):
+  objref = halo.lib.get_objref(worker.handle)
+  worker.put_object(objref, value)
+  return objref
+
+def main_loop(worker=global_worker):
+  if not halo.lib.connected(worker.handle):
+    raise Exception("Worker is attempting to enter main_loop but has not been connected yet.")
+  halo.lib.start_worker_service(worker.handle)
+  def process_task(task): # wrapping these lines in a function should cause the local variables to go out of scope more quickly, which is useful for inspecting reference counts
+    func_name, args, return_objrefs = serialization.deserialize_task(worker.handle, task)
+    arguments = get_arguments_for_execution(worker.functions[func_name], args, worker) # get args from objstore
+    outputs = worker.functions[func_name].executor(arguments) # execute the function
+    store_outputs_in_objstore(return_objrefs, outputs, worker) # store output in local object store
+    halo.lib.notify_task_completed(worker.handle) # notify the scheduler that the task has completed
+  while True:
+    task = halo.lib.wait_for_next_task(worker.handle)
+    process_task(task)
+
+def distributed(arg_types, return_types, worker=global_worker):
+  def distributed_decorator(func):
+    def func_executor(arguments):
+      """This is what gets executed remotely on a worker after a distributed function is scheduled by the scheduler."""
+      print "Calling function {}".format(func.__name__)
+      result = func(*arguments)
+      check_return_values(func_call, result) # throws an exception if result is invalid
+      print "Finished executing function {}".format(func.__name__)
+      return result
+    def func_call(*args, **kwargs):
+      """This is what gets run immediately when a worker calls a distributed function."""
+      args = list(args)
+      args.extend([kwargs[keyword] if kwargs.has_key(keyword) else default for keyword, default in func_call.keyword_defaults[len(args):]]) # fill in the remaining arguments
+      check_arguments(func_call, args) # throws an exception if args are invalid
+      objrefs = worker.submit_task(func_call.func_name, args)
+      return objrefs[0] if len(objrefs) == 1 else objrefs
+    func_call.func_name = "{}.{}".format(func.__module__, func.__name__)
+    func_call.executor = func_executor
+    func_call.arg_types = arg_types
+    func_call.return_types = return_types
+    func_call.is_distributed = True
+    func_call.keyword_defaults = [(k, v.default) for k, v in funcsigs.signature(func).parameters.iteritems()]
+    return func_call
+  return distributed_decorator
+
+# helper method, this should not be called by the user
+def check_return_values(function, result):
+  if len(function.return_types) == 1:
+    result = (result,)
+    # if not isinstance(result, function.return_types[0]):
+    #   raise Exception("The @distributed decorator for function {} expects one return value with type {}, but {} returned a {}.".format(function.__name__, function.return_types[0], function.__name__, type(result)))
+  else:
+    if len(result) != len(function.return_types):
+      raise Exception("The @distributed decorator for function {} has {} return values with types {}, but {} returned {} values.".format(function.__name__, len(function.return_types), function.return_types, function.__name__, len(result)))
+    for i in range(len(result)):
+      if (not isinstance(result[i], function.return_types[i])) and (not isinstance(result[i], halo.lib.ObjRef)):
+        raise Exception("The {}th return value for function {} has type {}, but the @distributed decorator expected a return value of type {} or an ObjRef.".format(i, function.__name__, type(result[i]), function.return_types[i]))
+
+# helper method, this should not be called by the user
+def check_arguments(function, args):
+  # check the number of args
+  if len(args) != len(function.arg_types) and function.arg_types[-1] is not None:
+    raise Exception("Function {} expects {} arguments, but received {}.".format(function.__name__, len(function.arg_types), len(args)))
+  elif len(args) < len(function.arg_types) - 1 and function.arg_types[-1] is None:
+    raise Exception("Function {} expects at least {} arguments, but received {}.".format(function.__name__, len(function.arg_types) - 1, len(args)))
+
+  for (i, arg) in enumerate(args):
+    if i < len(function.arg_types) - 1:
+      expected_type = function.arg_types[i]
+    elif i == len(function.arg_types) - 1 and function.arg_types[-1] is not None:
+      expected_type = function.arg_types[-1]
+    elif function.arg_types[-1] is None and len(function.arg_types) > 1:
+      expected_type = function.arg_types[-2]
+    else:
+      assert False, "This code should be unreachable."
+
+    if isinstance(arg, halo.lib.ObjRef):
+      # TODO(rkn): When we have type information in the ObjRef, do type checking here.
+      pass
+    else:
+      if not isinstance(arg, expected_type): # TODO(rkn): This check doesn't really work, e.g., isinstance([1,2,3], typing.List[str]) == True
+        raise Exception("Argument {} for function {} has type {} but an argument of type {} was expected.".format(i, function.__name__, type(arg), expected_type))
+
+# helper method, this should not be called by the user
+def get_arguments_for_execution(function, args, worker=global_worker):
+  # TODO(rkn): Eventually, all of the type checking can be put in `check_arguments` above so that the error will happen immediately when calling a remote function.
+  arguments = []
+  """
+  # check the number of args
+  if len(args) != len(function.arg_types) and function.arg_types[-1] is not None:
+    raise Exception("Function {} expects {} arguments, but received {}.".format(function.__name__, len(function.arg_types), len(args)))
+  elif len(args) < len(function.arg_types) - 1 and function.arg_types[-1] is None:
+    raise Exception("Function {} expects at least {} arguments, but received {}.".format(function.__name__, len(function.arg_types) - 1, len(args)))
+  """
+
+  for (i, arg) in enumerate(args):
+    if i < len(function.arg_types) - 1:
+      expected_type = function.arg_types[i]
+    elif i == len(function.arg_types) - 1 and function.arg_types[-1] is not None:
+      expected_type = function.arg_types[-1]
+    elif function.arg_types[-1] is None and len(function.arg_types) > 1:
+      expected_type = function.arg_types[-2]
+    else:
+      assert False, "This code should be unreachable."
+
+    if isinstance(arg, halo.lib.ObjRef):
+      # get the object from the local object store
+      print "Getting argument {} for function {}.".format(i, function.__name__)
+      argument = worker.get_object(arg)
+      print "Successfully retrieved argument {} for function {}.".format(i, function.__name__)
+    else:
+      # pass the argument by value
+      argument = arg
+
+    if not isinstance(argument, expected_type):
+      raise Exception("Argument {} for function {} has type {} but an argument of type {} was expected.".format(i, function.__name__, type(argument), expected_type))
+    arguments.append(argument)
+  return arguments
+
+# helper method, this should not be called by the user
+def store_outputs_in_objstore(objrefs, outputs, worker=global_worker):
+  if len(objrefs) == 1:
+    outputs = (outputs,)
+
+  for i in range(len(objrefs)):
+    if isinstance(outputs[i], halo.lib.ObjRef):
+      # An ObjRef is being returned, so we must alias objrefs[i] so that it refers to the same object that outputs[i] refers to
+      print "Aliasing objrefs {} and {}".format(objrefs[i].val, outputs[i].val)
+      worker.alias_objrefs(objrefs[i], outputs[i])
+      pass
+    else:
+      worker.put_object(objrefs[i], outputs[i])
@@ -0,0 +1,21 @@
+import sys
+
+from setuptools import setup, Extension, find_packages
+import setuptools
+
+# because of relative paths, this must be run from inside halo/lib/python/
+
+MACOSX = (sys.platform in ["darwin"])
+
+setup(
+  name = "halo",
+  version = "0.1.dev0",
+  use_2to3=True,
+  packages=find_packages(),
+  package_data = {
+    "halo": ["libhalolib.dylib" if MACOSX else "libhalolib.so",
+               "scheduler",
+               "objstore"]
+  },
+  zip_safe=False
+)