mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 21:38:18 +08:00
renaming project, halo -> ray (#95)
This commit is contained in:
committed by
Philipp Moritz
parent
44ae1788ee
commit
4cc024ae36
+17
-17
@@ -1,6 +1,6 @@
|
||||
cmake_minimum_required(VERSION 2.8)
|
||||
|
||||
project(halo)
|
||||
project(ray)
|
||||
|
||||
set(THIRDPARTY_DIR "${CMAKE_SOURCE_DIR}/thirdparty")
|
||||
|
||||
@@ -23,15 +23,15 @@ include_directories("${NUMPY_INCLUDE_DIR}")
|
||||
|
||||
set(PROTO_PATH "${CMAKE_SOURCE_DIR}/protos")
|
||||
|
||||
set(HALO_PROTO "${PROTO_PATH}/halo.proto")
|
||||
set(RAY_PROTO "${PROTO_PATH}/ray.proto")
|
||||
set(TYPES_PROTO "${PROTO_PATH}/types.proto")
|
||||
set(GENERATED_PROTOBUF_PATH "${CMAKE_BINARY_DIR}/generated")
|
||||
file(MAKE_DIRECTORY ${GENERATED_PROTOBUF_PATH})
|
||||
|
||||
set(HALO_PB_CPP_FILE "${GENERATED_PROTOBUF_PATH}/halo.pb.cc")
|
||||
set(HALO_PB_H_FILE "${GENERATED_PROTOBUF_PATH}/halo.pb.h")
|
||||
set(HALO_GRPC_PB_CPP_FILE "${GENERATED_PROTOBUF_PATH}/halo.grpc.pb.cc")
|
||||
set(HALO_GRPC_PB_H_FILE "${GENERATED_PROTOBUF_PATH}/halo.grpc.pb.h")
|
||||
set(RAY_PB_CPP_FILE "${GENERATED_PROTOBUF_PATH}/ray.pb.cc")
|
||||
set(RAY_PB_H_FILE "${GENERATED_PROTOBUF_PATH}/ray.pb.h")
|
||||
set(RAY_GRPC_PB_CPP_FILE "${GENERATED_PROTOBUF_PATH}/ray.grpc.pb.cc")
|
||||
set(RAY_GRPC_PB_H_FILE "${GENERATED_PROTOBUF_PATH}/ray.grpc.pb.h")
|
||||
|
||||
set(TYPES_PB_CPP_FILE "${GENERATED_PROTOBUF_PATH}/types.pb.cc")
|
||||
set(TYPES_PB_H_FILE "${GENERATED_PROTOBUF_PATH}/types.pb.h")
|
||||
@@ -39,19 +39,19 @@ set(TYPES_GRPC_PB_CPP_FILE "${GENERATED_PROTOBUF_PATH}/types.grpc.pb.cc")
|
||||
set(TYPES_GRPC_PB_H_FILE "${GENERATED_PROTOBUF_PATH}/types.grpc.pb.h")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT "${HALO_PB_H_FILE}"
|
||||
"${HALO_PB_CPP_FILE}"
|
||||
"${HALO_GRPC_PB_H_FILE}"
|
||||
"${HALO_GRPC_PB_CPP_FILE}"
|
||||
OUTPUT "${RAY_PB_H_FILE}"
|
||||
"${RAY_PB_CPP_FILE}"
|
||||
"${RAY_GRPC_PB_H_FILE}"
|
||||
"${RAY_GRPC_PB_CPP_FILE}"
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/thirdparty/grpc/bins/opt/protobuf/protoc
|
||||
ARGS "--proto_path=${PROTO_PATH}"
|
||||
"--cpp_out=${GENERATED_PROTOBUF_PATH}"
|
||||
"${HALO_PROTO}"
|
||||
"${RAY_PROTO}"
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/thirdparty/grpc/bins/opt/protobuf/protoc
|
||||
ARGS "--proto_path=${PROTO_PATH}"
|
||||
"--grpc_out=${GENERATED_PROTOBUF_PATH}"
|
||||
"--plugin=protoc-gen-grpc=${CMAKE_SOURCE_DIR}/thirdparty/grpc/bins/opt/grpc_cpp_plugin"
|
||||
"${HALO_PROTO}"
|
||||
"${RAY_PROTO}"
|
||||
)
|
||||
|
||||
add_custom_command(
|
||||
@@ -70,8 +70,8 @@ add_custom_command(
|
||||
"${TYPES_PROTO}"
|
||||
)
|
||||
|
||||
set(GENERATED_PROTOBUF_FILES ${HALO_PB_H_FILE} ${HALO_PB_CPP_FILE}
|
||||
${HALO_GRPC_PB_H_FILE} ${HALO_GRPC_PB_CPP_FILE}
|
||||
set(GENERATED_PROTOBUF_FILES ${RAY_PB_H_FILE} ${RAY_PB_CPP_FILE}
|
||||
${RAY_GRPC_PB_H_FILE} ${RAY_GRPC_PB_CPP_FILE}
|
||||
${TYPES_PB_H_FILE} ${TYPES_PB_CPP_FILE}
|
||||
${TYPES_GRPC_PB_H_FILE} ${TYPES_GRPC_PB_CPP_FILE})
|
||||
|
||||
@@ -110,7 +110,7 @@ target_link_libraries(pynumbuf ${ARROW_LIB} ${PYTHON_LIBRARIES})
|
||||
add_executable(objstore src/objstore.cc src/ipc.cc ${GENERATED_PROTOBUF_FILES})
|
||||
target_link_libraries(objstore ${ARROW_LIB} pynumbuf)
|
||||
add_executable(scheduler src/scheduler.cc src/computation_graph.cc ${GENERATED_PROTOBUF_FILES})
|
||||
add_library(halolib SHARED src/halolib.cc src/worker.cc src/ipc.cc ${GENERATED_PROTOBUF_FILES})
|
||||
target_link_libraries(halolib ${ARROW_LIB} pynumbuf)
|
||||
add_library(raylib SHARED src/raylib.cc src/worker.cc src/ipc.cc ${GENERATED_PROTOBUF_FILES})
|
||||
target_link_libraries(raylib ${ARROW_LIB} pynumbuf)
|
||||
|
||||
install(TARGETS objstore scheduler halolib DESTINATION ${CMAKE_SOURCE_DIR}/lib/python/halo)
|
||||
install(TARGETS objstore scheduler raylib DESTINATION ${CMAKE_SOURCE_DIR}/lib/python/ray)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Halo
|
||||
# Ray
|
||||
|
||||
Halo is a distributed execution framework with a Python-like programming model.
|
||||
Ray is a distributed execution framework with a Python-like programming model.
|
||||
|
||||
## Design Decisions
|
||||
|
||||
@@ -14,6 +14,6 @@ For a description of our design decisions, see
|
||||
|
||||
1. sudo apt-get update
|
||||
2. sudo apt-get install git
|
||||
3. git clone https://github.com/amplab/halo.git
|
||||
4. cd halo
|
||||
3. git clone https://github.com/amplab/ray.git
|
||||
4. cd ray
|
||||
5. bash setup.sh
|
||||
|
||||
+4
-4
@@ -1,6 +1,6 @@
|
||||
# Aliasing
|
||||
|
||||
An important feature of Halo is that a remote call sent to the scheduler
|
||||
An important feature of Ray is that a remote call sent to the scheduler
|
||||
immediately returns object references to the outputs of the task, and the actual
|
||||
outputs of the task are only associated with the relevant object references
|
||||
after the task has been executed and the outputs have been computed. This allows
|
||||
@@ -10,15 +10,15 @@ However, to provide a more flexible API, we allow tasks to not only return
|
||||
values, but to also return object references to values. As an examples, consider
|
||||
the following code.
|
||||
```python
|
||||
@halo.remote([], [np.ndarray])
|
||||
@ray.remote([], [np.ndarray])
|
||||
def f()
|
||||
return np.zeros(5)
|
||||
|
||||
@halo.remote([], [np.ndarray])
|
||||
@ray.remote([], [np.ndarray])
|
||||
def g()
|
||||
return f()
|
||||
|
||||
@halo.remote([], [np.ndarray])
|
||||
@ray.remote([], [np.ndarray])
|
||||
def h()
|
||||
return g()
|
||||
```
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Reference Counting
|
||||
|
||||
In Halo, each object is assigned a globally unique object reference by the
|
||||
In Ray, each object is assigned a globally unique object reference by the
|
||||
scheduler (starting with 0 and incrementing upward). The objects are stored in
|
||||
object stores. In order to avoid running out of memory, the object stores must
|
||||
know when it is ok to deallocate an object. Since a worker on one node may have
|
||||
@@ -11,7 +11,7 @@ information.
|
||||
## Reference Counting
|
||||
|
||||
Two approaches to reclaiming memory are garbage collection and reference
|
||||
counting. We choose to use a reference counting approach in Halo. There are a
|
||||
counting. We choose to use a reference counting approach in Ray. There are a
|
||||
couple of reasons for this. Reference counting allows us to reclaim memory as
|
||||
early as possible. It also avoids pausing the system for garbage collection. We
|
||||
also note that implementing reference counting at the cluster level plays nicely
|
||||
@@ -77,13 +77,13 @@ because they must be passed into `AliasObjRefs` at some point).
|
||||
The following problem has not yet been resolved. In the following code, the
|
||||
result `x` will be garbage.
|
||||
```python
|
||||
x = halo.pull(ra.zeros([10, 10], "float"))
|
||||
x = ray.pull(ra.zeros([10, 10], "float"))
|
||||
```
|
||||
When `ra.zeros` is called, a worker will create an array of zeros and store
|
||||
it in an object store. An object reference to the output is returned. The call
|
||||
to `halo.pull` will not copy data from the object store process to the worker
|
||||
to `ray.pull` will not copy data from the object store process to the worker
|
||||
process, but will instead give the worker process a pointer to shared memory.
|
||||
After the `halo.pull` call completes, the object reference returned by
|
||||
After the `ray.pull` call completes, the object reference returned by
|
||||
`ra.zeros` will go out of scope, and the object it refers to will be
|
||||
deallocated from the object store. This will cause the memory that `x` points to
|
||||
to be garbage.
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
# Scheduler
|
||||
|
||||
The scheduling strategies currently implemented in Halo are fairly basic and
|
||||
The scheduling strategies currently implemented in Ray are fairly basic and
|
||||
all use a central scheduler.
|
||||
|
||||
* The naive scheduler assigns tasks to workers just taking into account
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef HALO_INCLUDE_HALO_H
|
||||
#define HALO_INCLUDE_HALO_H
|
||||
#ifndef RAY_INCLUDE_RAY_H
|
||||
#define RAY_INCLUDE_RAY_H
|
||||
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
@@ -34,20 +34,20 @@ public:
|
||||
typedef std::vector<std::vector<ObjStoreId> > ObjTable;
|
||||
typedef std::unordered_map<std::string, FnInfo> FnTable;
|
||||
|
||||
#define HALO_VERBOSE -1
|
||||
#define HALO_INFO 0
|
||||
#define HALO_DEBUG 1
|
||||
#define HALO_FATAL 2
|
||||
#define HALO_REFCOUNT HALO_VERBOSE
|
||||
#define HALO_ALIAS HALO_VERBOSE
|
||||
#define RAY_VERBOSE -1
|
||||
#define RAY_INFO 0
|
||||
#define RAY_DEBUG 1
|
||||
#define RAY_FATAL 2
|
||||
#define RAY_REFCOUNT RAY_VERBOSE
|
||||
#define RAY_ALIAS RAY_VERBOSE
|
||||
|
||||
#define HALO_LOG(LEVEL, MESSAGE) \
|
||||
if (LEVEL == HALO_VERBOSE) { \
|
||||
#define RAY_LOG(LEVEL, MESSAGE) \
|
||||
if (LEVEL == RAY_VERBOSE) { \
|
||||
\
|
||||
} else if (LEVEL == HALO_FATAL) { \
|
||||
} else if (LEVEL == RAY_FATAL) { \
|
||||
std::cerr << "fatal error occured: " << MESSAGE << std::endl; \
|
||||
std::exit(1); \
|
||||
} else if (LEVEL == HALO_DEBUG) { \
|
||||
} else if (LEVEL == RAY_DEBUG) { \
|
||||
\
|
||||
} else { \
|
||||
std::cout << MESSAGE << std::endl; \
|
||||
@@ -1,3 +1,3 @@
|
||||
import libhalolib as lib
|
||||
import libraylib as lib
|
||||
import serialization
|
||||
from worker import scheduler_info, register_module, connect, disconnect, pull, push, remote
|
||||
+19
-19
@@ -1,7 +1,7 @@
|
||||
from typing import List
|
||||
import numpy as np
|
||||
import halo.arrays.remote as ra
|
||||
import halo
|
||||
import ray.arrays.remote as ra
|
||||
import ray
|
||||
|
||||
__all__ = ["BLOCK_SIZE", "DistArray", "assemble", "zeros", "ones", "copy",
|
||||
"eye", "triu", "tril", "blockwise_dot", "dot", "transpose", "add", "subtract", "numpy_to_dist", "subblocks"]
|
||||
@@ -55,13 +55,13 @@ class DistArray(object):
|
||||
|
||||
def assemble(self):
|
||||
"""Assemble an array on this node from a distributed array object reference."""
|
||||
first_block = halo.pull(self.objrefs[(0,) * self.ndim])
|
||||
first_block = ray.pull(self.objrefs[(0,) * self.ndim])
|
||||
dtype = first_block.dtype
|
||||
result = np.zeros(self.shape, dtype=dtype)
|
||||
for index in np.ndindex(*self.num_blocks):
|
||||
lower = DistArray.compute_block_lower(index, self.shape)
|
||||
upper = DistArray.compute_block_upper(index, self.shape)
|
||||
result[[slice(l, u) for (l, u) in zip(lower, upper)]] = halo.pull(self.objrefs[index])
|
||||
result[[slice(l, u) for (l, u) in zip(lower, upper)]] = ray.pull(self.objrefs[index])
|
||||
return result
|
||||
|
||||
def __getitem__(self, sliced):
|
||||
@@ -69,42 +69,42 @@ class DistArray(object):
|
||||
a = self.assemble()
|
||||
return a[sliced]
|
||||
|
||||
@halo.remote([DistArray], [np.ndarray])
|
||||
@ray.remote([DistArray], [np.ndarray])
|
||||
def assemble(a):
|
||||
return a.assemble()
|
||||
|
||||
# TODO(rkn): what should we call this method
|
||||
@halo.remote([np.ndarray], [DistArray])
|
||||
@ray.remote([np.ndarray], [DistArray])
|
||||
def numpy_to_dist(a):
|
||||
result = DistArray(a.shape)
|
||||
for index in np.ndindex(*result.num_blocks):
|
||||
lower = DistArray.compute_block_lower(index, a.shape)
|
||||
upper = DistArray.compute_block_upper(index, a.shape)
|
||||
result.objrefs[index] = halo.push(a[[slice(l, u) for (l, u) in zip(lower, upper)]])
|
||||
result.objrefs[index] = ray.push(a[[slice(l, u) for (l, u) in zip(lower, upper)]])
|
||||
return result
|
||||
|
||||
@halo.remote([List[int], str], [DistArray])
|
||||
@ray.remote([List[int], str], [DistArray])
|
||||
def zeros(shape, dtype_name="float"):
|
||||
result = DistArray(shape)
|
||||
for index in np.ndindex(*result.num_blocks):
|
||||
result.objrefs[index] = ra.zeros(DistArray.compute_block_shape(index, shape), dtype_name=dtype_name)
|
||||
return result
|
||||
|
||||
@halo.remote([List[int], str], [DistArray])
|
||||
@ray.remote([List[int], str], [DistArray])
|
||||
def ones(shape, dtype_name="float"):
|
||||
result = DistArray(shape)
|
||||
for index in np.ndindex(*result.num_blocks):
|
||||
result.objrefs[index] = ra.ones(DistArray.compute_block_shape(index, shape), dtype_name=dtype_name)
|
||||
return result
|
||||
|
||||
@halo.remote([DistArray], [DistArray])
|
||||
@ray.remote([DistArray], [DistArray])
|
||||
def copy(a):
|
||||
result = DistArray(a.shape)
|
||||
for index in np.ndindex(*result.num_blocks):
|
||||
result.objrefs[index] = a.objrefs[index] # We don't need to actually copy the objects because cluster-level objects are assumed to be immutable.
|
||||
return result
|
||||
|
||||
@halo.remote([int, int, str], [DistArray])
|
||||
@ray.remote([int, int, str], [DistArray])
|
||||
def eye(dim1, dim2=-1, dtype_name="float"):
|
||||
dim2 = dim1 if dim2 == -1 else dim2
|
||||
shape = [dim1, dim2]
|
||||
@@ -117,7 +117,7 @@ def eye(dim1, dim2=-1, dtype_name="float"):
|
||||
result.objrefs[i, j] = ra.zeros(block_shape, dtype_name=dtype_name)
|
||||
return result
|
||||
|
||||
@halo.remote([DistArray], [DistArray])
|
||||
@ray.remote([DistArray], [DistArray])
|
||||
def triu(a):
|
||||
if a.ndim != 2:
|
||||
raise Exception("Input must have 2 dimensions, but a.ndim is " + str(a.ndim))
|
||||
@@ -131,7 +131,7 @@ def triu(a):
|
||||
result.objrefs[i, j] = ra.zeros_like(a.objrefs[i, j])
|
||||
return result
|
||||
|
||||
@halo.remote([DistArray], [DistArray])
|
||||
@ray.remote([DistArray], [DistArray])
|
||||
def tril(a):
|
||||
if a.ndim != 2:
|
||||
raise Exception("Input must have 2 dimensions, but a.ndim is " + str(a.ndim))
|
||||
@@ -145,7 +145,7 @@ def tril(a):
|
||||
result.objrefs[i, j] = ra.zeros_like(a.objrefs[i, j])
|
||||
return result
|
||||
|
||||
@halo.remote([np.ndarray], [np.ndarray])
|
||||
@ray.remote([np.ndarray], [np.ndarray])
|
||||
def blockwise_dot(*matrices):
|
||||
n = len(matrices)
|
||||
if n % 2 != 0:
|
||||
@@ -156,7 +156,7 @@ def blockwise_dot(*matrices):
|
||||
result += np.dot(matrices[i], matrices[n / 2 + i])
|
||||
return result
|
||||
|
||||
@halo.remote([DistArray, DistArray], [DistArray])
|
||||
@ray.remote([DistArray, DistArray], [DistArray])
|
||||
def dot(a, b):
|
||||
if a.ndim != 2:
|
||||
raise Exception("dot expects its arguments to be 2-dimensional, but a.ndim = {}.".format(a.ndim))
|
||||
@@ -171,7 +171,7 @@ def dot(a, b):
|
||||
result.objrefs[i, j] = blockwise_dot(*args)
|
||||
return result
|
||||
|
||||
@halo.remote([DistArray, List[int]], [DistArray])
|
||||
@ray.remote([DistArray, List[int]], [DistArray])
|
||||
def subblocks(a, *ranges):
|
||||
"""
|
||||
This function produces a distributed array from a subset of the blocks in the `a`. The result and `a` will have the same number of dimensions.For example,
|
||||
@@ -202,7 +202,7 @@ def subblocks(a, *ranges):
|
||||
result.objrefs[index] = a.objrefs[tuple([ranges[i][index[i]] for i in range(a.ndim)])]
|
||||
return result
|
||||
|
||||
@halo.remote([DistArray], [DistArray])
|
||||
@ray.remote([DistArray], [DistArray])
|
||||
def transpose(a):
|
||||
if a.ndim != 2:
|
||||
raise Exception("transpose expects its argument to be 2-dimensional, but a.ndim = {}, a.shape = {}.".format(a.ndim, a.shape))
|
||||
@@ -213,7 +213,7 @@ def transpose(a):
|
||||
return result
|
||||
|
||||
# TODO(rkn): support broadcasting?
|
||||
@halo.remote([DistArray, DistArray], [DistArray])
|
||||
@ray.remote([DistArray, DistArray], [DistArray])
|
||||
def add(x1, x2):
|
||||
if x1.shape != x2.shape:
|
||||
raise Exception("add expects arguments `x1` and `x2` to have the same shape, but x1.shape = {}, and x2.shape = {}.".format(x1.shape, x2.shape))
|
||||
@@ -223,7 +223,7 @@ def add(x1, x2):
|
||||
return result
|
||||
|
||||
# TODO(rkn): support broadcasting?
|
||||
@halo.remote([DistArray, DistArray], [DistArray])
|
||||
@ray.remote([DistArray, DistArray], [DistArray])
|
||||
def subtract(x1, x2):
|
||||
if x1.shape != x2.shape:
|
||||
raise Exception("subtract expects arguments `x1` and `x2` to have the same shape, but x1.shape = {}, and x2.shape = {}.".format(x1.shape, x2.shape))
|
||||
+19
-19
@@ -1,14 +1,14 @@
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import halo.arrays.remote as ra
|
||||
import halo
|
||||
import ray.arrays.remote as ra
|
||||
import ray
|
||||
|
||||
from core import *
|
||||
|
||||
__all__ = ["tsqr", "modified_lu", "tsqr_hr", "qr"]
|
||||
|
||||
@halo.remote([DistArray], [DistArray, np.ndarray])
|
||||
@ray.remote([DistArray], [DistArray, np.ndarray])
|
||||
def tsqr(a):
|
||||
"""
|
||||
arguments:
|
||||
@@ -17,10 +17,10 @@ def tsqr(a):
|
||||
a.shape == (M, N)
|
||||
K == min(M, N)
|
||||
return values:
|
||||
q: DistArray, if q_full = halo.context.pull(DistArray, q).assemble(), then
|
||||
q: DistArray, if q_full = ray.context.pull(DistArray, q).assemble(), then
|
||||
q_full.shape == (M, K)
|
||||
np.allclose(np.dot(q_full.T, q_full), np.eye(K)) == True
|
||||
r: np.ndarray, if r_val = halo.context.pull(np.ndarray, r), then
|
||||
r: np.ndarray, if r_val = ray.context.pull(np.ndarray, r), then
|
||||
r_val.shape == (K, N)
|
||||
np.allclose(r, np.triu(r)) == True
|
||||
"""
|
||||
@@ -80,7 +80,7 @@ def tsqr(a):
|
||||
return q_result, r
|
||||
|
||||
# TODO(rkn): This is unoptimized, we really want a block version of this.
|
||||
@halo.remote([DistArray], [DistArray, np.ndarray, np.ndarray])
|
||||
@ray.remote([DistArray], [DistArray, np.ndarray, np.ndarray])
|
||||
def modified_lu(q):
|
||||
"""
|
||||
Algorithm 5 from http://www.eecs.berkeley.edu/Pubs/TechRpts/2013/EECS-2013-175.pdf
|
||||
@@ -108,39 +108,39 @@ def modified_lu(q):
|
||||
for i in range(b):
|
||||
L[i, i] = 1
|
||||
U = np.triu(q_work)[:b, :]
|
||||
return numpy_to_dist(halo.push(L)), U, S # TODO(rkn): get rid of push and pull
|
||||
return numpy_to_dist(ray.push(L)), U, S # TODO(rkn): get rid of push and pull
|
||||
|
||||
@halo.remote([np.ndarray, np.ndarray, np.ndarray, int], [np.ndarray, np.ndarray])
|
||||
@ray.remote([np.ndarray, np.ndarray, np.ndarray, int], [np.ndarray, np.ndarray])
|
||||
def tsqr_hr_helper1(u, s, y_top_block, b):
|
||||
y_top = y_top_block[:b, :b]
|
||||
s_full = np.diag(s)
|
||||
t = -1 * np.dot(u, np.dot(s_full, np.linalg.inv(y_top).T))
|
||||
return t, y_top
|
||||
|
||||
@halo.remote([np.ndarray, np.ndarray], [np.ndarray])
|
||||
@ray.remote([np.ndarray, np.ndarray], [np.ndarray])
|
||||
def tsqr_hr_helper2(s, r_temp):
|
||||
s_full = np.diag(s)
|
||||
return np.dot(s_full, r_temp)
|
||||
|
||||
@halo.remote([DistArray], [DistArray, np.ndarray, np.ndarray, np.ndarray])
|
||||
@ray.remote([DistArray], [DistArray, np.ndarray, np.ndarray, np.ndarray])
|
||||
def tsqr_hr(a):
|
||||
"""Algorithm 6 from http://www.eecs.berkeley.edu/Pubs/TechRpts/2013/EECS-2013-175.pdf"""
|
||||
q, r_temp = tsqr(a)
|
||||
y, u, s = modified_lu(q)
|
||||
y_blocked = halo.pull(y)
|
||||
y_blocked = ray.pull(y)
|
||||
t, y_top = tsqr_hr_helper1(u, s, y_blocked.objrefs[0, 0], a.shape[1])
|
||||
r = tsqr_hr_helper2(s, r_temp)
|
||||
return y, t, y_top, r
|
||||
|
||||
@halo.remote([np.ndarray, np.ndarray, np.ndarray, np.ndarray], [np.ndarray])
|
||||
@ray.remote([np.ndarray, np.ndarray, np.ndarray, np.ndarray], [np.ndarray])
|
||||
def qr_helper1(a_rc, y_ri, t, W_c):
|
||||
return a_rc - np.dot(y_ri, np.dot(t.T, W_c))
|
||||
|
||||
@halo.remote([np.ndarray, np.ndarray], [np.ndarray])
|
||||
@ray.remote([np.ndarray, np.ndarray], [np.ndarray])
|
||||
def qr_helper2(y_ri, a_rc):
|
||||
return np.dot(y_ri.T, a_rc)
|
||||
|
||||
@halo.remote([DistArray], [DistArray, DistArray])
|
||||
@ray.remote([DistArray], [DistArray, DistArray])
|
||||
def qr(a):
|
||||
"""Algorithm 7 from http://www.eecs.berkeley.edu/Pubs/TechRpts/2013/EECS-2013-175.pdf"""
|
||||
m, n = a.shape[0], a.shape[1]
|
||||
@@ -150,21 +150,21 @@ def qr(a):
|
||||
a_work = DistArray()
|
||||
a_work.construct(a.shape, np.copy(a.objrefs))
|
||||
|
||||
result_dtype = np.linalg.qr(halo.pull(a.objrefs[0, 0]))[0].dtype.name
|
||||
r_res = halo.pull(zeros([k, n], result_dtype)) # TODO(rkn): It would be preferable not to pull this right after creating it.
|
||||
y_res = halo.pull(zeros([m, k], result_dtype)) # TODO(rkn): It would be preferable not to pull this right after creating it.
|
||||
result_dtype = np.linalg.qr(ray.pull(a.objrefs[0, 0]))[0].dtype.name
|
||||
r_res = ray.pull(zeros([k, n], result_dtype)) # TODO(rkn): It would be preferable not to pull this right after creating it.
|
||||
y_res = ray.pull(zeros([m, k], result_dtype)) # TODO(rkn): It would be preferable not to pull this right after creating it.
|
||||
Ts = []
|
||||
|
||||
for i in range(min(a.num_blocks[0], a.num_blocks[1])): # this differs from the paper, which says "for i in range(a.num_blocks[1])", but that doesn't seem to make any sense when a.num_blocks[1] > a.num_blocks[0]
|
||||
sub_dist_array = subblocks(a_work, range(i, a_work.num_blocks[0]), [i])
|
||||
y, t, _, R = tsqr_hr(sub_dist_array)
|
||||
y_val = halo.pull(y)
|
||||
y_val = ray.pull(y)
|
||||
|
||||
for j in range(i, a.num_blocks[0]):
|
||||
y_res.objrefs[j, i] = y_val.objrefs[j - i, 0]
|
||||
if a.shape[0] > a.shape[1]:
|
||||
# in this case, R needs to be square
|
||||
R_shape = halo.pull(ra.shape(R))
|
||||
R_shape = ray.pull(ra.shape(R))
|
||||
eye_temp = ra.eye(R_shape[1], R_shape[0], dtype_name=result_dtype)
|
||||
r_res.objrefs[i, i] = ra.dot(eye_temp, R)
|
||||
else:
|
||||
+3
-3
@@ -1,12 +1,12 @@
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import halo.arrays.remote as ra
|
||||
import halo
|
||||
import ray.arrays.remote as ra
|
||||
import ray
|
||||
|
||||
from core import *
|
||||
|
||||
@halo.remote([List[int]], [DistArray])
|
||||
@ray.remote([List[int]], [DistArray])
|
||||
def normal(shape):
|
||||
num_blocks = DistArray.compute_num_blocks(shape)
|
||||
objrefs = np.empty(num_blocks, dtype=object)
|
||||
@@ -1,77 +1,77 @@
|
||||
from typing import List
|
||||
import numpy as np
|
||||
import halo
|
||||
import ray
|
||||
|
||||
__all__ = ["zeros", "zeros_like", "ones", "eye", "dot", "vstack", "hstack", "subarray", "copy", "tril", "triu", "diag", "transpose", "add", "subtract", "sum", "shape"]
|
||||
|
||||
@halo.remote([List[int], str, str], [np.ndarray])
|
||||
@ray.remote([List[int], str, str], [np.ndarray])
|
||||
def zeros(shape, dtype_name="float", order="C"):
|
||||
return np.zeros(shape, dtype=np.dtype(dtype_name), order=order)
|
||||
|
||||
@halo.remote([np.ndarray, str, str, bool], [np.ndarray])
|
||||
@ray.remote([np.ndarray, str, str, bool], [np.ndarray])
|
||||
def zeros_like(a, dtype_name="None", order="K", subok=True):
|
||||
dtype_val = None if dtype_name == "None" else np.dtype(dtype_name)
|
||||
return np.zeros_like(a, dtype=dtype_val, order=order, subok=subok)
|
||||
|
||||
@halo.remote([List[int], str, str], [np.ndarray])
|
||||
@ray.remote([List[int], str, str], [np.ndarray])
|
||||
def ones(shape, dtype_name="float", order="C"):
|
||||
return np.ones(shape, dtype=np.dtype(dtype_name), order=order)
|
||||
|
||||
@halo.remote([int, int, int, str], [np.ndarray])
|
||||
@ray.remote([int, int, int, str], [np.ndarray])
|
||||
def eye(N, M=-1, k=0, dtype_name="float"):
|
||||
M = N if M == -1 else M
|
||||
return np.eye(N, M=M, k=k, dtype=np.dtype(dtype_name))
|
||||
|
||||
@halo.remote([np.ndarray, np.ndarray], [np.ndarray])
|
||||
@ray.remote([np.ndarray, np.ndarray], [np.ndarray])
|
||||
def dot(a, b):
|
||||
return np.dot(a, b)
|
||||
|
||||
@halo.remote([np.ndarray], [np.ndarray])
|
||||
@ray.remote([np.ndarray], [np.ndarray])
|
||||
def vstack(*xs):
|
||||
return np.vstack(xs)
|
||||
|
||||
@halo.remote([np.ndarray], [np.ndarray])
|
||||
@ray.remote([np.ndarray], [np.ndarray])
|
||||
def hstack(*xs):
|
||||
return np.hstack(xs)
|
||||
|
||||
# TODO(rkn): instead of this, consider implementing slicing
|
||||
@halo.remote([np.ndarray, List[int], List[int]], [np.ndarray])
|
||||
@ray.remote([np.ndarray, List[int], List[int]], [np.ndarray])
|
||||
def subarray(a, lower_indices, upper_indices): # TODO(rkn): be consistent about using "index" versus "indices"
|
||||
return a[[slice(l, u) for (l, u) in zip(lower_indices, upper_indices)]]
|
||||
|
||||
@halo.remote([np.ndarray, str], [np.ndarray])
|
||||
@ray.remote([np.ndarray, str], [np.ndarray])
|
||||
def copy(a, order="K"):
|
||||
return np.copy(a, order=order)
|
||||
|
||||
@halo.remote([np.ndarray, int], [np.ndarray])
|
||||
@ray.remote([np.ndarray, int], [np.ndarray])
|
||||
def tril(m, k=0):
|
||||
return np.tril(m, k=k)
|
||||
|
||||
@halo.remote([np.ndarray, int], [np.ndarray])
|
||||
@ray.remote([np.ndarray, int], [np.ndarray])
|
||||
def triu(m, k=0):
|
||||
return np.triu(m, k=k)
|
||||
|
||||
@halo.remote([np.ndarray, int], [np.ndarray])
|
||||
@ray.remote([np.ndarray, int], [np.ndarray])
|
||||
def diag(v, k=0):
|
||||
return np.diag(v, k=k)
|
||||
|
||||
@halo.remote([np.ndarray, List[int]], [np.ndarray])
|
||||
@ray.remote([np.ndarray, List[int]], [np.ndarray])
|
||||
def transpose(a, axes=[]):
|
||||
axes = None if axes == [] else axes
|
||||
return np.transpose(a, axes=axes)
|
||||
|
||||
@halo.remote([np.ndarray, np.ndarray], [np.ndarray])
|
||||
@ray.remote([np.ndarray, np.ndarray], [np.ndarray])
|
||||
def add(x1, x2):
|
||||
return np.add(x1, x2)
|
||||
|
||||
@halo.remote([np.ndarray, np.ndarray], [np.ndarray])
|
||||
@ray.remote([np.ndarray, np.ndarray], [np.ndarray])
|
||||
def subtract(x1, x2):
|
||||
return np.subtract(x1, x2)
|
||||
|
||||
@halo.remote([int, np.ndarray], [np.ndarray])
|
||||
@ray.remote([int, np.ndarray], [np.ndarray])
|
||||
def sum(axis, *xs):
|
||||
return np.sum(xs, axis=axis)
|
||||
|
||||
@halo.remote([np.ndarray], [tuple])
|
||||
@ray.remote([np.ndarray], [tuple])
|
||||
def shape(a):
|
||||
return np.shape(a)
|
||||
@@ -1,88 +1,88 @@
|
||||
from typing import List
|
||||
import numpy as np
|
||||
import halo
|
||||
import ray
|
||||
|
||||
__all__ = ["matrix_power", "solve", "tensorsolve", "tensorinv", "inv",
|
||||
"cholesky", "eigvals", "eigvalsh", "pinv", "slogdet", "det",
|
||||
"svd", "eig", "eigh", "lstsq", "norm", "qr", "cond", "matrix_rank",
|
||||
"LinAlgError", "multi_dot"]
|
||||
|
||||
@halo.remote([np.ndarray, int], [np.ndarray])
|
||||
@ray.remote([np.ndarray, int], [np.ndarray])
|
||||
def matrix_power(M, n):
|
||||
return np.linalg.matrix_power(M, n)
|
||||
|
||||
@halo.remote([np.ndarray, np.ndarray], [np.ndarray])
|
||||
@ray.remote([np.ndarray, np.ndarray], [np.ndarray])
|
||||
def solve(a, b):
|
||||
return np.linalg.solve(a, b)
|
||||
|
||||
@halo.remote([np.ndarray], [np.ndarray, np.ndarray])
|
||||
@ray.remote([np.ndarray], [np.ndarray, np.ndarray])
|
||||
def tensorsolve(a):
|
||||
raise NotImplementedError
|
||||
|
||||
@halo.remote([np.ndarray], [np.ndarray, np.ndarray])
|
||||
@ray.remote([np.ndarray], [np.ndarray, np.ndarray])
|
||||
def tensorinv(a):
|
||||
raise NotImplementedError
|
||||
|
||||
@halo.remote([np.ndarray], [np.ndarray])
|
||||
@ray.remote([np.ndarray], [np.ndarray])
|
||||
def inv(a):
|
||||
return np.linalg.inv(a)
|
||||
|
||||
@halo.remote([np.ndarray], [np.ndarray])
|
||||
@ray.remote([np.ndarray], [np.ndarray])
|
||||
def cholesky(a):
|
||||
return np.linalg.cholesky(a)
|
||||
|
||||
@halo.remote([np.ndarray], [np.ndarray])
|
||||
@ray.remote([np.ndarray], [np.ndarray])
|
||||
def eigvals(a):
|
||||
return np.linalg.eigvals(a)
|
||||
|
||||
@halo.remote([np.ndarray], [np.ndarray])
|
||||
@ray.remote([np.ndarray], [np.ndarray])
|
||||
def eigvalsh(a):
|
||||
raise NotImplementedError
|
||||
|
||||
@halo.remote([np.ndarray], [np.ndarray])
|
||||
@ray.remote([np.ndarray], [np.ndarray])
|
||||
def pinv(a):
|
||||
return np.linalg.pinv(a)
|
||||
|
||||
@halo.remote([np.ndarray], [int])
|
||||
@ray.remote([np.ndarray], [int])
|
||||
def slogdet(a):
|
||||
raise NotImplementedError
|
||||
|
||||
@halo.remote([np.ndarray], [float])
|
||||
@ray.remote([np.ndarray], [float])
|
||||
def det(a):
|
||||
return np.linalg.det(a)
|
||||
|
||||
@halo.remote([np.ndarray], [np.ndarray, np.ndarray, np.ndarray])
|
||||
@ray.remote([np.ndarray], [np.ndarray, np.ndarray, np.ndarray])
|
||||
def svd(a):
|
||||
return np.linalg.svd(a)
|
||||
|
||||
@halo.remote([np.ndarray], [np.ndarray, np.ndarray])
|
||||
@ray.remote([np.ndarray], [np.ndarray, np.ndarray])
|
||||
def eig(a):
|
||||
return np.linalg.eig(a)
|
||||
|
||||
@halo.remote([np.ndarray], [np.ndarray, np.ndarray])
|
||||
@ray.remote([np.ndarray], [np.ndarray, np.ndarray])
|
||||
def eigh(a):
|
||||
return np.linalg.eigh(a)
|
||||
|
||||
@halo.remote([np.ndarray], [np.ndarray, np.ndarray, int, np.ndarray])
|
||||
@ray.remote([np.ndarray], [np.ndarray, np.ndarray, int, np.ndarray])
|
||||
def lstsq(a, b):
|
||||
return np.linalg.lstsq(a)
|
||||
|
||||
@halo.remote([np.ndarray], [float])
|
||||
@ray.remote([np.ndarray], [float])
|
||||
def norm(x):
|
||||
return np.linalg.norm(x)
|
||||
|
||||
@halo.remote([np.ndarray], [np.ndarray, np.ndarray])
|
||||
@ray.remote([np.ndarray], [np.ndarray, np.ndarray])
|
||||
def qr(a):
|
||||
return np.linalg.qr(a)
|
||||
|
||||
@halo.remote([np.ndarray], [float])
|
||||
@ray.remote([np.ndarray], [float])
|
||||
def cond(x):
|
||||
return np.linalg.cond(x)
|
||||
|
||||
@halo.remote([np.ndarray], [int])
|
||||
@ray.remote([np.ndarray], [int])
|
||||
def matrix_rank(M):
|
||||
return np.linalg.matrix_rank(M)
|
||||
|
||||
@halo.remote([np.ndarray], [np.ndarray])
|
||||
@ray.remote([np.ndarray], [np.ndarray])
|
||||
def multi_dot(*a):
|
||||
raise NotImplementedError
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import List
|
||||
import numpy as np
|
||||
import halo
|
||||
import ray
|
||||
|
||||
@halo.remote([List[int]], [np.ndarray])
|
||||
@ray.remote([List[int]], [np.ndarray])
|
||||
def normal(shape):
|
||||
return np.random.normal(size=shape)
|
||||
@@ -1,6 +1,6 @@
|
||||
import importlib
|
||||
|
||||
import halo
|
||||
import ray
|
||||
|
||||
def to_primitive(obj):
|
||||
if hasattr(obj, "serialize"):
|
||||
@@ -22,18 +22,18 @@ def from_primitive(primitive_obj):
|
||||
|
||||
def serialize(worker_capsule, obj):
|
||||
primitive_obj = to_primitive(obj)
|
||||
obj_capsule, contained_objrefs = halo.lib.serialize_object(worker_capsule, primitive_obj) # contained_objrefs is a list of the objrefs contained in obj
|
||||
obj_capsule, contained_objrefs = ray.lib.serialize_object(worker_capsule, primitive_obj) # contained_objrefs is a list of the objrefs contained in obj
|
||||
return obj_capsule, contained_objrefs
|
||||
|
||||
def deserialize(worker_capsule, capsule):
|
||||
primitive_obj = halo.lib.deserialize_object(worker_capsule, capsule)
|
||||
primitive_obj = ray.lib.deserialize_object(worker_capsule, capsule)
|
||||
return from_primitive(primitive_obj)
|
||||
|
||||
def serialize_task(worker_capsule, func_name, args):
|
||||
primitive_args = [(arg if isinstance(arg, halo.lib.ObjRef) else to_primitive(arg)) for arg in args]
|
||||
return halo.lib.serialize_task(worker_capsule, func_name, primitive_args)
|
||||
primitive_args = [(arg if isinstance(arg, ray.lib.ObjRef) else to_primitive(arg)) for arg in args]
|
||||
return ray.lib.serialize_task(worker_capsule, func_name, primitive_args)
|
||||
|
||||
def deserialize_task(worker_capsule, task):
|
||||
func_name, primitive_args, return_objrefs = halo.lib.deserialize_task(worker_capsule, task)
|
||||
args = [(arg if isinstance(arg, halo.lib.ObjRef) else from_primitive(arg)) for arg in primitive_args]
|
||||
func_name, primitive_args, return_objrefs = ray.lib.deserialize_task(worker_capsule, task)
|
||||
args = [(arg if isinstance(arg, ray.lib.ObjRef) else from_primitive(arg)) for arg in primitive_args]
|
||||
return func_name, args, return_objrefs
|
||||
@@ -3,8 +3,8 @@ import os
|
||||
import atexit
|
||||
import time
|
||||
|
||||
import halo
|
||||
import halo.worker as worker
|
||||
import ray
|
||||
import ray.worker as worker
|
||||
|
||||
_services_path = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
@@ -58,9 +58,9 @@ def cleanup():
|
||||
|
||||
global drivers
|
||||
for driver in drivers:
|
||||
halo.disconnect(driver)
|
||||
ray.disconnect(driver)
|
||||
if len(drivers) == 0:
|
||||
halo.disconnect()
|
||||
ray.disconnect()
|
||||
drivers = []
|
||||
|
||||
# atexit.register(cleanup)
|
||||
@@ -97,7 +97,7 @@ def start_node(scheduler_address, node_ip_address, num_workers, worker_path=None
|
||||
for _ in range(num_workers):
|
||||
start_worker(worker_path, scheduler_address, objstore_address, address(node_ip_address, new_worker_port()))
|
||||
time.sleep(0.3)
|
||||
halo.connect(scheduler_address, objstore_address, address(node_ip_address, new_worker_port()))
|
||||
ray.connect(scheduler_address, objstore_address, address(node_ip_address, new_worker_port()))
|
||||
time.sleep(0.5)
|
||||
|
||||
def start_singlenode_cluster(return_drivers=False, num_objstores=1, num_workers_per_objstore=0, worker_path=None):
|
||||
@@ -124,11 +124,11 @@ def start_singlenode_cluster(return_drivers=False, num_objstores=1, num_workers_
|
||||
driver_workers = []
|
||||
for i in range(num_objstores):
|
||||
driver_worker = worker.Worker()
|
||||
halo.connect(scheduler_address, objstore_address, address(IP_ADDRESS, new_worker_port()), driver_worker)
|
||||
ray.connect(scheduler_address, objstore_address, address(IP_ADDRESS, new_worker_port()), driver_worker)
|
||||
driver_workers.append(driver_worker)
|
||||
drivers.append(driver_worker)
|
||||
time.sleep(0.5)
|
||||
return driver_workers
|
||||
else:
|
||||
halo.connect(scheduler_address, objstore_addresses[0], address(IP_ADDRESS, new_worker_port()))
|
||||
ray.connect(scheduler_address, objstore_addresses[0], address(IP_ADDRESS, new_worker_port()))
|
||||
time.sleep(0.5)
|
||||
@@ -4,7 +4,7 @@ import funcsigs
|
||||
import numpy as np
|
||||
import pynumbuf
|
||||
|
||||
import halo
|
||||
import ray
|
||||
import serialization
|
||||
|
||||
class Worker(object):
|
||||
@@ -17,10 +17,10 @@ class Worker(object):
|
||||
def put_object(self, objref, value):
|
||||
"""Put `value` in the local object store with objref `objref`. This assumes that the value for `objref` has not yet been placed in the local object store."""
|
||||
if pynumbuf.serializable(value):
|
||||
halo.lib.put_arrow(self.handle, objref, value)
|
||||
ray.lib.put_arrow(self.handle, objref, value)
|
||||
else:
|
||||
object_capsule, contained_objrefs = serialization.serialize(self.handle, value) # contained_objrefs is a list of the objrefs contained in object_capsule
|
||||
halo.lib.put_object(self.handle, objref, object_capsule, contained_objrefs)
|
||||
ray.lib.put_object(self.handle, objref, object_capsule, contained_objrefs)
|
||||
|
||||
def get_object(self, objref):
|
||||
"""
|
||||
@@ -29,32 +29,32 @@ class Worker(object):
|
||||
|
||||
WARNING: get_object can only be called on a canonical objref.
|
||||
"""
|
||||
if halo.lib.is_arrow(self.handle, objref):
|
||||
return halo.lib.get_arrow(self.handle, objref)
|
||||
if ray.lib.is_arrow(self.handle, objref):
|
||||
return ray.lib.get_arrow(self.handle, objref)
|
||||
else:
|
||||
object_capsule = halo.lib.get_object(self.handle, objref)
|
||||
object_capsule = ray.lib.get_object(self.handle, objref)
|
||||
return serialization.deserialize(self.handle, object_capsule)
|
||||
|
||||
def alias_objrefs(self, alias_objref, target_objref):
|
||||
"""Make `alias_objref` refer to the same object that `target_objref` refers to."""
|
||||
halo.lib.alias_objrefs(self.handle, alias_objref, target_objref)
|
||||
ray.lib.alias_objrefs(self.handle, alias_objref, target_objref)
|
||||
|
||||
def register_function(self, function):
|
||||
"""Notify the scheduler that this worker can execute the function with name `func_name`. Store the function `function` locally."""
|
||||
halo.lib.register_function(self.handle, function.func_name, len(function.return_types))
|
||||
ray.lib.register_function(self.handle, function.func_name, len(function.return_types))
|
||||
self.functions[function.func_name] = function
|
||||
|
||||
def submit_task(self, func_name, args):
|
||||
"""Tell the scheduler to schedule the execution of the function with name `func_name` with arguments `args`. Retrieve object references for the outputs of the function from the scheduler and immediately return them."""
|
||||
task_capsule = serialization.serialize_task(self.handle, func_name, args)
|
||||
objrefs = halo.lib.submit_task(self.handle, task_capsule)
|
||||
objrefs = ray.lib.submit_task(self.handle, task_capsule)
|
||||
return objrefs
|
||||
|
||||
# We make `global_worker` a global variable so that there is one worker per worker process.
|
||||
global_worker = Worker()
|
||||
|
||||
def scheduler_info(worker=global_worker):
|
||||
return halo.lib.scheduler_info(worker.handle);
|
||||
return ray.lib.scheduler_info(worker.handle);
|
||||
|
||||
def register_module(module, recursive=False, worker=global_worker):
|
||||
print "registering functions in module {}.".format(module.__name__)
|
||||
@@ -69,32 +69,32 @@ def register_module(module, recursive=False, worker=global_worker):
|
||||
def connect(scheduler_addr, objstore_addr, worker_addr, worker=global_worker):
|
||||
if hasattr(worker, "handle"):
|
||||
del worker.handle
|
||||
worker.handle = halo.lib.create_worker(scheduler_addr, objstore_addr, worker_addr)
|
||||
worker.handle = ray.lib.create_worker(scheduler_addr, objstore_addr, worker_addr)
|
||||
|
||||
def disconnect(worker=global_worker):
|
||||
halo.lib.disconnect(worker.handle)
|
||||
ray.lib.disconnect(worker.handle)
|
||||
|
||||
def pull(objref, worker=global_worker):
|
||||
halo.lib.request_object(worker.handle, objref)
|
||||
ray.lib.request_object(worker.handle, objref)
|
||||
return worker.get_object(objref)
|
||||
|
||||
def push(value, worker=global_worker):
|
||||
objref = halo.lib.get_objref(worker.handle)
|
||||
objref = ray.lib.get_objref(worker.handle)
|
||||
worker.put_object(objref, value)
|
||||
return objref
|
||||
|
||||
def main_loop(worker=global_worker):
|
||||
if not halo.lib.connected(worker.handle):
|
||||
if not ray.lib.connected(worker.handle):
|
||||
raise Exception("Worker is attempting to enter main_loop but has not been connected yet.")
|
||||
halo.lib.start_worker_service(worker.handle)
|
||||
ray.lib.start_worker_service(worker.handle)
|
||||
def process_task(task): # wrapping these lines in a function should cause the local variables to go out of scope more quickly, which is useful for inspecting reference counts
|
||||
func_name, args, return_objrefs = serialization.deserialize_task(worker.handle, task)
|
||||
arguments = get_arguments_for_execution(worker.functions[func_name], args, worker) # get args from objstore
|
||||
outputs = worker.functions[func_name].executor(arguments) # execute the function
|
||||
store_outputs_in_objstore(return_objrefs, outputs, worker) # store output in local object store
|
||||
halo.lib.notify_task_completed(worker.handle) # notify the scheduler that the task has completed
|
||||
ray.lib.notify_task_completed(worker.handle) # notify the scheduler that the task has completed
|
||||
while True:
|
||||
task = halo.lib.wait_for_next_task(worker.handle)
|
||||
task = ray.lib.wait_for_next_task(worker.handle)
|
||||
process_task(task)
|
||||
|
||||
def remote(arg_types, return_types, worker=global_worker):
|
||||
@@ -148,7 +148,7 @@ def check_return_values(function, result):
|
||||
if len(result) != len(function.return_types):
|
||||
raise Exception("The @remote decorator for function {} has {} return values with types {}, but {} returned {} values.".format(function.__name__, len(function.return_types), function.return_types, function.__name__, len(result)))
|
||||
for i in range(len(result)):
|
||||
if (not isinstance(result[i], function.return_types[i])) and (not isinstance(result[i], halo.lib.ObjRef)):
|
||||
if (not isinstance(result[i], function.return_types[i])) and (not isinstance(result[i], ray.lib.ObjRef)):
|
||||
raise Exception("The {}th return value for function {} has type {}, but the @remote decorator expected a return value of type {} or an ObjRef.".format(i, function.__name__, type(result[i]), function.return_types[i]))
|
||||
|
||||
# helper method, this should not be called by the user
|
||||
@@ -167,7 +167,7 @@ def check_arguments(function, args):
|
||||
else:
|
||||
assert False, "This code should be unreachable."
|
||||
|
||||
if isinstance(arg, halo.lib.ObjRef):
|
||||
if isinstance(arg, ray.lib.ObjRef):
|
||||
# TODO(rkn): When we have type information in the ObjRef, do type checking here.
|
||||
pass
|
||||
else:
|
||||
@@ -194,7 +194,7 @@ def get_arguments_for_execution(function, args, worker=global_worker):
|
||||
else:
|
||||
assert False, "This code should be unreachable."
|
||||
|
||||
if isinstance(arg, halo.lib.ObjRef):
|
||||
if isinstance(arg, ray.lib.ObjRef):
|
||||
# get the object from the local object store
|
||||
print "Getting argument {} for function {}.".format(i, function.__name__)
|
||||
argument = worker.get_object(arg)
|
||||
@@ -214,7 +214,7 @@ def store_outputs_in_objstore(objrefs, outputs, worker=global_worker):
|
||||
outputs = (outputs,)
|
||||
|
||||
for i in range(len(objrefs)):
|
||||
if isinstance(outputs[i], halo.lib.ObjRef):
|
||||
if isinstance(outputs[i], ray.lib.ObjRef):
|
||||
# An ObjRef is being returned, so we must alias objrefs[i] so that it refers to the same object that outputs[i] refers to
|
||||
print "Aliasing objrefs {} and {}".format(objrefs[i].val, outputs[i].val)
|
||||
worker.alias_objrefs(objrefs[i], outputs[i])
|
||||
+3
-3
@@ -3,15 +3,15 @@ import sys
|
||||
from setuptools import setup, Extension, find_packages
|
||||
import setuptools
|
||||
|
||||
# because of relative paths, this must be run from inside halo/lib/python/
|
||||
# because of relative paths, this must be run from inside ray/lib/python/
|
||||
|
||||
setup(
|
||||
name = "halo",
|
||||
name = "ray",
|
||||
version = "0.1.dev0",
|
||||
use_2to3=True,
|
||||
packages=find_packages(),
|
||||
package_data = {
|
||||
"halo": ["libhalolib.so", "scheduler", "objstore"]
|
||||
"ray": ["libraylib.so", "scheduler", "objstore"]
|
||||
},
|
||||
zip_safe=False
|
||||
)
|
||||
|
||||
@@ -4,7 +4,7 @@ OperationId ComputationGraph::add_operation(std::unique_ptr<Operation> operation
|
||||
OperationId operationid = operations_.size();
|
||||
OperationId creator_operationid = operation->creator_operationid();
|
||||
if (spawned_operations_.size() != operationid) {
|
||||
HALO_LOG(HALO_FATAL, "ComputationGraph is attempting to call add_operation, but spawned_operations_.size() != operationid.");
|
||||
RAY_LOG(RAY_FATAL, "ComputationGraph is attempting to call add_operation, but spawned_operations_.size() != operationid.");
|
||||
}
|
||||
operations_.emplace_back(std::move(operation));
|
||||
if (creator_operationid != NO_OPERATION && creator_operationid != ROOT_OPERATION) {
|
||||
@@ -16,10 +16,10 @@ OperationId ComputationGraph::add_operation(std::unique_ptr<Operation> operation
|
||||
|
||||
const Task& ComputationGraph::get_task(OperationId operationid) {
|
||||
if (operationid >= operations_.size()) {
|
||||
HALO_LOG(HALO_FATAL, "ComputationGraph attempting to get_task with operationid " << operationid << ", but operationid >= operations_.size().");
|
||||
RAY_LOG(RAY_FATAL, "ComputationGraph attempting to get_task with operationid " << operationid << ", but operationid >= operations_.size().");
|
||||
}
|
||||
if (!operations_[operationid]->has_task()) {
|
||||
HALO_LOG(HALO_FATAL, "Calling get_task with operationid " << operationid << ", but this corresponds to a push not a task.");
|
||||
RAY_LOG(RAY_FATAL, "Calling get_task with operationid " << operationid << ", but this corresponds to a push not a task.");
|
||||
}
|
||||
return operations_[operationid]->task();
|
||||
}
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
#ifndef HALO_COMPUTATIONGRAPH_H
|
||||
#define HALO_COMPUTATIONGRAPH_H
|
||||
#ifndef RAY_COMPUTATIONGRAPH_H
|
||||
#define RAY_COMPUTATIONGRAPH_H
|
||||
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
|
||||
#include "halo/halo.h"
|
||||
#include "halo.grpc.pb.h"
|
||||
#include "ray/ray.h"
|
||||
#include "ray.grpc.pb.h"
|
||||
#include "types.pb.h"
|
||||
|
||||
// used to represent the root operation (that is, the driver code)
|
||||
|
||||
+6
-6
@@ -31,9 +31,9 @@ MemorySegmentPool::MemorySegmentPool(ObjStoreId objstoreid, bool create) : objst
|
||||
// creates a memory segment if it is not already there; if the pool is in create mode,
|
||||
// space is allocated, if it is in open mode, the shared memory is mapped into the process
|
||||
void MemorySegmentPool::open_segment(SegmentId segmentid, size_t size) {
|
||||
HALO_LOG(HALO_DEBUG, "Opening segmentid " << segmentid << " on object store " << objstoreid_ << " with create_mode_ = " << create_mode_);
|
||||
RAY_LOG(RAY_DEBUG, "Opening segmentid " << segmentid << " on object store " << objstoreid_ << " with create_mode_ = " << create_mode_);
|
||||
if (segmentid != segments_.size() && create_mode_) {
|
||||
HALO_LOG(HALO_FATAL, "Object store " << objstoreid_ << " is attempting to open segmentid " << segmentid << " on the object store, but segments_.size() = " << segments_.size());
|
||||
RAY_LOG(RAY_FATAL, "Object store " << objstoreid_ << " is attempting to open segmentid " << segmentid << " on the object store, but segments_.size() = " << segments_.size());
|
||||
}
|
||||
if (segmentid >= segments_.size()) { // resize and initialize segments_
|
||||
int current_size = segments_.size();
|
||||
@@ -47,7 +47,7 @@ void MemorySegmentPool::open_segment(SegmentId segmentid, size_t size) {
|
||||
return;
|
||||
}
|
||||
if (segments_[segmentid].second == SegmentStatusType::CLOSED) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to open segmentid " << segmentid << ", but segments_[segmentid].second == SegmentStatusType::CLOSED.");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to open segmentid " << segmentid << ", but segments_[segmentid].second == SegmentStatusType::CLOSED.");
|
||||
}
|
||||
std::string segment_name = get_segment_name(segmentid);
|
||||
if (create_mode_) {
|
||||
@@ -61,7 +61,7 @@ void MemorySegmentPool::open_segment(SegmentId segmentid, size_t size) {
|
||||
}
|
||||
|
||||
void MemorySegmentPool::close_segment(SegmentId segmentid) {
|
||||
HALO_LOG(HALO_DEBUG, "closing segmentid " << segmentid);
|
||||
RAY_LOG(RAY_DEBUG, "closing segmentid " << segmentid);
|
||||
std::string segment_name = get_segment_name(segmentid);
|
||||
shared_memory_object::remove(segment_name.c_str());
|
||||
segments_[segmentid].first.reset();
|
||||
@@ -70,7 +70,7 @@ void MemorySegmentPool::close_segment(SegmentId segmentid) {
|
||||
|
||||
ObjHandle MemorySegmentPool::allocate(size_t size) {
|
||||
if (!create_mode_) { // allocate is called only by the object store
|
||||
HALO_LOG(HALO_FATAL, "Attempting to call allocate, but create_mode_ is false");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to call allocate, but create_mode_ is false");
|
||||
}
|
||||
// TODO(pcm): at the moment, this always creates a new segment, this will be changed
|
||||
SegmentId segmentid = segments_.size();
|
||||
@@ -91,7 +91,7 @@ void MemorySegmentPool::deallocate(ObjHandle pointer) {
|
||||
// the process that will use the address
|
||||
uint8_t* MemorySegmentPool::get_address(ObjHandle pointer) {
|
||||
if (create_mode_ && segments_[pointer.segmentid()].second != SegmentStatusType::OPENED) {
|
||||
HALO_LOG(HALO_FATAL, "Object store " << objstoreid_ << " is attempting to call get_address on segmentid " << pointer.segmentid() << ", which has not been opened yet.");
|
||||
RAY_LOG(RAY_FATAL, "Object store " << objstoreid_ << " is attempting to call get_address on segmentid " << pointer.segmentid() << ", which has not been opened yet.");
|
||||
}
|
||||
if (!create_mode_) {
|
||||
open_segment(pointer.segmentid());
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef HALO_IPC_H
|
||||
#define HALO_IPC_H
|
||||
#ifndef RAY_IPC_H
|
||||
#define RAY_IPC_H
|
||||
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
@@ -10,7 +10,7 @@
|
||||
#include <arrow/api.h>
|
||||
#include <arrow/ipc/memory.h>
|
||||
|
||||
#include "halo/halo.h"
|
||||
#include "ray/ray.h"
|
||||
|
||||
using namespace boost::interprocess;
|
||||
|
||||
@@ -42,7 +42,7 @@ public:
|
||||
queue_ = std::unique_ptr<message_queue>(new message_queue(open_only, name.c_str()));
|
||||
}
|
||||
} catch(interprocess_exception &ex) {
|
||||
HALO_LOG(HALO_FATAL, "boost::interprocess exception: " << ex.what());
|
||||
RAY_LOG(RAY_FATAL, "boost::interprocess exception: " << ex.what());
|
||||
}
|
||||
return true;
|
||||
};
|
||||
@@ -55,7 +55,7 @@ public:
|
||||
try {
|
||||
queue_->send(object, sizeof(T), 0);
|
||||
} catch(interprocess_exception &ex) {
|
||||
HALO_LOG(HALO_FATAL, "boost::interprocess exception: " << ex.what());
|
||||
RAY_LOG(RAY_FATAL, "boost::interprocess exception: " << ex.what());
|
||||
}
|
||||
return true;
|
||||
};
|
||||
@@ -66,7 +66,7 @@ public:
|
||||
try {
|
||||
queue_->receive(object, sizeof(T), recvd_size, priority);
|
||||
} catch(interprocess_exception &ex) {
|
||||
HALO_LOG(HALO_FATAL, "boost::interprocess exception: " << ex.what());
|
||||
RAY_LOG(RAY_FATAL, "boost::interprocess exception: " << ex.what());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
+32
-32
@@ -8,7 +8,7 @@ const size_t ObjStoreService::CHUNK_SIZE = 8 * 1024;
|
||||
// this method needs to be protected by a objstore_lock_
|
||||
// TODO(rkn): Make sure that we do not in fact need the objstore_lock_. We want multiple deliveries to be able to happen simultaneously.
|
||||
void ObjStoreService::pull_data_from(ObjRef objref, ObjStore::Stub& stub) {
|
||||
HALO_LOG(HALO_DEBUG, "Objstore " << objstoreid_ << " is beginning to pull objref " << objref);
|
||||
RAY_LOG(RAY_DEBUG, "Objstore " << objstoreid_ << " is beginning to pull objref " << objref);
|
||||
ObjChunk chunk;
|
||||
ClientContext context;
|
||||
StreamObjToRequest stream_request;
|
||||
@@ -27,7 +27,7 @@ void ObjStoreService::pull_data_from(ObjRef objref, ObjStore::Stub& stub) {
|
||||
segmentpool_lock_.unlock();
|
||||
do {
|
||||
if (num_bytes + chunk.data().size() > total_size) {
|
||||
HALO_LOG(HALO_FATAL, "The reader attempted to stream too many bytes.");
|
||||
RAY_LOG(RAY_FATAL, "The reader attempted to stream too many bytes.");
|
||||
}
|
||||
std::memcpy(data, chunk.data().c_str(), chunk.data().size());
|
||||
data += chunk.data().size();
|
||||
@@ -37,10 +37,10 @@ void ObjStoreService::pull_data_from(ObjRef objref, ObjStore::Stub& stub) {
|
||||
|
||||
// finalize object
|
||||
if (num_bytes != total_size) {
|
||||
HALO_LOG(HALO_FATAL, "Streamed objref " << objref << ", but num_bytes != total_size");
|
||||
RAY_LOG(RAY_FATAL, "Streamed objref " << objref << ", but num_bytes != total_size");
|
||||
}
|
||||
object_ready(objref, chunk.metadata_offset());
|
||||
HALO_LOG(HALO_DEBUG, "finished streaming data, objref was " << objref << " and size was " << num_bytes);
|
||||
RAY_LOG(RAY_DEBUG, "finished streaming data, objref was " << objref << " and size was " << num_bytes);
|
||||
}
|
||||
|
||||
ObjStoreService::ObjStoreService(const std::string& objstore_address, std::shared_ptr<Channel> scheduler_channel)
|
||||
@@ -79,10 +79,10 @@ Status ObjStoreService::StartDelivery(ServerContext* context, const StartDeliver
|
||||
if (memory_[objref].second == MemoryStatusType::NOT_PRESENT) {
|
||||
}
|
||||
else if (memory_[objref].second == MemoryStatusType::DEALLOCATED) {
|
||||
HALO_LOG(HALO_FATAL, "Objstore " << objstoreid_ << " is attempting to get objref " << objref << ", but memory_[objref] == DEALLOCATED.");
|
||||
RAY_LOG(RAY_FATAL, "Objstore " << objstoreid_ << " is attempting to get objref " << objref << ", but memory_[objref] == DEALLOCATED.");
|
||||
}
|
||||
else {
|
||||
HALO_LOG(HALO_DEBUG, "Objstore " << objstoreid_ << " already has objref " << objref << " or it is already being shipped, so no need to pull it again.");
|
||||
RAY_LOG(RAY_DEBUG, "Objstore " << objstoreid_ << " already has objref " << objref << " or it is already being shipped, so no need to pull it again.");
|
||||
return Status::OK;
|
||||
}
|
||||
memory_[objref].second = MemoryStatusType::PRE_ALLOCED;
|
||||
@@ -115,15 +115,15 @@ Status ObjStoreService::ObjStoreInfo(ServerContext* context, const ObjStoreInfoR
|
||||
}
|
||||
|
||||
Status ObjStoreService::StreamObjTo(ServerContext* context, const StreamObjToRequest* request, ServerWriter<ObjChunk>* writer) {
|
||||
HALO_LOG(HALO_DEBUG, "begin to stream data from object store " << objstoreid_);
|
||||
RAY_LOG(RAY_DEBUG, "begin to stream data from object store " << objstoreid_);
|
||||
ObjChunk chunk;
|
||||
ObjRef objref = request->objref();
|
||||
memory_lock_.lock();
|
||||
if (objref >= memory_.size()) {
|
||||
HALO_LOG(HALO_FATAL, "Objstore " << objstoreid_ << " is attempting to use objref " << objref << " in StreamObjTo, but this objref is not present in the object store.");
|
||||
RAY_LOG(RAY_FATAL, "Objstore " << objstoreid_ << " is attempting to use objref " << objref << " in StreamObjTo, but this objref is not present in the object store.");
|
||||
}
|
||||
if (memory_[objref].second != MemoryStatusType::READY) {
|
||||
HALO_LOG(HALO_FATAL, "Objstore " << objstoreid_ << " is attempting to stream objref " << objref << ", but memory_[objref].second != MemoryStatusType::READY.");
|
||||
RAY_LOG(RAY_FATAL, "Objstore " << objstoreid_ << " is attempting to stream objref " << objref << ", but memory_[objref].second != MemoryStatusType::READY.");
|
||||
}
|
||||
ObjHandle handle = memory_[objref].first;
|
||||
memory_lock_.unlock(); // TODO(rkn): Make sure we don't still need to hold on to this lock.
|
||||
@@ -136,7 +136,7 @@ Status ObjStoreService::StreamObjTo(ServerContext* context, const StreamObjToReq
|
||||
chunk.set_total_size(size);
|
||||
chunk.set_data(head + i, std::min(CHUNK_SIZE, size - i));
|
||||
if (!writer->Write(chunk)) {
|
||||
HALO_LOG(HALO_FATAL, "stream connection prematurely closed")
|
||||
RAY_LOG(RAY_FATAL, "stream connection prematurely closed")
|
||||
}
|
||||
}
|
||||
return Status::OK;
|
||||
@@ -146,20 +146,20 @@ Status ObjStoreService::NotifyAlias(ServerContext* context, const NotifyAliasReq
|
||||
// NotifyAlias assumes that the objstore already holds canonical_objref
|
||||
ObjRef alias_objref = request->alias_objref();
|
||||
ObjRef canonical_objref = request->canonical_objref();
|
||||
HALO_LOG(HALO_DEBUG, "Aliasing objref " << alias_objref << " with objref " << canonical_objref);
|
||||
RAY_LOG(RAY_DEBUG, "Aliasing objref " << alias_objref << " with objref " << canonical_objref);
|
||||
{
|
||||
std::lock_guard<std::mutex> memory_lock(memory_lock_);
|
||||
if (canonical_objref >= memory_.size()) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to alias objref " << alias_objref << " with objref " << canonical_objref << ", but objref " << canonical_objref << " is not in the objstore.")
|
||||
RAY_LOG(RAY_FATAL, "Attempting to alias objref " << alias_objref << " with objref " << canonical_objref << ", but objref " << canonical_objref << " is not in the objstore.")
|
||||
}
|
||||
if (memory_[canonical_objref].second == MemoryStatusType::NOT_READY) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to alias objref " << alias_objref << " with objref " << canonical_objref << ", but objref " << canonical_objref << " is not ready yet in the objstore.")
|
||||
RAY_LOG(RAY_FATAL, "Attempting to alias objref " << alias_objref << " with objref " << canonical_objref << ", but objref " << canonical_objref << " is not ready yet in the objstore.")
|
||||
}
|
||||
if (memory_[canonical_objref].second == MemoryStatusType::NOT_PRESENT) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to alias objref " << alias_objref << " with objref " << canonical_objref << ", but objref " << canonical_objref << " is not present in the objstore.")
|
||||
RAY_LOG(RAY_FATAL, "Attempting to alias objref " << alias_objref << " with objref " << canonical_objref << ", but objref " << canonical_objref << " is not present in the objstore.")
|
||||
}
|
||||
if (memory_[canonical_objref].second == MemoryStatusType::DEALLOCATED) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to alias objref " << alias_objref << " with objref " << canonical_objref << ", but objref " << canonical_objref << " has already been deallocated.")
|
||||
RAY_LOG(RAY_FATAL, "Attempting to alias objref " << alias_objref << " with objref " << canonical_objref << ", but objref " << canonical_objref << " has already been deallocated.")
|
||||
}
|
||||
if (alias_objref >= memory_.size()) {
|
||||
memory_.resize(alias_objref + 1, std::make_pair(ObjHandle(), MemoryStatusType::NOT_PRESENT));
|
||||
@@ -176,13 +176,13 @@ Status ObjStoreService::NotifyAlias(ServerContext* context, const NotifyAliasReq
|
||||
|
||||
Status ObjStoreService::DeallocateObject(ServerContext* context, const DeallocateObjectRequest* request, AckReply* reply) {
|
||||
ObjRef canonical_objref = request->canonical_objref();
|
||||
HALO_LOG(HALO_REFCOUNT, "Deallocating canonical_objref " << canonical_objref);
|
||||
RAY_LOG(RAY_REFCOUNT, "Deallocating canonical_objref " << canonical_objref);
|
||||
std::lock_guard<std::mutex> memory_lock(memory_lock_);
|
||||
if (memory_[canonical_objref].second != MemoryStatusType::READY) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to deallocate canonical_objref " << canonical_objref << ", but memory_[canonical_objref].second = " << memory_[canonical_objref].second);
|
||||
RAY_LOG(RAY_FATAL, "Attempting to deallocate canonical_objref " << canonical_objref << ", but memory_[canonical_objref].second = " << memory_[canonical_objref].second);
|
||||
}
|
||||
if (canonical_objref >= memory_.size()) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to deallocate canonical_objref " << canonical_objref << ", but it is not in the objstore.");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to deallocate canonical_objref " << canonical_objref << ", but it is not in the objstore.");
|
||||
}
|
||||
segmentpool_lock_.lock();
|
||||
segmentpool_->deallocate(memory_[canonical_objref].first);
|
||||
@@ -208,7 +208,7 @@ void ObjStoreService::process_objstore_request(const ObjRequest request) {
|
||||
}
|
||||
break;
|
||||
default: {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to process request of type " << request.type << ". This code should be unreachable.");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to process request of type " << request.type << ". This code should be unreachable.");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -237,13 +237,13 @@ void ObjStoreService::process_worker_request(const ObjRequest request) {
|
||||
std::lock_guard<std::mutex> memory_lock(memory_lock_);
|
||||
std::pair<ObjHandle, MemoryStatusType>& item = memory_[request.objref];
|
||||
if (item.second == MemoryStatusType::READY) {
|
||||
HALO_LOG(HALO_DEBUG, "Responding to GET request: returning objref " << request.objref);
|
||||
RAY_LOG(RAY_DEBUG, "Responding to GET request: returning objref " << request.objref);
|
||||
send_queues_[request.workerid].send(&item.first);
|
||||
} else if (item.second == MemoryStatusType::NOT_READY || item.second == MemoryStatusType::NOT_PRESENT || item.second == MemoryStatusType::PRE_ALLOCED) {
|
||||
std::lock_guard<std::mutex> lock(pull_queue_lock_);
|
||||
pull_queue_.push_back(std::make_pair(request.workerid, request.objref));
|
||||
} else {
|
||||
HALO_LOG(HALO_FATAL, "A worker requested objref " << request.objref << ", but memory_[objref].second = " << memory_[request.objref].second);
|
||||
RAY_LOG(RAY_FATAL, "A worker requested objref " << request.objref << ", but memory_[objref].second = " << memory_[request.objref].second);
|
||||
}
|
||||
}
|
||||
break;
|
||||
@@ -252,7 +252,7 @@ void ObjStoreService::process_worker_request(const ObjRequest request) {
|
||||
}
|
||||
break;
|
||||
default: {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to process request of type " << request.type << ". This code should be unreachable.");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to process request of type " << request.type << ". This code should be unreachable.");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -264,17 +264,17 @@ void ObjStoreService::process_requests() {
|
||||
recv_queue_.receive(&request);
|
||||
switch (request.type) {
|
||||
case ObjRequestType::ALLOC: {
|
||||
HALO_LOG(HALO_VERBOSE, "Request (worker " << request.workerid << " to objstore " << objstoreid_ << "): Allocate object with objref " << request.objref << " and size " << request.size);
|
||||
RAY_LOG(RAY_VERBOSE, "Request (worker " << request.workerid << " to objstore " << objstoreid_ << "): Allocate object with objref " << request.objref << " and size " << request.size);
|
||||
process_worker_request(request);
|
||||
}
|
||||
break;
|
||||
case ObjRequestType::GET: {
|
||||
HALO_LOG(HALO_VERBOSE, "Request (worker " << request.workerid << " to objstore " << objstoreid_ << "): Get object with objref " << request.objref);
|
||||
RAY_LOG(RAY_VERBOSE, "Request (worker " << request.workerid << " to objstore " << objstoreid_ << "): Get object with objref " << request.objref);
|
||||
process_worker_request(request);
|
||||
}
|
||||
break;
|
||||
case ObjRequestType::WORKER_DONE: {
|
||||
HALO_LOG(HALO_VERBOSE, "Request (worker " << request.workerid << " to objstore " << objstoreid_ << "): Finalize object with objref " << request.objref);
|
||||
RAY_LOG(RAY_VERBOSE, "Request (worker " << request.workerid << " to objstore " << objstoreid_ << "): Finalize object with objref " << request.objref);
|
||||
process_worker_request(request);
|
||||
}
|
||||
break;
|
||||
@@ -283,7 +283,7 @@ void ObjStoreService::process_requests() {
|
||||
}
|
||||
break;
|
||||
default: {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to process request of type " << request.type << ". This code should be unreachable.");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to process request of type " << request.type << ". This code should be unreachable.");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -309,9 +309,9 @@ ObjHandle ObjStoreService::alloc(ObjRef objref, size_t size) {
|
||||
ObjHandle handle = segmentpool_->allocate(size);
|
||||
segmentpool_lock_.unlock();
|
||||
std::lock_guard<std::mutex> memory_lock(memory_lock_);
|
||||
HALO_LOG(HALO_VERBOSE, "Allocating space for objref " << objref << " on object store " << objstoreid_);
|
||||
RAY_LOG(RAY_VERBOSE, "Allocating space for objref " << objref << " on object store " << objstoreid_);
|
||||
if (memory_[objref].second != MemoryStatusType::NOT_PRESENT && memory_[objref].second != MemoryStatusType::PRE_ALLOCED) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to allocate space for objref " << objref << ", but memory_[objref].second = " << memory_[objref].second);
|
||||
RAY_LOG(RAY_FATAL, "Attempting to allocate space for objref " << objref << ", but memory_[objref].second = " << memory_[objref].second);
|
||||
}
|
||||
memory_[objref].first = handle;
|
||||
memory_[objref].second = MemoryStatusType::NOT_READY;
|
||||
@@ -323,7 +323,7 @@ void ObjStoreService::object_ready(ObjRef objref, size_t metadata_offset) {
|
||||
std::lock_guard<std::mutex> memory_lock(memory_lock_);
|
||||
std::pair<ObjHandle, MemoryStatusType>& item = memory_[objref];
|
||||
if (item.second != MemoryStatusType::NOT_READY) {
|
||||
HALO_LOG(HALO_FATAL, "A worker notified the object store that objref " << objref << " has been written to the object store, but memory_[objref].second != NOT_READY.");
|
||||
RAY_LOG(RAY_FATAL, "A worker notified the object store that objref " << objref << " has been written to the object store, but memory_[objref].second != NOT_READY.");
|
||||
}
|
||||
item.first.set_metadata_offset(metadata_offset);
|
||||
item.second = MemoryStatusType::READY;
|
||||
@@ -341,14 +341,14 @@ void ObjStoreService::object_ready(ObjRef objref, size_t metadata_offset) {
|
||||
|
||||
void ObjStoreService::start_objstore_service() {
|
||||
communicator_thread_ = std::thread([this]() {
|
||||
HALO_LOG(HALO_INFO, "started object store communicator server");
|
||||
RAY_LOG(RAY_INFO, "started object store communicator server");
|
||||
process_requests();
|
||||
});
|
||||
}
|
||||
|
||||
void start_objstore(const char* scheduler_addr, const char* objstore_addr) {
|
||||
auto scheduler_channel = grpc::CreateChannel(scheduler_addr, grpc::InsecureChannelCredentials());
|
||||
HALO_LOG(HALO_INFO, "object store " << objstore_addr << " connected to scheduler " << scheduler_addr);
|
||||
RAY_LOG(RAY_INFO, "object store " << objstore_addr << " connected to scheduler " << scheduler_addr);
|
||||
std::string objstore_address(objstore_addr);
|
||||
ObjStoreService service(objstore_address, scheduler_channel);
|
||||
service.start_objstore_service();
|
||||
@@ -365,7 +365,7 @@ void start_objstore(const char* scheduler_addr, const char* objstore_addr) {
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if (argc != 3) {
|
||||
HALO_LOG(HALO_FATAL, "object store: expected two arguments (scheduler ip address and object store ip address)");
|
||||
RAY_LOG(RAY_FATAL, "object store: expected two arguments (scheduler ip address and object store ip address)");
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
+4
-4
@@ -1,5 +1,5 @@
|
||||
#ifndef HALO_OBJSTORE_H
|
||||
#define HALO_OBJSTORE_H
|
||||
#ifndef RAY_OBJSTORE_H
|
||||
#define RAY_OBJSTORE_H
|
||||
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
@@ -7,8 +7,8 @@
|
||||
#include <iostream>
|
||||
#include <grpc++/grpc++.h>
|
||||
|
||||
#include "halo/halo.h"
|
||||
#include "halo.grpc.pb.h"
|
||||
#include "ray/ray.h"
|
||||
#include "ray.grpc.pb.h"
|
||||
#include "types.pb.h"
|
||||
#include "ipc.h"
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ static int PyObjRef_init(PyObjRef *self, PyObject *args, PyObject *kwds) {
|
||||
}
|
||||
std::vector<ObjRef> objrefs;
|
||||
objrefs.push_back(self->val);
|
||||
HALO_LOG(HALO_REFCOUNT, "In PyObjRef_init, calling increment_reference_count for objref " << objrefs[0]);
|
||||
RAY_LOG(RAY_REFCOUNT, "In PyObjRef_init, calling increment_reference_count for objref " << objrefs[0]);
|
||||
self->worker->increment_reference_count(objrefs);
|
||||
return 0;
|
||||
};
|
||||
@@ -70,7 +70,7 @@ static PyMemberDef PyObjRef_members[] = {
|
||||
static PyTypeObject PyObjRefType = {
|
||||
PyObject_HEAD_INIT(NULL)
|
||||
0, /* ob_size */
|
||||
"halo.ObjRef", /* tp_name */
|
||||
"ray.ObjRef", /* tp_name */
|
||||
sizeof(PyObjRef), /* tp_basicsize */
|
||||
0, /* tp_itemsize */
|
||||
(destructor)PyObjRef_dealloc, /* tp_dealloc */
|
||||
@@ -89,7 +89,7 @@ static PyTypeObject PyObjRefType = {
|
||||
0, /* tp_setattro */
|
||||
0, /* tp_as_buffer */
|
||||
Py_TPFLAGS_DEFAULT, /* tp_flags */
|
||||
"Halo objects", /* tp_doc */
|
||||
"Ray objects", /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
@@ -119,7 +119,7 @@ PyObject* make_pyobjref(PyObject* worker_capsule, ObjRef objref) {
|
||||
|
||||
// Error handling
|
||||
|
||||
static PyObject *HaloError;
|
||||
static PyObject *RayError;
|
||||
|
||||
// Pass arguments from Python to C++
|
||||
|
||||
@@ -312,12 +312,12 @@ int serialize(PyObject* worker_capsule, PyObject* val, Obj* obj, std::vector<Obj
|
||||
}
|
||||
break;
|
||||
default:
|
||||
PyErr_SetString(HaloError, "serialization: numpy datatype not know");
|
||||
PyErr_SetString(RayError, "serialization: numpy datatype not know");
|
||||
return -1;
|
||||
}
|
||||
Py_DECREF(array); // TODO(rkn): is this right?
|
||||
} else {
|
||||
PyErr_SetString(HaloError, "serialization: type not know");
|
||||
PyErr_SetString(RayError, "serialization: type not know");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
@@ -402,7 +402,7 @@ PyObject* deserialize(PyObject* worker_capsule, const Obj& obj, std::vector<ObjR
|
||||
}
|
||||
break;
|
||||
default:
|
||||
PyErr_SetString(HaloError, "deserialization: internal error (array type not implemented)");
|
||||
PyErr_SetString(RayError, "deserialization: internal error (array type not implemented)");
|
||||
return NULL;
|
||||
}
|
||||
} else if (array.uint_data_size() > 0) {
|
||||
@@ -423,7 +423,7 @@ PyObject* deserialize(PyObject* worker_capsule, const Obj& obj, std::vector<ObjR
|
||||
}
|
||||
break;
|
||||
default:
|
||||
PyErr_SetString(HaloError, "deserialization: internal error (array type not implemented)");
|
||||
PyErr_SetString(RayError, "deserialization: internal error (array type not implemented)");
|
||||
return NULL;
|
||||
}
|
||||
} else if (array.objref_data_size() > 0) {
|
||||
@@ -434,12 +434,12 @@ PyObject* deserialize(PyObject* worker_capsule, const Obj& obj, std::vector<ObjR
|
||||
objrefs.push_back(array.objref_data(i));
|
||||
}
|
||||
} else {
|
||||
PyErr_SetString(HaloError, "deserialization: internal error (array type not implemented)");
|
||||
PyErr_SetString(RayError, "deserialization: internal error (array type not implemented)");
|
||||
return NULL;
|
||||
}
|
||||
return (PyObject*) pyarray;
|
||||
} else {
|
||||
PyErr_SetString(HaloError, "deserialization: internal error (type not implemented)");
|
||||
PyErr_SetString(RayError, "deserialization: internal error (type not implemented)");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
@@ -535,13 +535,13 @@ PyObject* serialize_task(PyObject* self, PyObject* args) {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
PyErr_SetString(HaloError, "serialize_task: second argument needs to be a list");
|
||||
PyErr_SetString(RayError, "serialize_task: second argument needs to be a list");
|
||||
return NULL;
|
||||
}
|
||||
Worker* worker;
|
||||
PyObjectToWorker(worker_capsule, &worker);
|
||||
if (objrefs.size() > 0) {
|
||||
HALO_LOG(HALO_REFCOUNT, "In serialize_task, calling increment_reference_count for contained objrefs");
|
||||
RAY_LOG(RAY_REFCOUNT, "In serialize_task, calling increment_reference_count for contained objrefs");
|
||||
worker->increment_reference_count(objrefs);
|
||||
}
|
||||
return PyCapsule_New(static_cast<void*>(task), "task", &TaskCapsule_Destructor);
|
||||
@@ -584,7 +584,7 @@ PyObject* deserialize_task(PyObject* self, PyObject* args) {
|
||||
return t;
|
||||
}
|
||||
|
||||
// Halo Python API
|
||||
// Ray Python API
|
||||
|
||||
PyObject* create_worker(PyObject* self, PyObject* args) {
|
||||
const char* scheduler_addr;
|
||||
@@ -692,7 +692,7 @@ PyObject* put_object(PyObject* self, PyObject* args) {
|
||||
return NULL;
|
||||
}
|
||||
if (!PyList_Check(contained_objrefs)) {
|
||||
HALO_LOG(HALO_FATAL, "The contained_objrefs argument must be a list.")
|
||||
RAY_LOG(RAY_FATAL, "The contained_objrefs argument must be a list.")
|
||||
}
|
||||
std::vector<ObjRef> vec_contained_objrefs;
|
||||
size_t size = PyList_Size(contained_objrefs);
|
||||
@@ -773,7 +773,7 @@ PyObject* scheduler_info(PyObject* self, PyObject* args) {
|
||||
return dict;
|
||||
}
|
||||
|
||||
static PyMethodDef HaloLibMethods[] = {
|
||||
static PyMethodDef RayLibMethods[] = {
|
||||
{ "serialize_object", serialize_object, METH_VARARGS, "serialize an object to protocol buffers" },
|
||||
{ "deserialize_object", deserialize_object, METH_VARARGS, "deserialize an object from protocol buffers" },
|
||||
{ "put_arrow", put_arrow, METH_VARARGS, "put an arrow array on the local object store"},
|
||||
@@ -798,18 +798,18 @@ static PyMethodDef HaloLibMethods[] = {
|
||||
{ NULL, NULL, 0, NULL }
|
||||
};
|
||||
|
||||
PyMODINIT_FUNC initlibhalolib(void) {
|
||||
PyMODINIT_FUNC initlibraylib(void) {
|
||||
PyObject* m;
|
||||
PyObjRefType.tp_new = PyType_GenericNew;
|
||||
if (PyType_Ready(&PyObjRefType) < 0) {
|
||||
return;
|
||||
}
|
||||
m = Py_InitModule3("libhalolib", HaloLibMethods, "Python C Extension for Halo");
|
||||
m = Py_InitModule3("libraylib", RayLibMethods, "Python C Extension for Ray");
|
||||
Py_INCREF(&PyObjRefType);
|
||||
PyModule_AddObject(m, "ObjRef", (PyObject *)&PyObjRefType);
|
||||
HaloError = PyErr_NewException("halo.error", NULL, NULL);
|
||||
Py_INCREF(HaloError);
|
||||
PyModule_AddObject(m, "error", HaloError);
|
||||
RayError = PyErr_NewException("ray.error", NULL, NULL);
|
||||
Py_INCREF(RayError);
|
||||
PyModule_AddObject(m, "error", RayError);
|
||||
import_array();
|
||||
}
|
||||
|
||||
+51
-51
@@ -14,7 +14,7 @@ Status SchedulerService::SubmitTask(ServerContext* context, const SubmitTaskRequ
|
||||
|
||||
if (fntable_.find(task->name()) == fntable_.end()) {
|
||||
// TODO(rkn): In the future, this should probably not be fatal. Instead, propagate the error back to the worker.
|
||||
HALO_LOG(HALO_FATAL, "The function " << task->name() << " has not been registered by any worker.");
|
||||
RAY_LOG(RAY_FATAL, "The function " << task->name() << " has not been registered by any worker.");
|
||||
}
|
||||
|
||||
size_t num_return_vals = fntable_[task->name()].num_return_vals();
|
||||
@@ -29,8 +29,8 @@ Status SchedulerService::SubmitTask(ServerContext* context, const SubmitTaskRequ
|
||||
}
|
||||
{
|
||||
std::lock_guard<std::mutex> reference_counts_lock(reference_counts_lock_); // we grab this lock because increment_ref_count assumes it has been acquired
|
||||
increment_ref_count(result_objrefs); // We increment once so the objrefs don't go out of scope before we reply to the worker that called SubmitTask. The corresponding decrement will happen in submit_task in halolib.
|
||||
increment_ref_count(result_objrefs); // We increment once so the objrefs don't go out of scope before the task is scheduled on the worker. The corresponding decrement will happen in deserialize_task in halolib.
|
||||
increment_ref_count(result_objrefs); // We increment once so the objrefs don't go out of scope before we reply to the worker that called SubmitTask. The corresponding decrement will happen in submit_task in raylib.
|
||||
increment_ref_count(result_objrefs); // We increment once so the objrefs don't go out of scope before the task is scheduled on the worker. The corresponding decrement will happen in deserialize_task in raylib.
|
||||
}
|
||||
|
||||
auto operation = std::unique_ptr<Operation>(new Operation());
|
||||
@@ -64,7 +64,7 @@ Status SchedulerService::RequestObj(ServerContext* context, const RequestObjRequ
|
||||
|
||||
ObjRef objref = request->objref();
|
||||
if (objref >= size) {
|
||||
HALO_LOG(HALO_FATAL, "internal error: no object with objref " << objref << " exists");
|
||||
RAY_LOG(RAY_FATAL, "internal error: no object with objref " << objref << " exists");
|
||||
}
|
||||
|
||||
pull_queue_lock_.lock();
|
||||
@@ -77,23 +77,23 @@ Status SchedulerService::RequestObj(ServerContext* context, const RequestObjRequ
|
||||
Status SchedulerService::AliasObjRefs(ServerContext* context, const AliasObjRefsRequest* request, AckReply* reply) {
|
||||
ObjRef alias_objref = request->alias_objref();
|
||||
ObjRef target_objref = request->target_objref();
|
||||
HALO_LOG(HALO_ALIAS, "Aliasing objref " << alias_objref << " with objref " << target_objref);
|
||||
RAY_LOG(RAY_ALIAS, "Aliasing objref " << alias_objref << " with objref " << target_objref);
|
||||
if (alias_objref == target_objref) {
|
||||
HALO_LOG(HALO_FATAL, "internal error: attempting to alias objref " << alias_objref << " with itself.");
|
||||
RAY_LOG(RAY_FATAL, "internal error: attempting to alias objref " << alias_objref << " with itself.");
|
||||
}
|
||||
objtable_lock_.lock();
|
||||
size_t size = objtable_.size();
|
||||
objtable_lock_.unlock();
|
||||
if (alias_objref >= size) {
|
||||
HALO_LOG(HALO_FATAL, "internal error: no object with objref " << alias_objref << " exists");
|
||||
RAY_LOG(RAY_FATAL, "internal error: no object with objref " << alias_objref << " exists");
|
||||
}
|
||||
if (target_objref >= size) {
|
||||
HALO_LOG(HALO_FATAL, "internal error: no object with objref " << target_objref << " exists");
|
||||
RAY_LOG(RAY_FATAL, "internal error: no object with objref " << target_objref << " exists");
|
||||
}
|
||||
{
|
||||
std::lock_guard<std::mutex> target_objrefs_lock(target_objrefs_lock_);
|
||||
if (target_objrefs_[alias_objref] != UNITIALIZED_ALIAS) {
|
||||
HALO_LOG(HALO_FATAL, "internal error: attempting to alias objref " << alias_objref << " with objref " << target_objref << ", but objref " << alias_objref << " has already been aliased with objref " << target_objrefs_[alias_objref]);
|
||||
RAY_LOG(RAY_FATAL, "internal error: attempting to alias objref " << alias_objref << " with objref " << target_objref << ", but objref " << alias_objref << " has already been aliased with objref " << target_objrefs_[alias_objref]);
|
||||
}
|
||||
target_objrefs_[alias_objref] = target_objref;
|
||||
}
|
||||
@@ -121,7 +121,7 @@ Status SchedulerService::RegisterWorker(ServerContext* context, const RegisterWo
|
||||
std::pair<WorkerId, ObjStoreId> info = register_worker(request->worker_address(), request->objstore_address());
|
||||
WorkerId workerid = info.first;
|
||||
ObjStoreId objstoreid = info.second;
|
||||
HALO_LOG(HALO_INFO, "registered worker with workerid " << workerid);
|
||||
RAY_LOG(RAY_INFO, "registered worker with workerid " << workerid);
|
||||
reply->set_workerid(workerid);
|
||||
reply->set_objstoreid(objstoreid);
|
||||
schedule();
|
||||
@@ -129,7 +129,7 @@ Status SchedulerService::RegisterWorker(ServerContext* context, const RegisterWo
|
||||
}
|
||||
|
||||
Status SchedulerService::RegisterFunction(ServerContext* context, const RegisterFunctionRequest* request, AckReply* reply) {
|
||||
HALO_LOG(HALO_INFO, "register function " << request->fnname() << " from workerid " << request->workerid());
|
||||
RAY_LOG(RAY_INFO, "register function " << request->fnname() << " from workerid " << request->workerid());
|
||||
register_function(request->fnname(), request->workerid(), request->num_return_vals());
|
||||
schedule();
|
||||
return Status::OK;
|
||||
@@ -137,7 +137,7 @@ Status SchedulerService::RegisterFunction(ServerContext* context, const Register
|
||||
|
||||
Status SchedulerService::ObjReady(ServerContext* context, const ObjReadyRequest* request, AckReply* reply) {
|
||||
ObjRef objref = request->objref();
|
||||
HALO_LOG(HALO_DEBUG, "object " << objref << " ready on store " << request->objstoreid());
|
||||
RAY_LOG(RAY_DEBUG, "object " << objref << " ready on store " << request->objstoreid());
|
||||
add_canonical_objref(objref);
|
||||
add_location(objref, request->objstoreid());
|
||||
schedule();
|
||||
@@ -145,7 +145,7 @@ Status SchedulerService::ObjReady(ServerContext* context, const ObjReadyRequest*
|
||||
}
|
||||
|
||||
Status SchedulerService::WorkerReady(ServerContext* context, const WorkerReadyRequest* request, AckReply* reply) {
|
||||
HALO_LOG(HALO_INFO, "worker " << request->workerid() << " reported back");
|
||||
RAY_LOG(RAY_INFO, "worker " << request->workerid() << " reported back");
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(avail_workers_lock_);
|
||||
avail_workers_.push_back(request->workerid());
|
||||
@@ -157,7 +157,7 @@ Status SchedulerService::WorkerReady(ServerContext* context, const WorkerReadyRe
|
||||
Status SchedulerService::IncrementRefCount(ServerContext* context, const IncrementRefCountRequest* request, AckReply* reply) {
|
||||
int num_objrefs = request->objref_size();
|
||||
if (num_objrefs == 0) {
|
||||
HALO_LOG(HALO_FATAL, "Scheduler received IncrementRefCountRequest with 0 objrefs.");
|
||||
RAY_LOG(RAY_FATAL, "Scheduler received IncrementRefCountRequest with 0 objrefs.");
|
||||
}
|
||||
std::vector<ObjRef> objrefs;
|
||||
for (int i = 0; i < num_objrefs; ++i) {
|
||||
@@ -171,7 +171,7 @@ Status SchedulerService::IncrementRefCount(ServerContext* context, const Increme
|
||||
Status SchedulerService::DecrementRefCount(ServerContext* context, const DecrementRefCountRequest* request, AckReply* reply) {
|
||||
int num_objrefs = request->objref_size();
|
||||
if (num_objrefs == 0) {
|
||||
HALO_LOG(HALO_FATAL, "Scheduler received DecrementRefCountRequest with 0 objrefs.");
|
||||
RAY_LOG(RAY_FATAL, "Scheduler received DecrementRefCountRequest with 0 objrefs.");
|
||||
}
|
||||
std::vector<ObjRef> objrefs;
|
||||
for (int i = 0; i < num_objrefs; ++i) {
|
||||
@@ -186,11 +186,11 @@ Status SchedulerService::AddContainedObjRefs(ServerContext* context, const AddCo
|
||||
ObjRef objref = request->objref();
|
||||
// if (!is_canonical(objref)) {
|
||||
// TODO(rkn): Perhaps we don't need this check. It won't work because the objstore may not have called ObjReady yet.
|
||||
// HALO_LOG(HALO_FATAL, "Attempting to add contained objrefs for non-canonical objref " << objref);
|
||||
// RAY_LOG(RAY_FATAL, "Attempting to add contained objrefs for non-canonical objref " << objref);
|
||||
// }
|
||||
std::lock_guard<std::mutex> contained_objrefs_lock(contained_objrefs_lock_);
|
||||
if (contained_objrefs_[objref].size() != 0) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to add contained objrefs for objref " << objref << ", but contained_objrefs_[objref].size() != 0.");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to add contained objrefs for objref " << objref << ", but contained_objrefs_[objref].size() != 0.");
|
||||
}
|
||||
for (int i = 0; i < request->contained_objref_size(); ++i) {
|
||||
contained_objrefs_[objref].push_back(request->contained_objref(i));
|
||||
@@ -212,10 +212,10 @@ Status SchedulerService::SchedulerInfo(ServerContext* context, const SchedulerIn
|
||||
// deliver_object assumes that the aliasing for objref has already been completed. That is, has_canonical_objref(objref) == true
|
||||
void SchedulerService::deliver_object(ObjRef objref, ObjStoreId from, ObjStoreId to) {
|
||||
if (from == to) {
|
||||
HALO_LOG(HALO_FATAL, "attempting to deliver objref " << objref << " from objstore " << from << " to itself.");
|
||||
RAY_LOG(RAY_FATAL, "attempting to deliver objref " << objref << " from objstore " << from << " to itself.");
|
||||
}
|
||||
if (!has_canonical_objref(objref)) {
|
||||
HALO_LOG(HALO_FATAL, "attempting to deliver objref " << objref << ", but this objref does not yet have a canonical objref.");
|
||||
RAY_LOG(RAY_FATAL, "attempting to deliver objref " << objref << ", but this objref does not yet have a canonical objref.");
|
||||
}
|
||||
ClientContext context;
|
||||
AckReply reply;
|
||||
@@ -235,7 +235,7 @@ void SchedulerService::schedule() {
|
||||
} else if (scheduling_algorithm_ == SCHEDULING_ALGORITHM_LOCALITY_AWARE) {
|
||||
schedule_tasks_location_aware(); // See what we can do in task_queue_
|
||||
} else {
|
||||
HALO_LOG(HALO_FATAL, "scheduling algorithm not known");
|
||||
RAY_LOG(RAY_FATAL, "scheduling algorithm not known");
|
||||
}
|
||||
perform_notify_aliases(); // See what we can do in alias_notification_queue_
|
||||
}
|
||||
@@ -247,7 +247,7 @@ void SchedulerService::assign_task(OperationId operationid, WorkerId workerid) {
|
||||
ClientContext context;
|
||||
ExecuteTaskRequest request;
|
||||
ExecuteTaskReply reply;
|
||||
HALO_LOG(HALO_INFO, "starting to send arguments");
|
||||
RAY_LOG(RAY_INFO, "starting to send arguments");
|
||||
for (size_t i = 0; i < task.arg_size(); ++i) {
|
||||
if (!task.arg(i).has_obj()) {
|
||||
ObjRef objref = task.arg(i).ref();
|
||||
@@ -259,7 +259,7 @@ void SchedulerService::assign_task(OperationId operationid, WorkerId workerid) {
|
||||
}
|
||||
attempt_notify_alias(get_store(workerid), objref, canonical_objref);
|
||||
|
||||
HALO_LOG(HALO_DEBUG, "task contains object ref " << canonical_objref);
|
||||
RAY_LOG(RAY_DEBUG, "task contains object ref " << canonical_objref);
|
||||
std::lock_guard<std::mutex> objtable_lock(objtable_lock_);
|
||||
auto &objstores = objtable_[canonical_objref];
|
||||
std::lock_guard<std::mutex> workers_lock(workers_lock_);
|
||||
@@ -290,7 +290,7 @@ bool SchedulerService::can_run(const Task& task) {
|
||||
}
|
||||
|
||||
std::pair<WorkerId, ObjStoreId> SchedulerService::register_worker(const std::string& worker_address, const std::string& objstore_address) {
|
||||
HALO_LOG(HALO_INFO, "registering worker " << worker_address << " connected to object store " << objstore_address);
|
||||
RAY_LOG(RAY_INFO, "registering worker " << worker_address << " connected to object store " << objstore_address);
|
||||
ObjStoreId objstoreid = std::numeric_limits<size_t>::max();
|
||||
for (int num_attempts = 0; num_attempts < 5; ++num_attempts) {
|
||||
std::lock_guard<std::mutex> lock(objstores_lock_);
|
||||
@@ -304,7 +304,7 @@ std::pair<WorkerId, ObjStoreId> SchedulerService::register_worker(const std::str
|
||||
}
|
||||
}
|
||||
if (objstoreid == std::numeric_limits<size_t>::max()) {
|
||||
HALO_LOG(HALO_FATAL, "object store with address " << objstore_address << " not yet registered");
|
||||
RAY_LOG(RAY_FATAL, "object store with address " << objstore_address << " not yet registered");
|
||||
}
|
||||
workers_lock_.lock();
|
||||
WorkerId workerid = workers_.size();
|
||||
@@ -334,16 +334,16 @@ ObjRef SchedulerService::register_new_object() {
|
||||
ObjRef reference_counts_size = reference_counts_.size();
|
||||
ObjRef contained_objrefs_size = contained_objrefs_.size();
|
||||
if (objtable_size != target_objrefs_size) {
|
||||
HALO_LOG(HALO_FATAL, "objtable_ and target_objrefs_ should have the same size, but objtable_.size() = " << objtable_size << " and target_objrefs_.size() = " << target_objrefs_size);
|
||||
RAY_LOG(RAY_FATAL, "objtable_ and target_objrefs_ should have the same size, but objtable_.size() = " << objtable_size << " and target_objrefs_.size() = " << target_objrefs_size);
|
||||
}
|
||||
if (objtable_size != reverse_target_objrefs_size) {
|
||||
HALO_LOG(HALO_FATAL, "objtable_ and reverse_target_objrefs_ should have the same size, but objtable_.size() = " << objtable_size << " and reverse_target_objrefs_.size() = " << reverse_target_objrefs_size);
|
||||
RAY_LOG(RAY_FATAL, "objtable_ and reverse_target_objrefs_ should have the same size, but objtable_.size() = " << objtable_size << " and reverse_target_objrefs_.size() = " << reverse_target_objrefs_size);
|
||||
}
|
||||
if (objtable_size != reference_counts_size) {
|
||||
HALO_LOG(HALO_FATAL, "objtable_ and reference_counts_ should have the same size, but objtable_.size() = " << objtable_size << " and reference_counts_.size() = " << reference_counts_size);
|
||||
RAY_LOG(RAY_FATAL, "objtable_ and reference_counts_ should have the same size, but objtable_.size() = " << objtable_size << " and reference_counts_.size() = " << reference_counts_size);
|
||||
}
|
||||
if (objtable_size != contained_objrefs_size) {
|
||||
HALO_LOG(HALO_FATAL, "objtable_ and contained_objrefs_ should have the same size, but objtable_.size() = " << objtable_size << " and contained_objrefs_.size() = " << contained_objrefs_size);
|
||||
RAY_LOG(RAY_FATAL, "objtable_ and contained_objrefs_ should have the same size, but objtable_.size() = " << objtable_size << " and contained_objrefs_.size() = " << contained_objrefs_size);
|
||||
}
|
||||
objtable_.push_back(std::vector<ObjStoreId>());
|
||||
target_objrefs_.push_back(UNITIALIZED_ALIAS);
|
||||
@@ -356,11 +356,11 @@ ObjRef SchedulerService::register_new_object() {
|
||||
void SchedulerService::add_location(ObjRef canonical_objref, ObjStoreId objstoreid) {
|
||||
// add_location must be called with a canonical objref
|
||||
if (!is_canonical(canonical_objref)) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to call add_location with a non-canonical objref (objref " << canonical_objref << ")");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to call add_location with a non-canonical objref (objref " << canonical_objref << ")");
|
||||
}
|
||||
std::lock_guard<std::mutex> objtable_lock(objtable_lock_);
|
||||
if (canonical_objref >= objtable_.size()) {
|
||||
HALO_LOG(HALO_FATAL, "trying to put an object in the object store that was not registered with the scheduler (objref " << canonical_objref << ")");
|
||||
RAY_LOG(RAY_FATAL, "trying to put an object in the object store that was not registered with the scheduler (objref " << canonical_objref << ")");
|
||||
}
|
||||
// do a binary search
|
||||
auto pos = std::lower_bound(objtable_[canonical_objref].begin(), objtable_[canonical_objref].end(), objstoreid);
|
||||
@@ -372,10 +372,10 @@ void SchedulerService::add_location(ObjRef canonical_objref, ObjStoreId objstore
|
||||
void SchedulerService::add_canonical_objref(ObjRef objref) {
|
||||
std::lock_guard<std::mutex> lock(target_objrefs_lock_);
|
||||
if (objref >= target_objrefs_.size()) {
|
||||
HALO_LOG(HALO_FATAL, "internal error: attempting to insert objref " << objref << " in target_objrefs_, but target_objrefs_.size() is " << target_objrefs_.size());
|
||||
RAY_LOG(RAY_FATAL, "internal error: attempting to insert objref " << objref << " in target_objrefs_, but target_objrefs_.size() is " << target_objrefs_.size());
|
||||
}
|
||||
if (target_objrefs_[objref] != UNITIALIZED_ALIAS && target_objrefs_[objref] != objref) {
|
||||
HALO_LOG(HALO_FATAL, "internal error: attempting to declare objref " << objref << " as a canonical objref, but target_objrefs_[objref] is already aliased with objref " << target_objrefs_[objref]);
|
||||
RAY_LOG(RAY_FATAL, "internal error: attempting to declare objref " << objref << " as a canonical objref, but target_objrefs_[objref] is already aliased with objref " << target_objrefs_[objref]);
|
||||
}
|
||||
target_objrefs_[objref] = objref;
|
||||
}
|
||||
@@ -433,7 +433,7 @@ void SchedulerService::get_info(const SchedulerInfoRequest& request, SchedulerIn
|
||||
ObjStoreId SchedulerService::pick_objstore(ObjRef canonical_objref) {
|
||||
std::mt19937 rng;
|
||||
if (!is_canonical(canonical_objref)) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to call pick_objstore with a non-canonical objref, (objref " << canonical_objref << ")");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to call pick_objstore with a non-canonical objref, (objref " << canonical_objref << ")");
|
||||
}
|
||||
std::uniform_int_distribution<int> uni(0, objtable_[canonical_objref].size() - 1);
|
||||
ObjStoreId objstoreid = objtable_[canonical_objref][uni(rng)];
|
||||
@@ -443,7 +443,7 @@ ObjStoreId SchedulerService::pick_objstore(ObjRef canonical_objref) {
|
||||
bool SchedulerService::is_canonical(ObjRef objref) {
|
||||
std::lock_guard<std::mutex> lock(target_objrefs_lock_);
|
||||
if (target_objrefs_[objref] == UNITIALIZED_ALIAS) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to call is_canonical on an objref for which aliasing is not complete or the object is not ready, target_objrefs_[objref] == UNITIALIZED_ALIAS for objref " << objref << ".");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to call is_canonical on an objref for which aliasing is not complete or the object is not ready, target_objrefs_[objref] == UNITIALIZED_ALIAS for objref " << objref << ".");
|
||||
}
|
||||
return objref == target_objrefs_[objref];
|
||||
}
|
||||
@@ -456,11 +456,11 @@ void SchedulerService::perform_pulls() {
|
||||
ObjRef objref = pull.second;
|
||||
WorkerId workerid = pull.first;
|
||||
if (!has_canonical_objref(objref)) {
|
||||
HALO_LOG(HALO_ALIAS, "objref " << objref << " does not have a canonical_objref, so continuing");
|
||||
RAY_LOG(RAY_ALIAS, "objref " << objref << " does not have a canonical_objref, so continuing");
|
||||
continue;
|
||||
}
|
||||
ObjRef canonical_objref = get_canonical_objref(objref);
|
||||
HALO_LOG(HALO_DEBUG, "attempting to pull objref " << pull.second << " with canonical objref " << canonical_objref << " to objstore " << get_store(workerid));
|
||||
RAY_LOG(RAY_DEBUG, "attempting to pull objref " << pull.second << " with canonical objref " << canonical_objref << " to objstore " << get_store(workerid));
|
||||
|
||||
objtable_lock_.lock();
|
||||
int num_stores = objtable_[canonical_objref].size();
|
||||
@@ -538,7 +538,7 @@ void SchedulerService::schedule_tasks_location_aware() {
|
||||
if (!task.arg(j).has_obj()) {
|
||||
ObjRef objref = task.arg(j).ref();
|
||||
if (!has_canonical_objref(objref)) {
|
||||
HALO_LOG(HALO_FATAL, "no canonical object ref found even though task is ready; that should not be possible!");
|
||||
RAY_LOG(RAY_FATAL, "no canonical object ref found even though task is ready; that should not be possible!");
|
||||
}
|
||||
ObjRef canonical_objref = get_canonical_objref(objref);
|
||||
// check if the object is already in the local object store
|
||||
@@ -585,7 +585,7 @@ bool SchedulerService::has_canonical_objref(ObjRef objref) {
|
||||
ObjRef objref_temp = objref;
|
||||
while (true) {
|
||||
if (objref_temp >= target_objrefs_.size()) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to index target_objrefs_ with objref " << objref_temp << ", but target_objrefs_.size() = " << target_objrefs_.size());
|
||||
RAY_LOG(RAY_FATAL, "Attempting to index target_objrefs_ with objref " << objref_temp << ", but target_objrefs_.size() = " << target_objrefs_.size());
|
||||
}
|
||||
if (target_objrefs_[objref_temp] == UNITIALIZED_ALIAS) {
|
||||
return false;
|
||||
@@ -603,16 +603,16 @@ ObjRef SchedulerService::get_canonical_objref(ObjRef objref) {
|
||||
ObjRef objref_temp = objref;
|
||||
while (true) {
|
||||
if (objref_temp >= target_objrefs_.size()) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to index target_objrefs_ with objref " << objref_temp << ", but target_objrefs_.size() = " << target_objrefs_.size());
|
||||
RAY_LOG(RAY_FATAL, "Attempting to index target_objrefs_ with objref " << objref_temp << ", but target_objrefs_.size() = " << target_objrefs_.size());
|
||||
}
|
||||
if (target_objrefs_[objref_temp] == UNITIALIZED_ALIAS) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to get canonical objref for objref " << objref << ", which aliases, objref " << objref_temp << ", but target_objrefs_[objref_temp] == UNITIALIZED_ALIAS for objref_temp = " << objref_temp << ".");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to get canonical objref for objref " << objref << ", which aliases, objref " << objref_temp << ", but target_objrefs_[objref_temp] == UNITIALIZED_ALIAS for objref_temp = " << objref_temp << ".");
|
||||
}
|
||||
if (target_objrefs_[objref_temp] == objref_temp) {
|
||||
return objref_temp;
|
||||
}
|
||||
objref_temp = target_objrefs_[objref_temp];
|
||||
HALO_LOG(HALO_ALIAS, "Looping in get_canonical_objref.");
|
||||
RAY_LOG(RAY_ALIAS, "Looping in get_canonical_objref.");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -646,7 +646,7 @@ void SchedulerService::deallocate_object(ObjRef canonical_objref) {
|
||||
// these methods require reference_counts_lock_ to have been acquired, and
|
||||
// so the lock must before outside of these methods (it is acquired in
|
||||
// DecrementRefCount).
|
||||
HALO_LOG(HALO_REFCOUNT, "Deallocating canonical_objref " << canonical_objref << ".");
|
||||
RAY_LOG(RAY_REFCOUNT, "Deallocating canonical_objref " << canonical_objref << ".");
|
||||
{
|
||||
std::lock_guard<std::mutex> objtable_lock(objtable_lock_);
|
||||
auto &objstores = objtable_[canonical_objref];
|
||||
@@ -657,7 +657,7 @@ void SchedulerService::deallocate_object(ObjRef canonical_objref) {
|
||||
DeallocateObjectRequest request;
|
||||
request.set_canonical_objref(canonical_objref);
|
||||
ObjStoreId objstoreid = objstores[i];
|
||||
HALO_LOG(HALO_REFCOUNT, "Attempting to deallocate canonical_objref " << canonical_objref << " from objstore " << objstoreid);
|
||||
RAY_LOG(RAY_REFCOUNT, "Attempting to deallocate canonical_objref " << canonical_objref << " from objstore " << objstoreid);
|
||||
objstores_[objstoreid].objstore_stub->DeallocateObject(&context, request, &reply);
|
||||
}
|
||||
objtable_[canonical_objref].clear();
|
||||
@@ -670,10 +670,10 @@ void SchedulerService::increment_ref_count(std::vector<ObjRef> &objrefs) {
|
||||
for (int i = 0; i < objrefs.size(); ++i) {
|
||||
ObjRef objref = objrefs[i];
|
||||
if (reference_counts_[objref] == DEALLOCATED) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to increment the reference count for objref " << objref << ", but this object appears to have been deallocated already.");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to increment the reference count for objref " << objref << ", but this object appears to have been deallocated already.");
|
||||
}
|
||||
reference_counts_[objref] += 1;
|
||||
HALO_LOG(HALO_REFCOUNT, "Incremented ref count for objref " << objref <<". New reference count is " << reference_counts_[objref]);
|
||||
RAY_LOG(RAY_REFCOUNT, "Incremented ref count for objref " << objref <<". New reference count is " << reference_counts_[objref]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -682,13 +682,13 @@ void SchedulerService::decrement_ref_count(std::vector<ObjRef> &objrefs) {
|
||||
for (int i = 0; i < objrefs.size(); ++i) {
|
||||
ObjRef objref = objrefs[i];
|
||||
if (reference_counts_[objref] == DEALLOCATED) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to decrement the reference count for objref " << objref << ", but this object appears to have been deallocated already.");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to decrement the reference count for objref " << objref << ", but this object appears to have been deallocated already.");
|
||||
}
|
||||
if (reference_counts_[objref] == 0) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to decrement the reference count for objref " << objref << ", but the reference count for this object is already 0.");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to decrement the reference count for objref " << objref << ", but the reference count for this object is already 0.");
|
||||
}
|
||||
reference_counts_[objref] -= 1;
|
||||
HALO_LOG(HALO_REFCOUNT, "Decremented ref count for objref " << objref << ". New reference count is " << reference_counts_[objref]);
|
||||
RAY_LOG(RAY_REFCOUNT, "Decremented ref count for objref " << objref << ". New reference count is " << reference_counts_[objref]);
|
||||
// See if we can deallocate the object
|
||||
std::vector<ObjRef> equivalent_objrefs;
|
||||
get_equivalent_objrefs(objref, equivalent_objrefs);
|
||||
@@ -702,7 +702,7 @@ void SchedulerService::decrement_ref_count(std::vector<ObjRef> &objrefs) {
|
||||
if (can_deallocate) {
|
||||
ObjRef canonical_objref = equivalent_objrefs[0];
|
||||
if (!is_canonical(canonical_objref)) {
|
||||
HALO_LOG(HALO_FATAL, "canonical_objref is not canonical.");
|
||||
RAY_LOG(RAY_FATAL, "canonical_objref is not canonical.");
|
||||
}
|
||||
deallocate_object(canonical_objref);
|
||||
for (int j = 0; j < equivalent_objrefs.size(); ++j) {
|
||||
@@ -724,7 +724,7 @@ void SchedulerService::get_equivalent_objrefs(ObjRef objref, std::vector<ObjRef>
|
||||
std::lock_guard<std::mutex> target_objrefs_lock(target_objrefs_lock_);
|
||||
ObjRef downstream_objref = objref;
|
||||
while (target_objrefs_[downstream_objref] != downstream_objref && target_objrefs_[downstream_objref] != UNITIALIZED_ALIAS) {
|
||||
HALO_LOG(HALO_ALIAS, "Looping in get_equivalent_objrefs");
|
||||
RAY_LOG(RAY_ALIAS, "Looping in get_equivalent_objrefs");
|
||||
downstream_objref = target_objrefs_[downstream_objref];
|
||||
}
|
||||
std::lock_guard<std::mutex> reverse_target_objrefs_lock(reverse_target_objrefs_lock_);
|
||||
@@ -755,7 +755,7 @@ char* get_cmd_option(char** begin, char** end, const std::string& option) {
|
||||
int main(int argc, char** argv) {
|
||||
SchedulingAlgorithmType scheduling_algorithm = SCHEDULING_ALGORITHM_LOCALITY_AWARE;
|
||||
if (argc < 2) {
|
||||
HALO_LOG(HALO_FATAL, "scheduler: expected at least one argument (scheduler ip address)");
|
||||
RAY_LOG(RAY_FATAL, "scheduler: expected at least one argument (scheduler ip address)");
|
||||
return 1;
|
||||
}
|
||||
if (argc > 2) {
|
||||
|
||||
+4
-4
@@ -1,5 +1,5 @@
|
||||
#ifndef HALO_SCHEDULER_H
|
||||
#define HALO_SCHEDULER_H
|
||||
#ifndef RAY_SCHEDULER_H
|
||||
#define RAY_SCHEDULER_H
|
||||
|
||||
|
||||
#include <deque>
|
||||
@@ -10,8 +10,8 @@
|
||||
|
||||
#include <grpc++/grpc++.h>
|
||||
|
||||
#include "halo/halo.h"
|
||||
#include "halo.grpc.pb.h"
|
||||
#include "ray/ray.h"
|
||||
#include "ray.grpc.pb.h"
|
||||
#include "types.pb.h"
|
||||
|
||||
#include "computation_graph.h"
|
||||
|
||||
+4
-4
@@ -1,5 +1,5 @@
|
||||
#ifndef HALO_UTILS_H
|
||||
#define HALO_UTILS_H
|
||||
#ifndef RAY_UTILS_H
|
||||
#define RAY_UTILS_H
|
||||
|
||||
inline std::string::iterator split_ip_address(std::string& ip_address) {
|
||||
if (ip_address[0] == '[') { // IPv6
|
||||
@@ -10,11 +10,11 @@ inline std::string::iterator split_ip_address(std::string& ip_address) {
|
||||
if(split_end != ip_address.end() && *split_end == ':') {
|
||||
return split_end;
|
||||
}
|
||||
HALO_LOG(HALO_FATAL, "ip address should contain a port number");
|
||||
RAY_LOG(RAY_FATAL, "ip address should contain a port number");
|
||||
} else { // IPv4
|
||||
auto split_point = std::find(ip_address.rbegin(), ip_address.rend(), ':').base();
|
||||
if (split_point == ip_address.begin()) {
|
||||
HALO_LOG(HALO_FATAL, "ip address should contain a port number");
|
||||
RAY_LOG(RAY_FATAL, "ip address should contain a port number");
|
||||
} else {
|
||||
return split_point;
|
||||
}
|
||||
|
||||
+21
-21
@@ -5,12 +5,12 @@
|
||||
#include <pynumbuf/serialize.h>
|
||||
|
||||
extern "C" {
|
||||
static PyObject *HaloError;
|
||||
static PyObject *RayError;
|
||||
}
|
||||
|
||||
Status WorkerServiceImpl::ExecuteTask(ServerContext* context, const ExecuteTaskRequest* request, ExecuteTaskReply* reply) {
|
||||
task_ = request->task(); // Copy task
|
||||
HALO_LOG(HALO_INFO, "invoked task " << request->task().name());
|
||||
RAY_LOG(RAY_INFO, "invoked task " << request->task().name());
|
||||
Task* taskptr = &task_;
|
||||
send_queue_.send(&taskptr);
|
||||
return Status::OK;
|
||||
@@ -26,7 +26,7 @@ Worker::Worker(const std::string& worker_address, std::shared_ptr<Channel> sched
|
||||
|
||||
SubmitTaskReply Worker::submit_task(SubmitTaskRequest* request) {
|
||||
if (!connected_) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to perform submit_task, but connected_ = " << connected_ << ".");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to perform submit_task, but connected_ = " << connected_ << ".");
|
||||
}
|
||||
SubmitTaskReply reply;
|
||||
ClientContext context;
|
||||
@@ -52,7 +52,7 @@ void Worker::register_worker(const std::string& worker_address, const std::strin
|
||||
|
||||
void Worker::request_object(ObjRef objref) {
|
||||
if (!connected_) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to perform request_object, but connected_ = " << connected_ << ".");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to perform request_object, but connected_ = " << connected_ << ".");
|
||||
}
|
||||
RequestObjRequest request;
|
||||
request.set_workerid(workerid_);
|
||||
@@ -66,7 +66,7 @@ void Worker::request_object(ObjRef objref) {
|
||||
ObjRef Worker::get_objref() {
|
||||
// first get objref for the new object
|
||||
if (!connected_) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to perform get_objref, but connected_ = " << connected_ << ".");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to perform get_objref, but connected_ = " << connected_ << ".");
|
||||
}
|
||||
PushObjRequest push_request;
|
||||
PushObjReply push_reply;
|
||||
@@ -78,7 +78,7 @@ ObjRef Worker::get_objref() {
|
||||
slice Worker::get_object(ObjRef objref) {
|
||||
// get_object assumes that objref is a canonical objref
|
||||
if (!connected_) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to perform get_object, but connected_ = " << connected_ << ".");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to perform get_object, but connected_ = " << connected_ << ".");
|
||||
}
|
||||
ObjRequest request;
|
||||
request.workerid = workerid_;
|
||||
@@ -97,7 +97,7 @@ slice Worker::get_object(ObjRef objref) {
|
||||
// contained_objrefs is a vector of all the objrefs contained in obj
|
||||
void Worker::put_object(ObjRef objref, const Obj* obj, std::vector<ObjRef> &contained_objrefs) {
|
||||
if (!connected_) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to perform put_object, but connected_ = " << connected_ << ".");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to perform put_object, but connected_ = " << connected_ << ".");
|
||||
}
|
||||
std::string data;
|
||||
obj->SerializeToString(&data); // TODO(pcm): get rid of this serialization
|
||||
@@ -108,7 +108,7 @@ void Worker::put_object(ObjRef objref, const Obj* obj, std::vector<ObjRef> &cont
|
||||
request.size = data.size();
|
||||
request_obj_queue_.send(&request);
|
||||
if (contained_objrefs.size() > 0) {
|
||||
HALO_LOG(HALO_REFCOUNT, "In put_object, calling increment_reference_count for contained objrefs");
|
||||
RAY_LOG(RAY_REFCOUNT, "In put_object, calling increment_reference_count for contained objrefs");
|
||||
increment_reference_count(contained_objrefs); // Notify the scheduler that some object references are serialized in the objstore.
|
||||
}
|
||||
ObjHandle result;
|
||||
@@ -135,14 +135,14 @@ void Worker::put_object(ObjRef objref, const Obj* obj, std::vector<ObjRef> &cont
|
||||
arrow::Status _s = (s); \
|
||||
if (!_s.ok()) { \
|
||||
std::string _errmsg = std::string(msg) + _s.ToString(); \
|
||||
PyErr_SetString(HaloError, _errmsg.c_str()); \
|
||||
PyErr_SetString(RayError, _errmsg.c_str()); \
|
||||
return NULL; \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
PyObject* Worker::put_arrow(ObjRef objref, PyObject* value) {
|
||||
if (!connected_) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to perform put_arrow, but connected_ = " << connected_ << ".");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to perform put_arrow, but connected_ = " << connected_ << ".");
|
||||
}
|
||||
ObjRequest request;
|
||||
pynumbuf::PythonObjectWriter writer;
|
||||
@@ -168,7 +168,7 @@ PyObject* Worker::put_arrow(ObjRef objref, PyObject* value) {
|
||||
|
||||
PyObject* Worker::get_arrow(ObjRef objref) {
|
||||
if (!connected_) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to perform get_arrow, but connected_ = " << connected_ << ".");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to perform get_arrow, but connected_ = " << connected_ << ".");
|
||||
}
|
||||
ObjRequest request;
|
||||
request.workerid = workerid_;
|
||||
@@ -186,7 +186,7 @@ PyObject* Worker::get_arrow(ObjRef objref) {
|
||||
|
||||
bool Worker::is_arrow(ObjRef objref) {
|
||||
if (!connected_) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to perform is_arrow, but connected_ = " << connected_ << ".");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to perform is_arrow, but connected_ = " << connected_ << ".");
|
||||
}
|
||||
ObjRequest request;
|
||||
request.workerid = workerid_;
|
||||
@@ -200,7 +200,7 @@ bool Worker::is_arrow(ObjRef objref) {
|
||||
|
||||
void Worker::alias_objrefs(ObjRef alias_objref, ObjRef target_objref) {
|
||||
if (!connected_) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to perform alias_objrefs, but connected_ = " << connected_ << ".");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to perform alias_objrefs, but connected_ = " << connected_ << ".");
|
||||
}
|
||||
ClientContext context;
|
||||
AliasObjRefsRequest request;
|
||||
@@ -212,14 +212,14 @@ void Worker::alias_objrefs(ObjRef alias_objref, ObjRef target_objref) {
|
||||
|
||||
void Worker::increment_reference_count(std::vector<ObjRef> &objrefs) {
|
||||
if (!connected_) {
|
||||
HALO_LOG(HALO_DEBUG, "Attempting to increment_reference_count for objrefs, but connected_ = " << connected_ << " so returning instead.");
|
||||
RAY_LOG(RAY_DEBUG, "Attempting to increment_reference_count for objrefs, but connected_ = " << connected_ << " so returning instead.");
|
||||
return;
|
||||
}
|
||||
if (objrefs.size() > 0) {
|
||||
ClientContext context;
|
||||
IncrementRefCountRequest request;
|
||||
for (int i = 0; i < objrefs.size(); ++i) {
|
||||
HALO_LOG(HALO_REFCOUNT, "Incrementing reference count for objref " << objrefs[i]);
|
||||
RAY_LOG(RAY_REFCOUNT, "Incrementing reference count for objref " << objrefs[i]);
|
||||
request.add_objref(objrefs[i]);
|
||||
}
|
||||
AckReply reply;
|
||||
@@ -229,14 +229,14 @@ void Worker::increment_reference_count(std::vector<ObjRef> &objrefs) {
|
||||
|
||||
void Worker::decrement_reference_count(std::vector<ObjRef> &objrefs) {
|
||||
if (!connected_) {
|
||||
HALO_LOG(HALO_DEBUG, "Attempting to decrement_reference_count, but connected_ = " << connected_ << " so returning instead.");
|
||||
RAY_LOG(RAY_DEBUG, "Attempting to decrement_reference_count, but connected_ = " << connected_ << " so returning instead.");
|
||||
return;
|
||||
}
|
||||
if (objrefs.size() > 0) {
|
||||
ClientContext context;
|
||||
DecrementRefCountRequest request;
|
||||
for (int i = 0; i < objrefs.size(); ++i) {
|
||||
HALO_LOG(HALO_REFCOUNT, "Decrementing reference count for objref " << objrefs[i]);
|
||||
RAY_LOG(RAY_REFCOUNT, "Decrementing reference count for objref " << objrefs[i]);
|
||||
request.add_objref(objrefs[i]);
|
||||
}
|
||||
AckReply reply;
|
||||
@@ -246,7 +246,7 @@ void Worker::decrement_reference_count(std::vector<ObjRef> &objrefs) {
|
||||
|
||||
void Worker::register_function(const std::string& name, size_t num_return_vals) {
|
||||
if (!connected_) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to perform register_function, but connected_ = " << connected_ << ".");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to perform register_function, but connected_ = " << connected_ << ".");
|
||||
}
|
||||
ClientContext context;
|
||||
RegisterFunctionRequest request;
|
||||
@@ -265,7 +265,7 @@ Task* Worker::receive_next_task() {
|
||||
|
||||
void Worker::notify_task_completed() {
|
||||
if (!connected_) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to perform notify_task_completed, but connected_ = " << connected_ << ".");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to perform notify_task_completed, but connected_ = " << connected_ << ".");
|
||||
}
|
||||
ClientContext context;
|
||||
WorkerReadyRequest request;
|
||||
@@ -285,7 +285,7 @@ bool Worker::connected() {
|
||||
// TODO(rkn): Should we be using pointers or references? And should they be const?
|
||||
void Worker::scheduler_info(ClientContext &context, SchedulerInfoRequest &request, SchedulerInfoReply &reply) {
|
||||
if (!connected_) {
|
||||
HALO_LOG(HALO_FATAL, "Attempting to get scheduler info, but connected_ = " << connected_ << ".");
|
||||
RAY_LOG(RAY_FATAL, "Attempting to get scheduler info, but connected_ = " << connected_ << ".");
|
||||
}
|
||||
scheduler_stub_->SchedulerInfo(&context, request, &reply);
|
||||
}
|
||||
@@ -306,7 +306,7 @@ void Worker::start_worker_service() {
|
||||
builder.AddListeningPort(std::string("0.0.0.0:") + port, grpc::InsecureServerCredentials());
|
||||
builder.RegisterService(&service);
|
||||
std::unique_ptr<Server> server(builder.BuildAndStart());
|
||||
HALO_LOG(HALO_INFO, "worker server listening on " << service_address);
|
||||
RAY_LOG(RAY_INFO, "worker server listening on " << service_address);
|
||||
server->Wait();
|
||||
});
|
||||
}
|
||||
|
||||
+4
-4
@@ -1,5 +1,5 @@
|
||||
#ifndef HALO_WORKER_H
|
||||
#define HALO_WORKER_H
|
||||
#ifndef RAY_WORKER_H
|
||||
#define RAY_WORKER_H
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
@@ -15,8 +15,8 @@ using grpc::ServerBuilder;
|
||||
using grpc::ServerContext;
|
||||
using grpc::Status;
|
||||
|
||||
#include "halo.grpc.pb.h"
|
||||
#include "halo/halo.h"
|
||||
#include "ray.grpc.pb.h"
|
||||
#include "ray/ray.h"
|
||||
#include "ipc.h"
|
||||
|
||||
using grpc::Channel;
|
||||
|
||||
+44
-44
@@ -1,15 +1,15 @@
|
||||
import unittest
|
||||
import halo
|
||||
import halo.serialization as serialization
|
||||
import halo.services as services
|
||||
import halo.worker as worker
|
||||
import ray
|
||||
import ray.serialization as serialization
|
||||
import ray.services as services
|
||||
import ray.worker as worker
|
||||
import numpy as np
|
||||
import time
|
||||
import subprocess32 as subprocess
|
||||
import os
|
||||
|
||||
import halo.arrays.remote as ra
|
||||
import halo.arrays.distributed as da
|
||||
import ray.arrays.remote as ra
|
||||
import ray.arrays.distributed as da
|
||||
|
||||
class ArraysSingleTest(unittest.TestCase):
|
||||
|
||||
@@ -20,27 +20,27 @@ class ArraysSingleTest(unittest.TestCase):
|
||||
|
||||
# test eye
|
||||
ref = ra.eye(3)
|
||||
val = halo.pull(ref)
|
||||
val = ray.pull(ref)
|
||||
self.assertTrue(np.alltrue(val == np.eye(3)))
|
||||
|
||||
# test zeros
|
||||
ref = ra.zeros([3, 4, 5])
|
||||
val = halo.pull(ref)
|
||||
val = ray.pull(ref)
|
||||
self.assertTrue(np.alltrue(val == np.zeros([3, 4, 5])))
|
||||
|
||||
# test qr - pass by value
|
||||
val_a = np.random.normal(size=[10, 13])
|
||||
ref_q, ref_r = ra.linalg.qr(val_a)
|
||||
val_q = halo.pull(ref_q)
|
||||
val_r = halo.pull(ref_r)
|
||||
val_q = ray.pull(ref_q)
|
||||
val_r = ray.pull(ref_r)
|
||||
self.assertTrue(np.allclose(np.dot(val_q, val_r), val_a))
|
||||
|
||||
# test qr - pass by objref
|
||||
a = ra.random.normal([10, 13])
|
||||
ref_q, ref_r = ra.linalg.qr(a)
|
||||
val_a = halo.pull(a)
|
||||
val_q = halo.pull(ref_q)
|
||||
val_r = halo.pull(ref_r)
|
||||
val_a = ray.pull(a)
|
||||
val_q = ray.pull(ref_q)
|
||||
val_r = ray.pull(ref_r)
|
||||
self.assertTrue(np.allclose(np.dot(val_q, val_r), val_a))
|
||||
|
||||
services.cleanup()
|
||||
@@ -51,7 +51,7 @@ class ArraysDistTest(unittest.TestCase):
|
||||
[w] = services.start_singlenode_cluster(return_drivers=True)
|
||||
|
||||
x = da.DistArray()
|
||||
x.construct([2, 3, 4], np.array([[[halo.push(0, w)]]]))
|
||||
x.construct([2, 3, 4], np.array([[[ray.push(0, w)]]]))
|
||||
capsule, _ = serialization.serialize(w.handle, x) # TODO(rkn): THIS REQUIRES A WORKER_HANDLE
|
||||
y = serialization.deserialize(w.handle, capsule) # TODO(rkn): THIS REQUIRES A WORKER_HANDLE
|
||||
self.assertEqual(x.shape, y.shape)
|
||||
@@ -79,33 +79,33 @@ class ArraysDistTest(unittest.TestCase):
|
||||
|
||||
x = da.zeros([9, 25, 51], "float")
|
||||
y = da.assemble(x)
|
||||
self.assertTrue(np.alltrue(halo.pull(y) == np.zeros([9, 25, 51])))
|
||||
self.assertTrue(np.alltrue(ray.pull(y) == np.zeros([9, 25, 51])))
|
||||
|
||||
x = da.ones([11, 25, 49], dtype_name="float")
|
||||
y = da.assemble(x)
|
||||
self.assertTrue(np.alltrue(halo.pull(y) == np.ones([11, 25, 49])))
|
||||
self.assertTrue(np.alltrue(ray.pull(y) == np.ones([11, 25, 49])))
|
||||
|
||||
x = da.random.normal([11, 25, 49])
|
||||
y = da.copy(x)
|
||||
z = da.assemble(x)
|
||||
w = da.assemble(y)
|
||||
self.assertTrue(np.alltrue(halo.pull(z) == halo.pull(w)))
|
||||
self.assertTrue(np.alltrue(ray.pull(z) == ray.pull(w)))
|
||||
|
||||
x = da.eye(25, dtype_name="float")
|
||||
y = da.assemble(x)
|
||||
self.assertTrue(np.alltrue(halo.pull(y) == np.eye(25)))
|
||||
self.assertTrue(np.alltrue(ray.pull(y) == np.eye(25)))
|
||||
|
||||
x = da.random.normal([25, 49])
|
||||
y = da.triu(x)
|
||||
z = da.assemble(y)
|
||||
w = da.assemble(x)
|
||||
self.assertTrue(np.alltrue(halo.pull(z) == np.triu(halo.pull(w))))
|
||||
self.assertTrue(np.alltrue(ray.pull(z) == np.triu(ray.pull(w))))
|
||||
|
||||
x = da.random.normal([25, 49])
|
||||
y = da.tril(x)
|
||||
z = da.assemble(y)
|
||||
w = da.assemble(x)
|
||||
self.assertTrue(np.alltrue(halo.pull(z) == np.tril(halo.pull(w))))
|
||||
self.assertTrue(np.alltrue(ray.pull(z) == np.tril(ray.pull(w))))
|
||||
|
||||
x = da.random.normal([25, 49])
|
||||
y = da.random.normal([49, 18])
|
||||
@@ -113,8 +113,8 @@ class ArraysDistTest(unittest.TestCase):
|
||||
w = da.assemble(z)
|
||||
u = da.assemble(x)
|
||||
v = da.assemble(y)
|
||||
np.allclose(halo.pull(w), np.dot(halo.pull(u), halo.pull(v)))
|
||||
self.assertTrue(np.allclose(halo.pull(w), np.dot(halo.pull(u), halo.pull(v))))
|
||||
np.allclose(ray.pull(w), np.dot(ray.pull(u), ray.pull(v)))
|
||||
self.assertTrue(np.allclose(ray.pull(w), np.dot(ray.pull(u), ray.pull(v))))
|
||||
|
||||
# test add
|
||||
x = da.random.normal([23, 42])
|
||||
@@ -123,7 +123,7 @@ class ArraysDistTest(unittest.TestCase):
|
||||
z_full = da.assemble(z)
|
||||
x_full = da.assemble(x)
|
||||
y_full = da.assemble(y)
|
||||
self.assertTrue(np.allclose(halo.pull(z_full), halo.pull(x_full) + halo.pull(y_full)))
|
||||
self.assertTrue(np.allclose(ray.pull(z_full), ray.pull(x_full) + ray.pull(y_full)))
|
||||
|
||||
# test subtract
|
||||
x = da.random.normal([33, 40])
|
||||
@@ -132,14 +132,14 @@ class ArraysDistTest(unittest.TestCase):
|
||||
z_full = da.assemble(z)
|
||||
x_full = da.assemble(x)
|
||||
y_full = da.assemble(y)
|
||||
self.assertTrue(np.allclose(halo.pull(z_full), halo.pull(x_full) - halo.pull(y_full)))
|
||||
self.assertTrue(np.allclose(ray.pull(z_full), ray.pull(x_full) - ray.pull(y_full)))
|
||||
|
||||
# test transpose
|
||||
x = da.random.normal([234, 432])
|
||||
y = da.transpose(x)
|
||||
x_full = da.assemble(x)
|
||||
y_full = da.assemble(y)
|
||||
self.assertTrue(np.alltrue(halo.pull(x_full).T == halo.pull(y_full)))
|
||||
self.assertTrue(np.alltrue(ray.pull(x_full).T == ray.pull(y_full)))
|
||||
|
||||
# test numpy_to_dist
|
||||
x = da.random.normal([23, 45])
|
||||
@@ -148,8 +148,8 @@ class ArraysDistTest(unittest.TestCase):
|
||||
w = da.assemble(z)
|
||||
x_full = da.assemble(x)
|
||||
z_full = da.assemble(z)
|
||||
self.assertTrue(np.alltrue(halo.pull(x_full) == halo.pull(z_full)))
|
||||
self.assertTrue(np.alltrue(halo.pull(y) == halo.pull(w)))
|
||||
self.assertTrue(np.alltrue(ray.pull(x_full) == ray.pull(z_full)))
|
||||
self.assertTrue(np.alltrue(ray.pull(y) == ray.pull(w)))
|
||||
|
||||
# test da.tsqr
|
||||
for shape in [[123, da.BLOCK_SIZE], [7, da.BLOCK_SIZE], [da.BLOCK_SIZE, da.BLOCK_SIZE], [da.BLOCK_SIZE, 7], [10 * da.BLOCK_SIZE, da.BLOCK_SIZE]]:
|
||||
@@ -157,10 +157,10 @@ class ArraysDistTest(unittest.TestCase):
|
||||
K = min(shape)
|
||||
q, r = da.linalg.tsqr(x)
|
||||
x_full = da.assemble(x)
|
||||
x_val = halo.pull(x_full)
|
||||
x_val = ray.pull(x_full)
|
||||
q_full = da.assemble(q)
|
||||
q_val = halo.pull(q_full)
|
||||
r_val = halo.pull(r)
|
||||
q_val = ray.pull(q_full)
|
||||
r_val = ray.pull(r)
|
||||
self.assertTrue(r_val.shape == (K, shape[1]))
|
||||
self.assertTrue(np.alltrue(r_val == np.triu(r_val)))
|
||||
self.assertTrue(np.allclose(x_val, np.dot(q_val, r_val)))
|
||||
@@ -174,12 +174,12 @@ class ArraysDistTest(unittest.TestCase):
|
||||
m = ra.random.normal([d1, d2])
|
||||
q, r = ra.linalg.qr(m)
|
||||
l, u, s = da.linalg.modified_lu(da.numpy_to_dist(q))
|
||||
q_val = halo.pull(q)
|
||||
r_val = halo.pull(r)
|
||||
q_val = ray.pull(q)
|
||||
r_val = ray.pull(r)
|
||||
l_full = da.assemble(l)
|
||||
l_val = halo.pull(l_full)
|
||||
u_val = halo.pull(u)
|
||||
s_val = halo.pull(s)
|
||||
l_val = ray.pull(l_full)
|
||||
u_val = ray.pull(u)
|
||||
s_val = ray.pull(s)
|
||||
s_mat = np.zeros((d1, d2))
|
||||
for i in range(len(s_val)):
|
||||
s_mat[i, i] = s_val[i]
|
||||
@@ -196,17 +196,17 @@ class ArraysDistTest(unittest.TestCase):
|
||||
a = da.random.normal([d1, d2])
|
||||
y, t, y_top, r = da.linalg.tsqr_hr(a)
|
||||
a_full = da.assemble(a)
|
||||
a_val = halo.pull(a_full)
|
||||
a_val = ray.pull(a_full)
|
||||
y_full = da.assemble(y)
|
||||
y_val = halo.pull(y_full)
|
||||
t_val = halo.pull(t)
|
||||
y_top_val = halo.pull(y_top)
|
||||
r_val = halo.pull(r)
|
||||
y_val = ray.pull(y_full)
|
||||
t_val = ray.pull(t)
|
||||
y_top_val = ray.pull(y_top)
|
||||
r_val = ray.pull(r)
|
||||
tall_eye = np.zeros((d1, min(d1, d2)))
|
||||
np.fill_diagonal(tall_eye, 1)
|
||||
q = tall_eye - np.dot(y_val, np.dot(t_val, y_top_val.T))
|
||||
self.assertTrue(np.allclose(np.dot(q.T, q), np.eye(min(d1, d2)))) # check that q.T * q = I
|
||||
self.assertTrue(np.allclose(np.dot(q, r_val), a_val)) # check that a = (I - y * t * y_thalo.T) * r
|
||||
self.assertTrue(np.allclose(np.dot(q, r_val), a_val)) # check that a = (I - y * t * y_top.T) * r
|
||||
|
||||
for d1, d2 in [(123, da.BLOCK_SIZE), (7, da.BLOCK_SIZE), (da.BLOCK_SIZE, da.BLOCK_SIZE), (da.BLOCK_SIZE, 7), (10 * da.BLOCK_SIZE, da.BLOCK_SIZE)]:
|
||||
test_dist_tsqr_hr(d1, d2)
|
||||
@@ -219,9 +219,9 @@ class ArraysDistTest(unittest.TestCase):
|
||||
a_full = da.assemble(a)
|
||||
q_full = da.assemble(q)
|
||||
r_full = da.assemble(r)
|
||||
a_val = halo.pull(a_full)
|
||||
q_val = halo.pull(q_full)
|
||||
r_val = halo.pull(r_full)
|
||||
a_val = ray.pull(a_full)
|
||||
q_val = ray.pull(q_full)
|
||||
r_val = ray.pull(r_full)
|
||||
|
||||
self.assertTrue(q_val.shape == (d1, K))
|
||||
self.assertTrue(r_val.shape == (K, d2))
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# For running the python tests
|
||||
|
||||
protoc -I ../protos/ --python_out=. --grpc_out=. --plugin=protoc-gen-grpc=`which grpc_python_plugin` ../protos/halo.proto
|
||||
protoc -I ../protos/ --python_out=. --grpc_out=. --plugin=protoc-gen-grpc=`which grpc_python_plugin` ../protos/ray.proto
|
||||
protoc -I ../protos/ --python_out=. --grpc_out=. --plugin=protoc-gen-grpc=`which grpc_python_plugin` ../protos/types.proto
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import unittest
|
||||
import halo
|
||||
import halo.services as services
|
||||
import ray
|
||||
import ray.services as services
|
||||
import time
|
||||
import os
|
||||
import numpy as np
|
||||
@@ -51,7 +51,7 @@ class MicroBenchmarkTest(unittest.TestCase):
|
||||
for _ in range(1000):
|
||||
start_time = time.time()
|
||||
x = test_functions.trivial_function()
|
||||
halo.pull(x)
|
||||
ray.pull(x)
|
||||
end_time = time.time()
|
||||
elapsed_times.append(end_time - start_time)
|
||||
elapsed_times = np.sort(elapsed_times)
|
||||
@@ -67,7 +67,7 @@ class MicroBenchmarkTest(unittest.TestCase):
|
||||
elapsed_times = []
|
||||
for _ in range(1000):
|
||||
start_time = time.time()
|
||||
halo.push(1)
|
||||
ray.push(1)
|
||||
end_time = time.time()
|
||||
elapsed_times.append(end_time - start_time)
|
||||
elapsed_times = np.sort(elapsed_times)
|
||||
|
||||
+52
-52
@@ -1,16 +1,16 @@
|
||||
import unittest
|
||||
import halo
|
||||
import halo.serialization as serialization
|
||||
import halo.services as services
|
||||
import halo.worker as worker
|
||||
import ray
|
||||
import ray.serialization as serialization
|
||||
import ray.services as services
|
||||
import ray.worker as worker
|
||||
import numpy as np
|
||||
import time
|
||||
import subprocess32 as subprocess
|
||||
import os
|
||||
|
||||
import test_functions
|
||||
import halo.arrays.remote as ra
|
||||
import halo.arrays.distributed as da
|
||||
import ray.arrays.remote as ra
|
||||
import ray.arrays.distributed as da
|
||||
|
||||
class SerializationTest(unittest.TestCase):
|
||||
|
||||
@@ -56,10 +56,10 @@ class SerializationTest(unittest.TestCase):
|
||||
self.numpyTypeTest(w, 'float32')
|
||||
self.numpyTypeTest(w, 'float64')
|
||||
|
||||
ref0 = halo.push(0, w)
|
||||
ref1 = halo.push(0, w)
|
||||
ref2 = halo.push(0, w)
|
||||
ref3 = halo.push(0, w)
|
||||
ref0 = ray.push(0, w)
|
||||
ref1 = ray.push(0, w)
|
||||
ref2 = ray.push(0, w)
|
||||
ref3 = ray.push(0, w)
|
||||
a = np.array([[ref0, ref1], [ref2, ref3]])
|
||||
capsule, _ = serialization.serialize(w.handle, a)
|
||||
result = serialization.deserialize(w.handle, capsule)
|
||||
@@ -75,8 +75,8 @@ class ObjStoreTest(unittest.TestCase):
|
||||
|
||||
# pushing and pulling an object shouldn't change it
|
||||
for data in ["h", "h" * 10000, 0, 0.0]:
|
||||
objref = halo.push(data, w1)
|
||||
result = halo.pull(objref, w1)
|
||||
objref = ray.push(data, w1)
|
||||
result = ray.pull(objref, w1)
|
||||
self.assertEqual(result, data)
|
||||
|
||||
# pushing an object, shipping it to another worker, and pulling it shouldn't change it
|
||||
@@ -129,7 +129,7 @@ class SchedulerTest(unittest.TestCase):
|
||||
|
||||
time.sleep(0.2)
|
||||
|
||||
value_after = halo.pull(objref[0], w)
|
||||
value_after = ray.pull(objref[0], w)
|
||||
self.assertEqual(value_before, value_after)
|
||||
|
||||
time.sleep(0.1)
|
||||
@@ -143,26 +143,26 @@ class WorkerTest(unittest.TestCase):
|
||||
|
||||
for i in range(100):
|
||||
value_before = i * 10 ** 6
|
||||
objref = halo.push(value_before, w)
|
||||
value_after = halo.pull(objref, w)
|
||||
objref = ray.push(value_before, w)
|
||||
value_after = ray.pull(objref, w)
|
||||
self.assertEqual(value_before, value_after)
|
||||
|
||||
for i in range(100):
|
||||
value_before = i * 10 ** 6 * 1.0
|
||||
objref = halo.push(value_before, w)
|
||||
value_after = halo.pull(objref, w)
|
||||
objref = ray.push(value_before, w)
|
||||
value_after = ray.pull(objref, w)
|
||||
self.assertEqual(value_before, value_after)
|
||||
|
||||
for i in range(100):
|
||||
value_before = "h" * i
|
||||
objref = halo.push(value_before, w)
|
||||
value_after = halo.pull(objref, w)
|
||||
objref = ray.push(value_before, w)
|
||||
value_after = ray.pull(objref, w)
|
||||
self.assertEqual(value_before, value_after)
|
||||
|
||||
for i in range(100):
|
||||
value_before = [1] * i
|
||||
objref = halo.push(value_before, w)
|
||||
value_after = halo.pull(objref, w)
|
||||
objref = ray.push(value_before, w)
|
||||
value_after = ray.pull(objref, w)
|
||||
self.assertEqual(value_before, value_after)
|
||||
|
||||
services.cleanup()
|
||||
@@ -175,11 +175,11 @@ class APITest(unittest.TestCase):
|
||||
[w] = services.start_singlenode_cluster(return_drivers=True, num_workers_per_objstore=3, worker_path=test_path)
|
||||
|
||||
objref = w.submit_task("test_functions.test_alias_f", [])
|
||||
self.assertTrue(np.alltrue(halo.pull(objref[0], w) == np.ones([3, 4, 5])))
|
||||
self.assertTrue(np.alltrue(ray.pull(objref[0], w) == np.ones([3, 4, 5])))
|
||||
objref = w.submit_task("test_functions.test_alias_g", [])
|
||||
self.assertTrue(np.alltrue(halo.pull(objref[0], w) == np.ones([3, 4, 5])))
|
||||
self.assertTrue(np.alltrue(ray.pull(objref[0], w) == np.ones([3, 4, 5])))
|
||||
objref = w.submit_task("test_functions.test_alias_h", [])
|
||||
self.assertTrue(np.alltrue(halo.pull(objref[0], w) == np.ones([3, 4, 5])))
|
||||
self.assertTrue(np.alltrue(ray.pull(objref[0], w) == np.ones([3, 4, 5])))
|
||||
|
||||
services.cleanup()
|
||||
|
||||
@@ -189,35 +189,35 @@ class APITest(unittest.TestCase):
|
||||
services.start_singlenode_cluster(return_drivers=False, num_workers_per_objstore=1, worker_path=test_path)
|
||||
|
||||
x = test_functions.keyword_fct1(1)
|
||||
self.assertEqual(halo.pull(x), "1 hello")
|
||||
self.assertEqual(ray.pull(x), "1 hello")
|
||||
x = test_functions.keyword_fct1(1, "hi")
|
||||
self.assertEqual(halo.pull(x), "1 hi")
|
||||
self.assertEqual(ray.pull(x), "1 hi")
|
||||
x = test_functions.keyword_fct1(1, b="world")
|
||||
self.assertEqual(halo.pull(x), "1 world")
|
||||
self.assertEqual(ray.pull(x), "1 world")
|
||||
|
||||
x = test_functions.keyword_fct2(a="w", b="hi")
|
||||
self.assertEqual(halo.pull(x), "w hi")
|
||||
self.assertEqual(ray.pull(x), "w hi")
|
||||
x = test_functions.keyword_fct2(b="hi", a="w")
|
||||
self.assertEqual(halo.pull(x), "w hi")
|
||||
self.assertEqual(ray.pull(x), "w hi")
|
||||
x = test_functions.keyword_fct2(a="w")
|
||||
self.assertEqual(halo.pull(x), "w world")
|
||||
self.assertEqual(ray.pull(x), "w world")
|
||||
x = test_functions.keyword_fct2(b="hi")
|
||||
self.assertEqual(halo.pull(x), "hello hi")
|
||||
self.assertEqual(ray.pull(x), "hello hi")
|
||||
x = test_functions.keyword_fct2("w")
|
||||
self.assertEqual(halo.pull(x), "w world")
|
||||
self.assertEqual(ray.pull(x), "w world")
|
||||
x = test_functions.keyword_fct2("w", "hi")
|
||||
self.assertEqual(halo.pull(x), "w hi")
|
||||
self.assertEqual(ray.pull(x), "w hi")
|
||||
|
||||
x = test_functions.keyword_fct3(0, 1, c="w", d="hi")
|
||||
self.assertEqual(halo.pull(x), "0 1 w hi")
|
||||
self.assertEqual(ray.pull(x), "0 1 w hi")
|
||||
x = test_functions.keyword_fct3(0, 1, d="hi", c="w")
|
||||
self.assertEqual(halo.pull(x), "0 1 w hi")
|
||||
self.assertEqual(ray.pull(x), "0 1 w hi")
|
||||
x = test_functions.keyword_fct3(0, 1, c="w")
|
||||
self.assertEqual(halo.pull(x), "0 1 w world")
|
||||
self.assertEqual(ray.pull(x), "0 1 w world")
|
||||
x = test_functions.keyword_fct3(0, 1, d="hi")
|
||||
self.assertEqual(halo.pull(x), "0 1 hello hi")
|
||||
self.assertEqual(ray.pull(x), "0 1 hello hi")
|
||||
x = test_functions.keyword_fct3(0, 1)
|
||||
self.assertEqual(halo.pull(x), "0 1 hello world")
|
||||
self.assertEqual(ray.pull(x), "0 1 hello world")
|
||||
|
||||
services.cleanup()
|
||||
|
||||
@@ -227,9 +227,9 @@ class APITest(unittest.TestCase):
|
||||
services.start_singlenode_cluster(return_drivers=False, num_workers_per_objstore=1, worker_path=test_path)
|
||||
|
||||
x = test_functions.varargs_fct1(0, 1, 2)
|
||||
self.assertEqual(halo.pull(x), "0 1 2")
|
||||
self.assertEqual(ray.pull(x), "0 1 2")
|
||||
x = test_functions.varargs_fct2(0, 1, 2)
|
||||
self.assertEqual(halo.pull(x), "1 2")
|
||||
self.assertEqual(ray.pull(x), "1 2")
|
||||
|
||||
self.assertTrue(test_functions.kwargs_exception_thrown)
|
||||
self.assertTrue(test_functions.varargs_and_kwargs_exception_thrown)
|
||||
@@ -244,48 +244,48 @@ class ReferenceCountingTest(unittest.TestCase):
|
||||
services.start_singlenode_cluster(return_drivers=False, num_workers_per_objstore=3, worker_path=test_path)
|
||||
|
||||
x = test_functions.test_alias_f()
|
||||
halo.pull(x)
|
||||
ray.pull(x)
|
||||
time.sleep(0.1)
|
||||
objref_val = x.val
|
||||
self.assertTrue(halo.scheduler_info()["reference_counts"][objref_val] == 1)
|
||||
self.assertTrue(ray.scheduler_info()["reference_counts"][objref_val] == 1)
|
||||
|
||||
del x
|
||||
self.assertTrue(halo.scheduler_info()["reference_counts"][objref_val] == -1) # -1 indicates deallocated
|
||||
self.assertTrue(ray.scheduler_info()["reference_counts"][objref_val] == -1) # -1 indicates deallocated
|
||||
|
||||
y = test_functions.test_alias_h()
|
||||
halo.pull(y)
|
||||
ray.pull(y)
|
||||
time.sleep(0.1)
|
||||
objref_val = y.val
|
||||
self.assertTrue(halo.scheduler_info()["reference_counts"][objref_val:(objref_val + 3)] == [1, 0, 0])
|
||||
self.assertTrue(ray.scheduler_info()["reference_counts"][objref_val:(objref_val + 3)] == [1, 0, 0])
|
||||
|
||||
del y
|
||||
self.assertTrue(halo.scheduler_info()["reference_counts"][objref_val:(objref_val + 3)] == [-1, -1, -1])
|
||||
self.assertTrue(ray.scheduler_info()["reference_counts"][objref_val:(objref_val + 3)] == [-1, -1, -1])
|
||||
|
||||
z = da.zeros([da.BLOCK_SIZE, 2 * da.BLOCK_SIZE], "float")
|
||||
time.sleep(0.1)
|
||||
objref_val = z.val
|
||||
self.assertTrue(halo.scheduler_info()["reference_counts"][objref_val:(objref_val + 3)] == [1, 1, 1])
|
||||
self.assertTrue(ray.scheduler_info()["reference_counts"][objref_val:(objref_val + 3)] == [1, 1, 1])
|
||||
|
||||
del z
|
||||
time.sleep(0.1)
|
||||
self.assertTrue(halo.scheduler_info()["reference_counts"][objref_val:(objref_val + 3)] == [-1, -1, -1])
|
||||
self.assertTrue(ray.scheduler_info()["reference_counts"][objref_val:(objref_val + 3)] == [-1, -1, -1])
|
||||
|
||||
x = ra.zeros([10, 10], "float")
|
||||
y = ra.zeros([10, 10], "float")
|
||||
z = ra.dot(x, y)
|
||||
objref_val = x.val
|
||||
time.sleep(0.1)
|
||||
self.assertTrue(halo.scheduler_info()["reference_counts"][objref_val:(objref_val + 3)] == [1, 1, 1])
|
||||
self.assertTrue(ray.scheduler_info()["reference_counts"][objref_val:(objref_val + 3)] == [1, 1, 1])
|
||||
|
||||
del x
|
||||
time.sleep(0.1)
|
||||
self.assertTrue(halo.scheduler_info()["reference_counts"][objref_val:(objref_val + 3)] == [-1, 1, 1])
|
||||
self.assertTrue(ray.scheduler_info()["reference_counts"][objref_val:(objref_val + 3)] == [-1, 1, 1])
|
||||
del y
|
||||
time.sleep(0.1)
|
||||
self.assertTrue(halo.scheduler_info()["reference_counts"][objref_val:(objref_val + 3)] == [-1, -1, 1])
|
||||
self.assertTrue(ray.scheduler_info()["reference_counts"][objref_val:(objref_val + 3)] == [-1, -1, 1])
|
||||
del z
|
||||
time.sleep(0.1)
|
||||
self.assertTrue(halo.scheduler_info()["reference_counts"][objref_val:(objref_val + 3)] == [-1, -1, -1])
|
||||
self.assertTrue(ray.scheduler_info()["reference_counts"][objref_val:(objref_val + 3)] == [-1, -1, -1])
|
||||
|
||||
services.cleanup()
|
||||
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
import argparse
|
||||
import numpy as np
|
||||
|
||||
import halo
|
||||
import halo.services as services
|
||||
import halo.worker as worker
|
||||
|
||||
import test_functions
|
||||
import halo.arrays.remote as ra
|
||||
import halo.arrays.distributed as da
|
||||
|
||||
from grpc.beta import implementations
|
||||
import halo_pb2
|
||||
import types_pb2
|
||||
|
||||
TIMEOUT_SECONDS = 5
|
||||
|
||||
parser = argparse.ArgumentParser(description='Parse addresses for the worker to connect to.')
|
||||
parser.add_argument("--scheduler-address", default="127.0.0.1:10001", type=str, help="the scheduler's address")
|
||||
parser.add_argument("--objstore-address", default="127.0.0.1:20001", type=str, help="the objstore's address")
|
||||
parser.add_argument("--worker-address", default="127.0.0.1:30001", type=str, help="the worker's address")
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parser.parse_args()
|
||||
worker.connect(args.scheduler_address, args.objstore_address, args.worker_address)
|
||||
|
||||
import IPython
|
||||
IPython.embed()
|
||||
+15
-15
@@ -1,70 +1,70 @@
|
||||
import halo
|
||||
import ray
|
||||
|
||||
import numpy as np
|
||||
|
||||
# Test simple functionality
|
||||
|
||||
@halo.remote([str], [str])
|
||||
@ray.remote([str], [str])
|
||||
def print_string(string):
|
||||
print "called print_string with", string
|
||||
f = open("asdfasdf.txt", "w")
|
||||
f.write("successfully called print_string with argument {}.".format(string))
|
||||
return string
|
||||
|
||||
@halo.remote([int, int], [int, int])
|
||||
@ray.remote([int, int], [int, int])
|
||||
def handle_int(a, b):
|
||||
return a + 1, b + 1
|
||||
|
||||
# Test aliasing
|
||||
|
||||
@halo.remote([], [np.ndarray])
|
||||
@ray.remote([], [np.ndarray])
|
||||
def test_alias_f():
|
||||
return np.ones([3, 4, 5])
|
||||
|
||||
@halo.remote([], [np.ndarray])
|
||||
@ray.remote([], [np.ndarray])
|
||||
def test_alias_g():
|
||||
return test_alias_f()
|
||||
|
||||
@halo.remote([], [np.ndarray])
|
||||
@ray.remote([], [np.ndarray])
|
||||
def test_alias_h():
|
||||
return test_alias_g()
|
||||
|
||||
# Test timing
|
||||
|
||||
@halo.remote([], [])
|
||||
@ray.remote([], [])
|
||||
def empty_function():
|
||||
return ()
|
||||
|
||||
@halo.remote([], [int])
|
||||
@ray.remote([], [int])
|
||||
def trivial_function():
|
||||
return 1
|
||||
|
||||
# Test keyword arguments
|
||||
|
||||
@halo.remote([int, str], [str])
|
||||
@ray.remote([int, str], [str])
|
||||
def keyword_fct1(a, b="hello"):
|
||||
return "{} {}".format(a, b)
|
||||
|
||||
@halo.remote([str, str], [str])
|
||||
@ray.remote([str, str], [str])
|
||||
def keyword_fct2(a="hello", b="world"):
|
||||
return "{} {}".format(a, b)
|
||||
|
||||
@halo.remote([int, int, str, str], [str])
|
||||
@ray.remote([int, int, str, str], [str])
|
||||
def keyword_fct3(a, b, c="hello", d="world"):
|
||||
return "{} {} {} {}".format(a, b, c, d)
|
||||
|
||||
# Test variable numbers of arguments
|
||||
|
||||
@halo.remote([int], [str])
|
||||
@ray.remote([int], [str])
|
||||
def varargs_fct1(*a):
|
||||
return " ".join(map(str, a))
|
||||
|
||||
@halo.remote([int, int], [str])
|
||||
@ray.remote([int, int], [str])
|
||||
def varargs_fct2(a, *b):
|
||||
return " ".join(map(str, b))
|
||||
|
||||
try:
|
||||
@halo.remote([int], [])
|
||||
@ray.remote([int], [])
|
||||
def kwargs_throw_exception(**c):
|
||||
return ()
|
||||
kwargs_exception_thrown = False
|
||||
@@ -72,7 +72,7 @@ except:
|
||||
kwargs_exception_thrown = True
|
||||
|
||||
try:
|
||||
@halo.remote([int, str, int], [str])
|
||||
@ray.remote([int, str, int], [str])
|
||||
def varargs_and_kwargs_throw_exception(a, b="hi", *c):
|
||||
return "{} {} {}".format(a, b, c)
|
||||
varargs_and_kwargs_exception_thrown = False
|
||||
|
||||
+13
-13
@@ -3,12 +3,12 @@ import argparse
|
||||
import numpy as np
|
||||
|
||||
import test_functions
|
||||
import halo.arrays.remote as ra
|
||||
import halo.arrays.distributed as da
|
||||
import ray.arrays.remote as ra
|
||||
import ray.arrays.distributed as da
|
||||
|
||||
import halo
|
||||
import halo.services as services
|
||||
import halo.worker as worker
|
||||
import ray
|
||||
import ray.services as services
|
||||
import ray.worker as worker
|
||||
|
||||
parser = argparse.ArgumentParser(description='Parse addresses for the worker to connect to.')
|
||||
parser.add_argument("--scheduler-address", default="127.0.0.1:10001", type=str, help="the scheduler's address")
|
||||
@@ -19,13 +19,13 @@ if __name__ == '__main__':
|
||||
args = parser.parse_args()
|
||||
worker.connect(args.scheduler_address, args.objstore_address, args.worker_address)
|
||||
|
||||
halo.register_module(test_functions)
|
||||
halo.register_module(ra)
|
||||
halo.register_module(ra.random)
|
||||
halo.register_module(ra.linalg)
|
||||
halo.register_module(da)
|
||||
halo.register_module(da.random)
|
||||
halo.register_module(da.linalg)
|
||||
halo.register_module(sys.modules[__name__])
|
||||
ray.register_module(test_functions)
|
||||
ray.register_module(ra)
|
||||
ray.register_module(ra.random)
|
||||
ray.register_module(ra.linalg)
|
||||
ray.register_module(da)
|
||||
ray.register_module(da.random)
|
||||
ray.register_module(da.linalg)
|
||||
ray.register_module(sys.modules[__name__])
|
||||
|
||||
worker.main_loop()
|
||||
|
||||
Reference in New Issue
Block a user