[JavaWorker] Enable java worker support (#2094)

* Enable java worker support
--------------------------
This commit includes a tailored version of the Java worker implementation from Ant Financial.
The changes for build system, python module, src module and arrow are in other commits, this commit consists of the following modules:
 - java/api: Ray API definition
 - java/common: utilities
 - java/hook: binary rewrite of the Java byte-code for remote execution
 - java/runtime-common: common implementation of the runtime in worker
 - java/runtime-dev: a pure-java mock implementation of the runtime for fast development
 - java/runtime-native: a native implementation of the runtime
 - java/test: various tests

Contributors for this work:
 Guyang Song, Peng Cao, Senlin Zhu,Xiaoying Chu, Yiming Yu, Yujie Liu, Zhenyu Guo

* change the format of java help document from markdown to RST

* update the vesion of Arrow for java worker

* adapt the new version of plasma java client from arrow which use byte[] instead of custom type

* add java worker test to ci

* add the example module for better usage guide
This commit is contained in:
Yujie Liu
2018-05-27 05:38:50 +08:00
committed by Philipp Moritz
parent 74cca3b284
commit a8d3c057c1
193 changed files with 22675 additions and 5 deletions
+15
View File
@@ -0,0 +1,15 @@
<assembly>
<id>ear</id>
<formats>
<format>zip</format>
</formats>
<dependencySets>
<dependencySet>
<useProjectArtifact>true</useProjectArtifact>
<outputDirectory>lib</outputDirectory>
</dependencySet>
</dependencySets>
<fileSets>
</fileSets>
</assembly>
+82
View File
@@ -0,0 +1,82 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<parent>
<groupId>org.ray.parent</groupId>
<artifactId>ray-superpom</artifactId>
<version>1.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>org.ray</groupId>
<artifactId>ray-runtime-native</artifactId>
<name>native runtime for ray</name>
<description>native runtime for ray</description>
<url></url>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.ray</groupId>
<artifactId>ray-runtime-common</artifactId>
<version>1.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
<dependency>
<groupId>net.lingala.zip4j</groupId>
<artifactId>zip4j</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>19.0</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-plasma</artifactId>
</dependency>
</dependencies>
<build>
<finalName>ray-runtime-deploy</finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>assembly.xml</descriptor>
</descriptors>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>install</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
@@ -0,0 +1,251 @@
package org.ray.core.impl;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.arrow.plasma.ObjectStoreLink;
import org.apache.arrow.plasma.PlasmaClient;
import org.ray.api.*;
import org.ray.api.funcs.RayFunc_2_1;
import org.ray.core.RayRuntime;
import org.ray.core.UniqueIdHelper;
import org.ray.core.WorkerContext;
import org.ray.core.model.RayParameters;
import org.ray.core.model.WorkerMode;
import org.ray.runner.RunManager;
import org.ray.spi.KeyValueStoreLink;
import org.ray.spi.LocalSchedulerLink;
import org.ray.spi.NopRemoteFunctionManager;
import org.ray.spi.PathConfig;
import org.ray.spi.RemoteFunctionManager;
import org.ray.spi.StateStoreProxy;
import org.ray.spi.impl.DefaultLocalSchedulerClient;
import org.ray.spi.impl.NativeRemoteFunctionManager;
import org.ray.spi.impl.RedisClient;
import org.ray.spi.impl.StateStoreProxyImpl;
import org.ray.spi.model.AddressInfo;
import org.ray.util.exception.TaskExecutionException;
import org.ray.util.logger.RayLog;
/**
* native runtime for local box and cluster run
*/
public class RayNativeRuntime extends RayRuntime {
static {
System.err.println("Current working directory is " + System.getProperty("user.dir"));
System.loadLibrary("local_scheduler_library_java");
System.loadLibrary("plasma_java");
}
private StateStoreProxy stateStoreProxy;
private KeyValueStoreLink kvStore = null;
private RunManager manager = null;
@Override
public void cleanUp() {
if (null != manager) {
manager.cleanup(true);
}
}
protected RayNativeRuntime() {
}
private void initStateStore(String redisAddress) throws Exception {
kvStore = new RedisClient();
kvStore.SetAddr(redisAddress);
stateStoreProxy = new StateStoreProxyImpl(kvStore);
//stateStoreProxy.setStore(kvStore);
stateStoreProxy.initializeGlobalState();
}
@Override
public void start(RayParameters params) throws Exception {
boolean isWorker = (params.worker_mode == WorkerMode.WORKER);
PathConfig pathConfig = new PathConfig(configReader);
// initialize params
if (params.redis_address.length() == 0) {
if (isWorker) {
throw new Error("Redis address must be configured under Worker mode.");
}
startOnebox(params, pathConfig);
initStateStore(params.redis_address);
} else {
initStateStore(params.redis_address);
if (!isWorker) {
List<AddressInfo> nodes = stateStoreProxy.getAddressInfo(params.node_ip_address, 5);
params.object_store_name = nodes.get(0).storeName;
params.object_store_manager_name = nodes.get(0).managerName;
params.local_scheduler_name = nodes.get(0).schedulerName;
}
}
// initialize remote function manager
RemoteFunctionManager funcMgr = params.run_mode.isStaticRewrite() ?
new NativeRemoteFunctionManager(kvStore) :
new NopRemoteFunctionManager(params.driver_id);
// initialize worker context
if (params.worker_mode == WorkerMode.DRIVER) {
// TODO: The relationship between workerID, driver_id and dummy_task.driver_id should be recheck carefully
WorkerContext.workerID = params.driver_id;
}
WorkerContext.init(params);
if (params.onebox_delay_seconds_before_run_app_logic > 0) {
for (int i = 0; i < params.onebox_delay_seconds_before_run_app_logic; ++i) {
System.err.println("Pause for debugger, "
+ (params.onebox_delay_seconds_before_run_app_logic - i)
+ " seconds left ...");
Thread.sleep(1000);
}
}
if (params.worker_mode != WorkerMode.NONE) {
String overwrites = "";
// initialize the links
int release_delay = RayRuntime.configReader
.getIntegerValue("ray", "plasma_default_release_delay", 0,
"how many release requests should be delayed in plasma client");
ObjectStoreLink pLink = new PlasmaClient(params.object_store_name, params.object_store_manager_name, release_delay);
LocalSchedulerLink sLink = new DefaultLocalSchedulerClient(
params.local_scheduler_name,
WorkerContext.currentWorkerID(),
UniqueID.nil,
isWorker,
0
);
init(sLink, pLink, funcMgr, pathConfig);
// register
registerWorker(isWorker, params.node_ip_address, params.object_store_name,
params.object_store_manager_name, params.local_scheduler_name);
}
RayLog.core.info("RayNativeRuntime start with "
+ "store " + params.object_store_name
+ ", manager " + params.object_store_manager_name
+ ", scheduler " + params.local_scheduler_name
);
}
private void startOnebox(RayParameters params, PathConfig paths) throws Exception {
params.cleanup = true;
manager = new RunManager(params, paths, RayRuntime.configReader);
manager.startRayHead();
params.redis_address = manager.info().redisAddress;
params.object_store_name = manager.info().local_stores.get(0).storeName;
params.object_store_manager_name = manager.info().local_stores.get(0).managerName;
params.local_scheduler_name = manager.info().local_stores.get(0).schedulerName;
//params.node_ip_address = NetworkUtil.getIpAddress();
}
private void registerWorker(boolean isWorker, String node_ip_address, String storeName,
String managerName, String schedulerName) {
Map<String, String> workerInfo = new HashMap<>();
String workerId = new String(WorkerContext.currentWorkerID().getBytes());
if (!isWorker) {
workerInfo.put("node_ip_address", node_ip_address);
workerInfo.put("driver_id", workerId);
workerInfo.put("start_time", String.valueOf(System.currentTimeMillis()));
workerInfo.put("plasma_store_socket", storeName);
workerInfo.put("plasma_manager_socket", managerName);
workerInfo.put("local_scheduler_socket", schedulerName);
workerInfo.put("name", System.getProperty("user.dir"));
//TODO: worker.redis_client.hmset(b"Drivers:" + worker.workerId, driver_info)
kvStore.Hmset("Drivers:" + workerId, workerInfo);
} else {
workerInfo.put("node_ip_address", node_ip_address);
//TODO:
/*
"stdout_file": os.path.abspath(log_stdout_file.name),
"stderr_file": os.path.abspath(log_stderr_file.name),
*/
workerInfo.put("plasma_store_socket", storeName);
workerInfo.put("plasma_manager_socket", managerName);
workerInfo.put("local_scheduler_socket", schedulerName);
//TODO: b"Workers:" + worker.workerId,
kvStore.Hmset("Workers:" + workerId, workerInfo);
}
}
private Object _actor = null;
private UniqueID actorID = UniqueID.nil;
@RayRemote
public static byte[] createActorInActor(byte[] actorId, String className) {
((RayNativeRuntime) RayRuntime.getInstance()).localCreateActorInActor(actorId, className);
return actorId;
}
@SuppressWarnings("unchecked")
@Override
public <T> RayActor<T> create(Class<T> cls) {
UniqueID createTaskId = UniqueIdHelper.nextTaskId(-1);
UniqueID actorId = UniqueIdHelper.taskComputeReturnId(createTaskId, 0, false);
RayActor<T> actor = new RayActor<>(actorId);
UniqueID cursorId;
if (params.run_mode.isRemoteLambda()) {
RayFunc_2_1<byte[], String, byte[]> createActorLambda = RayNativeRuntime::createActorInActor;
cursorId = worker.rpcCreateActor(
createTaskId,
actorId,
UniqueID.nil,
RayFunc_2_1.class,
createActorLambda,
1,
new Object[]{actorId.getBytes(), cls.getName()}
).getObjs()[0].getId();
} else {
cursorId = worker.rpcCreateActor(
createTaskId,
actorId,
() -> RayNativeRuntime.createActorInActor(null, null),
1,
new Object[]{actorId.getBytes(), cls.getName()}
).getObjs()[0].getId();
}
actor.setTaskCursor(cursorId);
return actor;
}
public Object localCreateActorInActor(byte[] actorId, String className) {
try {
actorID = new UniqueID(actorId);
Class<?> cls = Class.forName(className, true, Thread.currentThread().getContextClassLoader());
Constructor<?>[] cts = cls.getConstructors();
for (Constructor<?> ct : cts) {
System.err.println(ct.getName() + ", param count = " + ct.getParameterCount());
}
_actor = cls.getConstructor(new Class<?>[0]).newInstance();
RayLog.core.info("create actor " + actorID + " inside actor ok");
return _actor;
} catch (ClassNotFoundException | InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e) {
e.printStackTrace();
String log = "create actor " + actorID + " for " + className + " failed, ex = " + e
.getMessage();
System.err.println(log);
RayLog.core.error(log, e);
throw new TaskExecutionException(log, e);
}
}
@Override
public Object getLocalActor(UniqueID id) {
if (actorID.equals(id)) {
return _actor;
} else {
return null;
}
}
}
@@ -0,0 +1,13 @@
package org.ray.runner;
public class ProcessInfo {
public Process process;
public String[] cmd;
public RunInfo.ProcessType type;
public String workDir;
public String redisAddress;
public String ip;
public boolean redirect;
public boolean cleanup;
}
@@ -0,0 +1,41 @@
package org.ray.runner;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.ray.spi.model.AddressInfo;
/**
* information of kinds of processes
*/
public class RunInfo {
public String redisAddress;
public List<String> redisShards;
public List<AddressInfo> local_stores = new ArrayList<>();
public ArrayList<List<ProcessInfo>> allProcesses = initProcessInfoArray();
public ArrayList<List<Process>> toBeCleanedProcesses = initProcessArray();
public ArrayList<ProcessInfo> deadProcess = new ArrayList<>();
public enum ProcessType {
PT_WORKER, PT_LOCAL_SCHEDULER, PT_PLASMA_MANAGER, PT_PLASMA_STORE,
PT_GLOBAL_SCHEDULER, PT_REDIS_SERVER, PT_WEB_UI,
PT_DRIVER
}
private ArrayList<List<Process>> initProcessArray() {
ArrayList<List<Process>> processes = new ArrayList<>();
for (ProcessType ignored : ProcessType.values()) {
processes.add(Collections.synchronizedList(new ArrayList<>()));
}
return processes;
}
private ArrayList<List<ProcessInfo>> initProcessInfoArray() {
ArrayList<List<ProcessInfo>> processes = new ArrayList<>();
for (ProcessType ignored : ProcessType.values()) {
processes.add(Collections.synchronizedList(new ArrayList<>()));
}
return processes;
}
}
@@ -0,0 +1,766 @@
package org.ray.runner;
import java.io.File;
import java.io.IOException;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.ray.api.UniqueID;
import org.ray.core.model.RayParameters;
import org.ray.core.model.RunMode;
import org.ray.runner.RunInfo.ProcessType;
import org.ray.spi.PathConfig;
import org.ray.spi.model.AddressInfo;
import org.ray.util.StringUtil;
import org.ray.util.config.ConfigReader;
import org.ray.util.logger.RayLog;
import redis.clients.jedis.Jedis;
/**
* Ray service management on one box
*/
public class RunManager {
public static final int INT16_MAX = 32767;
private RayParameters params;
private PathConfig paths;
private ConfigReader configReader;
private String procStdoutFileName = "";
private String procStderrFileName = "";
private RunInfo runInfo = new RunInfo();
public RunInfo info() {
return runInfo;
}
public PathConfig getPathManager() {
return paths;
}
public String getProcStdoutFileName() {
return procStdoutFileName;
}
public String getProcStderrFileName() {
return procStderrFileName;
}
public RunManager(RayParameters params, PathConfig paths, ConfigReader configReader) {
this.params = params;
this.paths = paths;
this.configReader = configReader;
}
public void startRayHead() throws Exception {
if (params.redis_address.length() != 0) {
throw new Exception("Redis address must be empty in head node.");
}
if (params.num_redis_shards <= 0) {
params.num_redis_shards = 1;
}
if (params.num_local_schedulers <= 0) {
params.num_local_schedulers = 1;
}
params.start_workers_from_local_scheduler = params.run_mode != RunMode.SINGLE_BOX;
params.include_global_scheduler = true;
params.start_redis_shards = true;
startRayProcesses();
}
public void startRayNode() throws Exception {
if (params.redis_address.length() == 0) {
throw new Exception("Redis address cannot be empty in non-head node.");
}
if (params.num_redis_shards != 0) {
throw new Exception("Number of redis shards should be zero in non-head node.");
}
if (params.num_local_schedulers <= 0) {
params.num_local_schedulers = 1;
}
//params.start_workers_from_local_scheduler = true;
params.include_global_scheduler = false;
params.start_redis_shards = false;
startRayProcesses();
}
private String buildJavaProcessCommand(
RunInfo.ProcessType pt, String mainClass, String additionalClassPaths,
String additionalConfigs,
String additionalJvmArgs, String workDir, String ip, String redisAddr, String agentlibAddr) {
String cmd = "java -ea -noverify " + params.jvm_parameters + " ";
if (agentlibAddr != null && !agentlibAddr.equals("")) {
cmd += " -agentlib:jdwp=transport=dt_socket,address=" + agentlibAddr + ",server=y,suspend=n";
}
cmd += " -Djava.library.path=" + StringUtil.mergeArray(paths.java_jnilib_paths, ":");
cmd += " -classpath " + StringUtil.mergeArray(paths.java_class_paths, ":");
if (additionalClassPaths.length() > 0) {
cmd += ":" + additionalClassPaths;
}
if (additionalJvmArgs.length() > 0) {
cmd += " " + additionalJvmArgs;
}
cmd += " " + mainClass;
String section = "ray.java.start.";
cmd += " --config=" + configReader.filePath();
cmd += " --overwrite="
+ section + "node_ip_address=" + ip + ";"
+ section + "redis_address=" + redisAddr + ";"
+ section + "working_directory=" + workDir + ";"
+ section + "logging_directory=" + params.logging_directory + ";"
+ section + "working_directory=" + workDir;
if (additionalConfigs.length() > 0) {
cmd += ";" + additionalConfigs;
}
return cmd;
}
private Process startJavaProcess(RunInfo.ProcessType pt, String mainClass,
String additonalClassPaths, String additionalConfigs,
String additionalJvmArgs, String workDir, String ip, String redisAddr, boolean redirect,
boolean cleanup, String agentlibAddr) {
String cmd = buildJavaProcessCommand(pt, mainClass, additonalClassPaths, additionalConfigs,
additionalJvmArgs, workDir, ip, redisAddr, agentlibAddr);
return startProcess(cmd.split(" "), null, pt, workDir, redisAddr, ip, redirect, cleanup);
}
public Process startDriver(String mainClass, String redisAddress, UniqueID driverId,
String workDir, String ip,
String driverClass, String additonalClassPaths, String additionalConfigs) {
String driverConfigs =
"ray.java.start.driver_id=" + driverId + ";ray.java.start.driver_class=" + driverClass;
if (null != additionalConfigs) {
additionalConfigs += ";" + driverConfigs;
} else {
additionalConfigs = driverConfigs;
}
return startJavaProcess(
RunInfo.ProcessType.PT_DRIVER,
mainClass,
additonalClassPaths,
additionalConfigs,
"",
workDir,
ip,
redisAddress,
true,
false,
null
);
}
public void startRayProcesses() {
Jedis _redis_client = null;
RayLog.core.info("start ray processes @ " + params.node_ip_address + " ...");
// start primary redis
if (params.redis_address.length() == 0) {
List<String> primaryShards = startRedis(params.working_directory + "/redis",
params.node_ip_address, params.redis_port, 1, params.redirect, params.cleanup);
params.redis_address = primaryShards.get(0);
String[] args = params.redis_address.split(":");
_redis_client = new Jedis(args[0], Integer.parseInt(args[1]));
// Register the number of Redis shards in the primary shard, so that clients
// know how many redis shards to expect under RedisShards.
_redis_client.set("NumRedisShards", Integer.toString(params.num_redis_shards));
} else {
String[] args = params.redis_address.split(":");
_redis_client = new Jedis(args[0], Integer.parseInt(args[1]));
}
runInfo.redisAddress = params.redis_address;
// start redis shards
if (params.start_redis_shards) {
runInfo.redisShards = startRedis(params.working_directory + "/redis/shards",
params.node_ip_address, params.redis_port + 1, params.num_redis_shards,
params.redirect,
params.cleanup);
// Store redis shard information in the primary redis shard.
for (int i = 0; i < runInfo.redisShards.size(); i++) {
String addr = runInfo.redisShards.get(i);
_redis_client.rpush("RedisShards", addr);
}
}
_redis_client.close();
// start global scheduler
if (params.include_global_scheduler) {
startGlobalScheduler(params.working_directory + "/globalScheduler",
params.redis_address, params.node_ip_address, params.redirect, params.cleanup);
}
// prepare parameters for node processes
if (params.num_cpus.length == 0) {
params.num_cpus = new int[params.num_local_schedulers];
for (int i = 0; i < params.num_local_schedulers; i++) {
params.num_cpus[i] = 1;
}
} else {
assert (params.num_cpus.length == params.num_local_schedulers);
}
if (params.num_gpus.length == 0) {
params.num_gpus = new int[params.num_local_schedulers];
for (int i = 0; i < params.num_local_schedulers; i++) {
params.num_gpus[i] = 0;
}
} else {
assert (params.num_gpus.length == params.num_local_schedulers);
}
int[] local_num_workers = new int[params.num_local_schedulers];
if (params.num_workers == 0) {
System.arraycopy(params.num_cpus, 0, local_num_workers, 0, params.num_local_schedulers);
} else {
for (int i = 0; i < params.num_local_schedulers; i++) {
local_num_workers[i] = params.num_workers;
}
}
// start object stores
for (int i = 0; i < params.num_local_schedulers; i++) {
AddressInfo info = new AddressInfo();
// store
startObjectStore(i, info, params.working_directory + "/store",
params.redis_address, params.node_ip_address, params.redirect, params.cleanup);
// store manager
startObjectManager(i, info,
params.working_directory + "/storeManager", params.redis_address,
params.node_ip_address, params.redirect, params.cleanup);
runInfo.local_stores.add(info);
}
// start local scheduler
for (int i = 0; i < params.num_local_schedulers; i++) {
int workerCount = 0;
if (params.start_workers_from_local_scheduler) {
workerCount = local_num_workers[i];
local_num_workers[i] = 0;
}
startLocalScheduler(i, runInfo.local_stores.get(i),
params.num_cpus[i], params.num_gpus[i], workerCount,
params.working_directory + "/localScheduler", params.redis_address,
params.node_ip_address, params.redirect, params.cleanup);
}
// start local workers
for (int i = 0; i < params.num_local_schedulers; i++) {
runInfo.local_stores.get(i).workerCount = local_num_workers[i];
for (int j = 0; j < local_num_workers[i]; j++) {
startWorker(runInfo.local_stores.get(i).storeName,
runInfo.local_stores.get(i).managerName, runInfo.local_stores.get(i).schedulerName,
params.working_directory + "/worker" + i + "." + j, params.redis_address,
params.node_ip_address, UniqueID.nil, "",
params.redirect, params.cleanup);
}
}
HashSet<RunInfo.ProcessType> excludeTypes = new HashSet<>();
if (!checkAlive(excludeTypes)) {
cleanup(true);
throw new RuntimeException("Start Ray processes failed");
}
}
public boolean checkAlive(HashSet<RunInfo.ProcessType> excludeTypes) {
RunInfo.ProcessType[] types = RunInfo.ProcessType.values();
for (int i = 0; i < types.length; i++) {
if (excludeTypes.contains(types[i])) {
continue;
}
ProcessInfo p;
for (int j = 0; j < runInfo.allProcesses.get(i).size();) {
p = runInfo.allProcesses.get(i).get(j);
if (!p.process.isAlive()) {
RayLog.core.error("Process " + p.hashCode() + " is not alive!" + " Process Type " + types[i].name());
runInfo.deadProcess.add(p);
runInfo.allProcesses.get(i).remove(j);
} else {
j++;
}
}
}
return runInfo.deadProcess.isEmpty();
}
public boolean tryRecoverDeadProcess() {
if (runInfo.deadProcess.isEmpty()) {
return true;
}
/* check the dead process */
for (ProcessInfo info : runInfo.deadProcess) {
if (info.type == RunInfo.ProcessType.PT_LOCAL_SCHEDULER
|| info.type == RunInfo.ProcessType.PT_PLASMA_STORE
|| info.type == RunInfo.ProcessType.PT_PLASMA_MANAGER) {
/* When local scheduler or plasma store or plasma manager process dead, we can not
* recover this node simply by restarting the dead process. Instead, We need to restart
* all the node processes
* */
RayLog.core
.error(info.type.name() + "process dead, we can not simply restart this process");
return false;
}
}
/* try to recover */
ProcessInfo info;
for (int i = 0; i < runInfo.deadProcess.size(); i++) {
info = runInfo.deadProcess.get(i);
if (info.type == RunInfo.ProcessType.PT_GLOBAL_SCHEDULER) {
RayLog.core.error(info.type.name() + "process dead, restart this process");
startProcess(info.cmd, null, info.type, info.workDir, info.redisAddress, info.ip,
info.redirect, info.cleanup);
} else {
RayLog.core.error(info.type.name() + "process dead, we don't deal with it");
}
}
runInfo.deadProcess.clear();
return true;
}
// kill all processes started by startRayHead
public void cleanup(boolean killAll) {
// clean up the process in reverse order
for (int i = ProcessType.values().length - 1; i >= 0; i--) {
if (killAll) {
runInfo.allProcesses.get(i).forEach(p -> {
if (killProcess(p.process)) {
RayLog.core.info("Kill process " + p.hashCode() + " forcely");
}
});
} else {
runInfo.toBeCleanedProcesses.get(i).forEach(p -> {
if (killProcess(p)) {
RayLog.core.info("Kill process " + p.hashCode() + " forcely");
}
});
}
runInfo.toBeCleanedProcesses.get(i).clear();
runInfo.allProcesses.get(i).clear();
runInfo.deadProcess.clear();
}
if (killAll) {
// kill all workers that are forked by local scheduler
// ps aux | grep DefaultWorker | awk '{system("kill "$2);}'
String[] cmd = {"/bin/sh", "-c", ""};
cmd[2] = "ps aux | grep DefaultWorker | grep -v grep | awk \"{print \\$2}\" | xargs kill";
try {
Runtime.getRuntime().exec(cmd);
} catch (IOException e) {
}
}
}
private void record_log_files_in_redis(String redis_address, String node_ip_address,
List<String> logfiles) {
if (redis_address != null && !redis_address.isEmpty() && node_ip_address != null
&& !node_ip_address.isEmpty() && logfiles.size() > 0) {
String[] ip_port = redis_address.split(":");
Jedis jedis_client = new Jedis(ip_port[0], Integer.parseInt(ip_port[1]));
String log_file_list_key = String.format("LOG_FILENAMES:{%s}", node_ip_address);
for (String logfile : logfiles) {
jedis_client.rpush(log_file_list_key, logfile);
}
jedis_client.close();
}
}
private Process startProcess(String[] cmd, Map<String, String> env, RunInfo.ProcessType type,
String workDir,
String redisAddress, String ip, boolean redirect,
boolean cleanup) {
File wdir = new File(workDir);
if (!wdir.exists()) {
wdir.mkdirs();
}
int processIndex = runInfo.allProcesses.get(type.ordinal()).size();
ProcessBuilder builder;
List<String> newCmd = Arrays.stream(cmd).filter(s -> s.length() > 0)
.collect(Collectors.toList());
builder = new ProcessBuilder(newCmd);
builder.directory(new File(workDir));
if (redirect) {
String stdout_file;
String stderr_file;
stdout_file = workDir + "/" + processIndex + ".out.txt";
stderr_file = workDir + "/" + processIndex + ".err.txt";
builder.redirectOutput(new File(stdout_file));
builder.redirectError(new File(stderr_file));
List<String> std_file_list = new ArrayList<>();
std_file_list.add(stdout_file);
std_file_list.add(stderr_file);
record_log_files_in_redis(redisAddress, ip, std_file_list);
procStdoutFileName = stdout_file;
procStderrFileName = stderr_file;
}
if (env != null && !env.isEmpty()) {
builder.environment().putAll(env);
}
Process p = null;
try {
p = builder.start();
} catch (IOException e) {
RayLog.core
.error("Start process " + Arrays.toString(cmd).replace(',', ' ') + " in working dir '"
+ workDir + "' failed",
e);
return null;
}
RayLog.core.info(
"Start process " + p.hashCode() + " OK, cmd = " + Arrays.toString(cmd).replace(',', ' ')
+ ", working dir = '" + workDir + "'" + (redirect ? ", redirect" : ", no redirect"));
if (cleanup) {
runInfo.toBeCleanedProcesses.get(type.ordinal()).add(p);
}
ProcessInfo processInfo = new ProcessInfo();
processInfo.cmd = cmd;
processInfo.type = type;
processInfo.workDir = workDir;
processInfo.redisAddress = redisAddress;
processInfo.ip = ip;
processInfo.redirect = redirect;
processInfo.cleanup = cleanup;
processInfo.process = p;
runInfo.allProcesses.get(type.ordinal()).add(processInfo);
return p;
}
private static boolean killProcess(Process p) {
if (p.isAlive()) {
p.destroyForcibly();
return true;
} else {
return false;
}
}
//
// start a redis server
//
// @param ip the IP address of the local node
// @param port port to be opended for redis traffic
// @param numOfShards the number of redis shards to start
// @param redirect whether to redirect the output/err to the log files
// @param cleanup true if using ray in local mode. If cleanup is true, when
// all Redis processes started by this method will be killed by @cleanup
// when the worker exits
// @return primary redis shard address
//
private List<String> startRedis(String workDir, String ip, int port, int numOfShards,
boolean redirect, boolean cleanup) {
ArrayList<String> shards = new ArrayList<>();
String addr;
for (int i = 0; i < numOfShards; i++) {
addr = startRedisInstance(workDir, ip, port + i, redirect, cleanup);
if (addr.length() == 0) {
cleanup(cleanup);
shards.clear();
return shards;
} else {
shards.add(addr);
}
}
for (String shard : shards) {
// TODO: wait for redis server to start
}
return shards;
}
//
// @param ip local node ip, only used for logging purpose
// @param port given port for this redis instance, 0 for auto-selected port
// @return redis server address
//
private String startRedisInstance(String workDir, String ip, int port,
boolean redirect, boolean cleanup) {
String redisFilePath = paths.redis_server;
String redisModule = paths.redis_module;
assert (new File(redisFilePath).exists()) : "file don't exsits : " + redisFilePath;
assert (new File(redisModule).exists()) : "file don't exsits : " + redisModule;
String cmd = redisFilePath + " --protected-mode no --port " + port + " --loglevel warning"
+ " --loadmodule " + redisModule;
Map<String, String> env = null;
Process p = startProcess(cmd.split(" "), env, RunInfo.ProcessType.PT_REDIS_SERVER,
workDir + port, "", ip, redirect, cleanup);
if (p == null || !p.isAlive()) {
return "";
}
try {
TimeUnit.MILLISECONDS.sleep(300);
} catch (InterruptedException e) {
}
Jedis client = new Jedis(params.node_ip_address, port);
// Configure Redis to only generate notifications for the export keys.
client.configSet("notify-keyspace-events", "Kl");
// Put a time stamp in Redis to indicate when it was started.
client.set("redis_start_time", LocalDateTime.now().toString());
client.close();
return ip + ":" + port;
}
private void startGlobalScheduler(String workDir, String redisAddress, String ip,
boolean redirect, boolean cleanup) {
String filePath = paths.global_scheduler;
String cmd = filePath + " -r " + redisAddress + " -h " + ip;
Map<String, String> env = null;
startProcess(cmd.split(" "), env, RunInfo.ProcessType.PT_GLOBAL_SCHEDULER, workDir,
redisAddress,
ip, redirect, cleanup);
}
private Map<String, String> retrieveEnv(String conf, Map<String, String> env) {
String[] splits = conf.split(" ");
for (String item : splits) {
int idx = item.trim().indexOf('=');
if (idx == -1) {
continue;
}
String key = item.substring(0, idx);
String val = item.substring(idx + 1);
env.put(key, val);
}
return env;
}
/*
* @param storeName The name of the plasma store socket to connect to
*
* @param storeManagerName The name of the plasma manager socket to connect
* to
*
* @param storeManagerAddress the address of the plasma manager to connect
* to
*
* @param workerPath The path of the script to use when the local scheduler
* starts up new workers
*
* @param numCpus The number of CPUs the local scheduler should be
* configured with
*
* @param numGpus The number of GPUs the local scheduler should be
* configured with
*
* @param numWorkers The number of workers that the local scheduler should
* start
*/
private void startLocalScheduler(int index, AddressInfo info, int numCpus,
int numGpus, int numWorkers, String workDir,
String redisAddress, String ip, boolean redirect,
boolean cleanup) {
//if (numCpus <= 0)
// numCpus = Runtime.getRuntime().availableProcessors();
if (numGpus <= 0) {
numGpus = 0;
}
String filePath = paths.local_scheduler;
int rpcPort = params.local_scheduler_rpc_port + index;
String name = "/tmp/scheduler" + rpcPort;
String rpcAddr = "";
String cmd = filePath + " -s " + name + " -p " + info.storeName + " -h " + ip + " -n "
+ numWorkers + " -c " + "CPU," + INT16_MAX +",GPU,0";
assert (info.managerName.length() > 0);
assert (info.storeName.length() > 0);
assert (redisAddress.length() > 0);
cmd += " -m " + info.managerName;
String workerCmd = null;
workerCmd = buildWorkerCommand(true, info.storeName, info.managerName, name, UniqueID.nil,
"", workDir + rpcPort, ip, redisAddress);
cmd += " -w \"" + workerCmd + "\"";
if (redisAddress.length() > 0) {
cmd += " -r " + redisAddress;
}
if (info.managerPort > 0) {
cmd += " -a " + params.node_ip_address + ":" + info.managerPort;
}
Map<String, String> env = null;
String[] cmds = StringUtil.Split(cmd, " ", "\"", "\"").toArray(new String[0]);
Process p = startProcess(cmds, env, RunInfo.ProcessType.PT_LOCAL_SCHEDULER,
workDir + rpcPort, redisAddress, ip, redirect, cleanup);
if (p != null && p.isAlive()) {
try {
TimeUnit.MILLISECONDS.sleep(100);
} catch (InterruptedException e) {
}
}
if (p == null || !p.isAlive()) {
info.schedulerName = "";
info.schedulerRpcAddr = "";
throw new RuntimeException("Start local scheduler failed ...");
} else {
info.schedulerName = name;
info.schedulerRpcAddr = rpcAddr;
}
}
private String buildWorkerCommand(boolean isFromLocalScheduler, String storeName,
String storeManagerName, String localSchedulerName,
UniqueID actorId, String actorClass, String workDir, String ip, String redisAddress) {
String workerConfigs = "ray.java.start.object_store_name=" + storeName
+ ";ray.java.start.object_store_manager_name=" + storeManagerName
+ ";ray.java.start.worker_mode=WORKER"
+ ";ray.java.start.local_scheduler_name=" + localSchedulerName;
workerConfigs += ";ray.java.start.deploy=" + params.deploy;
if (!actorId.equals(UniqueID.nil)) {
workerConfigs += ";ray.java.start.actor_id=" + actorId;
}
if (!actorClass.equals("")) {
workerConfigs += ";ray.java.start.driver_class=" + actorClass;
}
String jvmArgs = "";
jvmArgs += " -DlogOutput=" + params.logging_directory + "/workers/*pid_suffix*";
return buildJavaProcessCommand(
RunInfo.ProcessType.PT_WORKER,
"org.ray.runner.worker.DefaultWorker",
"",
workerConfigs,
jvmArgs,
workDir,
ip,
redisAddress,
null
);
}
private void startObjectStore(int index, AddressInfo info, String workDir, String redisAddress,
String ip, boolean redirect, boolean cleanup) {
int occupiedMemoryMB = params.object_store_occupied_memory_MB;
long memoryBytes = occupiedMemoryMB * 1000000;
String filePath = paths.store;
int rpcPort = params.object_store_rpc_port + index;
String name = "/tmp/plasma_store" + rpcPort;
String rpcAddr = "";
String cmd = filePath + " -s " + name + " -m " + memoryBytes;
Map<String, String> env = null;
Process p = startProcess(cmd.split(" "), env, RunInfo.ProcessType.PT_PLASMA_STORE,
workDir + rpcPort, redisAddress, ip, redirect, cleanup);
if (p != null && p.isAlive()) {
try {
TimeUnit.MILLISECONDS.sleep(100);
} catch (InterruptedException e) {
}
}
if (p == null || !p.isAlive()) {
info.storeName = "";
info.storeRpcAddr = "";
throw new RuntimeException("Start object store failed ...");
} else {
info.storeName = name;
info.storeRpcAddr = rpcAddr;
}
}
private AddressInfo startObjectManager(int index, AddressInfo info, String workDir,
String redisAddress, String ip, boolean redirect,
boolean cleanup) {
String filePath = paths.store_manager;
int rpcPort = params.object_store_manager_rpc_port + index;
String name = "/tmp/plasma_manager" + rpcPort;
String rpcAddr = "";
String cmd = filePath + " -s " + info.storeName + " -m " + name + " -h " + ip + " -p "
+ (params.object_store_manager_ray_listen_port + index)
+ " -r " + redisAddress;
Map<String, String> env = null;
Process p = startProcess(cmd.split(" "), env, RunInfo.ProcessType.PT_PLASMA_MANAGER,
workDir + rpcPort, redisAddress, ip, redirect, cleanup);
if (p != null && p.isAlive()) {
try {
TimeUnit.MILLISECONDS.sleep(100);
} catch (InterruptedException e) {
}
}
if (p == null || !p.isAlive()) {
throw new RuntimeException("Start object manager failed ...");
} else {
info.managerName = name;
info.managerPort = params.object_store_manager_ray_listen_port + index;
info.managerRpcAddr = rpcAddr;
return info;
}
}
public void startWorker(String storeName, String storeManagerName,
String localSchedulerName, String workDir, String redisAddress,
String ip, UniqueID actorId, String actorClass,
boolean redirect, boolean cleanup) {
String cmd = buildWorkerCommand(false, storeName, storeManagerName, localSchedulerName, actorId,
actorClass, workDir, ip, redisAddress);
startProcess(cmd.split(" "), null, RunInfo.ProcessType.PT_WORKER, workDir, redisAddress, ip,
redirect, cleanup);
}
}
@@ -0,0 +1,31 @@
package org.ray.runner.worker;
import org.ray.core.RayRuntime;
import org.ray.core.model.WorkerMode;
/**
*
*/
public class DefaultDriver {
//
// " --node-ip-address=" + ip
// + " --redis-address=" + redisAddress
// + " --driver-class" + className
//
public static void main(String[] args) {
try {
RayRuntime.init(args);
assert RayRuntime.getParams().worker_mode == WorkerMode.DRIVER;
String driverClass = RayRuntime.configReader
.getStringValue("ray.java.start", "driver_class", "",
"java class which main is served as the driver in a java worker");
Class<?> cls = Class.forName(driverClass);
cls.getMethod("main", String[].class).invoke(null, (Object) new String[]{});
} catch (Throwable e) {
e.printStackTrace();
System.exit(-1);
}
}
}
@@ -0,0 +1,31 @@
package org.ray.runner.worker;
import org.ray.core.RayRuntime;
import org.ray.core.model.WorkerMode;
/**
* default worker implementation
*/
public class DefaultWorker {
//
// String workerCmd = "java" + " -jarls " + workerPath + " --node-ip-address=" + ip
// + " --object-store-name=" + storeName
// + " --object-store-manager-name=" + storeManagerName
// + " --local-scheduler-name=" + name + " --redis-address=" + redisAddress
//
public static void main(String[] args) {
try {
RayRuntime.init(args);
assert RayRuntime.getParams().worker_mode == WorkerMode.WORKER;
RayRuntime.getInstance().loop();
throw new RuntimeException("Control flow should never reach here");
} catch (Throwable e) {
e.printStackTrace();
System.err
.println("--config=ray.config.ini --overwrite=ray.java.start.worker_mode=WORKER;...");
System.exit(-1);
}
}
}
@@ -0,0 +1,90 @@
package org.ray.spi;
import java.io.DataInputStream;
import java.io.DataOutputStream;
public interface FileStoreLink {
/**
*
*/
boolean mkdirs(String f);
/**
* Check if exists.
*
* @param f source file
*/
boolean exists(String f);
/**
* True if the named path is a directory.
*
* @param f path to check
*/
boolean isDirectory(String f);
/**
* True if the named path is a regular file.
*
* @param f path to check
*/
boolean isFile(String f);
/**
* Delete a file.
*
* @param f the path to delete.
* @param recursive if path is a directory and set to true, the directory is deleted else throws
* an exception. In case of a file the recursive can be set to either true or false.
* @return true if delete is successful else false.
*/
boolean delete(String f, boolean recursive);
/**
* The src file is on the local disk. Add it to FS at the given dst name and the source is kept
* intact afterwards
*
* @param src path
* @param dst path
*/
void copyFromLocalFile(String src, String dst);
/**
* The src file is under FS, and the dst is on the local disk. Copy it from FS control to the
* local dst name.
*
* @param src path
* @param dst path
*/
void copyToLocalFile(String src, String dst);
/**
* Create an FSDataOutputStream at the indicated Path. Files are overwritten by default.
*
* @param f the file to create
*/
DataOutputStream create(String f, boolean overwrite);
/**
* Opens an FSDataInputStream at the indicated Path.
*
* @param f the file name to open
*/
DataInputStream open(String f);
/**
* Append to an existing file (optional operation).
*
* @param f the existing file to be appended.
*/
DataOutputStream append(String f);
/**
* get the file length which is located in the file store.
*
* @param f the existing file path.
*/
int fileLength(String f);
}
@@ -0,0 +1,125 @@
package org.ray.spi;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Ray K/V abstraction
*/
public interface KeyValueStoreLink {
/**
* set address of kv store: format "ip:port"
*/
void SetAddr(String addr);
/**
* check if the kvstore client connected
*/
void CheckConnected() throws Exception;
/**
* Set Key-value into State Store, such as redis
*
* @param key the key to set
* @param value the value to set
* @param field the field is being set when the item is a hash If it is not hash field should be
* filled with null
* @return If the key(or field) already exists, and the StateStoreSet just produced an update of
* the value, 0 is returned, otherwise if a new key(or field) is created 1 is returned.
*/
Long Set(final String key, final String value, final String field);
Long Set(final byte[] key, final byte[] value, final byte[] field);
/**
* multi hash value set
*
* @param key the key in kvStore
* @param hash the multi hash value to be set
* @return Return OK or Exception if hash is empty
*/
String Hmset(final String key, final Map<String, String> hash);
String Hmset(final byte[] key, final Map<byte[], byte[]> hash);
/**
* multi hash value get
*
* @param key the key in kvStore
* @param fields the fields to be get
* @return Multi Bulk Reply specifically a list of all the values associated with the specified
* fields, in the same order of the request.
*/
List<String> Hmget(final String key, final String... fields);
List<byte[]> Hmget(final byte[] key, final byte[]... fields);
/**
* Get the value of the specified key from State Store
*
* @param key the key to get
* @param field the field is being got when the item is a hash If it is not hash field should be
* filled with null
* @return Bulk reply If the key does not exist null is returned.
*/
String Get(final String key, final String field);
byte[] Get(final byte[] key, final byte[] field);
/**
* Delete the key(or the specified field of the key) from State Store
*
* @param key the key to delete
* @param field the field is to delete when the item is a hash If it is not hash field should be
* filled with null
* @return Integer reply, specifically: an integer greater than 0 if the key(or the field) was
* removed 0 if none of the specified key existed
*/
Long Delete(final String key, final String field);
Long Delete(final byte[] key, final byte[] field);
/**
* get all keys which fit the pattern
*/
Set<byte[]> Keys(final byte[] pattern);
/**
* get all keys which fit the pattern
*/
Set<String> Keys(String pattern);
/**
* get all hash of the key
*/
Map<byte[], byte[]> hgetAll(final byte[] key);
/**
* Return the specified elements of the list stored at the specified key.
*
* @return Multi bulk reply, specifically a list of elements in the specified range.
*/
List<String> Lrange(final String key, final long start, final long end);
/**
* @return Integer reply, specifically, the number of elements inside the list after the push
* operation.
*/
Long Rpush(final String key, final String... strings);
Long Rpush(final byte[] key, final byte[]... strings);
/**
*
* @param channel To which channel the message will be published
* @param message What to publish
* @return the number of clients that received the message
*/
Long Publish(final String channel, final String message);
Long Publish(byte[] channel, byte[] message);
Object GetImpl();
}
@@ -0,0 +1,32 @@
package org.ray.spi;
import java.util.List;
import java.util.Set;
import org.ray.spi.model.AddressInfo;
/**
* Proxy client for state store, for instance redis
*/
public interface StateStoreProxy {
/**
* @param rayKvStore the underlying kv store used to store states
*/
void setStore(KeyValueStoreLink rayKvStore);
/**
* initialize the store
*/
void initializeGlobalState() throws Exception;
/**
* @param pattern filter which keys you are interested in.
*/
Set<String> keys(final String pattern);
/**
* @return list of address information
*/
List<AddressInfo> getAddressInfo(final String node_ip_address, int num_retries);
}
@@ -0,0 +1,58 @@
package org.ray.spi.impl;
// automatically generated by the FlatBuffers compiler, do not modify
import java.nio.*;
import java.lang.*;
import java.util.*;
import com.google.flatbuffers.*;
@SuppressWarnings("unused")
public final class Arg extends Table {
public static Arg getRootAsArg(ByteBuffer _bb) { return getRootAsArg(_bb, new Arg()); }
public static Arg getRootAsArg(ByteBuffer _bb, Arg obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
public void __init(int _i, ByteBuffer _bb) { bb_pos = _i; bb = _bb; }
public Arg __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
public String objectIds(int j) { int o = __offset(4); return o != 0 ? __string(__vector(o) + j * 4) : null; }
public int objectIdsLength() { int o = __offset(4); return o != 0 ? __vector_len(o) : 0; }
public String data() { int o = __offset(6); return o != 0 ? __string(o + bb_pos) : null; }
public ByteBuffer dataAsByteBuffer() { return __vector_as_bytebuffer(6, 1); }
public static int createArg(FlatBufferBuilder builder,
int object_idsOffset,
int dataOffset) {
builder.startObject(2);
Arg.addData(builder, dataOffset);
Arg.addObjectIds(builder, object_idsOffset);
return Arg.endArg(builder);
}
public static void startArg(FlatBufferBuilder builder) { builder.startObject(2); }
public static void addObjectIds(FlatBufferBuilder builder, int objectIdsOffset) { builder.addOffset(0, objectIdsOffset, 0); }
public static int createObjectIdsVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
public static void startObjectIdsVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
public static void addData(FlatBufferBuilder builder, int dataOffset) { builder.addOffset(1, dataOffset, 0); }
public static int endArg(FlatBufferBuilder builder) {
int o = builder.endObject();
return o;
}
//this is manually added to avoid encoding/decoding cost as our object id is a byte array instead of a string
public ByteBuffer objectIdAsByteBuffer(int j) {
int o = __offset(4);
if (o == 0) {
return null;
}
int offset = __vector(o) + j * 4;
offset += bb.getInt(offset);
ByteBuffer src = bb.duplicate().order(ByteOrder.LITTLE_ENDIAN);
int length = src.getInt(offset);
src.position(offset + 4);
src.limit(offset + 4 + length);
return src;
}
}
@@ -0,0 +1,226 @@
package org.ray.spi.impl;
import com.google.flatbuffers.FlatBufferBuilder;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.List;
import org.ray.api.UniqueID;
import org.ray.core.RayRuntime;
import org.ray.spi.LocalSchedulerLink;
import org.ray.spi.model.FunctionArg;
import org.ray.spi.model.TaskSpec;
import org.ray.util.logger.RayLog;
/**
* JNI-based local scheduler link provider
*/
public class DefaultLocalSchedulerClient implements LocalSchedulerLink {
public DefaultLocalSchedulerClient(String schedulerSockName, UniqueID clientId, UniqueID actorId,
boolean isWorker, long numGpus) {
_client = _init(schedulerSockName, clientId.getBytes(), actorId.getBytes(), isWorker,
numGpus);
}
@Override
public void submitTask(TaskSpec task) {
ByteBuffer info = TaskSpec2Info(task);
byte[] a = null;
if(!task.actorId.isNil()) {
a = task.cursorId.getBytes();
}
_submitTask(_client, a, info, info.position(), info.remaining());
}
@Override
public TaskSpec getTaskTodo() {
byte[] bytes = _getTaskTodo(_client);
assert (null != bytes);
ByteBuffer bb = ByteBuffer.wrap(bytes);
return TaskInfo2Spec(bb);
}
public void destroy() {
_destroy(_client);
}
@Override
public void notifyUnblocked() {
_notify_unblocked(_client);
}
@Override
public void reconstructObject(UniqueID objectId) {
_reconstruct_object(_client, objectId.getBytes());
}
@Override
public void markTaskPutDependency(UniqueID taskId, UniqueID objectId) {
_put_object(_client, taskId.getBytes(), objectId.getBytes());
}
private long _client = 0;
private static ThreadLocal<ByteBuffer> _taskBuffer = ThreadLocal.withInitial(() -> {
ByteBuffer bb = ByteBuffer
.allocateDirect(RayRuntime.getParams().max_submit_task_buffer_size_bytes);
bb.order(ByteOrder.LITTLE_ENDIAN);
return bb;
});
public static ByteBuffer TaskSpec2Info(TaskSpec task) {
ByteBuffer bb = _taskBuffer.get();
bb.clear();
FlatBufferBuilder fbb = new FlatBufferBuilder(bb);
int driver_idOffset = fbb.createString(task.driverId.ToByteBuffer());
int task_idOffset = fbb.createString(task.taskId.ToByteBuffer());
int parent_task_idOffset = fbb.createString(task.parentTaskId.ToByteBuffer());
int parent_counter = task.parentCounter;
int actorCreate_idOffset = fbb.createString(task.createActorId.ToByteBuffer());
int actorCreateDummy_idOffset = fbb.createString(UniqueID.nil.ToByteBuffer());
int actor_idOffset = fbb.createString(task.actorId.ToByteBuffer());
int actor_handle_idOffset = fbb.createString(task.actorHandleId.ToByteBuffer());
int actor_counter = task.actorCounter;
int function_idOffset = fbb.createString(task.functionId.ToByteBuffer());
// serialize args
int[] argsOffsets = new int[task.args.length];
for (int i = 0; i < argsOffsets.length; i++) {
int object_idOffset = 0;
int dataOffset = 0;
if (task.args[i].ids != null) {
int id_count = task.args[i].ids.size();
int[] idOffsets = new int[id_count];
for (int k = 0; k < id_count; k++) {
idOffsets[k] = fbb.createString(task.args[i].ids.get(k).ToByteBuffer());
}
object_idOffset = fbb.createVectorOfTables(idOffsets);
}
if (task.args[i].data != null) {
dataOffset = fbb.createString(ByteBuffer.wrap(task.args[i].data));
}
argsOffsets[i] = Arg.createArg(fbb, object_idOffset, dataOffset);
}
int argsOffset = fbb.createVectorOfTables(argsOffsets);
// serialize returns
int return_count = task.returnIds.length;
int[] returnsOffsets = new int[return_count];
for (int k = 0; k < return_count; k++) {
returnsOffsets[k] = fbb.createString(task.returnIds[k].ToByteBuffer());
}
int returnsOffset = fbb.createVectorOfTables(returnsOffsets);
// serialize required resources
// The required_resources vector indicates the quantities of the different
// resources required by this task. The index in this vector corresponds to
// the resource type defined in the ResourceIndex enum. For example,
int[] required_resourcesOffsets = new int[1];
for (int i = 0; i < required_resourcesOffsets.length; i++) {
int keyOffset = 0;
keyOffset = fbb.createString(ByteBuffer.wrap("CPU".getBytes()));
required_resourcesOffsets[i] = ResourcePair.createResourcePair(fbb,keyOffset,0.0);
}
int requiredResourcesOffset = fbb.createVectorOfTables(required_resourcesOffsets);
int root = TaskInfo.createTaskInfo(
fbb, driver_idOffset, task_idOffset,
parent_task_idOffset, parent_counter,
actorCreate_idOffset, actorCreateDummy_idOffset,
actor_idOffset, actor_handle_idOffset, actor_counter,
false, function_idOffset,
argsOffset, returnsOffset, requiredResourcesOffset);
fbb.finish(root);
ByteBuffer buffer = fbb.dataBuffer();
if (buffer.remaining() > RayRuntime.getParams().max_submit_task_buffer_size_bytes) {
RayLog.core.error(
"Allocated buffer is not enough to transfer the task specification: " + RayRuntime
.getParams().max_submit_task_buffer_size_bytes + " vs " + buffer.remaining());
assert (false);
}
return buffer;
}
public static TaskSpec TaskInfo2Spec(ByteBuffer bb) {
bb.order(ByteOrder.LITTLE_ENDIAN);
TaskInfo info = TaskInfo.getRootAsTaskInfo(bb);
TaskSpec spec = new TaskSpec();
spec.driverId = new UniqueID(info.driverIdAsByteBuffer());
spec.taskId = new UniqueID(info.taskIdAsByteBuffer());
spec.parentTaskId = new UniqueID(info.parentTaskIdAsByteBuffer());
spec.parentCounter = info.parentCounter();
spec.actorId = new UniqueID(info.actorIdAsByteBuffer());
spec.actorCounter = info.actorCounter();
spec.createActorId = new UniqueID(info.actorCreationIdAsByteBuffer());
spec.functionId = new UniqueID(info.functionIdAsByteBuffer());
List<FunctionArg> args = new ArrayList<>();
for (int i = 0; i < info.argsLength(); i++) {
FunctionArg darg = new FunctionArg();
Arg sarg = info.args(i);
int id_count = sarg.objectIdsLength();
if (id_count > 0) {
darg.ids = new ArrayList<>();
for (int j = 0; j < id_count; j++) {
ByteBuffer lbb = sarg.objectIdAsByteBuffer(j);
assert (lbb != null && lbb.remaining() > 0);
darg.ids.add(new UniqueID(lbb));
}
}
ByteBuffer lbb = sarg.dataAsByteBuffer();
if (lbb != null && lbb.remaining() > 0) {
// TODO: how to avoid memory copy
darg.data = new byte[lbb.remaining()];
lbb.get(darg.data);
}
args.add(darg);
}
spec.args = args.toArray(new FunctionArg[0]);
List<UniqueID> rids = new ArrayList<>();
for (int i = 0; i < info.returnsLength(); i++) {
ByteBuffer lbb = info.returnsAsByteBuffer(i);
assert (lbb != null && lbb.remaining() > 0);
rids.add(new UniqueID(lbb));
}
spec.returnIds = rids.toArray(new UniqueID[0]);
return spec;
}
native private static long _init(String localSchedulerSocket, byte[] workerId, byte[] actorId,
boolean isWorker, long numGpus);
// task -> TaskInfo (with FlatBuffer)
native private static void _submitTask(long client, byte[] cursorId, /*Direct*/ByteBuffer task, int pos, int sz);
// return TaskInfo (in FlatBuffer)
native private static byte[] _getTaskTodo(long client);
native private static byte[] _computePutId(long client, byte[] taskId, int putIndex);
native private static void _destroy(long client);
native private static void _task_done(long client);
native private static void _reconstruct_object(long client, byte[] objectId);
native private static void _notify_unblocked(long client);
native private static void _put_object(long client, byte[] taskId, byte[] objectId);
}
@@ -0,0 +1,135 @@
package org.ray.spi.impl;
import java.io.File;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.concurrent.ConcurrentHashMap;
import net.lingala.zip4j.core.ZipFile;
import org.ray.api.UniqueID;
import org.ray.core.RayRuntime;
import org.ray.hook.JarRewriter;
import org.ray.hook.runtime.JarLoader;
import org.ray.hook.runtime.LoadedFunctions;
import org.ray.spi.KeyValueStoreLink;
import org.ray.spi.RemoteFunctionManager;
import org.ray.util.FileUtil;
import org.ray.util.SystemUtil;
import org.ray.util.logger.RayLog;
/**
* native implementation of remote function manager
*/
public class NativeRemoteFunctionManager implements RemoteFunctionManager {
public NativeRemoteFunctionManager(KeyValueStoreLink kvStore) throws NoSuchAlgorithmException {
this.kvStore = kvStore;
md = MessageDigest.getInstance("SHA-1");
File appDir = new File(this.appDir);
if (!appDir.exists()) {
appDir.mkdirs();
}
}
@Override
public UniqueID registerResource(byte[] resourceZip) {
byte[] digest = md.digest(resourceZip);
assert (digest.length == UniqueID.LENGTH);
UniqueID resourceId = new UniqueID(digest);
// TODO: resources must be saved in persistent store
// instead of cache
//if (!Ray.exist(resourceId)) {
//Ray.put(resourceId, resourceZip);
kvStore.Set(resourceId.getBytes(), resourceZip, null);
//}
return resourceId;
}
@Override
public byte[] getResource(UniqueID resourceId) {
return kvStore.Get(resourceId.getBytes(), null);
//return (byte[])Ray.get(resourceId);
}
@Override
public void unregisterResource(UniqueID resourceId) {
kvStore.Delete(resourceId.getBytes(), null);
}
@Override
public void registerApp(UniqueID driverId, UniqueID resourceId) {
//Ray.put(driverId, resourceId);
kvStore.Set("App2ResMap", resourceId.toString(), driverId.toString());
}
@Override
public UniqueID getAppResourceId(UniqueID driverId) {
return new UniqueID(kvStore.Get("App2ResMap", driverId.toString()));
}
@Override
public void unregisterApp(UniqueID driverId) {
kvStore.Delete("App2ResMap", driverId.toString());
}
@Override
public LoadedFunctions loadFunctions(UniqueID driverId) {
LoadedFunctions rf = loadedApps.get(driverId);
if (rf == null) {
rf = initLoadedApps(driverId);
}
return rf;
}
private synchronized LoadedFunctions initLoadedApps(UniqueID driverId) {
try {
RayLog.core.info("initLoadedApps" + driverId.toString());
LoadedFunctions rf = loadedApps.get(driverId);
if (rf == null) {
UniqueID resId = new UniqueID(kvStore.Get("App2ResMap", driverId.toString()));
//UniqueID resId = Ray.get(driverId);
byte[] res = getResource(resId);
if (res == null) {
throw new RuntimeException("get resource null, the resId " + resId.toString());
}
RayLog.core.info("ger resource of " + resId.toString() + ", result len " + res.length);
String resPath =
appDir + "/" + driverId.toString() + "/" + String.valueOf(SystemUtil.pid());
File dir = new File(resPath);
if (!dir.exists()) {
dir.mkdirs();
}
String zipPath = resPath + ".zip";
RayLog.rapp.info("unzip app file: zipPath " + zipPath + " resPath " + resPath);
FileUtil.bytesToFile(res, zipPath);
ZipFile zipFile = new ZipFile(zipPath);
zipFile.extractAll(resPath);
rf = JarRewriter
.load(resPath, RayRuntime.getInstance().getPaths().java_runtime_rewritten_jars_dir);
loadedApps.put(driverId, rf);
}
return rf;
} catch (Exception e) {
RayLog.rapp.error("load function for " + driverId + " failed, ex = " + e.getMessage(), e);
return null;
}
}
@Override
public synchronized void unloadFunctions(UniqueID driverId) {
LoadedFunctions rf = loadedApps.get(driverId);
try {
JarLoader.unloadJars(rf.loader);
} catch (Exception e) {
RayLog.rapp.error("unload function for " + driverId + " failed, ex = " + e.getMessage(), e);
}
}
private ConcurrentHashMap<UniqueID, LoadedFunctions> loadedApps = new ConcurrentHashMap<>();
private MessageDigest md;
private String appDir = System.getProperty("user.dir") + "/apps";
private KeyValueStoreLink kvStore;
}
@@ -0,0 +1,35 @@
package org.ray.spi.impl;
/**
* This exception is raised if the object could not be created because there already is an object
* with the same ID in the plasma store.
*/
public class PlasmaObjectExistsException extends Exception {
/**
*
*/
private static final long serialVersionUID = 9128880292504270291L;
public PlasmaObjectExistsException() {
super();
}
public PlasmaObjectExistsException(String message, Throwable cause, boolean enableSuppression,
boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}
public PlasmaObjectExistsException(String message, Throwable cause) {
super(message, cause);
}
public PlasmaObjectExistsException(String message) {
super(message);
}
public PlasmaObjectExistsException(Throwable cause) {
super(cause);
}
}
@@ -0,0 +1,35 @@
package org.ray.spi.impl;
/**
* This exception is raised if the object could not be created because the plasma store is unable to
* evict enough objects to create room for it.
*/
public class PlasmaOutOfMemoryException extends Exception {
/**
*
*/
private static final long serialVersionUID = -2786069077559520659L;
public PlasmaOutOfMemoryException() {
super();
}
public PlasmaOutOfMemoryException(String message, Throwable cause, boolean enableSuppression,
boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}
public PlasmaOutOfMemoryException(String message, Throwable cause) {
super(message, cause);
}
public PlasmaOutOfMemoryException(String message) {
super(message);
}
public PlasmaOutOfMemoryException(Throwable cause) {
super(cause);
}
}
@@ -0,0 +1,206 @@
package org.ray.spi.impl;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.ray.spi.KeyValueStoreLink;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;
public class RedisClient implements KeyValueStoreLink {
private String redisAddress;
private JedisPool jedisPool;
public RedisClient() {
}
public RedisClient(String addr) {
SetAddr(addr);
}
@Override
public synchronized void SetAddr(String addr) {
if (StringUtils.isEmpty(redisAddress)) {
redisAddress = addr;
String[] ipPort = addr.split(":");
JedisPoolConfig jedisPoolConfig = new JedisPoolConfig();
//TODO NUM maybe equels to the thread num
jedisPoolConfig.setMaxTotal(1);
jedisPool = new JedisPool(jedisPoolConfig, ipPort[0], Integer.parseInt(ipPort[1]), 30000);
}
}
@Override
public void CheckConnected() throws Exception {
if (jedisPool == null) {
throw new Exception("the GlobalState API can't be used before ray init.");
}
}
@Override
public Long Set(final String key, final String value, final String field) {
try (Jedis jedis = jedisPool.getResource()) {
if (field == null) {
jedis.set(key, value);
return (long) 1;
} else {
return jedis.hset(key, field, value);
}
}
}
@Override
public Long Set(byte[] key, byte[] value, byte[] field) {
try (Jedis jedis = jedisPool.getResource()) {
if (field == null) {
jedis.set(key, value);
return (long) 1;
} else {
return jedis.hset(key, field, value);
}
}
}
@Override
public String Get(final String key, final String field) {
try (Jedis jedis = jedisPool.getResource()) {
if (field == null) {
return jedis.get(key);
} else {
return jedis.hget(key, field);
}
}
}
@Override
public byte[] Get(byte[] key, byte[] field) {
try (Jedis jedis = jedisPool.getResource()) {
if (field == null) {
return jedis.get(key);
} else {
return jedis.hget(key, field);
}
}
}
@Override
public Long Delete(final String key, final String field) {
try (Jedis jedis = jedisPool.getResource()) {
if (field == null) {
return jedis.del(key);
} else {
return jedis.hdel(key, field);
}
}
}
@Override
public Long Delete(byte[] key, byte[] field) {
try (Jedis jedis = jedisPool.getResource()) {
if (field == null) {
return jedis.del(key);
} else {
return jedis.hdel(key, field);
}
}
}
@Override
public String Hmset(String key, Map<String, String> hash) {
try (Jedis jedis = jedisPool.getResource()) {
return jedis.hmset(key, hash);
}
}
@Override
public String Hmset(byte[] key, Map<byte[], byte[]> hash) {
try (Jedis jedis = jedisPool.getResource()) {
return jedis.hmset(key, hash);
}
}
@Override
public List<String> Hmget(String key, String... fields) {
try (Jedis jedis = jedisPool.getResource()) {
return jedis.hmget(key, fields);
}
}
@Override
public List<byte[]> Hmget(byte[] key, byte[]... fields) {
try (Jedis jedis = jedisPool.getResource()) {
return jedis.hmget(key, fields);
}
}
@Override
public Set<byte[]> Keys(byte[] pattern) {
try (Jedis jedis = jedisPool.getResource()) {
return jedis.keys(pattern);
}
}
@Override
public Set<String> Keys(String pattern) {
try (Jedis jedis = jedisPool.getResource()) {
return jedis.keys(pattern);
}
}
@Override
public Map<byte[], byte[]> hgetAll(byte[] key) {
try (Jedis jedis = jedisPool.getResource()) {
return jedis.hgetAll(key);
}
}
@Override
public List<String> Lrange(String key, long start, long end) {
try (Jedis jedis = jedisPool.getResource()) {
return jedis.lrange(key, start, end);
}
}
@Override
public Long Rpush(String key, String... strings) {
try (Jedis jedis = jedisPool.getResource()) {
return jedis.rpush(key, strings);
}
}
@Override
public Long Rpush(byte[] key, byte[]... strings) {
try (Jedis jedis = jedisPool.getResource()) {
return jedis.rpush(key, strings);
}
}
@Override
public Long Publish(String channel, String message) {
try (Jedis jedis = jedisPool.getResource()) {
return jedis.publish(channel, message);
}
}
@Override
public Long Publish(byte[] channel, byte[] message) {
try (Jedis jedis = jedisPool.getResource()) {
return jedis.publish(channel, message);
}
}
@Override
public Object GetImpl() {
return jedisPool;
}
}
@@ -0,0 +1,38 @@
package org.ray.spi.impl;
// automatically generated by the FlatBuffers compiler, do not modify
import java.nio.*;
import java.lang.*;
import java.util.*;
import com.google.flatbuffers.*;
@SuppressWarnings("unused")
public final class ResourcePair extends Table {
public static ResourcePair getRootAsResourcePair(ByteBuffer _bb) { return getRootAsResourcePair(_bb, new ResourcePair()); }
public static ResourcePair getRootAsResourcePair(ByteBuffer _bb, ResourcePair obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
public void __init(int _i, ByteBuffer _bb) { bb_pos = _i; bb = _bb; }
public ResourcePair __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
public String key() { int o = __offset(4); return o != 0 ? __string(o + bb_pos) : null; }
public ByteBuffer keyAsByteBuffer() { return __vector_as_bytebuffer(4, 1); }
public double value() { int o = __offset(6); return o != 0 ? bb.getDouble(o + bb_pos) : 0.0; }
public static int createResourcePair(FlatBufferBuilder builder,
int keyOffset,
double value) {
builder.startObject(2);
ResourcePair.addValue(builder, value);
ResourcePair.addKey(builder, keyOffset);
return ResourcePair.endResourcePair(builder);
}
public static void startResourcePair(FlatBufferBuilder builder) { builder.startObject(2); }
public static void addKey(FlatBufferBuilder builder, int keyOffset) { builder.addOffset(0, keyOffset, 0); }
public static void addValue(FlatBufferBuilder builder, double value) { builder.addDouble(1, value, 0.0); }
public static int endResourcePair(FlatBufferBuilder builder) {
int o = builder.endObject();
return o;
}
}
@@ -0,0 +1,197 @@
package org.ray.spi.impl;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.ray.spi.KeyValueStoreLink;
import org.ray.spi.StateStoreProxy;
import org.ray.spi.model.AddressInfo;
import org.ray.util.logger.RayLog;
/**
* A class used to interface with the Ray control state
*/
public class StateStoreProxyImpl implements StateStoreProxy {
public KeyValueStoreLink rayKvStore;
public ArrayList<KeyValueStoreLink> shardStoreList = new ArrayList<>();
public StateStoreProxyImpl(KeyValueStoreLink rayKvStore) {
this.rayKvStore = rayKvStore;
}
public void setStore(KeyValueStoreLink rayKvStore) {
this.rayKvStore = rayKvStore;
}
public void checkConnected() throws Exception {
rayKvStore.CheckConnected();
}
public synchronized void initializeGlobalState() throws Exception {
String es;
checkConnected();
String s = rayKvStore.Get("NumRedisShards", null);
if (s == null) {
throw new Exception("NumRedisShards not found in redis.");
}
int numRedisShards = Integer.parseInt(s);
if (numRedisShards < 1) {
es = String.format("Expected at least one Redis shard, found %d", numRedisShards);
throw new Exception(es);
}
List<String> ipAddressPorts = rayKvStore.Lrange("RedisShards", 0, -1);
if (ipAddressPorts.size() != numRedisShards) {
es = String.format("Expected %d Redis shard addresses, found %d.", numRedisShards,
ipAddressPorts.size());
throw new Exception(es);
}
shardStoreList.clear();
for (String ipPort : ipAddressPorts) {
shardStoreList.add(new RedisClient(ipPort));
}
}
public synchronized Set<String> keys(final String pattern) {
Set<String> allKeys = new HashSet<>();
Set<String> tmpKey;
for (KeyValueStoreLink a_shardStoreList : shardStoreList) {
tmpKey = a_shardStoreList.Keys(pattern);
allKeys.addAll(tmpKey);
}
return allKeys;
}
private byte[] CharsetEncode(String str, String Charset) throws UnsupportedEncodingException {
if (str != null) {
return str.getBytes(Charset);
}
return null;
}
private String CharsetDecode(byte[] bs, String Charset) throws UnsupportedEncodingException {
return new String(bs, Charset);
}
/*
* get address info of one node from primary redis
* @param: node ip address, usually local ip address
* @return: a list of SchedulerInfo which contains storeName, managerName, managerPort and schedulerName
* @noteRedis data key is "CL:*", redis data value is a hash.
* The hash contains the following
* "deleted" : 0/1
* "ray_client_id"
* "nodeIpAddress"
* "client_type" : plasma_manager/local_scheduler
* "store_socket_name"(op)
* "manager_socket_name"(op)
* "local_scheduler_socket_name"(op)
*/
public List<AddressInfo> getAddressInfoHelper(final String nodeIpAddress) throws Exception {
if (this.rayKvStore == null) {
throw new Exception("no redis client when use getAddressInfoHelper");
}
List<AddressInfo> schedulerInfo = new ArrayList<>();
Set<byte[]> cks = rayKvStore.Keys("CL:*".getBytes());
byte[] key;
List<Map<byte[], byte[]>> plasmaManager = new ArrayList<>();
List<Map<byte[], byte[]>> localScheduler = new ArrayList<>();
for (byte[] ck : cks) {
key = ck;
Map<byte[], byte[]> info = rayKvStore.hgetAll(key);
String deleted = CharsetDecode(info.get("deleted".getBytes()), "US-ASCII");
if (deleted != null) {
if (Boolean.getBoolean(deleted)) {
continue;
}
}
if (!info.containsKey("ray_client_id".getBytes())) {
throw new Exception("no ray_client_id in any client");
} else if (!info.containsKey("nodeIpAddress".getBytes())) {
throw new Exception("no nodeIpAddress in any client");
} else if (!info.containsKey("client_type".getBytes())) {
throw new Exception("no client_type in any client");
}
if (CharsetDecode(info.get("nodeIpAddress".getBytes()), "US-ASCII")
.equals(nodeIpAddress)) {
String clientType = CharsetDecode(info.get("client_type".getBytes()), "US-ASCII");
if (clientType.equals("plasmaManager")) {
plasmaManager.add(info);
} else if (clientType.equals("localScheduler")) {
localScheduler.add(info);
}
}
}
if (plasmaManager.size() < 1 || localScheduler.size() < 1) {
throw new Exception("no plasmaManager or localScheduler");
} else if (plasmaManager.size() != localScheduler.size()) {
throw new Exception("plasmaManager number not Equal localScheduler number");
}
for (int i = 0; i < plasmaManager.size(); i++) {
AddressInfo si = new AddressInfo();
si.storeName = CharsetDecode(plasmaManager.get(i).get("store_socket_name".getBytes()),
"US-ASCII");
si.managerName = CharsetDecode(plasmaManager.get(i).get("manager_socket_name".getBytes()),
"US-ASCII");
byte[] rpc = plasmaManager.get(i).get("manager_rpc_name".getBytes());
if (rpc != null) {
si.managerRpcAddr = CharsetDecode(rpc, "US-ASCII");
}
rpc = plasmaManager.get(i).get("store_rpc_name".getBytes());
if (rpc != null) {
si.storeRpcAddr = CharsetDecode(rpc, "US-ASCII");
}
String managerAddr = CharsetDecode(plasmaManager.get(i).get("manager_address".getBytes()),
"US-ASCII");
si.managerPort = Integer.parseInt(managerAddr.split(":")[1]);
si.schedulerName = CharsetDecode(
localScheduler.get(i).get("local_scheduler_socket_name".getBytes()), "US-ASCII");
rpc = localScheduler.get(i).get("local_scheduler_rpc_name".getBytes());
if (rpc != null) {
si.schedulerRpcAddr = CharsetDecode(rpc, "US-ASCII");
}
schedulerInfo.add(si);
}
return schedulerInfo;
}
public List<AddressInfo> getAddressInfo(final String node_ip_address, int numRetries) {
int count = 0;
while (count < numRetries) {
try {
return getAddressInfoHelper(node_ip_address);
} catch (Exception e) {
try {
TimeUnit.MILLISECONDS.sleep(1000);
} catch (InterruptedException ie) {
RayLog.core.error("error at StateStoreProxyImpl getAddressInfo", e);
throw new RuntimeException(e);
}
}
count++;
}
throw new RuntimeException("cannot get address info from state store");
}
}
@@ -0,0 +1,119 @@
package org.ray.spi.impl;
// automatically generated by the FlatBuffers compiler, do not modify
import java.nio.*;
import java.lang.*;
import java.util.*;
import com.google.flatbuffers.*;
@SuppressWarnings("unused")
public final class TaskInfo extends Table {
public static TaskInfo getRootAsTaskInfo(ByteBuffer _bb) { return getRootAsTaskInfo(_bb, new TaskInfo()); }
public static TaskInfo getRootAsTaskInfo(ByteBuffer _bb, TaskInfo obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
public void __init(int _i, ByteBuffer _bb) { bb_pos = _i; bb = _bb; }
public TaskInfo __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
public String driverId() { int o = __offset(4); return o != 0 ? __string(o + bb_pos) : null; }
public ByteBuffer driverIdAsByteBuffer() { return __vector_as_bytebuffer(4, 1); }
public String taskId() { int o = __offset(6); return o != 0 ? __string(o + bb_pos) : null; }
public ByteBuffer taskIdAsByteBuffer() { return __vector_as_bytebuffer(6, 1); }
public String parentTaskId() { int o = __offset(8); return o != 0 ? __string(o + bb_pos) : null; }
public ByteBuffer parentTaskIdAsByteBuffer() { return __vector_as_bytebuffer(8, 1); }
public int parentCounter() { int o = __offset(10); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
public String actorCreationId() { int o = __offset(12); return o != 0 ? __string(o + bb_pos) : null; }
public ByteBuffer actorCreationIdAsByteBuffer() { return __vector_as_bytebuffer(12, 1); }
public String actorCreationDummyObjectId() { int o = __offset(14); return o != 0 ? __string(o + bb_pos) : null; }
public ByteBuffer actorCreationDummyObjectIdAsByteBuffer() { return __vector_as_bytebuffer(14, 1); }
public String actorId() { int o = __offset(16); return o != 0 ? __string(o + bb_pos) : null; }
public ByteBuffer actorIdAsByteBuffer() { return __vector_as_bytebuffer(16, 1); }
public String actorHandleId() { int o = __offset(18); return o != 0 ? __string(o + bb_pos) : null; }
public ByteBuffer actorHandleIdAsByteBuffer() { return __vector_as_bytebuffer(18, 1); }
public int actorCounter() { int o = __offset(20); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
public boolean isActorCheckpointMethod() { int o = __offset(22); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
public String functionId() { int o = __offset(24); return o != 0 ? __string(o + bb_pos) : null; }
public ByteBuffer functionIdAsByteBuffer() { return __vector_as_bytebuffer(24, 1); }
public Arg args(int j) { return args(new Arg(), j); }
public Arg args(Arg obj, int j) { int o = __offset(26); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
public int argsLength() { int o = __offset(26); return o != 0 ? __vector_len(o) : 0; }
public String returns(int j) { int o = __offset(28); return o != 0 ? __string(__vector(o) + j * 4) : null; }
public int returnsLength() { int o = __offset(28); return o != 0 ? __vector_len(o) : 0; }
public ResourcePair requiredResources(int j) { return requiredResources(new ResourcePair(), j); }
public ResourcePair requiredResources(ResourcePair obj, int j) { int o = __offset(30); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
public int requiredResourcesLength() { int o = __offset(30); return o != 0 ? __vector_len(o) : 0; }
public static int createTaskInfo(FlatBufferBuilder builder,
int driver_idOffset,
int task_idOffset,
int parent_task_idOffset,
int parent_counter,
int actor_creation_idOffset,
int actor_creation_dummy_object_idOffset,
int actor_idOffset,
int actor_handle_idOffset,
int actor_counter,
boolean is_actor_checkpoint_method,
int function_idOffset,
int argsOffset,
int returnsOffset,
int required_resourcesOffset) {
builder.startObject(14);
TaskInfo.addRequiredResources(builder, required_resourcesOffset);
TaskInfo.addReturns(builder, returnsOffset);
TaskInfo.addArgs(builder, argsOffset);
TaskInfo.addFunctionId(builder, function_idOffset);
TaskInfo.addActorCounter(builder, actor_counter);
TaskInfo.addActorHandleId(builder, actor_handle_idOffset);
TaskInfo.addActorId(builder, actor_idOffset);
TaskInfo.addActorCreationDummyObjectId(builder, actor_creation_dummy_object_idOffset);
TaskInfo.addActorCreationId(builder, actor_creation_idOffset);
TaskInfo.addParentCounter(builder, parent_counter);
TaskInfo.addParentTaskId(builder, parent_task_idOffset);
TaskInfo.addTaskId(builder, task_idOffset);
TaskInfo.addDriverId(builder, driver_idOffset);
TaskInfo.addIsActorCheckpointMethod(builder, is_actor_checkpoint_method);
return TaskInfo.endTaskInfo(builder);
}
public static void startTaskInfo(FlatBufferBuilder builder) { builder.startObject(14); }
public static void addDriverId(FlatBufferBuilder builder, int driverIdOffset) { builder.addOffset(0, driverIdOffset, 0); }
public static void addTaskId(FlatBufferBuilder builder, int taskIdOffset) { builder.addOffset(1, taskIdOffset, 0); }
public static void addParentTaskId(FlatBufferBuilder builder, int parentTaskIdOffset) { builder.addOffset(2, parentTaskIdOffset, 0); }
public static void addParentCounter(FlatBufferBuilder builder, int parentCounter) { builder.addInt(3, parentCounter, 0); }
public static void addActorCreationId(FlatBufferBuilder builder, int actorCreationIdOffset) { builder.addOffset(4, actorCreationIdOffset, 0); }
public static void addActorCreationDummyObjectId(FlatBufferBuilder builder, int actorCreationDummyObjectIdOffset) { builder.addOffset(5, actorCreationDummyObjectIdOffset, 0); }
public static void addActorId(FlatBufferBuilder builder, int actorIdOffset) { builder.addOffset(6, actorIdOffset, 0); }
public static void addActorHandleId(FlatBufferBuilder builder, int actorHandleIdOffset) { builder.addOffset(7, actorHandleIdOffset, 0); }
public static void addActorCounter(FlatBufferBuilder builder, int actorCounter) { builder.addInt(8, actorCounter, 0); }
public static void addIsActorCheckpointMethod(FlatBufferBuilder builder, boolean isActorCheckpointMethod) { builder.addBoolean(9, isActorCheckpointMethod, false); }
public static void addFunctionId(FlatBufferBuilder builder, int functionIdOffset) { builder.addOffset(10, functionIdOffset, 0); }
public static void addArgs(FlatBufferBuilder builder, int argsOffset) { builder.addOffset(11, argsOffset, 0); }
public static int createArgsVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
public static void startArgsVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
public static void addReturns(FlatBufferBuilder builder, int returnsOffset) { builder.addOffset(12, returnsOffset, 0); }
public static int createReturnsVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
public static void startReturnsVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
public static void addRequiredResources(FlatBufferBuilder builder, int requiredResourcesOffset) { builder.addOffset(13, requiredResourcesOffset, 0); }
public static int createRequiredResourcesVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
public static void startRequiredResourcesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
public static int endTaskInfo(FlatBufferBuilder builder) {
int o = builder.endObject();
return o;
}
//this is manually added to avoid encoding/decoding cost as our object id is a byte array instead of a string
public ByteBuffer returnsAsByteBuffer(int j) {
int o = __offset(28);
if (o == 0) {
return null;
}
int offset = __vector(o) + j * 4;
offset += bb.getInt(offset);
ByteBuffer src = bb.duplicate().order(ByteOrder.LITTLE_ENDIAN);
int length = src.getInt(offset);
src.position(offset + 4);
src.limit(offset + 4 + length);
return src;
}
}