[Java] Java worker cluster support (#2359)

This commit is contained in:
Zhijun Fu
2018-07-10 01:20:41 +08:00
committed by Robert Nishihara
parent 4ef9d15315
commit fa33ea5283
16 changed files with 661 additions and 19 deletions
@@ -110,10 +110,14 @@ public class RunManager {
public Process startDriver(String mainClass, String redisAddress, UniqueID driverId,
String workDir, String ip,
String driverClass, String additonalClassPaths, String
additionalConfigs) {
String driverClass, String driverArgs, String additonalClassPaths,
String additionalConfigs) {
String driverConfigs =
"ray.java.start.driver_id=" + driverId + ";ray.java.start.driver_class=" + driverClass;
if (driverArgs != null) {
driverConfigs += ";ray.java.start.driver_args=" + driverArgs;
}
if (null != additionalConfigs) {
additionalConfigs += ";" + driverConfigs;
} else {
@@ -175,7 +179,8 @@ public class RunManager {
+ section + "redis_address=" + redisAddr + ";"
+ section + "working_directory=" + workDir + ";"
+ section + "logging_directory=" + params.logging_directory + ";"
+ section + "working_directory=" + workDir;
+ section + "working_directory=" + workDir + ";"
+ section + "run_mode=" + params.run_mode;
if (additionalConfigs.length() > 0) {
cmd += ";" + additionalConfigs;
@@ -21,8 +21,12 @@ public class DefaultDriver {
String driverClass = RayRuntime.configReader
.getStringValue("ray.java.start", "driver_class", "",
"java class which main is served as the driver in a java worker");
String driverArgs = RayRuntime.configReader
.getStringValue("ray.java.start", "driver_args", "",
"arguments for the java class main function which is served at the driver");
Class<?> cls = Class.forName(driverClass);
cls.getMethod("main", String[].class).invoke(null, (Object) new String[] {});
String[] argsArray = (driverArgs != null) ? driverArgs.split(",") : (new String[] {});
cls.getMethod("main", String[].class).invoke(null, (Object) argsArray);
} catch (Throwable e) {
e.printStackTrace();
System.exit(-1);
@@ -80,6 +80,8 @@ public class StateStoreProxyImpl implements StateStoreProxy {
return getAddressInfoHelper(nodeIpAddress);
} catch (Exception e) {
try {
RayLog.core.warn("Error occurred in StateStoreProxyImpl getAddressInfo, "
+ (numRetries - count) + " retries remaining", e);
TimeUnit.MILLISECONDS.sleep(1000);
} catch (InterruptedException ie) {
RayLog.core.error("error at StateStoreProxyImpl getAddressInfo", e);
@@ -100,7 +102,7 @@ public class StateStoreProxyImpl implements StateStoreProxy {
* The hash contains the following
* "deleted" : 0/1
* "ray_client_id"
* "nodeIpAddress"
* "node_ip_address"
* "client_type" : plasma_manager/local_scheduler
* "store_socket_name"(op)
* "manager_socket_name"(op)
@@ -129,27 +131,27 @@ public class StateStoreProxyImpl implements StateStoreProxy {
if (!info.containsKey("ray_client_id".getBytes())) {
throw new Exception("no ray_client_id in any client");
} else if (!info.containsKey("nodeIpAddress".getBytes())) {
throw new Exception("no nodeIpAddress in any client");
} else if (!info.containsKey("node_ip_address".getBytes())) {
throw new Exception("no node_ip_address in any client");
} else if (!info.containsKey("client_type".getBytes())) {
throw new Exception("no client_type in any client");
}
if (charsetDecode(info.get("nodeIpAddress".getBytes()), "US-ASCII")
if (charsetDecode(info.get("node_ip_address".getBytes()), "US-ASCII")
.equals(nodeIpAddress)) {
String clientType = charsetDecode(info.get("client_type".getBytes()), "US-ASCII");
if (clientType.equals("plasmaManager")) {
if (clientType.equals("plasma_manager")) {
plasmaManager.add(info);
} else if (clientType.equals("localScheduler")) {
} else if (clientType.equals("local_scheduler")) {
localScheduler.add(info);
}
}
}
if (plasmaManager.size() < 1 || localScheduler.size() < 1) {
throw new Exception("no plasmaManager or localScheduler");
throw new Exception("no plasma_manager or local_scheduler");
} else if (plasmaManager.size() != localScheduler.size()) {
throw new Exception("plasmaManager number not Equal localScheduler number");
throw new Exception("plasma_manager number not Equal local_scheduler number");
}
for (int i = 0; i < plasmaManager.size(); i++) {
@@ -173,7 +175,7 @@ public class StateStoreProxyImpl implements StateStoreProxy {
"US-ASCII");
si.managerPort = Integer.parseInt(managerAddr.split(":")[1]);
si.schedulerName = charsetDecode(
localScheduler.get(i).get("local_scheduler_socket_name".getBytes()), "US-ASCII");
localScheduler.get(i).get("local_scheduler_socket_name".getBytes()), "US-ASCII");
rpc = localScheduler.get(i).get("local_scheduler_rpc_name".getBytes());
if (rpc != null) {