[Java] Remove non-raylet code in Java. (#2828)

This commit is contained in:
Wang Qing
2018-09-06 14:54:13 +08:00
committed by Hao Chen
parent d81605e9e7
commit 7e13e1fd49
23 changed files with 159 additions and 785 deletions
@@ -18,9 +18,8 @@ import org.ray.spi.RemoteFunctionManager;
import org.ray.spi.StateStoreProxy;
import org.ray.spi.impl.DefaultLocalSchedulerClient;
import org.ray.spi.impl.NativeRemoteFunctionManager;
import org.ray.spi.impl.NonRayletStateStoreProxyImpl;
import org.ray.spi.impl.RayletStateStoreProxyImpl;
import org.ray.spi.impl.RedisClient;
import org.ray.spi.impl.StateStoreProxyImpl;
import org.ray.spi.model.AddressInfo;
import org.ray.util.logger.RayLog;
@@ -53,19 +52,14 @@ public final class RayNativeRuntime extends AbstractRayRuntime {
throw new Error("Redis address must be configured under Worker mode.");
}
startOnebox(params, pathConfig);
initStateStore(params.redis_address, params.use_raylet);
initStateStore(params.redis_address);
} else {
initStateStore(params.redis_address, params.use_raylet);
initStateStore(params.redis_address);
if (!isWorker) {
List<AddressInfo> nodes = stateStoreProxy.getAddressInfo(
params.node_ip_address, params.redis_address, 5);
params.object_store_name = nodes.get(0).storeName;
if (!params.use_raylet) {
params.object_store_manager_name = nodes.get(0).managerName;
params.local_scheduler_name = nodes.get(0).schedulerName;
} else {
params.raylet_socket_name = nodes.get(0).rayletSocketName;
}
params.raylet_socket_name = nodes.get(0).rayletSocketName;
}
}
@@ -91,54 +85,29 @@ public final class RayNativeRuntime extends AbstractRayRuntime {
}
if (params.worker_mode != WorkerMode.NONE) {
String overwrites = "";
// initialize the links
int releaseDelay = AbstractRayRuntime.configReader
.getIntegerValue("ray", "plasma_default_release_delay", 0,
"how many release requests should be delayed in plasma client");
if (!params.use_raylet) {
ObjectStoreLink plink = new PlasmaClient(params.object_store_name,
params.object_store_manager_name, releaseDelay);
ObjectStoreLink plink = new PlasmaClient(params.object_store_name, "", releaseDelay);
LocalSchedulerLink slink = new DefaultLocalSchedulerClient(
params.raylet_socket_name,
WorkerContext.currentWorkerId(),
isWorker,
WorkerContext.currentTask().taskId
);
LocalSchedulerLink slink = new DefaultLocalSchedulerClient(
params.local_scheduler_name,
WorkerContext.currentWorkerId(),
isWorker,
WorkerContext.currentTask().taskId,
false
);
init(slink, plink, funcMgr, pathConfig);
init(slink, plink, funcMgr, pathConfig);
// register
registerWorker(isWorker, params.node_ip_address, params.object_store_name,
params.raylet_socket_name);
// register
registerWorker(isWorker, params.node_ip_address, params.object_store_name,
params.object_store_manager_name, params.local_scheduler_name);
} else {
ObjectStoreLink plink = new PlasmaClient(params.object_store_name, "", releaseDelay);
LocalSchedulerLink slink = new DefaultLocalSchedulerClient(
params.raylet_socket_name,
WorkerContext.currentWorkerId(),
isWorker,
WorkerContext.currentTask().taskId,
true
);
init(slink, plink, funcMgr, pathConfig);
// register
registerWorker(isWorker, params.node_ip_address, params.object_store_name,
params.raylet_socket_name);
}
}
RayLog.core.info("RayNativeRuntime start with "
+ "store " + params.object_store_name
+ ", manager " + params.object_store_manager_name
+ ", scheduler " + params.local_scheduler_name
);
RayLog.core.info("RayNativeRuntime started with store {}, raylet {}",
params.object_store_name, params.raylet_socket_name);
}
@Override
@@ -155,19 +124,14 @@ public final class RayNativeRuntime extends AbstractRayRuntime {
params.redis_address = manager.info().redisAddress;
params.object_store_name = manager.info().localStores.get(0).storeName;
params.object_store_manager_name = manager.info().localStores.get(0).managerName;
params.local_scheduler_name = manager.info().localStores.get(0).schedulerName;
params.raylet_socket_name = manager.info().localStores.get(0).rayletSocketName;
//params.node_ip_address = NetworkUtil.getIpAddress();
}
private void initStateStore(String redisAddress, boolean useRaylet) throws Exception {
private void initStateStore(String redisAddress) throws Exception {
kvStore = new RedisClient();
kvStore.setAddr(redisAddress);
stateStoreProxy = useRaylet
? new RayletStateStoreProxyImpl(kvStore)
: new NonRayletStateStoreProxyImpl(kvStore);
//stateStoreProxy.setStore(kvStore);
stateStoreProxy = new StateStoreProxyImpl(kvStore);
stateStoreProxy.initializeGlobalState();
}
@@ -193,27 +157,4 @@ public final class RayNativeRuntime extends AbstractRayRuntime {
}
}
private void registerWorker(boolean isWorker, String nodeIpAddress, String storeName,
String managerName, String schedulerName) {
Map<String, String> workerInfo = new HashMap<>();
String workerId = new String(WorkerContext.currentWorkerId().getBytes());
if (!isWorker) {
workerInfo.put("node_ip_address", nodeIpAddress);
workerInfo.put("driver_id", workerId);
workerInfo.put("start_time", String.valueOf(System.currentTimeMillis()));
workerInfo.put("plasma_store_socket", storeName);
workerInfo.put("plasma_manager_socket", managerName);
workerInfo.put("local_scheduler_socket", schedulerName);
workerInfo.put("name", System.getProperty("user.dir"));
//TODO: worker.redis_client.hmset(b"Drivers:" + worker.workerId, driver_info)
kvStore.hmset("Drivers:" + workerId, workerInfo);
} else {
workerInfo.put("node_ip_address", nodeIpAddress);
workerInfo.put("plasma_store_socket", storeName);
workerInfo.put("plasma_manager_socket", managerName);
workerInfo.put("local_scheduler_socket", schedulerName);
//TODO: b"Workers:" + worker.workerId,
kvStore.hmset("Workers:" + workerId, workerInfo);
}
}
}
@@ -34,8 +34,12 @@ public class RunInfo {
}
public enum ProcessType {
PT_WORKER, PT_LOCAL_SCHEDULER, PT_PLASMA_MANAGER, PT_PLASMA_STORE,
PT_GLOBAL_SCHEDULER, PT_REDIS_SERVER, PT_WEB_UI, PT_RAYLET,
PT_WORKER,
PT_PLASMA_STORE,
PT_REDIS_SERVER,
PT_WEB_UI,
PT_RAYLET,
PT_DRIVER
}
}
@@ -29,8 +29,6 @@ import redis.clients.jedis.Jedis;
*/
public class RunManager {
public static final int INT16_MAX = 32767;
private static final DateTimeFormatter DATE_TIME_FORMATTER =
DateTimeFormatter.ofPattern("Y-m-d_H-M-S");
@@ -71,13 +69,7 @@ public class RunManager {
if (params.num_redis_shards <= 0) {
params.num_redis_shards = 1;
}
if (params.num_local_schedulers <= 0) {
params.num_local_schedulers = 1;
}
params.start_workers_from_local_scheduler = params.run_mode != RunMode.SINGLE_BOX;
params.include_global_scheduler = true;
params.start_redis_shards = true;
startRayProcesses();
@@ -90,12 +82,7 @@ public class RunManager {
if (params.num_redis_shards != 0) {
throw new Exception("Number of redis shards should be zero in non-head node.");
}
if (params.num_local_schedulers <= 0) {
params.num_local_schedulers = 1;
}
//params.start_workers_from_local_scheduler = true;
params.include_global_scheduler = false;
params.start_redis_shards = false;
startRayProcesses();
@@ -281,120 +268,34 @@ public class RunManager {
}
redisClient.close();
// start global scheduler
if (params.include_global_scheduler && !params.use_raylet) {
startGlobalScheduler(
params.redis_address, params.node_ip_address, params.redirect, params.cleanup);
}
// prepare parameters for node processes
if (params.num_cpus.length == 0) {
params.num_cpus = new int[params.num_local_schedulers];
for (int i = 0; i < params.num_local_schedulers; i++) {
params.num_cpus[i] = 1;
}
} else {
assert (params.num_cpus.length == params.num_local_schedulers);
}
if (params.num_gpus.length == 0) {
params.num_gpus = new int[params.num_local_schedulers];
for (int i = 0; i < params.num_local_schedulers; i++) {
params.num_gpus[i] = 0;
}
} else {
assert (params.num_gpus.length == params.num_local_schedulers);
}
int[] localNumWorkers = new int[params.num_local_schedulers];
if (params.num_workers == 0) {
System.arraycopy(params.num_cpus, 0, localNumWorkers, 0, params.num_local_schedulers);
} else {
for (int i = 0; i < params.num_local_schedulers; i++) {
localNumWorkers[i] = params.num_workers;
}
}
AddressInfo info = new AddressInfo();
if (params.use_raylet) {
// Start object store
int rpcPort = params.object_store_rpc_port;
String storeName = "/tmp/plasma_store" + rpcPort;
// Start object store
int rpcPort = params.object_store_rpc_port;
String storeName = "/tmp/plasma_store" + rpcPort;
startObjectStore(0, info,
params.redis_address, params.node_ip_address, params.redirect, params.cleanup);
Map<String, Double> staticResources =
ResourceUtil.getResourcesMapFromString(params.static_resources);
//Start raylet
startRaylet(storeName, info, params.num_workers,
params.redis_address,
params.node_ip_address, params.redirect, staticResources, params.cleanup);
runInfo.localStores.add(info);
} else {
for (int i = 0; i < params.num_local_schedulers; i++) {
// Start object stores
startObjectStore(i, info,
startObjectStore(0, info,
params.redis_address, params.node_ip_address, params.redirect, params.cleanup);
startObjectManager(i, info,
Map<String, Double> staticResources =
ResourceUtil.getResourcesMapFromString(params.static_resources);
//Start raylet
startRaylet(storeName, info, params.num_workers,
params.redis_address,
params.node_ip_address, params.redirect, params.cleanup);
params.node_ip_address, params.redirect, staticResources, params.cleanup);
// Start local scheduler
int workerCount = 0;
runInfo.localStores.add(info);
if (params.start_workers_from_local_scheduler) {
workerCount = localNumWorkers[i];
localNumWorkers[i] = 0;
}
startLocalScheduler(i, info,
params.num_cpus[i], params.num_gpus[i], workerCount,
params.redis_address,
params.node_ip_address, params.redirect, params.cleanup);
runInfo.localStores.add(info);
}
}
// start local workers
if (!params.use_raylet) {
for (int i = 0; i < params.num_local_schedulers; i++) {
AddressInfo localStores = runInfo.localStores.get(i);
localStores.workerCount = localNumWorkers[i];
for (int j = 0; j < localNumWorkers[i]; j++) {
startWorker(localStores.storeName, localStores.managerName, localStores.schedulerName,
"/worker" + i + "." + j, params.redis_address,
params.node_ip_address, UniqueId.NIL, "", params.redirect, params.cleanup);
}
}
}
HashSet<RunInfo.ProcessType> excludeTypes = new HashSet<>();
if (!params.use_raylet) {
excludeTypes.add(RunInfo.ProcessType.PT_RAYLET);
} else {
excludeTypes.add(RunInfo.ProcessType.PT_LOCAL_SCHEDULER);
excludeTypes.add(RunInfo.ProcessType.PT_GLOBAL_SCHEDULER);
excludeTypes.add(RunInfo.ProcessType.PT_PLASMA_MANAGER);
}
if (!checkAlive(excludeTypes)) {
if (!checkAlive()) {
cleanup(true);
throw new RuntimeException("Start Ray processes failed");
}
}
private boolean checkAlive(HashSet<RunInfo.ProcessType> excludeTypes) {
private boolean checkAlive() {
RunInfo.ProcessType[] types = RunInfo.ProcessType.values();
for (int i = 0; i < types.length; i++) {
if (excludeTypes.contains(types[i])) {
continue;
}
ProcessInfo p;
for (int j = 0; j < runInfo.allProcesses.get(i).size(); ) {
p = runInfo.allProcesses.get(i).get(j);
@@ -513,96 +414,6 @@ public class RunManager {
return ip + ":" + port;
}
private void startGlobalScheduler(String redisAddress, String ip,
boolean redirect, boolean cleanup) {
String filePath = paths.global_scheduler;
String cmd = filePath + " -r " + redisAddress + " -h " + ip;
Map<String, String> env = null;
startProcess(cmd.split(" "), env, RunInfo.ProcessType.PT_GLOBAL_SCHEDULER, "global_scheduler",
redisAddress,
ip, redirect, cleanup);
}
/*
* @param storeName The name of the plasma store socket to connect to
*
* @param storeManagerName The name of the plasma manager socket to connect
* to
*
* @param storeManagerAddress the address of the plasma manager to connect
* to
*
* @param workerPath The path of the script to use when the local scheduler
* starts up new workers
*
* @param numCpus The number of CPUs the local scheduler should be
* configured with
*
* @param numGpus The number of GPUs the local scheduler should be
* configured with
*
* @param numWorkers The number of workers that the local scheduler should
* start
*/
private void startLocalScheduler(int index, AddressInfo info, int numCpus,
int numGpus, int numWorkers,
String redisAddress, String ip, boolean redirect,
boolean cleanup) {
//if (numCpus <= 0)
// numCpus = Runtime.getRuntime().availableProcessors();
if (numGpus <= 0) {
numGpus = 0;
}
String filePath = paths.local_scheduler;
int rpcPort = params.local_scheduler_rpc_port + index;
String name = "/tmp/scheduler" + rpcPort;
String rpcAddr = "";
String cmd = filePath + " -s " + name + " -p " + info.storeName + " -h " + ip + " -n "
+ numWorkers + " -c " + "CPU," + INT16_MAX + ",GPU,0";
assert (info.managerName.length() > 0);
assert (info.storeName.length() > 0);
assert (redisAddress.length() > 0);
cmd += " -m " + info.managerName;
String workerCmd = null;
workerCmd = buildWorkerCommand(true, info.storeName, info.managerName, name,
UniqueId.NIL, "", ip, redisAddress);
cmd += " -w \"" + workerCmd + "\"";
if (redisAddress.length() > 0) {
cmd += " -r " + redisAddress;
}
if (info.managerPort > 0) {
cmd += " -a " + params.node_ip_address + ":" + info.managerPort;
}
Map<String, String> env = null;
String[] cmds = StringUtil.split(cmd, " ", "\"", "\"").toArray(new String[0]);
Process p = startProcess(cmds, env, RunInfo.ProcessType.PT_LOCAL_SCHEDULER,
"local_scheduler", redisAddress, ip, redirect, cleanup);
if (p != null && p.isAlive()) {
try {
TimeUnit.MILLISECONDS.sleep(100);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
if (p == null || !p.isAlive()) {
info.schedulerName = "";
info.schedulerRpcAddr = "";
throw new RuntimeException("Start local scheduler failed ...");
} else {
info.schedulerName = name;
info.schedulerRpcAddr = rpcAddr;
}
}
private void startRaylet(String storeName, AddressInfo info, int numWorkers,
String redisAddress, String ip, boolean redirect,
Map<String, Double> staticResources, boolean cleanup) {
@@ -658,39 +469,7 @@ public class RunManager {
String ip, String redisAddress) {
String workerConfigs = "ray.java.start.object_store_name=" + storeName
+ ";ray.java.start.raylet_socket_name=" + rayletSocketName
+ ";ray.java.start.worker_mode=WORKER;ray.java.start.use_raylet=true";
workerConfigs += ";ray.java.start.deploy=" + params.deploy;
if (!actorId.equals(UniqueId.NIL)) {
workerConfigs += ";ray.java.start.actor_id=" + actorId;
}
if (!actorClass.equals("")) {
workerConfigs += ";ray.java.start.driver_class=" + actorClass;
}
String jvmArgs = "";
jvmArgs += " -Dlogging.path=" + params.log_dir;
jvmArgs += " -Dlogging.file.name=core-*pid_suffix*";
return buildJavaProcessCommand(
RunInfo.ProcessType.PT_WORKER,
"org.ray.runner.worker.DefaultWorker",
"",
workerConfigs,
jvmArgs,
ip,
redisAddress,
null
);
}
private String buildWorkerCommand(boolean isFromLocalScheduler, String storeName,
String storeManagerName, String localSchedulerName,
UniqueId actorId, String actorClass, String
ip, String redisAddress) {
String workerConfigs = "ray.java.start.object_store_name=" + storeName
+ ";ray.java.start.object_store_manager_name=" + storeManagerName
+ ";ray.java.start.worker_mode=WORKER"
+ ";ray.java.start.local_scheduler_name=" + localSchedulerName;
+ ";ray.java.start.worker_mode=WORKER";
workerConfigs += ";ray.java.start.deploy=" + params.deploy;
if (!actorId.equals(UniqueId.NIL)) {
workerConfigs += ";ray.java.start.actor_id=" + actorId;
@@ -747,47 +526,4 @@ public class RunManager {
}
}
private AddressInfo startObjectManager(int index, AddressInfo info,
String redisAddress, String ip, boolean redirect,
boolean cleanup) {
String filePath = paths.store_manager;
int rpcPort = params.object_store_manager_rpc_port + index;
String name = "/tmp/plasma_manager" + rpcPort;
String rpcAddr = "";
String cmd = filePath + " -s " + info.storeName + " -m " + name + " -h " + ip + " -p "
+ (params.object_store_manager_ray_listen_port + index)
+ " -r " + redisAddress;
Map<String, String> env = null;
Process p = startProcess(cmd.split(" "), env, RunInfo.ProcessType.PT_PLASMA_MANAGER,
"object_manager", redisAddress, ip, redirect, cleanup);
if (p != null && p.isAlive()) {
try {
TimeUnit.MILLISECONDS.sleep(100);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
if (p == null || !p.isAlive()) {
throw new RuntimeException("Start object manager failed ...");
} else {
info.managerName = name;
info.managerPort = params.object_store_manager_ray_listen_port + index;
info.managerRpcAddr = rpcAddr;
return info;
}
}
public void startWorker(String storeName, String storeManagerName,
String localSchedulerName, String workerName, String redisAddress,
String ip, UniqueId actorId, String actorClass,
boolean redirect, boolean cleanup) {
String cmd = buildWorkerCommand(false, storeName, storeManagerName, localSchedulerName, actorId,
actorClass, ip, redisAddress);
startProcess(cmd.split(" "), null, RunInfo.ProcessType.PT_WORKER, workerName, redisAddress, ip,
redirect, cleanup);
}
}
@@ -7,6 +7,8 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.ray.api.RayObject;
import org.ray.api.WaitResult;
import org.ray.api.id.UniqueId;
import org.ray.core.AbstractRayRuntime;
import org.ray.core.UniqueIdHelper;
@@ -28,48 +30,38 @@ public class DefaultLocalSchedulerClient implements LocalSchedulerLink {
return bb;
});
private long client = 0;
boolean useRaylet = false;
public DefaultLocalSchedulerClient(String schedulerSockName, UniqueId clientId,
boolean isWorker, UniqueId driverId, boolean useRaylet) {
boolean isWorker, UniqueId driverId) {
client = nativeInit(schedulerSockName, clientId.getBytes(),
isWorker, driverId.getBytes(), useRaylet);
this.useRaylet = useRaylet;
isWorker, driverId.getBytes());
}
@Override
public List<byte[]> wait(byte[][] objectIds, int timeoutMs, int numReturns) {
assert (useRaylet == true);
public <T> WaitResult<T> wait(List<RayObject<T>> waitFor, int numReturns, int timeoutMs) {
List<UniqueId> ids = new ArrayList<>();
for (RayObject<T> element : waitFor) {
ids.add(element.getId());
}
boolean[] readys = nativeWaitObject(client, objectIds, numReturns, timeoutMs, false);
assert (readys.length == objectIds.length);
boolean[] ready = nativeWaitObject(client, getIdBytes(ids), numReturns, timeoutMs, false);
List<RayObject<T>> readyList = new ArrayList<>();
List<RayObject<T>> unreadyList = new ArrayList<>();
List<byte[]> ret = new ArrayList<>();
for (int i = 0; i < readys.length; i++) {
if (readys[i]) {
ret.add(objectIds[i]);
for (int i = 0; i < ready.length; i++) {
if (ready[i]) {
readyList.add(waitFor.get(i));
} else {
unreadyList.add(waitFor.get(i));
}
}
return ret;
return new WaitResult<>(readyList, unreadyList);
}
@Override
public void submitTask(TaskSpec task) {
RayLog.core.debug("Submitting task: {}", task);
// We don't support resources management in non raylet mode.
if (!useRaylet) {
task.resources.clear();
task.resources.put(ResourceUtil.CPU_LITERAL, 0.0);
} else {
if (!task.resources.containsKey(ResourceUtil.CPU_LITERAL)) {
task.resources.put(ResourceUtil.CPU_LITERAL, 0.0);
}
if (!task.resources.containsKey(ResourceUtil.GPU_LITERAL)) {
task.resources.put(ResourceUtil.GPU_LITERAL, 0.0);
}
}
ByteBuffer info = taskSpec2Info(task);
byte[] cursorId = null;
@@ -77,29 +69,17 @@ public class DefaultLocalSchedulerClient implements LocalSchedulerLink {
cursorId = task.cursorId.getBytes();
}
nativeSubmitTask(client, cursorId, info, info.position(), info.remaining(), useRaylet);
nativeSubmitTask(client, cursorId, info, info.position(), info.remaining());
}
@Override
public TaskSpec getTask() {
byte[] bytes = nativeGetTask(client, useRaylet);
byte[] bytes = nativeGetTask(client);
assert (null != bytes);
ByteBuffer bb = ByteBuffer.wrap(bytes);
return taskInfo2Spec(bb);
}
@Override
public void markTaskPutDependency(UniqueId taskId, UniqueId objectId) {
nativePutObject(client, taskId.getBytes(), objectId.getBytes());
}
@Override
public void reconstructObject(UniqueId objectId, boolean fetchOnly) {
List<UniqueId> objects = new ArrayList<>();
objects.add(objectId);
nativeReconstructObjects(client, getIdBytes(objects), fetchOnly);
}
@Override
public void reconstructObjects(List<UniqueId> objectIds, boolean fetchOnly) {
if (RayLog.core.isInfoEnabled()) {
@@ -289,13 +269,13 @@ public class DefaultLocalSchedulerClient implements LocalSchedulerLink {
/// 6) popd
private static native long nativeInit(String localSchedulerSocket, byte[] workerId,
boolean isWorker, byte[] driverTaskId, boolean useRaylet);
boolean isWorker, byte[] driverTaskId);
private static native void nativeSubmitTask(long client, byte[] cursorId, ByteBuffer taskBuff,
int pos, int taskSize, boolean useRaylet);
int pos, int taskSize);
// return TaskInfo (in FlatBuffer)
private static native byte[] nativeGetTask(long client, boolean useRaylet);
private static native byte[] nativeGetTask(long client);
private static native void nativeDestroy(long client);
@@ -1,115 +0,0 @@
package org.ray.spi.impl;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.ray.spi.KeyValueStoreLink;
import org.ray.spi.model.AddressInfo;
/**
* A class used to interface with the Ray control state for non-raylet.
*/
public class NonRayletStateStoreProxyImpl extends BaseStateStoreProxyImpl {
public NonRayletStateStoreProxyImpl(KeyValueStoreLink rayKvStore) {
super(rayKvStore);
}
/*
* get address info of one node from primary redis
* @param: node ip address, usually local ip address
* @return: a list of SchedulerInfo which contains storeName, managerName, managerPort and
* schedulerName
* @noteRedis data key is "CL:*", redis data value is a hash.
* The hash contains the following
* "deleted" : 0/1
* "ray_client_id"
* "node_ip_address"
* "client_type" : plasma_manager/local_scheduler
* "store_socket_name"(op)
* "manager_socket_name"(op)
* "local_scheduler_socket_name"(op)
*/
@Override
public List<AddressInfo> doGetAddressInfo(final String nodeIpAddress,
final String redisAddress) throws Exception {
if (this.rayKvStore == null) {
throw new Exception("no redis client when use doGetAddressInfo");
}
List<AddressInfo> schedulerInfo = new ArrayList<>();
Set<byte[]> cks = rayKvStore.keys("CL:*".getBytes());
byte[] key;
List<Map<byte[], byte[]>> plasmaManager = new ArrayList<>();
List<Map<byte[], byte[]>> localScheduler = new ArrayList<>();
for (byte[] ck : cks) {
key = ck;
Map<byte[], byte[]> info = rayKvStore.hgetAll(key);
String deleted = charsetDecode(info.get("deleted".getBytes()), "US-ASCII");
if (deleted != null) {
if (Boolean.getBoolean(deleted)) {
continue;
}
}
if (!info.containsKey("ray_client_id".getBytes())) {
throw new Exception("no ray_client_id in any client");
} else if (!info.containsKey("node_ip_address".getBytes())) {
throw new Exception("no node_ip_address in any client");
} else if (!info.containsKey("client_type".getBytes())) {
throw new Exception("no client_type in any client");
}
if (charsetDecode(info.get("node_ip_address".getBytes()), "US-ASCII")
.equals(nodeIpAddress)) {
String clientType = charsetDecode(info.get("client_type".getBytes()), "US-ASCII");
if ("plasma_manager".equals(clientType)) {
plasmaManager.add(info);
} else if ("local_scheduler".equals(clientType)) {
localScheduler.add(info);
}
}
}
if (plasmaManager.size() < 1 || localScheduler.size() < 1) {
throw new Exception("no plasma_manager or local_scheduler");
} else if (plasmaManager.size() != localScheduler.size()) {
throw new Exception("plasma_manager number not Equal local_scheduler number");
}
for (int i = 0; i < plasmaManager.size(); i++) {
AddressInfo si = new AddressInfo();
si.storeName = charsetDecode(plasmaManager.get(i).get("store_socket_name".getBytes()),
"US-ASCII");
si.managerName = charsetDecode(plasmaManager.get(i).get("manager_socket_name".getBytes()),
"US-ASCII");
byte[] rpc = plasmaManager.get(i).get("manager_rpc_name".getBytes());
if (rpc != null) {
si.managerRpcAddr = charsetDecode(rpc, "US-ASCII");
}
rpc = plasmaManager.get(i).get("store_rpc_name".getBytes());
if (rpc != null) {
si.storeRpcAddr = charsetDecode(rpc, "US-ASCII");
}
String managerAddr = charsetDecode(plasmaManager.get(i).get("manager_address".getBytes()),
"US-ASCII");
si.managerPort = Integer.parseInt(managerAddr.split(":")[1]);
si.schedulerName = charsetDecode(
localScheduler.get(i).get("local_scheduler_socket_name".getBytes()), "US-ASCII");
rpc = localScheduler.get(i).get("local_scheduler_rpc_name".getBytes());
if (rpc != null) {
si.schedulerRpcAddr = charsetDecode(rpc, "US-ASCII");
}
schedulerInfo.add(si);
}
return schedulerInfo;
}
}
@@ -1,62 +0,0 @@
package org.ray.spi.impl;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import org.ray.api.id.UniqueId;
import org.ray.format.gcs.ClientTableData;
import org.ray.spi.KeyValueStoreLink;
import org.ray.spi.model.AddressInfo;
import org.ray.util.NetworkUtil;
/**
* A class used to interface with the Ray control state for raylet.
*/
public class RayletStateStoreProxyImpl extends BaseStateStoreProxyImpl {
public RayletStateStoreProxyImpl(KeyValueStoreLink rayKvStore) {
super(rayKvStore);
}
@Override
public List<AddressInfo> doGetAddressInfo(final String nodeIpAddress,
final String redisAddress) throws Exception {
if (this.rayKvStore == null) {
throw new Exception("no redis client when use doGetAddressInfo");
}
List<AddressInfo> schedulerInfo = new ArrayList<>();
byte[] prefix = "CLIENT".getBytes();
byte[] postfix = UniqueId.genNil().getBytes();
byte[] clientKey = new byte[prefix.length + postfix.length];
System.arraycopy(prefix, 0, clientKey, 0, prefix.length);
System.arraycopy(postfix, 0, clientKey, prefix.length, postfix.length);
Set<byte[]> clients = rayKvStore.zrange(clientKey, 0, -1);
for (byte[] clientMessage : clients) {
ByteBuffer bb = ByteBuffer.wrap(clientMessage);
ClientTableData client = ClientTableData.getRootAsClientTableData(bb);
String clientNodeIpAddress = client.nodeManagerAddress();
String localIpAddress = NetworkUtil.getIpAddress(null);
String redisIpAddress = redisAddress.substring(0, redisAddress.indexOf(':'));
boolean headNodeAddress = "127.0.0.1".equals(clientNodeIpAddress)
&& Objects.equals(redisIpAddress, localIpAddress);
boolean notHeadNodeAddress = Objects.equals(clientNodeIpAddress, nodeIpAddress);
if (headNodeAddress || notHeadNodeAddress) {
AddressInfo si = new AddressInfo();
si.storeName = client.objectStoreSocketName();
si.rayletSocketName = client.rayletSocketName();
si.managerRpcAddr = client.nodeManagerAddress();
si.managerPort = client.nodeManagerPort();
schedulerInfo.add(si);
}
}
return schedulerInfo;
}
}
@@ -1,26 +1,30 @@
package org.ray.spi.impl;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.ray.api.id.UniqueId;
import org.ray.format.gcs.ClientTableData;
import org.ray.spi.KeyValueStoreLink;
import org.ray.spi.StateStoreProxy;
import org.ray.spi.model.AddressInfo;
import org.ray.util.NetworkUtil;
import org.ray.util.logger.RayLog;
/**
* Base class used to interface with the Ray control state.
* A class used to interface with the Ray control state.
*/
public abstract class BaseStateStoreProxyImpl implements StateStoreProxy {
public class StateStoreProxyImpl implements StateStoreProxy {
public KeyValueStoreLink rayKvStore;
public ArrayList<KeyValueStoreLink> shardStoreList = new ArrayList<>();
public BaseStateStoreProxyImpl(KeyValueStoreLink rayKvStore) {
public StateStoreProxyImpl(KeyValueStoreLink rayKvStore) {
this.rayKvStore = rayKvStore;
}
@@ -49,7 +53,7 @@ public abstract class BaseStateStoreProxyImpl implements StateStoreProxy {
Set<String> distinctIpAddress = new HashSet<String>(ipAddressPorts);
if (distinctIpAddress.size() != numRedisShards) {
es = String.format("Expected %d Redis shard addresses, found2 %d.", numRedisShards,
distinctIpAddress.size());
distinctIpAddress.size());
throw new Exception(es);
}
@@ -78,7 +82,7 @@ public abstract class BaseStateStoreProxyImpl implements StateStoreProxy {
@Override
public List<AddressInfo> getAddressInfo(final String nodeIpAddress,
final String redisAddress,
final String redisAddress,
int numRetries) {
int count = 0;
while (count < numRetries) {
@@ -86,11 +90,11 @@ public abstract class BaseStateStoreProxyImpl implements StateStoreProxy {
return doGetAddressInfo(nodeIpAddress, redisAddress);
} catch (Exception e) {
try {
RayLog.core.warn("Error occurred in BaseStateStoreProxyImpl getAddressInfo, "
+ (numRetries - count) + " retries remaining", e);
RayLog.core.warn("Error occurred in StateStoreProxyImpl getAddressInfo, "
+ (numRetries - count) + " retries remaining", e);
TimeUnit.MILLISECONDS.sleep(1000);
} catch (InterruptedException ie) {
RayLog.core.error("error at BaseStateStoreProxyImpl getAddressInfo", e);
RayLog.core.error("error at StateStoreProxyImpl getAddressInfo", e);
throw new RuntimeException(e);
}
}
@@ -108,8 +112,44 @@ public abstract class BaseStateStoreProxyImpl implements StateStoreProxy {
* @return A list of SchedulerInfo which contains node manager or local scheduler address info.
* @throws Exception No redis client exception.
*/
protected abstract List<AddressInfo> doGetAddressInfo(final String nodeIpAddress,
final String redisAddress) throws Exception;
public List<AddressInfo> doGetAddressInfo(final String nodeIpAddress,
final String redisAddress) throws Exception {
if (this.rayKvStore == null) {
throw new Exception("no redis client when use doGetAddressInfo");
}
List<AddressInfo> schedulerInfo = new ArrayList<>();
byte[] prefix = "CLIENT".getBytes();
byte[] postfix = UniqueId.genNil().getBytes();
byte[] clientKey = new byte[prefix.length + postfix.length];
System.arraycopy(prefix, 0, clientKey, 0, prefix.length);
System.arraycopy(postfix, 0, clientKey, prefix.length, postfix.length);
Set<byte[]> clients = rayKvStore.zrange(clientKey, 0, -1);
for (byte[] clientMessage : clients) {
ByteBuffer bb = ByteBuffer.wrap(clientMessage);
ClientTableData client = ClientTableData.getRootAsClientTableData(bb);
String clientNodeIpAddress = client.nodeManagerAddress();
String localIpAddress = NetworkUtil.getIpAddress(null);
String redisIpAddress = redisAddress.substring(0, redisAddress.indexOf(':'));
boolean headNodeAddress = "127.0.0.1".equals(clientNodeIpAddress)
&& Objects.equals(redisIpAddress, localIpAddress);
boolean notHeadNodeAddress = Objects.equals(clientNodeIpAddress, nodeIpAddress);
if (headNodeAddress || notHeadNodeAddress) {
AddressInfo si = new AddressInfo();
si.storeName = client.objectStoreSocketName();
si.rayletSocketName = client.rayletSocketName();
si.managerRpcAddr = client.nodeManagerAddress();
si.managerPort = client.nodeManagerPort();
schedulerInfo.add(si);
}
}
return schedulerInfo;
}
protected String charsetDecode(byte[] bs, String charset) throws UnsupportedEncodingException {
return new String(bs, charset);