mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 18:06:25 +08:00
[Java] Simplify Ray.init() by invoking ray start internally (#10762)
This commit is contained in:
+2
-100
@@ -1,30 +1,16 @@
|
||||
package io.ray.streaming.api.context;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.gson.Gson;
|
||||
import io.ray.api.Ray;
|
||||
import io.ray.runtime.config.RayConfig;
|
||||
import io.ray.runtime.util.NetworkUtil;
|
||||
import java.io.File;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
class ClusterStarter {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(ClusterStarter.class);
|
||||
private static final String PLASMA_STORE_SOCKET_NAME = "/tmp/ray/plasma_store_socket";
|
||||
private static final String RAYLET_SOCKET_NAME = "/tmp/ray/raylet_socket";
|
||||
|
||||
static synchronized void startCluster(boolean isCrossLanguage, boolean isLocal) {
|
||||
static synchronized void startCluster(boolean isLocal) {
|
||||
Preconditions.checkArgument(!Ray.isInitialized());
|
||||
RayConfig.reset();
|
||||
if (!isLocal) {
|
||||
System.setProperty("ray.raylet.config.num_workers_per_process_java", "1");
|
||||
System.setProperty("ray.run-mode", "CLUSTER");
|
||||
@@ -33,97 +19,13 @@ class ClusterStarter {
|
||||
System.setProperty("ray.run-mode", "SINGLE_PROCESS");
|
||||
}
|
||||
|
||||
if (!isCrossLanguage) {
|
||||
Ray.init();
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing socket files.
|
||||
for (String socket : ImmutableList.of(RAYLET_SOCKET_NAME, PLASMA_STORE_SOCKET_NAME)) {
|
||||
File file = new File(socket);
|
||||
if (file.exists()) {
|
||||
LOG.info("Delete existing socket file {}", file);
|
||||
file.delete();
|
||||
}
|
||||
}
|
||||
|
||||
String nodeManagerPort = String.valueOf(NetworkUtil.getUnusedPort());
|
||||
|
||||
// jars in the `ray` wheel doesn't contains test classes, so we add test classes explicitly.
|
||||
// Since mvn test classes contains `test` in path and bazel test classes is located at a jar
|
||||
// with `test` included in the name, we can check classpath `test` to filter out test classes.
|
||||
String classpath = Stream.of(System.getProperty("java.class.path").split(":"))
|
||||
.filter(s -> !s.contains(" ") && s.contains("test"))
|
||||
.collect(Collectors.joining(":"));
|
||||
String workerOptions = new Gson().toJson(ImmutableList.of("-classpath", classpath));
|
||||
Map<String, String> config = new HashMap<>(RayConfig.create().rayletConfigParameters);
|
||||
config.put("num_workers_per_process_java", "1");
|
||||
// Start ray cluster.
|
||||
List<String> startCommand = ImmutableList.of(
|
||||
"ray",
|
||||
"start",
|
||||
"--head",
|
||||
"--port=6379",
|
||||
String.format("--plasma-store-socket-name=%s", PLASMA_STORE_SOCKET_NAME),
|
||||
String.format("--raylet-socket-name=%s", RAYLET_SOCKET_NAME),
|
||||
String.format("--node-manager-port=%s", nodeManagerPort),
|
||||
"--load-code-from-local",
|
||||
"--java-worker-options=" + workerOptions,
|
||||
"--system-config=" + new Gson().toJson(config)
|
||||
);
|
||||
if (!executeCommand(startCommand, 10)) {
|
||||
throw new RuntimeException("Couldn't start ray cluster.");
|
||||
}
|
||||
|
||||
// Connect to the cluster.
|
||||
System.setProperty("ray.address", "127.0.0.1:6379");
|
||||
System.setProperty("ray.object-store.socket-name", PLASMA_STORE_SOCKET_NAME);
|
||||
System.setProperty("ray.raylet.socket-name", RAYLET_SOCKET_NAME);
|
||||
System.setProperty("ray.raylet.node-manager-port", nodeManagerPort);
|
||||
Ray.init();
|
||||
}
|
||||
|
||||
public static synchronized void stopCluster(boolean isCrossLanguage) {
|
||||
public static synchronized void stopCluster() {
|
||||
// Disconnect to the cluster.
|
||||
Ray.shutdown();
|
||||
System.clearProperty("ray.address");
|
||||
System.clearProperty("ray.object-store.socket-name");
|
||||
System.clearProperty("ray.raylet.socket-name");
|
||||
System.clearProperty("ray.raylet.node-manager-port");
|
||||
System.clearProperty("ray.raylet.config.num_workers_per_process_java");
|
||||
System.clearProperty("ray.run-mode");
|
||||
|
||||
if (isCrossLanguage) {
|
||||
// Stop ray cluster.
|
||||
final List<String> stopCommand = ImmutableList.of(
|
||||
"ray",
|
||||
"stop"
|
||||
);
|
||||
if (!executeCommand(stopCommand, 10)) {
|
||||
throw new RuntimeException("Couldn't stop ray cluster");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute an external command.
|
||||
*
|
||||
* @return Whether the command succeeded.
|
||||
*/
|
||||
private static boolean executeCommand(List<String> command, int waitTimeoutSeconds) {
|
||||
LOG.info("Executing command: {}", String.join(" ", command));
|
||||
try {
|
||||
ProcessBuilder processBuilder = new ProcessBuilder(command)
|
||||
.redirectOutput(ProcessBuilder.Redirect.INHERIT)
|
||||
.redirectError(ProcessBuilder.Redirect.INHERIT);
|
||||
Process process = processBuilder.start();
|
||||
boolean exit = process.waitFor(waitTimeoutSeconds, TimeUnit.SECONDS);
|
||||
if (!exit) {
|
||||
process.destroyForcibly();
|
||||
}
|
||||
return process.exitValue() == 0;
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("Error executing command " + String.join(" ", command), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+3
-4
@@ -65,11 +65,10 @@ public class StreamingContext implements Serializable {
|
||||
|
||||
if (!Ray.isInitialized()) {
|
||||
if (Config.MEMORY_CHANNEL.equalsIgnoreCase(jobConfig.get(Config.CHANNEL_TYPE))) {
|
||||
Preconditions.checkArgument(!jobGraph.isCrossLanguageGraph());
|
||||
ClusterStarter.startCluster(false, true);
|
||||
ClusterStarter.startCluster(true);
|
||||
LOG.info("Created local cluster for job {}.", jobName);
|
||||
} else {
|
||||
ClusterStarter.startCluster(jobGraph.isCrossLanguageGraph(), false);
|
||||
ClusterStarter.startCluster(false);
|
||||
LOG.info("Created multi process cluster for job {}.", jobName);
|
||||
}
|
||||
Runtime.getRuntime().addShutdownHook(new Thread(StreamingContext.this::stop));
|
||||
@@ -103,7 +102,7 @@ public class StreamingContext implements Serializable {
|
||||
|
||||
public void stop() {
|
||||
if (Ray.isInitialized()) {
|
||||
ClusterStarter.stopCluster(jobGraph.isCrossLanguageGraph());
|
||||
ClusterStarter.stopCluster();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
package io.ray.streaming.jobgraph;
|
||||
|
||||
import io.ray.streaming.api.Language;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@@ -138,14 +137,4 @@ public class JobGraph implements Serializable {
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isCrossLanguageGraph() {
|
||||
Language language = jobVertices.get(0).getLanguage();
|
||||
for (JobVertex jobVertex : jobVertices) {
|
||||
if (jobVertex.getLanguage() != language) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+2
-5
@@ -1,6 +1,7 @@
|
||||
package io.ray.streaming.runtime.transfer;
|
||||
|
||||
import io.ray.runtime.RayNativeRuntime;
|
||||
import io.ray.runtime.util.BinaryFileUtil;
|
||||
import io.ray.runtime.util.JniUtils;
|
||||
|
||||
/**
|
||||
@@ -10,11 +11,7 @@ import io.ray.runtime.util.JniUtils;
|
||||
public class TransferHandler {
|
||||
|
||||
static {
|
||||
try {
|
||||
Class.forName(RayNativeRuntime.class.getName());
|
||||
} catch (ClassNotFoundException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
JniUtils.loadLibrary(BinaryFileUtil.CORE_WORKER_JAVA_LIBRARY, true);
|
||||
JniUtils.loadLibrary("streaming_java");
|
||||
}
|
||||
|
||||
|
||||
+2
-7
@@ -1,6 +1,7 @@
|
||||
package io.ray.streaming.runtime.util;
|
||||
|
||||
import io.ray.runtime.RayNativeRuntime;
|
||||
import io.ray.runtime.util.BinaryFileUtil;
|
||||
import io.ray.runtime.util.JniUtils;
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.net.InetAddress;
|
||||
@@ -29,13 +30,7 @@ public class EnvUtil {
|
||||
}
|
||||
|
||||
public static void loadNativeLibraries() {
|
||||
// Explicitly load `RayNativeRuntime`, to make sure `core_worker_library_java`
|
||||
// is loaded before `streaming_java`.
|
||||
try {
|
||||
Class.forName(RayNativeRuntime.class.getName());
|
||||
} catch (ClassNotFoundException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
JniUtils.loadLibrary(BinaryFileUtil.CORE_WORKER_JAVA_LIBRARY, true);
|
||||
JniUtils.loadLibrary("streaming_java");
|
||||
}
|
||||
|
||||
|
||||
+8
-4
@@ -58,11 +58,11 @@ public class StreamingQueueTest extends BaseUnitTest implements Serializable {
|
||||
void beforeMethod() {
|
||||
LOGGER.info("beforeTest");
|
||||
Ray.shutdown();
|
||||
System.setProperty("ray.resources", "CPU:4,RES-A:4");
|
||||
System.setProperty("ray.head-args.0", "--num-cpus=4");
|
||||
System.setProperty("ray.head-args.1", "--resources={\"RES-A\":4}");
|
||||
System.setProperty("ray.raylet.config.num_workers_per_process_java", "1");
|
||||
System.setProperty("ray.run-mode", "CLUSTER");
|
||||
System.setProperty("ray.redirect-output", "true");
|
||||
RayConfig.reset();
|
||||
Ray.init();
|
||||
}
|
||||
|
||||
@@ -71,6 +71,8 @@ public class StreamingQueueTest extends BaseUnitTest implements Serializable {
|
||||
LOGGER.info("afterTest");
|
||||
Ray.shutdown();
|
||||
System.clearProperty("ray.run-mode");
|
||||
System.clearProperty("ray.head-args.0");
|
||||
System.clearProperty("ray.head-args.1");
|
||||
}
|
||||
|
||||
@Test(timeOut = 300000)
|
||||
@@ -78,7 +80,8 @@ public class StreamingQueueTest extends BaseUnitTest implements Serializable {
|
||||
LOGGER.info("StreamingQueueTest.testReaderWriter run-mode: {}",
|
||||
System.getProperty("ray.run-mode"));
|
||||
Ray.shutdown();
|
||||
System.setProperty("ray.resources", "CPU:4,RES-A:4");
|
||||
System.setProperty("ray.head-args.0", "--num-cpus=4");
|
||||
System.setProperty("ray.head-args.1", "--resources={\"RES-A\":4}");
|
||||
System.setProperty("ray.raylet.config.num_workers_per_process_java", "1");
|
||||
|
||||
System.setProperty("ray.run-mode", "CLUSTER");
|
||||
@@ -134,7 +137,8 @@ public class StreamingQueueTest extends BaseUnitTest implements Serializable {
|
||||
@Test(timeOut = 60000)
|
||||
public void testWordCount() {
|
||||
Ray.shutdown();
|
||||
System.setProperty("ray.resources", "CPU:4,RES-A:4");
|
||||
System.setProperty("ray.head-args.0", "--num-cpus=4");
|
||||
System.setProperty("ray.head-args.1", "--resources={\"RES-A\":4}");
|
||||
System.setProperty("ray.raylet.config.num_workers_per_process_java", "1");
|
||||
|
||||
System.setProperty("ray.run-mode", "CLUSTER");
|
||||
|
||||
Reference in New Issue
Block a user