[Core] Multi-tenancy: Job isolation & implement per job config (except for env variables) (#9500)

This commit is contained in:
Kai Yang
2020-08-04 15:51:29 +08:00
committed by GitHub
parent 28b1f7710c
commit 27cd323ce1
35 changed files with 969 additions and 184 deletions
@@ -0,0 +1,69 @@
package io.ray.test;
import io.ray.api.ActorHandle;
import io.ray.api.ObjectRef;
import io.ray.api.Ray;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@Test(groups = {"cluster"})
public class JobConfigTest extends BaseTest {
@BeforeClass
public void setupJobConfig() {
System.setProperty("ray.raylet.config.enable_multi_tenancy", "true");
System.setProperty("ray.job.num-java-workers-per-process", "3");
System.setProperty("ray.job.jvm-options.0", "-DX=999");
}
@AfterClass
public void tearDownJobConfig() {
System.clearProperty("ray.raylet.config.enable_multi_tenancy");
System.clearProperty("ray.job.num-java-workers-per-process");
System.clearProperty("ray.job.jvm-options.0");
}
public static String getJvmOptions() {
return System.getProperty("X");
}
public static Integer getWorkersNum() {
return TestUtils.getRuntime().getRayConfig().numWorkersPerProcess;
}
public static class MyActor {
public Integer getWorkersNum() {
return TestUtils.getRuntime().getRayConfig().numWorkersPerProcess;
}
public String getJvmOptions() {
return System.getProperty("X");
}
}
public void testJvmOptions() {
ObjectRef<String> obj = Ray.task(JobConfigTest::getJvmOptions).remote();
Assert.assertEquals("999", obj.get());
}
public void testNumJavaWorkerPerProcess() {
ObjectRef<Integer> obj = Ray.task(JobConfigTest::getWorkersNum).remote();
Assert.assertEquals(3, (int) obj.get());
}
public void testInActor() {
ActorHandle<MyActor> actor = Ray.actor(MyActor::new).remote();
// test jvm options.
ObjectRef<String> obj1 = actor.task(MyActor::getJvmOptions).remote();
Assert.assertEquals("999", obj1.get());
// test workers number.
ObjectRef<Integer> obj2 = actor.task(MyActor::getWorkersNum).remote();
Assert.assertEquals(3, (int) obj2.get());
}
}
@@ -0,0 +1,130 @@
package io.ray.test;
import io.ray.api.ActorHandle;
import io.ray.api.ObjectRef;
import io.ray.api.Ray;
import io.ray.runtime.config.RayConfig;
import io.ray.runtime.util.SystemUtil;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.lang.ProcessBuilder.Redirect;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@Test(groups = {"cluster"})
public class MultiDriverTest extends BaseTest {
private static final int DRIVER_COUNT = 10;
private static final int NORMAL_TASK_COUNT_PER_DRIVER = 100;
private static final int ACTOR_COUNT_PER_DRIVER = 10;
private static final String PID_LIST_PREFIX = "PID: ";
@BeforeClass
public void setUpClass() {
System.setProperty("ray.raylet.config.enable_multi_tenancy", "true");
}
@AfterClass
public void tearDownClass() {
System.clearProperty("ray.raylet.config.enable_multi_tenancy");
}
static int getPid() {
return SystemUtil.pid();
}
public static class Actor {
public int getPid() {
return SystemUtil.pid();
}
}
public static void main(String[] args) throws IOException {
Ray.init();
List<ObjectRef<Integer>> pidObjectList = new ArrayList<>();
// Submit some normal tasks and get the PIDs of workers which execute the tasks.
for (int i = 0; i < NORMAL_TASK_COUNT_PER_DRIVER; ++i) {
pidObjectList.add(Ray.task(MultiDriverTest::getPid).remote());
}
// Create some actors and get the PIDs of actors.
for (int i = 0; i < ACTOR_COUNT_PER_DRIVER; ++i) {
ActorHandle<Actor> actor = Ray.actor(Actor::new).remote();
pidObjectList.add(actor.task(Actor::getPid).remote());
}
Set<Integer> pids = new HashSet<>();
for (ObjectRef<Integer> object : pidObjectList) {
pids.add(object.get());
}
// Write pids to stdout
System.out.println(
PID_LIST_PREFIX + pids.stream().map(String::valueOf).collect(Collectors.joining(",")));
}
public void testMultiDrivers() throws InterruptedException, IOException {
// This test case starts some driver processes. Each driver process submits some tasks and
// collect the PIDs of the workers used by the driver. The drivers output the PID list
// which will be read by the test case itself. The test case will compare the PIDs used by
// different drivers and make sure that all the PIDs don't overlap. If overlapped, it means that
// tasks owned by different drivers were scheduled to the same worker process, that is, tasks
// of different jobs were not correctly isolated during execution.
List<Process> drivers = new ArrayList<>();
for (int i = 0; i < DRIVER_COUNT; ++i) {
drivers.add(startDriver());
}
// Wait for drivers to finish.
for (Process driver : drivers) {
driver.waitFor();
Assert.assertEquals(driver.exitValue(), 0,
"The driver exited with code " + driver.exitValue());
}
// Read driver outputs and check for any PID overlap.
Set<Integer> pids = new HashSet<>();
for (Process driver : drivers) {
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(driver.getInputStream()))) {
String line;
int previousSize = pids.size();
while ((line = reader.readLine()) != null) {
if (line.startsWith(PID_LIST_PREFIX)) {
for (String pidString : line.substring(PID_LIST_PREFIX.length()).split(",")) {
// Make sure the PIDs don't overlap.
Assert.assertTrue(pids.add(Integer.valueOf(pidString)),
"Worker process with PID " + line + " is shared by multiple drivers.");
}
break;
}
}
int nowSize = pids.size();
Assert.assertTrue(nowSize > previousSize);
}
}
}
private Process startDriver() throws IOException {
RayConfig rayConfig = TestUtils.getRuntime().getRayConfig();
ProcessBuilder builder = new ProcessBuilder(
"java",
"-cp",
System.getProperty("java.class.path"),
"-Dray.redis.address=" + rayConfig.getRedisAddress(),
"-Dray.object-store.socket-name=" + rayConfig.objectStoreSocketName,
"-Dray.raylet.socket-name=" + rayConfig.rayletSocketName,
"-Dray.raylet.node-manager-port=" + String.valueOf(rayConfig.getNodeManagerPort()),
MultiDriverTest.class.getName());
builder.redirectError(Redirect.INHERIT);
return builder.start();
}
}
@@ -9,8 +9,8 @@ import org.testng.annotations.Test;
public class RayletConfigTest extends BaseTest {
private static final String RAY_CONFIG_KEY = "num_workers_per_process_java";
private static final String RAY_CONFIG_VALUE = "2";
private static final String RAY_CONFIG_KEY = "get_timeout_milliseconds";
private static final String RAY_CONFIG_VALUE = "1234";
@BeforeClass
public void beforeClass() {