[Streaming] Streaming data transfer java (#6474)

This commit is contained in:
Chaokun Yang
2019-12-22 10:56:05 +08:00
committed by Hao Chen
parent 1b14fbe179
commit 7bbfa85c66
146 changed files with 3923 additions and 786 deletions
@@ -0,0 +1,77 @@
package org.ray.streaming.runtime.demo;
import com.google.common.collect.ImmutableMap;
import org.ray.streaming.api.context.StreamingContext;
import org.ray.streaming.api.function.impl.FlatMapFunction;
import org.ray.streaming.api.function.impl.ReduceFunction;
import org.ray.streaming.api.function.impl.SinkFunction;
import org.ray.streaming.api.stream.StreamSource;
import org.ray.streaming.util.Config;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.Test;
public class WordCountTest implements Serializable {
private static final Logger LOGGER = LoggerFactory.getLogger(WordCountTest.class);
// TODO(zhenxuanpan): this test only works in single-process mode, because we put
// results in this in-memory map.
static Map<String, Integer> wordCount = new ConcurrentHashMap<>();
@Test
public void testWordCount() {
StreamingContext streamingContext = StreamingContext.buildContext();
Map<String, Object> config = new HashMap<>();
config.put(Config.STREAMING_BATCH_MAX_COUNT, 1);
config.put(Config.CHANNEL_TYPE, Config.MEMORY_CHANNEL);
streamingContext.withConfig(config);
List<String> text = new ArrayList<>();
text.add("hello world eagle eagle eagle");
StreamSource<String> streamSource = StreamSource.buildSource(streamingContext, text);
streamSource
.flatMap((FlatMapFunction<String, WordAndCount>) (value, collector) -> {
String[] records = value.split(" ");
for (String record : records) {
collector.collect(new WordAndCount(record, 1));
}
})
.keyBy(pair -> pair.word)
.reduce((ReduceFunction<WordAndCount>) (oldValue, newValue) ->
new WordAndCount(oldValue.word, oldValue.count + newValue.count))
.sink((SinkFunction<WordAndCount>)
result -> wordCount.put(result.word, result.count));
streamingContext.execute();
// Sleep until the count for every word is computed.
while (wordCount.size() < 3) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
LOGGER.warn("Got an exception while sleeping.", e);
}
}
Assert.assertEquals(wordCount, ImmutableMap.of("eagle", 3, "hello", 1, "world", 1));
}
private static class WordAndCount implements Serializable {
public final String word;
public final Integer count;
public WordAndCount(String key, Integer count) {
this.word = key;
this.count = count;
}
}
}
@@ -0,0 +1,75 @@
package org.ray.streaming.runtime.schedule;
import java.util.ArrayList;
import java.util.List;
import com.google.common.collect.Lists;
import org.ray.api.RayActor;
import org.ray.api.id.ActorId;
import org.ray.api.id.ObjectId;
import org.ray.runtime.actor.LocalModeRayActor;
import org.ray.streaming.api.context.StreamingContext;
import org.ray.streaming.api.partition.impl.RoundRobinPartition;
import org.ray.streaming.api.stream.DataStream;
import org.ray.streaming.api.stream.StreamSink;
import org.ray.streaming.api.stream.StreamSource;
import org.ray.streaming.runtime.core.graph.ExecutionEdge;
import org.ray.streaming.runtime.core.graph.ExecutionGraph;
import org.ray.streaming.runtime.core.graph.ExecutionNode;
import org.ray.streaming.runtime.core.graph.ExecutionNode.NodeType;
import org.ray.streaming.runtime.worker.JobWorker;
import org.ray.streaming.plan.Plan;
import org.ray.streaming.plan.PlanBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.Test;
public class TaskAssignImplTest {
private static final Logger LOGGER = LoggerFactory.getLogger(TaskAssignImplTest.class);
@Test
public void testTaskAssignImpl() {
Plan plan = buildDataSyncPlan();
List<RayActor<JobWorker>> workers = new ArrayList<>();
for(int i = 0; i < plan.getPlanVertexList().size(); i++) {
workers.add(new LocalModeRayActor(ActorId.fromRandom(), ObjectId.fromRandom()));
}
ITaskAssign taskAssign = new TaskAssignImpl();
ExecutionGraph executionGraph = taskAssign.assign(plan, workers);
List<ExecutionNode> executionNodeList = executionGraph.getExecutionNodeList();
Assert.assertEquals(executionNodeList.size(), 2);
ExecutionNode sourceNode = executionNodeList.get(0);
Assert.assertEquals(sourceNode.getNodeType(), NodeType.SOURCE);
Assert.assertEquals(sourceNode.getExecutionTasks().size(), 1);
Assert.assertEquals(sourceNode.getOutputEdges().size(), 1);
List<ExecutionEdge> sourceExecutionEdges = sourceNode.getOutputEdges();
Assert.assertEquals(sourceExecutionEdges.size(), 1);
ExecutionEdge source2Sink = sourceExecutionEdges.get(0);
Assert.assertEquals(source2Sink.getPartition().getClass(), RoundRobinPartition.class);
ExecutionNode sinkNode = executionNodeList.get(1);
Assert.assertEquals(sinkNode.getNodeType(), NodeType.SINK);
Assert.assertEquals(sinkNode.getExecutionTasks().size(), 1);
Assert.assertEquals(sinkNode.getOutputEdges().size(), 0);
}
public Plan buildDataSyncPlan() {
StreamingContext streamingContext = StreamingContext.buildContext();
DataStream<String> dataStream = StreamSource.buildSource(streamingContext,
Lists.newArrayList("a", "b", "c"));
StreamSink streamSink = dataStream.sink(x -> LOGGER.info(x));
PlanBuilder planBuilder = new PlanBuilder(Lists.newArrayList(streamSink));
Plan plan = planBuilder.buildPlan();
return plan;
}
}
@@ -0,0 +1,234 @@
package org.ray.streaming.runtime.streamingqueue;
import com.google.common.collect.ImmutableMap;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.lang.management.ManagementFactory;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.ray.api.Ray;
import org.ray.api.RayActor;
import org.ray.api.options.ActorCreationOptions;
import org.ray.api.options.ActorCreationOptions.Builder;
import org.ray.streaming.api.context.StreamingContext;
import org.ray.streaming.api.function.impl.FlatMapFunction;
import org.ray.streaming.api.function.impl.ReduceFunction;
import org.ray.streaming.api.stream.StreamSource;
import org.ray.streaming.runtime.transfer.ChannelID;
import org.ray.streaming.runtime.util.EnvUtil;
import org.ray.streaming.util.Config;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
public class StreamingQueueTest implements Serializable {
private static Logger LOGGER = LoggerFactory.getLogger(StreamingQueueTest.class);
static {
EnvUtil.loadNativeLibraries();
}
@org.testng.annotations.BeforeSuite
public void suiteSetUp() throws Exception {
LOGGER.info("Do set up");
String management = ManagementFactory.getRuntimeMXBean().getName();
String pid = management.split("@")[0];
LOGGER.info("StreamingQueueTest pid: {}", pid);
LOGGER.info("java.library.path = {}", System.getProperty("java.library.path"));
}
@org.testng.annotations.AfterSuite
public void suiteTearDown() throws Exception {
LOGGER.warn("Do tear down");
}
@BeforeClass
public void setUp() {
}
@BeforeMethod
void beforeMethod() {
LOGGER.info("beforeTest");
Ray.shutdown();
System.setProperty("ray.resources", "CPU:4,RES-A:4");
System.setProperty("ray.raylet.config.num_workers_per_process_java", "1");
System.setProperty("ray.run-mode", "CLUSTER");
System.setProperty("ray.redirect-output", "true");
// ray init
Ray.init();
}
@AfterMethod
void afterMethod() {
LOGGER.info("afterTest");
Ray.shutdown();
System.clearProperty("ray.run-mode");
}
@Test(timeOut = 3000000)
public void testReaderWriter() {
LOGGER.info("StreamingQueueTest.testReaderWriter run-mode: {}",
System.getProperty("ray.run-mode"));
Ray.shutdown();
System.setProperty("ray.resources", "CPU:4,RES-A:4");
System.setProperty("ray.raylet.config.num_workers_per_process_java", "1");
System.setProperty("ray.run-mode", "CLUSTER");
System.setProperty("ray.redirect-output", "true");
// ray init
Ray.init();
ActorCreationOptions.Builder builder = new Builder();
RayActor<WriterWorker> writerActor = Ray.createActor(WriterWorker::new, "writer",
builder.createActorCreationOptions());
RayActor<ReaderWorker> readerActor = Ray.createActor(ReaderWorker::new, "reader",
builder.createActorCreationOptions());
LOGGER.info("call getName on writerActor: {}",
Ray.call(WriterWorker::getName, writerActor).get());
LOGGER.info("call getName on readerActor: {}",
Ray.call(ReaderWorker::getName, readerActor).get());
// LOGGER.info(Ray.call(WriterWorker::testCallReader, writerActor, readerActor).get());
List<String> outputQueueList = new ArrayList<>();
List<String> inputQueueList = new ArrayList<>();
int queueNum = 2;
for (int i = 0; i < queueNum; ++i) {
String qid = ChannelID.genRandomIdStr();
LOGGER.info("getRandomQueueId: {}", qid);
inputQueueList.add(qid);
outputQueueList.add(qid);
readerActor.getId();
}
final int msgCount = 100;
Ray.call(ReaderWorker::init, readerActor, inputQueueList, writerActor, msgCount);
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
Ray.call(WriterWorker::init, writerActor, outputQueueList, readerActor, msgCount);
long time = 0;
while (time < 20000 &&
Ray.call(ReaderWorker::getTotalMsg, readerActor).get() < msgCount * queueNum) {
try {
Thread.sleep(1000);
time += 1000;
} catch (InterruptedException e) {
e.printStackTrace();
}
}
Assert.assertEquals(
Ray.call(ReaderWorker::getTotalMsg, readerActor).get().intValue(),
msgCount * queueNum);
}
@Test(timeOut = 60000)
public void testWordCount() {
LOGGER.info("StreamingQueueTest.testWordCount run-mode: {}",
System.getProperty("ray.run-mode"));
String resultFile = "/tmp/org.ray.streaming.runtime.streamingqueue.testWordCount.txt";
deleteResultFile(resultFile);
Map<String, Integer> wordCount = new ConcurrentHashMap<>();
StreamingContext streamingContext = StreamingContext.buildContext();
Map<String, Object> config = new HashMap<>();
config.put(Config.STREAMING_BATCH_MAX_COUNT, 1);
config.put(Config.CHANNEL_TYPE, Config.NATIVE_CHANNEL);
config.put(Config.CHANNEL_SIZE, "100000");
streamingContext.withConfig(config);
List<String> text = new ArrayList<>();
text.add("hello world eagle eagle eagle");
StreamSource<String> streamSource = StreamSource.buildSource(streamingContext, text);
streamSource
.flatMap((FlatMapFunction<String, WordAndCount>) (value, collector) -> {
String[] records = value.split(" ");
for (String record : records) {
collector.collect(new WordAndCount(record, 1));
}
})
.keyBy(pair -> pair.word)
.reduce((ReduceFunction<WordAndCount>) (oldValue, newValue) -> {
LOGGER.info("reduce: {} {}", oldValue, newValue);
return new WordAndCount(oldValue.word, oldValue.count + newValue.count);
})
.sink(s -> {
LOGGER.info("sink {} {}", s.word, s.count);
wordCount.put(s.word, s.count);
serializeResultToFile(resultFile, wordCount);
});
streamingContext.execute();
Map<String, Integer> checkWordCount =
(Map<String, Integer>) deserializeResultFromFile(resultFile);
// Sleep until the count for every word is computed.
while (checkWordCount == null || checkWordCount.size() < 3) {
LOGGER.info("sleep");
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
LOGGER.warn("Got an exception while sleeping.", e);
}
checkWordCount = (Map<String, Integer>) deserializeResultFromFile(resultFile);
}
LOGGER.info("check");
Assert.assertEquals(checkWordCount,
ImmutableMap.of("eagle", 3, "hello", 1, "world", 1));
}
private void serializeResultToFile(String fileName, Object obj) {
try {
ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(fileName));
out.writeObject(obj);
} catch (Exception e) {
LOGGER.error(String.valueOf(e));
}
}
private Object deserializeResultFromFile(String fileName) {
Map<String, Integer> checkWordCount = null;
try {
ObjectInputStream in = new ObjectInputStream(new FileInputStream(fileName));
checkWordCount = (Map<String, Integer>) in.readObject();
Assert.assertEquals(checkWordCount,
ImmutableMap.of("eagle", 3, "hello", 1, "world", 1));
} catch (Exception e) {
LOGGER.error(String.valueOf(e));
}
return checkWordCount;
}
private static class WordAndCount implements Serializable {
public final String word;
public final Integer count;
public WordAndCount(String key, Integer count) {
this.word = key;
this.count = count;
}
}
private void deleteResultFile(String path) {
File file = new File(path);
file.deleteOnExit();
}
}
@@ -0,0 +1,280 @@
package org.ray.streaming.runtime.streamingqueue;
import java.lang.management.ManagementFactory;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.ray.api.Ray;
import org.ray.api.RayActor;
import org.ray.api.annotation.RayRemote;
import org.ray.api.id.ActorId;
import org.ray.runtime.RayMultiWorkerNativeRuntime;
import org.ray.runtime.actor.NativeRayActor;
import org.ray.runtime.functionmanager.JavaFunctionDescriptor;
import org.ray.streaming.runtime.transfer.ChannelID;
import org.ray.streaming.runtime.transfer.DataMessage;
import org.ray.streaming.runtime.transfer.DataReader;
import org.ray.streaming.runtime.transfer.DataWriter;
import org.ray.streaming.runtime.transfer.TransferHandler;
import org.ray.streaming.util.Config;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
public class Worker {
private static final Logger LOGGER = LoggerFactory.getLogger(Worker.class);
protected TransferHandler transferHandler = null;
public Worker() {
transferHandler = new TransferHandler(((RayMultiWorkerNativeRuntime) Ray.internal())
.getCurrentRuntime().getNativeCoreWorkerPointer(),
new JavaFunctionDescriptor(Worker.class.getName(),
"onWriterMessage", "([B)V"),
new JavaFunctionDescriptor(Worker.class.getName(),
"onWriterMessageSync", "([B)[B"),
new JavaFunctionDescriptor(Worker.class.getName(),
"onReaderMessage", "([B)V"),
new JavaFunctionDescriptor(Worker.class.getName(),
"onReaderMessageSync", "([B)[B"));
}
public void onReaderMessage(byte[] buffer) {
transferHandler.onReaderMessage(buffer);
}
public byte[] onReaderMessageSync(byte[] buffer) {
return transferHandler.onReaderMessageSync(buffer);
}
public void onWriterMessage(byte[] buffer) {
transferHandler.onWriterMessage(buffer);
}
public byte[] onWriterMessageSync(byte[] buffer) {
return transferHandler.onWriterMessageSync(buffer);
}
}
@RayRemote
class ReaderWorker extends Worker {
private static final Logger LOGGER = LoggerFactory.getLogger(ReaderWorker.class);
private String name = null;
private List<String> inputQueueList = null;
private List<ActorId> inputActorIds = new ArrayList<>();
private DataReader dataReader = null;
private long handler = 0;
private RayActor peerActor = null;
private int msgCount = 0;
private int totalMsg = 0;
public ReaderWorker(String name) {
LOGGER.info("ReaderWorker constructor");
this.name = name;
}
public String getName() {
String management = ManagementFactory.getRuntimeMXBean().getName();
String pid = management.split("@")[0];
LOGGER.info("pid: {} name: {}", pid, name);
return name;
}
public String testRayCall() {
LOGGER.info("testRayCall called");
return "testRayCall";
}
public boolean init(List<String> inputQueueList, RayActor peer, int msgCount) {
this.inputQueueList = inputQueueList;
this.peerActor = peer;
this.msgCount = msgCount;
LOGGER.info("ReaderWorker init");
LOGGER.info("java.library.path = {}", System.getProperty("java.library.path"));
for (String queue : this.inputQueueList) {
inputActorIds.add(this.peerActor.getId());
LOGGER.info("ReaderWorker actorId: {}", this.peerActor.getId());
}
Map<String, String> conf = new HashMap<>();
conf.put(Config.CHANNEL_TYPE, Config.NATIVE_CHANNEL);
conf.put(Config.CHANNEL_SIZE, "100000");
conf.put(Config.STREAMING_JOB_NAME, "integrationTest1");
dataReader = new DataReader(inputQueueList, inputActorIds, conf);
// Should not GetBundle in RayCall thread
Thread readThread = new Thread(Ray.wrapRunnable(new Runnable() {
@Override
public void run() {
consume();
}
}));
readThread.start();
LOGGER.info("ReaderWorker init done");
return true;
}
public final void consume() {
int checkPointId = 1;
for (int i = 0; i < msgCount * inputQueueList.size(); ++i) {
DataMessage dataMessage = dataReader.read(100);
if (dataMessage == null) {
LOGGER.error("dataMessage is null");
i--;
continue;
}
int bufferSize = dataMessage.body().remaining();
int dataSize = dataMessage.body().getInt();
// check size
LOGGER.info("capacity {} bufferSize {} dataSize {}",
dataMessage.body().capacity(), bufferSize, dataSize);
Assert.assertEquals(bufferSize, dataSize);
if (dataMessage instanceof DataMessage) {
if (LOGGER.isInfoEnabled()) {
LOGGER.info("{} : {} message.", i, dataMessage.toString());
}
// check content
for (int j = 0; j < dataSize - 4; ++j) {
Assert.assertEquals(dataMessage.body().get(), (byte) j);
}
} else {
LOGGER.error("unknown message type");
Assert.fail();
}
totalMsg++;
}
LOGGER.info("ReaderWorker consume data done.");
}
void onQueueTransfer(long handler, byte[] buffer) {
}
public boolean done() {
return totalMsg == msgCount;
}
public int getTotalMsg() {
return totalMsg;
}
}
@RayRemote
class WriterWorker extends Worker {
private static final Logger LOGGER = LoggerFactory.getLogger(WriterWorker.class);
private String name = null;
private List<String> outputQueueList = null;
private List<ActorId> outputActorIds = new ArrayList<>();
DataWriter dataWriter = null;
RayActor peerActor = null;
int msgCount = 0;
public WriterWorker(String name) {
this.name = name;
}
public String getName() {
String management = ManagementFactory.getRuntimeMXBean().getName();
String pid = management.split("@")[0];
LOGGER.info("pid: {} name: {}", pid, name);
return name;
}
public String testCallReader(RayActor readerActor) {
String name = (String) Ray.call(ReaderWorker::getName, readerActor).get();
LOGGER.info("testCallReader: {}", name);
return name;
}
public boolean init(List<String> outputQueueList, RayActor peer, int msgCount) {
this.outputQueueList = outputQueueList;
this.peerActor = peer;
this.msgCount = msgCount;
LOGGER.info("WriterWorker init:");
for (String queue : this.outputQueueList) {
outputActorIds.add(this.peerActor.getId());
LOGGER.info("WriterWorker actorId: {}", this.peerActor.getId());
}
LOGGER.info("Peer isDirectActorCall: {}", ((NativeRayActor) peer).isDirectCallActor());
int count = 3;
while (count-- != 0) {
Ray.call(ReaderWorker::testRayCall, peer).get();
}
try {
Thread.sleep(2 * 1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
Map<String, String> conf = new HashMap<>();
conf.put(Config.CHANNEL_TYPE, Config.NATIVE_CHANNEL);
conf.put(Config.CHANNEL_SIZE, "100000");
conf.put(Config.STREAMING_JOB_NAME, "integrationTest1");
dataWriter = new DataWriter(this.outputQueueList, this.outputActorIds, conf);
Thread writerThread = new Thread(Ray.wrapRunnable(new Runnable() {
@Override
public void run() {
produce();
}
}));
writerThread.start();
LOGGER.info("WriterWorker init done");
return true;
}
public final void produce() {
int checkPointId = 1;
Random random = new Random();
this.msgCount = 100;
for (int i = 0; i < this.msgCount; ++i) {
for (int j = 0; j < outputQueueList.size(); ++j) {
LOGGER.info("WriterWorker produce");
int dataSize = (random.nextInt(100)) + 10;
if (LOGGER.isInfoEnabled()) {
LOGGER.info("dataSize: {}", dataSize);
}
ByteBuffer bb = ByteBuffer.allocate(dataSize);
bb.putInt(dataSize);
for (int k = 0; k < dataSize - 4; ++k) {
bb.put((byte) k);
}
bb.clear();
ChannelID qid = ChannelID.from(outputQueueList.get(j));
dataWriter.write(qid, bb);
}
}
try {
Thread.sleep(20 * 1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
@@ -0,0 +1,22 @@
package org.ray.streaming.runtime.transfer;
import static org.testng.Assert.assertEquals;
import org.ray.streaming.runtime.util.EnvUtil;
import org.testng.annotations.Test;
public class ChannelIDTest {
static {
EnvUtil.loadNativeLibraries();
}
@Test
public void testIdStrToBytes() {
String idStr = ChannelID.genRandomIdStr();
assertEquals(idStr.length(), ChannelID.ID_LENGTH * 2);
assertEquals(ChannelID.idStrToBytes(idStr).length, ChannelID.ID_LENGTH);
}
}
@@ -0,0 +1,6 @@
log4j.rootLogger=INFO, stdout
# Direct log messages to stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target=System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
@@ -0,0 +1,3 @@
ray {
run-mode = SINGLE_PROCESS
}