[Streaming] operator chain (#8910)

This commit is contained in:
chaokunyang
2020-06-18 15:11:07 +08:00
committed by GitHub
parent 003cec87b4
commit 5edddf6eac
39 changed files with 1058 additions and 140 deletions
@@ -2,8 +2,8 @@ package io.ray.streaming.jobgraph;
import com.google.common.collect.Lists;
import io.ray.streaming.api.context.StreamingContext;
import io.ray.streaming.api.partition.impl.ForwardPartition;
import io.ray.streaming.api.partition.impl.KeyPartition;
import io.ray.streaming.api.partition.impl.RoundRobinPartition;
import io.ray.streaming.api.stream.DataStream;
import io.ray.streaming.api.stream.DataStreamSource;
import io.ray.streaming.api.stream.StreamSink;
@@ -20,14 +20,14 @@ public class JobGraphBuilderTest {
@Test
public void testDataSync() {
JobGraph jobGraph = buildDataSyncJobGraph();
List<JobVertex> jobVertexList = jobGraph.getJobVertexList();
List<JobEdge> jobEdgeList = jobGraph.getJobEdgeList();
List<JobVertex> jobVertexList = jobGraph.getJobVertices();
List<JobEdge> jobEdgeList = jobGraph.getJobEdges();
Assert.assertEquals(jobVertexList.size(), 2);
Assert.assertEquals(jobEdgeList.size(), 1);
JobEdge jobEdge = jobEdgeList.get(0);
Assert.assertEquals(jobEdge.getPartition().getClass(), RoundRobinPartition.class);
Assert.assertEquals(jobEdge.getPartition().getClass(), ForwardPartition.class);
JobVertex sinkVertex = jobVertexList.get(1);
JobVertex sourceVertex = jobVertexList.get(0);
@@ -50,8 +50,8 @@ public class JobGraphBuilderTest {
@Test
public void testKeyByJobGraph() {
JobGraph jobGraph = buildKeyByJobGraph();
List<JobVertex> jobVertexList = jobGraph.getJobVertexList();
List<JobEdge> jobEdgeList = jobGraph.getJobEdgeList();
List<JobVertex> jobVertexList = jobGraph.getJobVertices();
List<JobEdge> jobEdgeList = jobGraph.getJobEdges();
Assert.assertEquals(jobVertexList.size(), 3);
Assert.assertEquals(jobEdgeList.size(), 2);
@@ -68,7 +68,7 @@ public class JobGraphBuilderTest {
JobEdge source2KeyBy = jobEdgeList.get(1);
Assert.assertEquals(keyBy2Sink.getPartition().getClass(), KeyPartition.class);
Assert.assertEquals(source2KeyBy.getPartition().getClass(), RoundRobinPartition.class);
Assert.assertEquals(source2KeyBy.getPartition().getClass(), ForwardPartition.class);
}
public JobGraph buildKeyByJobGraph() {
@@ -88,8 +88,8 @@ public class JobGraphBuilderTest {
JobGraph jobGraph = buildKeyByJobGraph();
jobGraph.generateDigraph();
String diGraph = jobGraph.getDigraph();
System.out.println(diGraph);
Assert.assertTrue(diGraph.contains("1-SourceOperator -> 2-KeyByOperator"));
Assert.assertTrue(diGraph.contains("2-KeyByOperator -> 3-SinkOperator"));
LOG.info(diGraph);
Assert.assertTrue(diGraph.contains("\"1-SourceOperatorImpl\" -> \"2-KeyByOperator\""));
Assert.assertTrue(diGraph.contains("\"2-KeyByOperator\" -> \"3-SinkOperator\""));
}
}
@@ -0,0 +1,70 @@
package io.ray.streaming.jobgraph;
import static org.testng.Assert.assertEquals;
import com.google.common.collect.Lists;
import io.ray.streaming.api.context.StreamingContext;
import io.ray.streaming.api.stream.DataStream;
import io.ray.streaming.api.stream.DataStreamSource;
import io.ray.streaming.python.PythonFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.annotations.Test;
public class JobGraphOptimizerTest {
private static final Logger LOG = LoggerFactory.getLogger( JobGraphOptimizerTest.class );
@Test
public void testOptimize() {
StreamingContext context = StreamingContext.buildContext();
DataStream<Integer> source1 = DataStreamSource.fromCollection(context,
Lists.newArrayList(1 ,2 ,3));
DataStream<String> source2 = DataStreamSource.fromCollection(context,
Lists.newArrayList("1", "2", "3"));
DataStream<String> source3 = DataStreamSource.fromCollection(context,
Lists.newArrayList("2", "3", "4"));
source1.filter(x -> x > 1)
.map(String::valueOf)
.union(source2)
.join(source3)
.sink(x -> System.out.println("Sink " + x));
JobGraph jobGraph = new JobGraphBuilder(context.getStreamSinks()).build();
LOG.info("Digraph {}", jobGraph.generateDigraph());
assertEquals(jobGraph.getJobVertices().size(), 8);
JobGraphOptimizer graphOptimizer = new JobGraphOptimizer(jobGraph);
JobGraph optimizedJobGraph = graphOptimizer.optimize();
optimizedJobGraph.printJobGraph();
LOG.info("Optimized graph {}", optimizedJobGraph.generateDigraph());
assertEquals(optimizedJobGraph.getJobVertices().size(), 5);
}
@Test
public void testOptimizeHybridStream() {
StreamingContext context = StreamingContext.buildContext();
DataStream<Integer> source1 = DataStreamSource.fromCollection(context,
Lists.newArrayList(1 ,2 ,3));
DataStream<String> source2 = DataStreamSource.fromCollection(context,
Lists.newArrayList("1", "2", "3"));
source1.asPythonStream()
.map(pyFunc(1))
.filter(pyFunc(2))
.union(source2.asPythonStream().filter(pyFunc(3)).map(pyFunc(4)))
.asJavaStream()
.sink(x -> System.out.println("Sink " + x));
JobGraph jobGraph = new JobGraphBuilder(context.getStreamSinks()).build();
LOG.info("Digraph {}", jobGraph.generateDigraph());
assertEquals(jobGraph.getJobVertices().size(), 8);
JobGraphOptimizer graphOptimizer = new JobGraphOptimizer(jobGraph);
JobGraph optimizedJobGraph = graphOptimizer.optimize();
optimizedJobGraph.printJobGraph();
LOG.info("Optimized graph {}", optimizedJobGraph.generateDigraph());
assertEquals(optimizedJobGraph.getJobVertices().size(), 6);
}
private PythonFunction pyFunc(int number) {
return new PythonFunction("module", "func" + number);
}
}