[Streaming] operator chain (#8910)

This commit is contained in:
chaokunyang
2020-06-18 15:11:07 +08:00
committed by GitHub
parent 003cec87b4
commit 5edddf6eac
39 changed files with 1058 additions and 140 deletions
@@ -2,9 +2,9 @@ package io.ray.streaming.runtime.core.processor;
import io.ray.streaming.operator.OneInputOperator;
import io.ray.streaming.operator.OperatorType;
import io.ray.streaming.operator.SourceOperator;
import io.ray.streaming.operator.StreamOperator;
import io.ray.streaming.operator.TwoInputOperator;
import io.ray.streaming.operator.impl.SourceOperator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -1,7 +1,7 @@
package io.ray.streaming.runtime.core.processor;
import io.ray.streaming.message.Record;
import io.ray.streaming.operator.impl.SourceOperator;
import io.ray.streaming.operator.SourceOperator;
/**
* The processor for the stream sources, containing a SourceOperator.
@@ -30,7 +30,7 @@ public class GraphManagerImpl implements GraphManager {
ExecutionGraph executionGraph = setupStructure(jobGraph);
// set max parallelism
int maxParallelism = jobGraph.getJobVertexList().stream()
int maxParallelism = jobGraph.getJobVertices().stream()
.map(JobVertex::getParallelism)
.max(Integer::compareTo).get();
executionGraph.setMaxParallelism(maxParallelism);
@@ -49,7 +49,7 @@ public class GraphManagerImpl implements GraphManager {
// create vertex
Map<Integer, ExecutionJobVertex> exeJobVertexMap = new LinkedHashMap<>();
long buildTime = executionGraph.getBuildTime();
for (JobVertex jobVertex : jobGraph.getJobVertexList()) {
for (JobVertex jobVertex : jobGraph.getJobVertices()) {
int jobVertexId = jobVertex.getVertexId();
exeJobVertexMap.put(jobVertexId,
new ExecutionJobVertex(
@@ -60,7 +60,7 @@ public class GraphManagerImpl implements GraphManager {
}
// connect vertex
jobGraph.getJobEdgeList().stream().forEach(jobEdge -> {
jobGraph.getJobEdges().forEach(jobEdge -> {
ExecutionJobVertex source = exeJobVertexMap.get(jobEdge.getSrcVertexId());
ExecutionJobVertex target = exeJobVertexMap.get(jobEdge.getTargetVertexId());
@@ -70,8 +70,8 @@ public class GraphManagerImpl implements GraphManager {
source.getOutputEdges().add(executionJobEdge);
target.getInputEdges().add(executionJobEdge);
source.getExecutionVertices().stream().forEach(vertex -> {
target.getExecutionVertices().stream().forEach(outputVertex -> {
source.getExecutionVertices().forEach(vertex -> {
target.getExecutionVertices().forEach(outputVertex -> {
ExecutionEdge executionEdge = new ExecutionEdge(vertex, outputVertex, executionJobEdge);
vertex.getOutputEdges().add(executionEdge);
outputVertex.getInputEdges().add(executionEdge);
@@ -7,6 +7,7 @@ import io.ray.streaming.api.partition.Partition;
import io.ray.streaming.operator.Operator;
import io.ray.streaming.python.PythonFunction;
import io.ray.streaming.python.PythonOperator;
import io.ray.streaming.python.PythonOperator.ChainedPythonOperator;
import io.ray.streaming.python.PythonPartition;
import io.ray.streaming.runtime.core.graph.executiongraph.ExecutionEdge;
import io.ray.streaming.runtime.core.graph.executiongraph.ExecutionVertex;
@@ -77,6 +78,7 @@ public class GraphPbBuilder {
executionVertexBuilder.setOperator(
ByteString.copyFrom(
serializeOperator(executionVertex.getStreamOperator())));
executionVertexBuilder.setChained(isPythonChainedOperator(executionVertex.getStreamOperator()));
executionVertexBuilder.setWorkerActor(
ByteString.copyFrom(
((NativeActorHandle) (executionVertex.getWorkerActor())).toBytes()));
@@ -104,17 +106,35 @@ public class GraphPbBuilder {
private byte[] serializeOperator(Operator operator) {
if (operator instanceof PythonOperator) {
PythonOperator pythonOperator = (PythonOperator) operator;
return serializer.serialize(Arrays.asList(
serializeFunction(pythonOperator.getFunction()),
pythonOperator.getModuleName(),
pythonOperator.getClassName()
));
if (isPythonChainedOperator(operator)) {
return serializePythonChainedOperator((ChainedPythonOperator) operator);
} else {
PythonOperator pythonOperator = (PythonOperator) operator;
return serializer.serialize(Arrays.asList(
serializeFunction(pythonOperator.getFunction()),
pythonOperator.getModuleName(),
pythonOperator.getClassName()
));
}
} else {
return new byte[0];
}
}
private boolean isPythonChainedOperator(Operator operator) {
return operator instanceof ChainedPythonOperator;
}
private byte[] serializePythonChainedOperator(ChainedPythonOperator operator) {
List<byte[]> serializedOperators = operator.getOperators().stream()
.map(this::serializeOperator).collect(Collectors.toList());
return serializer.serialize(Arrays.asList(
serializedOperators,
operator.getConfigs()
));
}
private byte[] serializeFunction(Function function) {
if (function instanceof PythonFunction) {
PythonFunction pyFunc = (PythonFunction) function;
@@ -1,6 +1,6 @@
package io.ray.streaming.runtime.worker.tasks;
import io.ray.streaming.operator.impl.SourceOperator;
import io.ray.streaming.operator.SourceOperator;
import io.ray.streaming.runtime.core.processor.Processor;
import io.ray.streaming.runtime.core.processor.SourceProcessor;
import io.ray.streaming.runtime.worker.JobWorker;
@@ -7,6 +7,7 @@ import io.ray.streaming.api.stream.DataStreamSource;
import io.ray.streaming.api.stream.StreamSink;
import io.ray.streaming.jobgraph.JobGraph;
import io.ray.streaming.jobgraph.JobGraphBuilder;
import io.ray.streaming.jobgraph.JobVertex;
import io.ray.streaming.runtime.BaseUnitTest;
import io.ray.streaming.runtime.config.StreamingConfig;
import io.ray.streaming.runtime.config.master.ResourceConfig;
@@ -40,10 +41,10 @@ public class ExecutionGraphTest extends BaseUnitTest {
ExecutionGraph executionGraph = buildExecutionGraph(graphManager, jobGraph);
List<ExecutionJobVertex> executionJobVertices = executionGraph.getExecutionJobVertexList();
Assert.assertEquals(executionJobVertices.size(), jobGraph.getJobVertexList().size());
Assert.assertEquals(executionJobVertices.size(), jobGraph.getJobVertices().size());
int totalVertexNum = jobGraph.getJobVertexList().stream()
.mapToInt(vertex -> vertex.getParallelism()).sum();
int totalVertexNum = jobGraph.getJobVertices().stream()
.mapToInt(JobVertex::getParallelism).sum();
Assert.assertEquals(executionGraph.getAllExecutionVertices().size(), totalVertexNum);
Assert.assertEquals(executionGraph.getAllExecutionVertices().size(),
executionGraph.getExecutionVertexIdGenerator().get());
@@ -66,7 +67,7 @@ public class ExecutionGraphTest extends BaseUnitTest {
List<ExecutionVertex> downStreamVertices = downStream.getExecutionVertices();
upStreamVertices.forEach(vertex -> {
Assert.assertEquals(vertex.getResource().get(ResourceType.CPU.name()), 2.0);
vertex.getOutputEdges().stream().forEach(upStreamOutPutEdge -> {
vertex.getOutputEdges().forEach(upStreamOutPutEdge -> {
Assert.assertTrue(downStreamVertices.contains(upStreamOutPutEdge.getTargetExecutionVertex()));
});
});