[Streaming] Streaming Python API (#6755)

This commit is contained in:
chaokunyang
2020-02-25 10:33:33 +08:00
committed by GitHub
parent 2c1f4fd82c
commit 8b6784de06
71 changed files with 2701 additions and 1928 deletions
@@ -0,0 +1,6 @@
package org.ray.streaming.api;
public enum Language {
JAVA,
PYTHON
}
@@ -70,6 +70,10 @@ public class StreamingContext implements Serializable {
streamSinks.add(streamSink);
}
public List<StreamSink> getStreamSinks() {
return streamSinks;
}
public void withConfig(Map<String, String> jobConfig) {
this.jobConfig = jobConfig;
}
@@ -27,7 +27,7 @@ public class JobGraphBuilder {
}
public JobGraphBuilder(List<StreamSink> streamSinkList, String jobName,
Map<String, String> jobConfig) {
Map<String, String> jobConfig) {
this.jobGraph = new JobGraph(jobName, jobConfig);
this.streamSinkList = streamSinkList;
this.edgeIdGenerator = new AtomicInteger(0);
@@ -63,6 +63,8 @@ public class JobGraphBuilder {
JobEdge jobEdge = new JobEdge(inputVertexId, vertexId, parentStream.getPartition());
this.jobGraph.addEdge(jobEdge);
processStream(parentStream);
} else {
throw new UnsupportedOperationException("Unsupported stream: " + stream);
}
this.jobGraph.addVertex(jobVertex);
}
@@ -2,6 +2,7 @@ package org.ray.streaming.jobgraph;
import com.google.common.base.MoreObjects;
import java.io.Serializable;
import org.ray.streaming.api.Language;
import org.ray.streaming.operator.StreamOperator;
/**
@@ -12,6 +13,7 @@ public class JobVertex implements Serializable {
private int vertexId;
private int parallelism;
private VertexType vertexType;
private Language language;
private StreamOperator streamOperator;
public JobVertex(int vertexId, int parallelism, VertexType vertexType,
@@ -20,6 +22,7 @@ public class JobVertex implements Serializable {
this.parallelism = parallelism;
this.vertexType = vertexType;
this.streamOperator = streamOperator;
this.language = streamOperator.getLanguage();
}
public int getVertexId() {
@@ -38,12 +41,17 @@ public class JobVertex implements Serializable {
return vertexType;
}
public Language getLanguage() {
return language;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("vertexId", vertexId)
.add("parallelism", parallelism)
.add("vertexType", vertexType)
.add("language", language)
.add("streamOperator", streamOperator)
.toString();
}
@@ -4,7 +4,6 @@ package org.ray.streaming.jobgraph;
* Different roles for a node.
*/
public enum VertexType {
MASTER,
SOURCE,
TRANSFORMATION,
SINK,
@@ -2,8 +2,10 @@ package org.ray.streaming.operator;
import java.io.Serializable;
import java.util.List;
import org.ray.streaming.api.Language;
import org.ray.streaming.api.collector.Collector;
import org.ray.streaming.api.context.RuntimeContext;
import org.ray.streaming.api.function.Function;
public interface Operator extends Serializable {
@@ -13,5 +15,9 @@ public interface Operator extends Serializable {
void close();
Function getFunction();
Language getLanguage();
OperatorType getOpType();
}
@@ -1,6 +1,5 @@
package org.ray.streaming.operator;
public enum OperatorType {
SOURCE,
ONE_INPUT,
@@ -1,6 +1,7 @@
package org.ray.streaming.operator;
import java.util.List;
import org.ray.streaming.api.Language;
import org.ray.streaming.api.collector.Collector;
import org.ray.streaming.api.context.RuntimeContext;
import org.ray.streaming.api.function.Function;
@@ -8,7 +9,6 @@ import org.ray.streaming.message.KeyRecord;
import org.ray.streaming.message.Record;
public abstract class StreamOperator<F extends Function> implements Operator {
protected String name;
protected F function;
protected List<Collector> collectorList;
@@ -35,6 +35,16 @@ public abstract class StreamOperator<F extends Function> implements Operator {
}
@Override
public Function getFunction() {
return function;
}
@Override
public Language getLanguage() {
return Language.JAVA;
}
protected void collect(Record record) {
for (Collector collector : this.collectorList) {
collector.collect(record);
@@ -20,16 +20,19 @@ import org.ray.streaming.api.function.Function;
*/
public class PythonFunction implements Function {
public enum FunctionInterface {
SOURCE_FUNCTION("ray.streaming.function.SourceFunction"),
MAP_FUNCTION("ray.streaming.function.MapFunction"),
FLAT_MAP_FUNCTION("ray.streaming.function.FlatMapFunction"),
FILTER_FUNCTION("ray.streaming.function.FilterFunction"),
KEY_FUNCTION("ray.streaming.function.KeyFunction"),
REDUCE_FUNCTION("ray.streaming.function.ReduceFunction"),
SINK_FUNCTION("ray.streaming.function.SinkFunction");
SOURCE_FUNCTION("SourceFunction"),
MAP_FUNCTION("MapFunction"),
FLAT_MAP_FUNCTION("FlatMapFunction"),
FILTER_FUNCTION("FilterFunction"),
KEY_FUNCTION("KeyFunction"),
REDUCE_FUNCTION("ReduceFunction"),
SINK_FUNCTION("SinkFunction");
private String functionInterface;
/**
* @param functionInterface function class name in `ray.streaming.function` module.
*/
FunctionInterface(String functionInterface) {
this.functionInterface = functionInterface;
}
@@ -59,6 +62,26 @@ public class PythonFunction implements Function {
this.functionInterface = functionInterface.functionInterface;
}
public byte[] getFunction() {
return function;
}
public String getModuleName() {
return moduleName;
}
public String getClassName() {
return className;
}
public String getFunctionName() {
return functionName;
}
public String getFunctionInterface() {
return functionInterface;
}
/**
* Create a {@link PythonFunction} using python serialized function
*
@@ -1,6 +1,7 @@
package org.ray.streaming.python;
import java.util.List;
import org.ray.streaming.api.Language;
import org.ray.streaming.api.context.RuntimeContext;
import org.ray.streaming.operator.OperatorType;
import org.ray.streaming.operator.StreamOperator;
@@ -39,5 +40,8 @@ public class PythonOperator extends StreamOperator {
throw new UnsupportedOperationException(msg);
}
@Override
public Language getLanguage() {
return Language.PYTHON;
}
}
@@ -45,4 +45,19 @@ public class PythonPartition implements Partition {
throw new UnsupportedOperationException(msg);
}
public byte[] getPartition() {
return partition;
}
public String getModuleName() {
return moduleName;
}
public String getClassName() {
return className;
}
public String getFunctionName() {
return functionName;
}
}
@@ -17,6 +17,10 @@ public class PythonDataStream extends Stream implements PythonStream {
super(streamingContext, pythonOperator);
}
public PythonDataStream(PythonDataStream input, PythonOperator pythonOperator) {
super(input, pythonOperator);
}
protected PythonDataStream(Stream inputStream, PythonOperator pythonOperator) {
super(inputStream, pythonOperator);
}
@@ -1,7 +1,5 @@
package org.ray.streaming.python.stream;
import org.ray.streaming.api.stream.Stream;
import org.ray.streaming.operator.StreamOperator;
import org.ray.streaming.python.PythonFunction;
import org.ray.streaming.python.PythonFunction.FunctionInterface;
import org.ray.streaming.python.PythonOperator;
@@ -10,10 +8,10 @@ import org.ray.streaming.python.PythonPartition;
/**
* Represents a python DataStream returned by a key-by operation.
*/
public class PythonKeyDataStream extends Stream implements PythonStream {
public class PythonKeyDataStream extends PythonDataStream implements PythonStream {
public PythonKeyDataStream(PythonDataStream input, StreamOperator streamOperator) {
super(input, streamOperator);
public PythonKeyDataStream(PythonDataStream input, PythonOperator pythonOperator) {
super(input, pythonOperator);
this.partition = PythonPartition.KeyPartition;
}
@@ -8,7 +8,7 @@ import org.ray.streaming.python.PythonOperator;
*/
public class PythonStreamSink extends StreamSink implements PythonStream {
public PythonStreamSink(PythonDataStream input, PythonOperator sinkOperator) {
super(input, null);
super(input, sinkOperator);
this.streamingContext.addSink(this);
}