diff --git a/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/context/StreamingContext.java b/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/context/StreamingContext.java
index 5f1ab4d4e..c18fb7cef 100644
--- a/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/context/StreamingContext.java
+++ b/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/context/StreamingContext.java
@@ -58,6 +58,7 @@ public class StreamingContext implements Serializable {
JobGraphBuilder jobGraphBuilder = new JobGraphBuilder(this.streamSinks, jobName);
this.jobGraph = jobGraphBuilder.build();
jobGraph.printJobGraph();
+ LOG.info("JobGraph digraph\n{}", jobGraph.generateDigraph());
if (Ray.internal() == null) {
if (Config.MEMORY_CHANNEL.equalsIgnoreCase(jobConfig.get(Config.CHANNEL_TYPE))) {
diff --git a/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/function/internal/Functions.java b/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/function/internal/Functions.java
index 3472da79e..94441076f 100644
--- a/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/function/internal/Functions.java
+++ b/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/function/internal/Functions.java
@@ -37,4 +37,8 @@ public class Functions {
}
}
+ public static RichFunction emptyFunction() {
+ return new DefaultRichFunction(null);
+ }
+
}
diff --git a/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/stream/DataStream.java b/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/stream/DataStream.java
index fe0a3af1b..bd90b0e25 100644
--- a/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/stream/DataStream.java
+++ b/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/stream/DataStream.java
@@ -1,6 +1,5 @@
package io.ray.streaming.api.stream;
-
import io.ray.streaming.api.Language;
import io.ray.streaming.api.context.StreamingContext;
import io.ray.streaming.api.function.impl.FilterFunction;
@@ -17,9 +16,13 @@ import io.ray.streaming.operator.impl.KeyByOperator;
import io.ray.streaming.operator.impl.MapOperator;
import io.ray.streaming.operator.impl.SinkOperator;
import io.ray.streaming.python.stream.PythonDataStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
/**
* Represents a stream of data.
+ *
*
This class defines all the streaming operations.
*
* @param Type of data in the stream.
@@ -81,13 +84,36 @@ public class DataStream extends Stream, T> {
}
/**
- * Apply a union transformation to this stream, with another stream.
+ * Apply union transformations to this stream by merging {@link DataStream} outputs of
+ * the same type with each other.
*
- * @param other Another stream.
+ * @param stream The DataStream to union output with.
+ * @param others The other DataStreams to union output with.
* @return A new UnionStream.
*/
- public UnionStream union(DataStream other) {
- return new UnionStream<>(this, null, other);
+ @SafeVarargs
+ public final DataStream union(DataStream stream, DataStream... others) {
+ List> streams = new ArrayList<>();
+ streams.add(stream);
+ streams.addAll(Arrays.asList(others));
+ return union(streams);
+ }
+
+ /**
+ * Apply union transformations to this stream by merging {@link DataStream} outputs of
+ * the same type with each other.
+ *
+ * @param streams The DataStreams to union output with.
+ * @return A new UnionStream.
+ */
+ public final DataStream union(List> streams) {
+ if (this instanceof UnionStream) {
+ UnionStream unionStream = (UnionStream) this;
+ streams.forEach(unionStream::addStream);
+ return unionStream;
+ } else {
+ return new UnionStream<>(this, streams);
+ }
}
/**
diff --git a/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/stream/UnionStream.java b/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/stream/UnionStream.java
index 6dd559ce7..833cddaa8 100644
--- a/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/stream/UnionStream.java
+++ b/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/stream/UnionStream.java
@@ -1,22 +1,32 @@
package io.ray.streaming.api.stream;
-import io.ray.streaming.operator.StreamOperator;
+import io.ray.streaming.operator.impl.UnionOperator;
import java.util.ArrayList;
import java.util.List;
/**
* Represents a union DataStream.
*
+ *
This stream does not create a physical operation, it only affects how upstream data are
+ * connected to downstream data.
+ *
* @param The type of union data.
*/
public class UnionStream extends DataStream {
-
private List> unionStreams;
- public UnionStream(DataStream input, StreamOperator streamOperator, DataStream other) {
- super(input, streamOperator);
+ public UnionStream(DataStream input, List> streams) {
+ super(input, new UnionOperator());
this.unionStreams = new ArrayList<>();
- this.unionStreams.add(other);
+ streams.forEach(this::addStream);
+ }
+
+ void addStream(DataStream stream) {
+ if (stream instanceof UnionStream) {
+ this.unionStreams.addAll(((UnionStream) stream).getUnionStreams());
+ } else {
+ this.unionStreams.add(stream);
+ }
}
public List> getUnionStreams() {
diff --git a/streaming/java/streaming-api/src/main/java/io/ray/streaming/jobgraph/JobGraphBuilder.java b/streaming/java/streaming-api/src/main/java/io/ray/streaming/jobgraph/JobGraphBuilder.java
index 30e26ce9b..2a2d02ebf 100644
--- a/streaming/java/streaming-api/src/main/java/io/ray/streaming/jobgraph/JobGraphBuilder.java
+++ b/streaming/java/streaming-api/src/main/java/io/ray/streaming/jobgraph/JobGraphBuilder.java
@@ -5,8 +5,11 @@ import io.ray.streaming.api.stream.DataStream;
import io.ray.streaming.api.stream.Stream;
import io.ray.streaming.api.stream.StreamSink;
import io.ray.streaming.api.stream.StreamSource;
+import io.ray.streaming.api.stream.UnionStream;
import io.ray.streaming.operator.StreamOperator;
import io.ray.streaming.python.stream.PythonDataStream;
+import io.ray.streaming.python.stream.PythonUnionStream;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -44,6 +47,7 @@ public class JobGraphBuilder {
return this.jobGraph;
}
+ @SuppressWarnings("unchecked")
private void processStream(Stream stream) {
while (stream.isProxyStream()) {
// Proxy stream and original stream are the same logical stream, both refer to the
@@ -74,6 +78,20 @@ public class JobGraphBuilder {
JobEdge jobEdge = new JobEdge(inputVertexId, vertexId, parentStream.getPartition());
this.jobGraph.addEdge(jobEdge);
processStream(parentStream);
+
+ // process union stream
+ List streams = new ArrayList<>();
+ if (stream instanceof UnionStream) {
+ streams.addAll(((UnionStream) stream).getUnionStreams());
+ }
+ if (stream instanceof PythonUnionStream) {
+ streams.addAll(((PythonUnionStream) stream).getUnionStreams());
+ }
+ for (Stream otherStream : streams) {
+ JobEdge otherEdge = new JobEdge(otherStream.getId(), vertexId, otherStream.getPartition());
+ this.jobGraph.addEdge(otherEdge);
+ processStream(otherStream);
+ }
} else {
throw new UnsupportedOperationException("Unsupported stream: " + stream);
}
diff --git a/streaming/java/streaming-api/src/main/java/io/ray/streaming/operator/impl/UnionOperator.java b/streaming/java/streaming-api/src/main/java/io/ray/streaming/operator/impl/UnionOperator.java
new file mode 100644
index 000000000..c3467582f
--- /dev/null
+++ b/streaming/java/streaming-api/src/main/java/io/ray/streaming/operator/impl/UnionOperator.java
@@ -0,0 +1,21 @@
+package io.ray.streaming.operator.impl;
+
+import io.ray.streaming.api.function.Function;
+import io.ray.streaming.api.function.internal.Functions;
+import io.ray.streaming.message.Record;
+import io.ray.streaming.operator.OneInputOperator;
+import io.ray.streaming.operator.StreamOperator;
+
+public class UnionOperator extends StreamOperator implements
+ OneInputOperator {
+
+ public UnionOperator() {
+ super(Functions.emptyFunction());
+ }
+
+ @Override
+ public void processElement(Record record) {
+ collect(record);
+ }
+
+}
diff --git a/streaming/java/streaming-api/src/main/java/io/ray/streaming/python/PythonFunction.java b/streaming/java/streaming-api/src/main/java/io/ray/streaming/python/PythonFunction.java
index 21533de70..aac706d2f 100644
--- a/streaming/java/streaming-api/src/main/java/io/ray/streaming/python/PythonFunction.java
+++ b/streaming/java/streaming-api/src/main/java/io/ray/streaming/python/PythonFunction.java
@@ -2,6 +2,7 @@ package io.ray.streaming.python;
import com.google.common.base.Preconditions;
import io.ray.streaming.api.function.Function;
+import java.util.StringJoiner;
import org.apache.commons.lang3.StringUtils;
/**
@@ -99,4 +100,17 @@ public class PythonFunction implements Function {
return functionInterface;
}
+ @Override
+ public String toString() {
+ StringJoiner stringJoiner = new StringJoiner(", ",
+ PythonFunction.class.getSimpleName() + "[", "]");
+ if (function != null) {
+ stringJoiner.add("function=binary function");
+ } else {
+ stringJoiner.add("moduleName='" + moduleName + "'")
+ .add("functionName='" + functionName + "'");
+ }
+ stringJoiner.add("functionInterface='" + functionInterface + "'");
+ return stringJoiner.toString();
+ }
}
diff --git a/streaming/java/streaming-api/src/main/java/io/ray/streaming/python/PythonOperator.java b/streaming/java/streaming-api/src/main/java/io/ray/streaming/python/PythonOperator.java
index d0eec6e3b..045814c7e 100644
--- a/streaming/java/streaming-api/src/main/java/io/ray/streaming/python/PythonOperator.java
+++ b/streaming/java/streaming-api/src/main/java/io/ray/streaming/python/PythonOperator.java
@@ -5,15 +5,26 @@ import io.ray.streaming.api.context.RuntimeContext;
import io.ray.streaming.operator.OperatorType;
import io.ray.streaming.operator.StreamOperator;
import java.util.List;
+import java.util.StringJoiner;
/**
* Represents a {@link StreamOperator} that wraps python {@link PythonFunction}.
*/
@SuppressWarnings("unchecked")
public class PythonOperator extends StreamOperator {
+ private final String moduleName;
+ private final String className;
+
+ public PythonOperator(String moduleName, String className) {
+ super(null);
+ this.moduleName = moduleName;
+ this.className = className;
+ }
public PythonOperator(PythonFunction function) {
super(function);
+ this.moduleName = null;
+ this.className = null;
}
@Override
@@ -44,4 +55,25 @@ public class PythonOperator extends StreamOperator {
public Language getLanguage() {
return Language.PYTHON;
}
+
+ public String getModuleName() {
+ return moduleName;
+ }
+
+ public String getClassName() {
+ return className;
+ }
+
+ @Override
+ public String toString() {
+ StringJoiner stringJoiner = new StringJoiner(", ",
+ PythonOperator.class.getSimpleName() + "[", "]");
+ if (function != null) {
+ stringJoiner.add("function='" + function + "'");
+ } else {
+ stringJoiner.add("moduleName='" + moduleName + "'")
+ .add("className='" + className + "'");
+ }
+ return stringJoiner.toString();
+ }
}
diff --git a/streaming/java/streaming-api/src/main/java/io/ray/streaming/python/PythonPartition.java b/streaming/java/streaming-api/src/main/java/io/ray/streaming/python/PythonPartition.java
index 6d8de051f..9f3bcd7a1 100644
--- a/streaming/java/streaming-api/src/main/java/io/ray/streaming/python/PythonPartition.java
+++ b/streaming/java/streaming-api/src/main/java/io/ray/streaming/python/PythonPartition.java
@@ -2,6 +2,7 @@ package io.ray.streaming.python;
import com.google.common.base.Preconditions;
import io.ray.streaming.api.partition.Partition;
+import java.util.StringJoiner;
import org.apache.commons.lang3.StringUtils;
/**
@@ -35,6 +36,7 @@ public class PythonPartition implements Partition