mirror of
https://github.com/wassname/ray.git
synced 2026-06-29 14:06:42 +08:00
[Java] Format ray java code (#13056)
This commit is contained in:
-1
@@ -9,5 +9,4 @@ package io.ray.streaming.api.collector;
|
||||
public interface Collector<T> {
|
||||
|
||||
void collect(T value);
|
||||
|
||||
}
|
||||
|
||||
+3
-9
@@ -9,9 +9,7 @@ import io.ray.streaming.state.keystate.state.MapState;
|
||||
import io.ray.streaming.state.keystate.state.ValueState;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Encapsulate the runtime information of a streaming task.
|
||||
*/
|
||||
/** Encapsulate the runtime information of a streaming task. */
|
||||
public interface RuntimeContext {
|
||||
|
||||
int getTaskId();
|
||||
@@ -20,14 +18,10 @@ public interface RuntimeContext {
|
||||
|
||||
int getParallelism();
|
||||
|
||||
/**
|
||||
* @return config of current function
|
||||
*/
|
||||
/** Returns config of current function */
|
||||
Map<String, String> getConfig();
|
||||
|
||||
/**
|
||||
* @return config of the job
|
||||
*/
|
||||
/** Returns config of the job */
|
||||
Map<String, String> getJobConfig();
|
||||
|
||||
Long getCheckpointId();
|
||||
|
||||
+7
-17
@@ -19,28 +19,20 @@ import java.util.concurrent.atomic.AtomicInteger;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Encapsulate the context information of a streaming Job.
|
||||
*/
|
||||
/** Encapsulate the context information of a streaming Job. */
|
||||
public class StreamingContext implements Serializable {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(StreamingContext.class);
|
||||
|
||||
private transient AtomicInteger idGenerator;
|
||||
|
||||
/**
|
||||
* The sinks of this streaming job.
|
||||
*/
|
||||
/** The sinks of this streaming job. */
|
||||
private List<StreamSink> streamSinks;
|
||||
|
||||
/**
|
||||
* The user custom streaming job configuration.
|
||||
*/
|
||||
/** The user custom streaming job configuration. */
|
||||
private Map<String, String> jobConfig;
|
||||
|
||||
/**
|
||||
* The logic plan.
|
||||
*/
|
||||
/** The logic plan. */
|
||||
private JobGraph jobGraph;
|
||||
|
||||
private StreamingContext() {
|
||||
@@ -53,9 +45,7 @@ public class StreamingContext implements Serializable {
|
||||
return new StreamingContext();
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct job DAG, and execute the job.
|
||||
*/
|
||||
/** Construct job DAG, and execute the job. */
|
||||
public void execute(String jobName) {
|
||||
JobGraphBuilder jobGraphBuilder = new JobGraphBuilder(this.streamSinks, jobName);
|
||||
JobGraph originalJobGraph = jobGraphBuilder.build();
|
||||
@@ -78,8 +68,8 @@ public class StreamingContext implements Serializable {
|
||||
|
||||
ServiceLoader<JobClient> serviceLoader = ServiceLoader.load(JobClient.class);
|
||||
Iterator<JobClient> iterator = serviceLoader.iterator();
|
||||
Preconditions.checkArgument(iterator.hasNext(),
|
||||
"No JobClient implementation has been provided.");
|
||||
Preconditions.checkArgument(
|
||||
iterator.hasNext(), "No JobClient implementation has been provided.");
|
||||
JobClient jobClient = iterator.next();
|
||||
jobClient.submit(jobGraph, jobConfig);
|
||||
}
|
||||
|
||||
+10
-14
@@ -2,31 +2,27 @@ package io.ray.streaming.api.function;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Interface of streaming functions.
|
||||
*/
|
||||
/** Interface of streaming functions. */
|
||||
public interface Function extends Serializable {
|
||||
|
||||
/**
|
||||
* This method will be called periodically by framework, you should return a a serializable
|
||||
* object which represents function state, framework will help you to serialize this object, save
|
||||
* it to storage, and load it back when in fail-over through.
|
||||
* {@link Function#loadCheckpoint(Serializable)}.
|
||||
* This method will be called periodically by framework, you should return a a serializable object
|
||||
* which represents function state, framework will help you to serialize this object, save it to
|
||||
* storage, and load it back when in fail-over through. {@link
|
||||
* Function#loadCheckpoint(Serializable)}.
|
||||
*
|
||||
* @return A serializable object which represents function state.
|
||||
* <p>Returns A serializable object which represents function state.
|
||||
*/
|
||||
default Serializable saveCheckpoint() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method will be called by framework when a worker died and been restarted.
|
||||
* We will pass the last object you returned in {@link Function#saveCheckpoint()} when
|
||||
* doing checkpoint, you are responsible to load this object back to you function.
|
||||
* This method will be called by framework when a worker died and been restarted. We will pass the
|
||||
* last object you returned in {@link Function#saveCheckpoint()} when doing checkpoint, you are
|
||||
* responsible to load this object back to you function.
|
||||
*
|
||||
* @param checkpointObject the last object you returned in {@link Function#saveCheckpoint()}
|
||||
*/
|
||||
default void loadCheckpoint(Serializable checkpointObject) {
|
||||
}
|
||||
|
||||
default void loadCheckpoint(Serializable checkpointObject) {}
|
||||
}
|
||||
|
||||
-1
@@ -20,5 +20,4 @@ public interface RichFunction extends Function {
|
||||
* Tear-down method for the user function which called after the last call to the user function.
|
||||
*/
|
||||
void close();
|
||||
|
||||
}
|
||||
|
||||
+2
-2
@@ -14,8 +14,8 @@ public interface FilterFunction<T> extends Function {
|
||||
/**
|
||||
* The filter function that evaluates the predicate.
|
||||
*
|
||||
* @param value The value to be filtered.
|
||||
* @return True for values that should be retained, false for values to be filtered out.
|
||||
* @param value The value to be filtered. Returns True for values that should be retained, false
|
||||
* for values to be filtered out.
|
||||
*/
|
||||
boolean filter(T value) throws Exception;
|
||||
}
|
||||
|
||||
-1
@@ -13,5 +13,4 @@ import io.ray.streaming.api.function.Function;
|
||||
public interface JoinFunction<T, O, R> extends Function {
|
||||
|
||||
R join(T left, O right);
|
||||
|
||||
}
|
||||
|
||||
-1
@@ -18,6 +18,5 @@ public interface SourceFunction<T> extends Function {
|
||||
interface SourceContext<T> {
|
||||
|
||||
void collect(T element) throws Exception;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
+2
-5
@@ -18,8 +18,7 @@ public class CollectionSourceFunction<T> implements SourceFunction<T> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(int totalParallel, int currentIndex) {
|
||||
}
|
||||
public void init(int totalParallel, int currentIndex) {}
|
||||
|
||||
@Override
|
||||
public void fetch(SourceContext<T> ctx) throws Exception {
|
||||
@@ -33,7 +32,5 @@ public class CollectionSourceFunction<T> implements SourceFunction<T> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
|
||||
public void close() {}
|
||||
}
|
||||
|
||||
+3
-8
@@ -4,9 +4,7 @@ import io.ray.streaming.api.context.RuntimeContext;
|
||||
import io.ray.streaming.api.function.Function;
|
||||
import io.ray.streaming.api.function.RichFunction;
|
||||
|
||||
/**
|
||||
* A util class for {@link Function}
|
||||
*/
|
||||
/** A util class for {@link Function} */
|
||||
public class Functions {
|
||||
|
||||
private static class DefaultRichFunction implements RichFunction {
|
||||
@@ -18,12 +16,10 @@ public class Functions {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open(RuntimeContext runtimeContext) {
|
||||
}
|
||||
public void open(RuntimeContext runtimeContext) {}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
public void close() {}
|
||||
|
||||
public Function getFunction() {
|
||||
return function;
|
||||
@@ -41,5 +37,4 @@ public class Functions {
|
||||
public static RichFunction emptyFunction() {
|
||||
return new DefaultRichFunction(null);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+2
-3
@@ -15,9 +15,8 @@ public interface Partition<T> extends Function {
|
||||
* record.
|
||||
*
|
||||
* @param record The record.
|
||||
* @param numPartition num of partitions
|
||||
* @return IDs of the downstream partitions that should receive the record.
|
||||
* @param numPartition num of partitions Returns IDs of the downstream partitions that should
|
||||
* receive the record.
|
||||
*/
|
||||
int[] partition(T record, int numPartition);
|
||||
|
||||
}
|
||||
|
||||
+2
-6
@@ -3,15 +3,12 @@ package io.ray.streaming.api.partition.impl;
|
||||
import io.ray.streaming.api.partition.Partition;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/**
|
||||
* Broadcast the record to all downstream partitions.
|
||||
*/
|
||||
/** Broadcast the record to all downstream partitions. */
|
||||
public class BroadcastPartition<T> implements Partition<T> {
|
||||
|
||||
private int[] partitions = new int[0];
|
||||
|
||||
public BroadcastPartition() {
|
||||
}
|
||||
public BroadcastPartition() {}
|
||||
|
||||
@Override
|
||||
public int[] partition(T value, int numPartition) {
|
||||
@@ -20,5 +17,4 @@ public class BroadcastPartition<T> implements Partition<T> {
|
||||
}
|
||||
return partitions;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+15
-25
@@ -22,6 +22,7 @@ import java.util.List;
|
||||
|
||||
/**
|
||||
* Represents a stream of data.
|
||||
*
|
||||
* <p>This class defines all the streaming operations.
|
||||
*
|
||||
* @param <T> Type of data in the stream.
|
||||
@@ -33,9 +34,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
|
||||
}
|
||||
|
||||
public DataStream(
|
||||
StreamingContext streamingContext,
|
||||
StreamOperator streamOperator,
|
||||
Partition<T> partition) {
|
||||
StreamingContext streamingContext, StreamOperator streamOperator, Partition<T> partition) {
|
||||
super(streamingContext, streamOperator, partition);
|
||||
}
|
||||
|
||||
@@ -44,9 +43,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
|
||||
}
|
||||
|
||||
public <R> DataStream(
|
||||
DataStream<R> input,
|
||||
StreamOperator streamOperator,
|
||||
Partition<T> partition) {
|
||||
DataStream<R> input, StreamOperator streamOperator, Partition<T> partition) {
|
||||
super(input, streamOperator, partition);
|
||||
}
|
||||
|
||||
@@ -62,8 +59,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
|
||||
* Apply a map function to this stream.
|
||||
*
|
||||
* @param mapFunction The map function.
|
||||
* @param <R> Type of data returned by the map function.
|
||||
* @return A new DataStream.
|
||||
* @param <R> Type of data returned by the map function. Returns A new DataStream.
|
||||
*/
|
||||
public <R> DataStream<R> map(MapFunction<T, R> mapFunction) {
|
||||
return new DataStream<>(this, new MapOperator<>(mapFunction));
|
||||
@@ -73,8 +69,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
|
||||
* Apply a flat-map function to this stream.
|
||||
*
|
||||
* @param flatMapFunction The FlatMapFunction
|
||||
* @param <R> Type of data returned by the flatmap function.
|
||||
* @return A new DataStream
|
||||
* @param <R> Type of data returned by the flatmap function. Returns A new DataStream
|
||||
*/
|
||||
public <R> DataStream<R> flatMap(FlatMapFunction<T, R> flatMapFunction) {
|
||||
return new DataStream<>(this, new FlatMapOperator<>(flatMapFunction));
|
||||
@@ -89,8 +84,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
|
||||
* type with each other.
|
||||
*
|
||||
* @param stream The DataStream to union output with.
|
||||
* @param others The other DataStreams to union output with.
|
||||
* @return A new UnionStream.
|
||||
* @param others The other DataStreams to union output with. Returns A new UnionStream.
|
||||
*/
|
||||
@SafeVarargs
|
||||
public final DataStream<T> union(DataStream<T> stream, DataStream<T>... others) {
|
||||
@@ -104,8 +98,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
|
||||
* Apply union transformations to this stream by merging {@link DataStream} outputs of the same
|
||||
* type with each other.
|
||||
*
|
||||
* @param streams The DataStreams to union output with.
|
||||
* @return A new UnionStream.
|
||||
* @param streams The DataStreams to union output with. Returns A new UnionStream.
|
||||
*/
|
||||
public final DataStream<T> union(List<DataStream<T>> streams) {
|
||||
if (this instanceof UnionStream) {
|
||||
@@ -122,8 +115,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
|
||||
*
|
||||
* @param other Another stream.
|
||||
* @param <O> The type of the other stream data.
|
||||
* @param <R> The type of the data in the joined stream.
|
||||
* @return A new JoinStream.
|
||||
* @param <R> The type of the data in the joined stream. Returns A new JoinStream.
|
||||
*/
|
||||
public <O, R> JoinStream<T, O, R> join(DataStream<O> other) {
|
||||
return new JoinStream<>(this, other);
|
||||
@@ -137,8 +129,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
|
||||
/**
|
||||
* Apply a sink function and get a StreamSink.
|
||||
*
|
||||
* @param sinkFunction The sink function.
|
||||
* @return A new StreamSink.
|
||||
* @param sinkFunction The sink function. Returns A new StreamSink.
|
||||
*/
|
||||
public DataStreamSink<T> sink(SinkFunction<T> sinkFunction) {
|
||||
return new DataStreamSink<>(this, new SinkOperator<>(sinkFunction));
|
||||
@@ -148,8 +139,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
|
||||
* Apply a key-by function to this stream.
|
||||
*
|
||||
* @param keyFunction the key function.
|
||||
* @param <K> The type of the key.
|
||||
* @return A new KeyDataStream.
|
||||
* @param <K> The type of the key. Returns A new KeyDataStream.
|
||||
*/
|
||||
public <K> KeyDataStream<K, T> keyBy(KeyFunction<T, K> keyFunction) {
|
||||
checkPartitionCall();
|
||||
@@ -159,7 +149,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
|
||||
/**
|
||||
* Apply broadcast to this stream.
|
||||
*
|
||||
* @return This stream.
|
||||
* <p>Returns This stream.
|
||||
*/
|
||||
public DataStream<T> broadcast() {
|
||||
checkPartitionCall();
|
||||
@@ -169,8 +159,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
|
||||
/**
|
||||
* Apply a partition to this stream.
|
||||
*
|
||||
* @param partition The partitioning strategy.
|
||||
* @return This stream.
|
||||
* @param partition The partitioning strategy. Returns This stream.
|
||||
*/
|
||||
public DataStream<T> partitionBy(Partition<T> partition) {
|
||||
checkPartitionCall();
|
||||
@@ -183,8 +172,9 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
|
||||
*/
|
||||
private void checkPartitionCall() {
|
||||
if (getInputStream() != null && getInputStream().getLanguage() == Language.PYTHON) {
|
||||
throw new RuntimeException("Partition related methods can't be called on a " +
|
||||
"java stream if parent stream is a python stream.");
|
||||
throw new RuntimeException(
|
||||
"Partition related methods can't be called on a "
|
||||
+ "java stream if parent stream is a python stream.");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+1
-3
@@ -27,12 +27,10 @@ public class DataStreamSource<T> extends DataStream<T> implements StreamSource<T
|
||||
*
|
||||
* @param context Stream context.
|
||||
* @param values A collection of values.
|
||||
* @param <T> The type of source data.
|
||||
* @return A DataStreamSource.
|
||||
* @param <T> The type of source data. Returns A DataStreamSource.
|
||||
*/
|
||||
public static <T> DataStreamSource<T> fromCollection(
|
||||
StreamingContext context, Collection<T> values) {
|
||||
return new DataStreamSource<>(context, new CollectionSourceFunction<>(values));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+3
-5
@@ -25,9 +25,7 @@ public class JoinStream<L, R, O> extends DataStream<L> {
|
||||
return rightStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply key-by to the left join stream.
|
||||
*/
|
||||
/** Apply key-by to the left join stream. */
|
||||
public <K> Where<K> where(KeyFunction<L, K> keyFunction) {
|
||||
return new Where<>(this, keyFunction);
|
||||
}
|
||||
@@ -64,7 +62,8 @@ public class JoinStream<L, R, O> extends DataStream<L> {
|
||||
private KeyFunction<R, K> rightKeyByFunction;
|
||||
|
||||
Equal(
|
||||
JoinStream<L, R, O> joinStream, KeyFunction<L, K> leftKeyByFunction,
|
||||
JoinStream<L, R, O> joinStream,
|
||||
KeyFunction<L, K> leftKeyByFunction,
|
||||
KeyFunction<R, K> rightKeyByFunction) {
|
||||
this.joinStream = joinStream;
|
||||
this.leftKeyByFunction = leftKeyByFunction;
|
||||
@@ -78,5 +77,4 @@ public class JoinStream<L, R, O> extends DataStream<L> {
|
||||
return (DataStream<O>) joinStream;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+2
-5
@@ -33,8 +33,7 @@ public class KeyDataStream<K, T> extends DataStream<T> {
|
||||
/**
|
||||
* Apply a reduce function to this stream.
|
||||
*
|
||||
* @param reduceFunction The reduce function.
|
||||
* @return A new DataStream.
|
||||
* @param reduceFunction The reduce function. Returns A new DataStream.
|
||||
*/
|
||||
public DataStream<T> reduce(ReduceFunction reduceFunction) {
|
||||
return new DataStream<>(this, new ReduceOperator(reduceFunction));
|
||||
@@ -45,8 +44,7 @@ public class KeyDataStream<K, T> extends DataStream<T> {
|
||||
*
|
||||
* @param aggregateFunction The aggregate function
|
||||
* @param <A> The type of aggregated intermediate data.
|
||||
* @param <O> The type of result data.
|
||||
* @return A new DataStream.
|
||||
* @param <O> The type of result data. Returns A new DataStream.
|
||||
*/
|
||||
public <A, O> DataStream<O> aggregate(AggregateFunction<T, A, O> aggregateFunction) {
|
||||
return new DataStream<>(this, null);
|
||||
@@ -60,5 +58,4 @@ public class KeyDataStream<K, T> extends DataStream<T> {
|
||||
public PythonKeyDataStream asPythonStream() {
|
||||
return new PythonKeyDataStream(this);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -19,8 +19,7 @@ import java.util.Map;
|
||||
* @param <S> Type of stream class
|
||||
* @param <T> Type of the data in the stream.
|
||||
*/
|
||||
public abstract class Stream<S extends Stream<S, T>, T>
|
||||
implements Serializable {
|
||||
public abstract class Stream<S extends Stream<S, T>, T> implements Serializable {
|
||||
|
||||
private final int id;
|
||||
private final StreamingContext streamingContext;
|
||||
@@ -36,14 +35,15 @@ public abstract class Stream<S extends Stream<S, T>, T>
|
||||
}
|
||||
|
||||
public Stream(
|
||||
StreamingContext streamingContext,
|
||||
StreamOperator streamOperator,
|
||||
Partition<T> partition) {
|
||||
StreamingContext streamingContext, StreamOperator streamOperator, Partition<T> partition) {
|
||||
this(streamingContext, null, streamOperator, partition);
|
||||
}
|
||||
|
||||
public Stream(Stream inputStream, StreamOperator streamOperator) {
|
||||
this(inputStream.getStreamingContext(), inputStream, streamOperator,
|
||||
this(
|
||||
inputStream.getStreamingContext(),
|
||||
inputStream,
|
||||
streamOperator,
|
||||
getForwardPartition(streamOperator));
|
||||
}
|
||||
|
||||
@@ -87,8 +87,7 @@ public abstract class Stream<S extends Stream<S, T>, T>
|
||||
case JAVA:
|
||||
return new ForwardPartition<>();
|
||||
default:
|
||||
throw new UnsupportedOperationException(
|
||||
"Unsupported language " + operator.getLanguage());
|
||||
throw new UnsupportedOperationException("Unsupported language " + operator.getLanguage());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -169,18 +168,14 @@ public abstract class Stream<S extends Stream<S, T>, T>
|
||||
return originalStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set chain strategy for this stream
|
||||
*/
|
||||
/** Set chain strategy for this stream */
|
||||
public S withChainStrategy(ChainStrategy chainStrategy) {
|
||||
Preconditions.checkArgument(!isProxyStream());
|
||||
operator.setChainStrategy(chainStrategy);
|
||||
return self();
|
||||
}
|
||||
|
||||
/**
|
||||
* Disable chain for this stream
|
||||
*/
|
||||
/** Disable chain for this stream */
|
||||
public S disableChain() {
|
||||
return withChainStrategy(ChainStrategy.NEVER);
|
||||
}
|
||||
|
||||
+1
-3
@@ -5,6 +5,4 @@ package io.ray.streaming.api.stream;
|
||||
*
|
||||
* @param <T> The type of StreamSource data.
|
||||
*/
|
||||
public interface StreamSource<T> {
|
||||
|
||||
}
|
||||
public interface StreamSource<T> {}
|
||||
|
||||
+1
@@ -6,6 +6,7 @@ import java.util.List;
|
||||
|
||||
/**
|
||||
* Represents a union DataStream.
|
||||
*
|
||||
* <p>This stream does not create a physical operation, it only affects how upstream data are
|
||||
* connected to downstream data.
|
||||
*
|
||||
|
||||
@@ -3,9 +3,7 @@ package io.ray.streaming.client;
|
||||
import io.ray.streaming.jobgraph.JobGraph;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Interface of the job client.
|
||||
*/
|
||||
/** Interface of the job client. */
|
||||
public interface JobClient {
|
||||
|
||||
/**
|
||||
|
||||
@@ -3,9 +3,7 @@ package io.ray.streaming.jobgraph;
|
||||
import io.ray.streaming.api.partition.Partition;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Job edge is connection and partition rules of upstream and downstream execution nodes.
|
||||
*/
|
||||
/** Job edge is connection and partition rules of upstream and downstream execution nodes. */
|
||||
public class JobEdge implements Serializable {
|
||||
|
||||
private int srcVertexId;
|
||||
@@ -44,7 +42,13 @@ public class JobEdge implements Serializable {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Edge(" + "from:" + srcVertexId + "-" + targetVertexId + "-" + this.partition.getClass()
|
||||
return "Edge("
|
||||
+ "from:"
|
||||
+ srcVertexId
|
||||
+ "-"
|
||||
+ targetVertexId
|
||||
+ "-"
|
||||
+ this.partition.getClass()
|
||||
+ ")";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,9 +9,7 @@ import java.util.stream.Collectors;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Job graph, the logical plan of streaming job.
|
||||
*/
|
||||
/** Job graph, the logical plan of streaming job. */
|
||||
public class JobGraph implements Serializable {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(JobGraph.class);
|
||||
@@ -30,8 +28,10 @@ public class JobGraph implements Serializable {
|
||||
}
|
||||
|
||||
public JobGraph(
|
||||
String jobName, Map<String, String> jobConfig,
|
||||
List<JobVertex> jobVertices, List<JobEdge> jobEdges) {
|
||||
String jobName,
|
||||
Map<String, String> jobConfig,
|
||||
List<JobVertex> jobVertices,
|
||||
List<JobEdge> jobEdges) {
|
||||
this.jobName = jobName;
|
||||
this.jobConfig = jobConfig;
|
||||
this.jobVertices = jobVertices;
|
||||
@@ -43,7 +43,7 @@ public class JobGraph implements Serializable {
|
||||
* Generate direct-graph(made up of a set of vertices and connected by edges) by current job graph
|
||||
* for simple log printing.
|
||||
*
|
||||
* @return Digraph in string type.
|
||||
* <p>Returns Digraph in string type.
|
||||
*/
|
||||
public String generateDigraph() {
|
||||
StringBuilder digraph = new StringBuilder();
|
||||
@@ -136,5 +136,4 @@ public class JobGraph implements Serializable {
|
||||
LOG.info(jobEdge.toString());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+5
-6
@@ -36,8 +36,7 @@ public class JobGraphBuilder {
|
||||
}
|
||||
|
||||
public JobGraphBuilder(
|
||||
List<StreamSink> streamSinkList, String jobName,
|
||||
Map<String, String> jobConfig) {
|
||||
List<StreamSink> streamSinkList, String jobName, Map<String, String> jobConfig) {
|
||||
this.jobGraph = new JobGraph(jobName, jobConfig);
|
||||
this.streamSinkList = streamSinkList;
|
||||
this.edgeIdGenerator = new AtomicInteger(0);
|
||||
@@ -60,7 +59,8 @@ public class JobGraphBuilder {
|
||||
stream = stream.getOriginalStream();
|
||||
}
|
||||
StreamOperator streamOperator = stream.getOperator();
|
||||
Preconditions.checkArgument(stream.getLanguage() == streamOperator.getLanguage(),
|
||||
Preconditions.checkArgument(
|
||||
stream.getLanguage() == streamOperator.getLanguage(),
|
||||
"Reference stream should be skipped.");
|
||||
int vertexId = stream.getId();
|
||||
int parallelism = stream.getParallelism();
|
||||
@@ -76,8 +76,8 @@ public class JobGraphBuilder {
|
||||
} else if (stream instanceof StreamSource) {
|
||||
jobVertex = new JobVertex(vertexId, parallelism, VertexType.SOURCE, streamOperator, config);
|
||||
} else if (stream instanceof DataStream || stream instanceof PythonDataStream) {
|
||||
jobVertex = new JobVertex(
|
||||
vertexId, parallelism, VertexType.TRANSFORMATION, streamOperator, config);
|
||||
jobVertex =
|
||||
new JobVertex(vertexId, parallelism, VertexType.TRANSFORMATION, streamOperator, config);
|
||||
Stream parentStream = stream.getInputStream();
|
||||
int inputVertexId = parentStream.getId();
|
||||
JobEdge jobEdge = new JobEdge(inputVertexId, vertexId, parentStream.getPartition());
|
||||
@@ -114,5 +114,4 @@ public class JobGraphBuilder {
|
||||
private int getEdgeId() {
|
||||
return this.edgeIdGenerator.incrementAndGet();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+80
-67
@@ -36,24 +36,32 @@ public class JobGraphOptimizer {
|
||||
|
||||
public JobGraphOptimizer(JobGraph jobGraph) {
|
||||
this.jobGraph = jobGraph;
|
||||
vertexMap = jobGraph.getJobVertices().stream()
|
||||
.collect(Collectors.toMap(JobVertex::getVertexId, Function.identity()));
|
||||
outputEdgesMap = vertexMap.keySet().stream().collect(Collectors.toMap(
|
||||
id -> vertexMap.get(id), id -> new HashSet<>(jobGraph.getVertexOutputEdges(id))));
|
||||
vertexMap =
|
||||
jobGraph.getJobVertices().stream()
|
||||
.collect(Collectors.toMap(JobVertex::getVertexId, Function.identity()));
|
||||
outputEdgesMap =
|
||||
vertexMap.keySet().stream()
|
||||
.collect(
|
||||
Collectors.toMap(
|
||||
id -> vertexMap.get(id),
|
||||
id -> new HashSet<>(jobGraph.getVertexOutputEdges(id))));
|
||||
mergedVertexMap = new HashMap<>();
|
||||
}
|
||||
|
||||
public JobGraph optimize() {
|
||||
// Deep-first traverse nodes from source to sink to merge vertices that can be chained
|
||||
// together.
|
||||
jobGraph.getSourceVertices().forEach(vertex -> {
|
||||
List<JobVertex> verticesToMerge = new ArrayList<>();
|
||||
verticesToMerge.add(vertex);
|
||||
mergeVerticesRecursively(vertex, verticesToMerge);
|
||||
});
|
||||
jobGraph
|
||||
.getSourceVertices()
|
||||
.forEach(
|
||||
vertex -> {
|
||||
List<JobVertex> verticesToMerge = new ArrayList<>();
|
||||
verticesToMerge.add(vertex);
|
||||
mergeVerticesRecursively(vertex, verticesToMerge);
|
||||
});
|
||||
|
||||
List<JobVertex> vertices = mergedVertexMap.values().stream()
|
||||
.map(Pair::getLeft).collect(Collectors.toList());
|
||||
List<JobVertex> vertices =
|
||||
mergedVertexMap.values().stream().map(Pair::getLeft).collect(Collectors.toList());
|
||||
|
||||
return new JobGraph(jobGraph.getJobName(), jobGraph.getJobConfig(), vertices, createEdges());
|
||||
}
|
||||
@@ -65,18 +73,19 @@ public class JobGraphOptimizer {
|
||||
if (outputEdges.isEmpty()) {
|
||||
mergeAndAddVertex(verticesToMerge);
|
||||
} else {
|
||||
outputEdges.forEach(edge -> {
|
||||
JobVertex succeedingVertex = vertexMap.get(edge.getTargetVertexId());
|
||||
if (canBeChained(vertex, succeedingVertex, edge)) {
|
||||
verticesToMerge.add(succeedingVertex);
|
||||
mergeVerticesRecursively(succeedingVertex, verticesToMerge);
|
||||
} else {
|
||||
mergeAndAddVertex(verticesToMerge);
|
||||
List<JobVertex> newMergedVertices = new ArrayList<>();
|
||||
newMergedVertices.add(succeedingVertex);
|
||||
mergeVerticesRecursively(succeedingVertex, newMergedVertices);
|
||||
}
|
||||
});
|
||||
outputEdges.forEach(
|
||||
edge -> {
|
||||
JobVertex succeedingVertex = vertexMap.get(edge.getTargetVertexId());
|
||||
if (canBeChained(vertex, succeedingVertex, edge)) {
|
||||
verticesToMerge.add(succeedingVertex);
|
||||
mergeVerticesRecursively(succeedingVertex, verticesToMerge);
|
||||
} else {
|
||||
mergeAndAddVertex(verticesToMerge);
|
||||
List<JobVertex> newMergedVertices = new ArrayList<>();
|
||||
newMergedVertices.add(succeedingVertex);
|
||||
mergeVerticesRecursively(succeedingVertex, newMergedVertices);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -89,25 +98,30 @@ public class JobGraphOptimizer {
|
||||
// no chain
|
||||
mergedVertex = headVertex;
|
||||
} else {
|
||||
List<StreamOperator> operators = verticesToMerge.stream()
|
||||
.map(v -> vertexMap.get(v.getVertexId())
|
||||
.getStreamOperator())
|
||||
.collect(Collectors.toList());
|
||||
List<Map<String, String>> configs = verticesToMerge.stream()
|
||||
.map(v -> vertexMap.get(v.getVertexId()).getConfig())
|
||||
.collect(Collectors.toList());
|
||||
List<StreamOperator> operators =
|
||||
verticesToMerge.stream()
|
||||
.map(v -> vertexMap.get(v.getVertexId()).getStreamOperator())
|
||||
.collect(Collectors.toList());
|
||||
List<Map<String, String>> configs =
|
||||
verticesToMerge.stream()
|
||||
.map(v -> vertexMap.get(v.getVertexId()).getConfig())
|
||||
.collect(Collectors.toList());
|
||||
StreamOperator operator;
|
||||
if (language == Language.JAVA) {
|
||||
operator = ChainedOperator.newChainedOperator(operators, configs);
|
||||
} else {
|
||||
List<PythonOperator> pythonOperators = operators.stream()
|
||||
.map(o -> (PythonOperator) o)
|
||||
.collect(Collectors.toList());
|
||||
List<PythonOperator> pythonOperators =
|
||||
operators.stream().map(o -> (PythonOperator) o).collect(Collectors.toList());
|
||||
operator = new ChainedPythonOperator(pythonOperators, configs);
|
||||
}
|
||||
// chained operator config is placed into `ChainedOperator`.
|
||||
mergedVertex = new JobVertex(headVertex.getVertexId(), headVertex.getParallelism(),
|
||||
headVertex.getVertexType(), operator, new HashMap<>());
|
||||
mergedVertex =
|
||||
new JobVertex(
|
||||
headVertex.getVertexId(),
|
||||
headVertex.getParallelism(),
|
||||
headVertex.getVertexType(),
|
||||
operator,
|
||||
new HashMap<>());
|
||||
}
|
||||
|
||||
mergedVertexMap.put(mergedVertex.getVertexId(), Pair.of(mergedVertex, verticesToMerge));
|
||||
@@ -115,37 +129,39 @@ public class JobGraphOptimizer {
|
||||
|
||||
private List<JobEdge> createEdges() {
|
||||
List<JobEdge> edges = new ArrayList<>();
|
||||
mergedVertexMap.forEach((id, pair) -> {
|
||||
JobVertex mergedVertex = pair.getLeft();
|
||||
List<JobVertex> mergedVertices = pair.getRight();
|
||||
JobVertex tailVertex = mergedVertices.get(mergedVertices.size() - 1);
|
||||
// input edge will be set up in input vertices
|
||||
if (outputEdgesMap.containsKey(tailVertex)) {
|
||||
outputEdgesMap.get(tailVertex).forEach(edge -> {
|
||||
Pair<JobVertex, List<JobVertex>> downstreamPair =
|
||||
mergedVertexMap.get(edge.getTargetVertexId());
|
||||
// change ForwardPartition to RoundRobinPartition.
|
||||
Partition partition = changePartition(edge.getPartition());
|
||||
JobEdge newEdge = new JobEdge(
|
||||
mergedVertex.getVertexId(),
|
||||
downstreamPair.getLeft().getVertexId(),
|
||||
partition);
|
||||
edges.add(newEdge);
|
||||
mergedVertexMap.forEach(
|
||||
(id, pair) -> {
|
||||
JobVertex mergedVertex = pair.getLeft();
|
||||
List<JobVertex> mergedVertices = pair.getRight();
|
||||
JobVertex tailVertex = mergedVertices.get(mergedVertices.size() - 1);
|
||||
// input edge will be set up in input vertices
|
||||
if (outputEdgesMap.containsKey(tailVertex)) {
|
||||
outputEdgesMap
|
||||
.get(tailVertex)
|
||||
.forEach(
|
||||
edge -> {
|
||||
Pair<JobVertex, List<JobVertex>> downstreamPair =
|
||||
mergedVertexMap.get(edge.getTargetVertexId());
|
||||
// change ForwardPartition to RoundRobinPartition.
|
||||
Partition partition = changePartition(edge.getPartition());
|
||||
JobEdge newEdge =
|
||||
new JobEdge(
|
||||
mergedVertex.getVertexId(),
|
||||
downstreamPair.getLeft().getVertexId(),
|
||||
partition);
|
||||
edges.add(newEdge);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
});
|
||||
return edges;
|
||||
}
|
||||
|
||||
/**
|
||||
* Change ForwardPartition to RoundRobinPartition.
|
||||
*/
|
||||
/** Change ForwardPartition to RoundRobinPartition. */
|
||||
private Partition changePartition(Partition partition) {
|
||||
if (partition instanceof PythonPartition) {
|
||||
PythonPartition pythonPartition = (PythonPartition) partition;
|
||||
if (!pythonPartition.isConstructedFromBinary() &&
|
||||
pythonPartition.getFunctionName().equals(PythonPartition.FORWARD_PARTITION_CLASS)) {
|
||||
if (!pythonPartition.isConstructedFromBinary()
|
||||
&& pythonPartition.getFunctionName().equals(PythonPartition.FORWARD_PARTITION_CLASS)) {
|
||||
return PythonPartition.RoundRobinPartition;
|
||||
} else {
|
||||
return partition;
|
||||
@@ -160,11 +176,9 @@ public class JobGraphOptimizer {
|
||||
}
|
||||
|
||||
private boolean canBeChained(
|
||||
JobVertex precedingVertex,
|
||||
JobVertex succeedingVertex,
|
||||
JobEdge edge) {
|
||||
if (jobGraph.getVertexOutputEdges(precedingVertex.getVertexId()).size() > 1 ||
|
||||
jobGraph.getVertexInputEdges(succeedingVertex.getVertexId()).size() > 1) {
|
||||
JobVertex precedingVertex, JobVertex succeedingVertex, JobEdge edge) {
|
||||
if (jobGraph.getVertexOutputEdges(precedingVertex.getVertexId()).size() > 1
|
||||
|| jobGraph.getVertexInputEdges(succeedingVertex.getVertexId()).size() > 1) {
|
||||
return false;
|
||||
}
|
||||
if (precedingVertex.getParallelism() != succeedingVertex.getParallelism()) {
|
||||
@@ -183,9 +197,8 @@ public class JobGraphOptimizer {
|
||||
return partition instanceof ForwardPartition;
|
||||
} else {
|
||||
PythonPartition pythonPartition = (PythonPartition) partition;
|
||||
return !pythonPartition.isConstructedFromBinary() &&
|
||||
pythonPartition.getFunctionName().equals(PythonPartition.FORWARD_PARTITION_CLASS);
|
||||
return !pythonPartition.isConstructedFromBinary()
|
||||
&& pythonPartition.getFunctionName().equals(PythonPartition.FORWARD_PARTITION_CLASS);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -6,9 +6,7 @@ import io.ray.streaming.operator.StreamOperator;
|
||||
import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Job vertex is a cell node where logic is executed.
|
||||
*/
|
||||
/** Job vertex is a cell node where logic is executed. */
|
||||
public class JobVertex implements Serializable {
|
||||
|
||||
private int vertexId;
|
||||
@@ -71,5 +69,4 @@ public class JobVertex implements Serializable {
|
||||
.add("config", config)
|
||||
.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+1
-3
@@ -1,8 +1,6 @@
|
||||
package io.ray.streaming.jobgraph;
|
||||
|
||||
/**
|
||||
* Different roles for a node.
|
||||
*/
|
||||
/** Different roles for a node. */
|
||||
public enum VertexType {
|
||||
SOURCE,
|
||||
TRANSFORMATION,
|
||||
|
||||
@@ -37,8 +37,7 @@ public class Record<T> implements Serializable {
|
||||
return false;
|
||||
}
|
||||
Record<?> record = (Record<?>) o;
|
||||
return Objects.equals(stream, record.stream) &&
|
||||
Objects.equals(value, record.value);
|
||||
return Objects.equals(stream, record.stream) && Objects.equals(value, record.value);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -50,5 +49,4 @@ public class Record<T> implements Serializable {
|
||||
public String toString() {
|
||||
return value.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+3
-9
@@ -1,20 +1,14 @@
|
||||
package io.ray.streaming.operator;
|
||||
|
||||
/**
|
||||
* Chain strategy for streaming operators. Chained operators are run in the same thread.
|
||||
*/
|
||||
/** Chain strategy for streaming operators. Chained operators are run in the same thread. */
|
||||
public enum ChainStrategy {
|
||||
/**
|
||||
* The operator won't be chained with preceding operators, but maybe chained with succeeding
|
||||
* operators.
|
||||
*/
|
||||
HEAD,
|
||||
/**
|
||||
* Operators will be chained together when possible.
|
||||
*/
|
||||
/** Operators will be chained together when possible. */
|
||||
ALWAYS,
|
||||
/**
|
||||
* The operator won't be chained with any operator.
|
||||
*/
|
||||
/** The operator won't be chained with any operator. */
|
||||
NEVER
|
||||
}
|
||||
|
||||
@@ -25,13 +25,9 @@ public interface Operator extends Serializable {
|
||||
|
||||
ChainStrategy getChainStrategy();
|
||||
|
||||
/**
|
||||
* See {@link Function#saveCheckpoint()}.
|
||||
*/
|
||||
/** See {@link Function#saveCheckpoint()}. */
|
||||
Serializable saveCheckpoint();
|
||||
|
||||
/**
|
||||
* See {@link Function#loadCheckpoint(Serializable)}.
|
||||
*/
|
||||
/** See {@link Function#loadCheckpoint(Serializable)}. */
|
||||
void loadCheckpoint(Serializable checkpointObject);
|
||||
}
|
||||
|
||||
+1
-1
@@ -11,4 +11,4 @@ public interface SourceOperator<T> extends Operator {
|
||||
default OperatorType getOpType() {
|
||||
return OperatorType.SOURCE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+1
-3
@@ -42,9 +42,7 @@ public abstract class StreamOperator<F extends Function> implements Operator {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish() {
|
||||
|
||||
}
|
||||
public void finish() {}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
|
||||
+25
-30
@@ -20,9 +20,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Abstract base class for chained operators.
|
||||
*/
|
||||
/** Abstract base class for chained operators. */
|
||||
public abstract class ChainedOperator extends StreamOperator<Function> {
|
||||
|
||||
protected final List<StreamOperator> operators;
|
||||
@@ -31,9 +29,10 @@ public abstract class ChainedOperator extends StreamOperator<Function> {
|
||||
private final List<Map<String, String>> configs;
|
||||
|
||||
public ChainedOperator(List<StreamOperator> operators, List<Map<String, String>> configs) {
|
||||
Preconditions.checkArgument(operators.size() >= 2,
|
||||
"Need at lease two operators to be chained together");
|
||||
operators.stream().skip(1)
|
||||
Preconditions.checkArgument(
|
||||
operators.size() >= 2, "Need at lease two operators to be chained together");
|
||||
operators.stream()
|
||||
.skip(1)
|
||||
.forEach(operator -> Preconditions.checkArgument(operator instanceof OneInputOperator));
|
||||
this.operators = operators;
|
||||
this.configs = configs;
|
||||
@@ -44,10 +43,11 @@ public abstract class ChainedOperator extends StreamOperator<Function> {
|
||||
@Override
|
||||
public void open(List<Collector> collectorList, RuntimeContext runtimeContext) {
|
||||
// Dont' call super.open() as we `open` every operator separately.
|
||||
List<ForwardCollector> succeedingCollectors = operators.stream().skip(1)
|
||||
.map(operator -> new ForwardCollector(
|
||||
(OneInputOperator) operator))
|
||||
.collect(Collectors.toList());
|
||||
List<ForwardCollector> succeedingCollectors =
|
||||
operators.stream()
|
||||
.skip(1)
|
||||
.map(operator -> new ForwardCollector((OneInputOperator) operator))
|
||||
.collect(Collectors.toList());
|
||||
for (int i = 0; i < operators.size() - 1; i++) {
|
||||
StreamOperator operator = operators.get(i);
|
||||
List<ForwardCollector> forwardCollectors =
|
||||
@@ -70,8 +70,7 @@ public abstract class ChainedOperator extends StreamOperator<Function> {
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return operators.stream().map(Operator::getName)
|
||||
.collect(Collectors.joining(" -> ", "[", "]"));
|
||||
return operators.stream().map(Operator::getName).collect(Collectors.joining(" -> ", "[", "]"));
|
||||
}
|
||||
|
||||
public List<StreamOperator> getOperators() {
|
||||
@@ -104,20 +103,21 @@ public abstract class ChainedOperator extends StreamOperator<Function> {
|
||||
}
|
||||
|
||||
private RuntimeContext createRuntimeContext(RuntimeContext runtimeContext, int index) {
|
||||
return (RuntimeContext) Proxy.newProxyInstance(runtimeContext.getClass().getClassLoader(),
|
||||
new Class[] {RuntimeContext.class},
|
||||
(proxy, method, methodArgs) -> {
|
||||
if (method.getName().equals("getConfig")) {
|
||||
return configs.get(index);
|
||||
} else {
|
||||
return method.invoke(runtimeContext, methodArgs);
|
||||
}
|
||||
});
|
||||
return (RuntimeContext)
|
||||
Proxy.newProxyInstance(
|
||||
runtimeContext.getClass().getClassLoader(),
|
||||
new Class[] {RuntimeContext.class},
|
||||
(proxy, method, methodArgs) -> {
|
||||
if (method.getName().equals("getConfig")) {
|
||||
return configs.get(index);
|
||||
} else {
|
||||
return method.invoke(runtimeContext, methodArgs);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public static ChainedOperator newChainedOperator(
|
||||
List<StreamOperator> operators,
|
||||
List<Map<String, String>> configs) {
|
||||
List<StreamOperator> operators, List<Map<String, String>> configs) {
|
||||
switch (operators.get(0).getOpType()) {
|
||||
case SOURCE:
|
||||
return new ChainedSourceOperator(operators, configs);
|
||||
@@ -131,8 +131,7 @@ public abstract class ChainedOperator extends StreamOperator<Function> {
|
||||
}
|
||||
}
|
||||
|
||||
static class ChainedSourceOperator<T> extends ChainedOperator
|
||||
implements SourceOperator<T> {
|
||||
static class ChainedSourceOperator<T> extends ChainedOperator implements SourceOperator<T> {
|
||||
|
||||
private final SourceOperator<T> sourceOperator;
|
||||
|
||||
@@ -151,11 +150,9 @@ public abstract class ChainedOperator extends StreamOperator<Function> {
|
||||
public SourceContext<T> getSourceContext() {
|
||||
return sourceOperator.getSourceContext();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class ChainedOneInputOperator<T> extends ChainedOperator
|
||||
implements OneInputOperator<T> {
|
||||
static class ChainedOneInputOperator<T> extends ChainedOperator implements OneInputOperator<T> {
|
||||
|
||||
private final OneInputOperator<T> inputOperator;
|
||||
|
||||
@@ -169,7 +166,6 @@ public abstract class ChainedOperator extends StreamOperator<Function> {
|
||||
public void processElement(Record<T> record) throws Exception {
|
||||
inputOperator.processElement(record);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class ChainedTwoInputOperator<L, R> extends ChainedOperator
|
||||
@@ -187,6 +183,5 @@ public abstract class ChainedOperator extends StreamOperator<Function> {
|
||||
public void processElement(Record<L> record1, Record<R> record2) {
|
||||
inputOperator.processElement(record1, record2);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
+2
-2
@@ -5,8 +5,8 @@ import io.ray.streaming.message.Record;
|
||||
import io.ray.streaming.operator.OneInputOperator;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
|
||||
public class FilterOperator<T> extends StreamOperator<FilterFunction<T>> implements
|
||||
OneInputOperator<T> {
|
||||
public class FilterOperator<T> extends StreamOperator<FilterFunction<T>>
|
||||
implements OneInputOperator<T> {
|
||||
|
||||
public FilterOperator(FilterFunction<T> filterFunction) {
|
||||
super(filterFunction);
|
||||
|
||||
+2
-2
@@ -9,8 +9,8 @@ import io.ray.streaming.operator.OneInputOperator;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
import java.util.List;
|
||||
|
||||
public class FlatMapOperator<T, R> extends StreamOperator<FlatMapFunction<T, R>> implements
|
||||
OneInputOperator<T> {
|
||||
public class FlatMapOperator<T, R> extends StreamOperator<FlatMapFunction<T, R>>
|
||||
implements OneInputOperator<T> {
|
||||
|
||||
private CollectionCollector collectionCollector;
|
||||
|
||||
|
||||
+4
-9
@@ -15,12 +15,10 @@ import io.ray.streaming.operator.TwoInputOperator;
|
||||
* @param <K> Type of the data in the join key.
|
||||
* @param <O> Type of the data in the joined stream.
|
||||
*/
|
||||
public class JoinOperator<L, R, K, O> extends StreamOperator<JoinFunction<L, R, O>> implements
|
||||
TwoInputOperator<L, R> {
|
||||
public class JoinOperator<L, R, K, O> extends StreamOperator<JoinFunction<L, R, O>>
|
||||
implements TwoInputOperator<L, R> {
|
||||
|
||||
public JoinOperator() {
|
||||
|
||||
}
|
||||
public JoinOperator() {}
|
||||
|
||||
public JoinOperator(JoinFunction<L, R, O> function) {
|
||||
super(function);
|
||||
@@ -28,13 +26,10 @@ public class JoinOperator<L, R, K, O> extends StreamOperator<JoinFunction<L, R,
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processElement(Record<L> record1, Record<R> record2) {
|
||||
|
||||
}
|
||||
public void processElement(Record<L> record1, Record<R> record2) {}
|
||||
|
||||
@Override
|
||||
public OperatorType getOpType() {
|
||||
return OperatorType.TWO_INPUT;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+2
-3
@@ -6,8 +6,8 @@ import io.ray.streaming.message.Record;
|
||||
import io.ray.streaming.operator.OneInputOperator;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
|
||||
public class KeyByOperator<T, K> extends StreamOperator<KeyFunction<T, K>> implements
|
||||
OneInputOperator<T> {
|
||||
public class KeyByOperator<T, K> extends StreamOperator<KeyFunction<T, K>>
|
||||
implements OneInputOperator<T> {
|
||||
|
||||
public KeyByOperator(KeyFunction<T, K> keyFunction) {
|
||||
super(keyFunction);
|
||||
@@ -19,4 +19,3 @@ public class KeyByOperator<T, K> extends StreamOperator<KeyFunction<T, K>> imple
|
||||
collect(new KeyRecord<>(key, record.getValue()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+2
-2
@@ -5,8 +5,8 @@ import io.ray.streaming.message.Record;
|
||||
import io.ray.streaming.operator.OneInputOperator;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
|
||||
public class MapOperator<T, R> extends StreamOperator<MapFunction<T, R>> implements
|
||||
OneInputOperator<T> {
|
||||
public class MapOperator<T, R> extends StreamOperator<MapFunction<T, R>>
|
||||
implements OneInputOperator<T> {
|
||||
|
||||
public MapOperator(MapFunction<T, R> mapFunction) {
|
||||
super(mapFunction);
|
||||
|
||||
+2
-3
@@ -12,8 +12,8 @@ import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class ReduceOperator<K, T> extends StreamOperator<ReduceFunction<T>> implements
|
||||
OneInputOperator<T> {
|
||||
public class ReduceOperator<K, T> extends StreamOperator<ReduceFunction<T>>
|
||||
implements OneInputOperator<T> {
|
||||
|
||||
private Map<K, T> reduceState;
|
||||
|
||||
@@ -43,5 +43,4 @@ public class ReduceOperator<K, T> extends StreamOperator<ReduceFunction<T>> impl
|
||||
collect(record);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+2
-2
@@ -5,8 +5,8 @@ import io.ray.streaming.message.Record;
|
||||
import io.ray.streaming.operator.OneInputOperator;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
|
||||
public class SinkOperator<T> extends StreamOperator<SinkFunction<T>> implements
|
||||
OneInputOperator<T> {
|
||||
public class SinkOperator<T> extends StreamOperator<SinkFunction<T>>
|
||||
implements OneInputOperator<T> {
|
||||
|
||||
public SinkOperator(SinkFunction<T> sinkFunction) {
|
||||
super(sinkFunction);
|
||||
|
||||
-2
@@ -61,7 +61,5 @@ public class SourceOperatorImpl<T> extends StreamOperator<SourceFunction<T>>
|
||||
collector.collect(new Record<>(t));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+1
-3
@@ -6,8 +6,7 @@ import io.ray.streaming.message.Record;
|
||||
import io.ray.streaming.operator.OneInputOperator;
|
||||
import io.ray.streaming.operator.StreamOperator;
|
||||
|
||||
public class UnionOperator<T> extends StreamOperator<Function> implements
|
||||
OneInputOperator<T> {
|
||||
public class UnionOperator<T> extends StreamOperator<Function> implements OneInputOperator<T> {
|
||||
|
||||
public UnionOperator() {
|
||||
super(Functions.emptyFunction());
|
||||
@@ -17,5 +16,4 @@ public class UnionOperator<T> extends StreamOperator<Function> implements
|
||||
public void processElement(Record<T> record) {
|
||||
collect(record);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+19
-19
@@ -7,14 +7,18 @@ import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
/**
|
||||
* Represents a user defined python function.
|
||||
* <p>Python worker can use information in this class to create a function object.</p>
|
||||
* <p>If this object is constructed from serialized python function,
|
||||
* python worker can deserialize it to create python function directly. If this object is
|
||||
* constructed from moduleName and className/functionName, python worker will use `importlib` to
|
||||
* load python function.</p>
|
||||
* <p>If the python data stream api is invoked from python, `function` will be not null.</p>
|
||||
* <p>If the python data stream api is invoked from java, `moduleName` and
|
||||
* `functionName` will be not null.</p>
|
||||
*
|
||||
* <p>Python worker can use information in this class to create a function object.
|
||||
*
|
||||
* <p>If this object is constructed from serialized python function, python worker can deserialize
|
||||
* it to create python function directly. If this object is constructed from moduleName and
|
||||
* className/functionName, python worker will use `importlib` to load python function.
|
||||
*
|
||||
* <p>If the python data stream api is invoked from python, `function` will be not null.
|
||||
*
|
||||
* <p>If the python data stream api is invoked from java, `moduleName` and `functionName` will be
|
||||
* not null.
|
||||
*
|
||||
* <p>
|
||||
*/
|
||||
public class PythonFunction implements Function {
|
||||
@@ -30,9 +34,7 @@ public class PythonFunction implements Function {
|
||||
|
||||
private String functionInterface;
|
||||
|
||||
/**
|
||||
* @param functionInterface function class name in `ray.streaming.function` module.
|
||||
*/
|
||||
/** @param functionInterface function class name in `ray.streaming.function` module. */
|
||||
FunctionInterface(String functionInterface) {
|
||||
this.functionInterface = functionInterface;
|
||||
}
|
||||
@@ -66,13 +68,10 @@ public class PythonFunction implements Function {
|
||||
* Create a {@link PythonFunction} from a moduleName and streaming function name.
|
||||
*
|
||||
* @param moduleName module name of streaming function.
|
||||
* @param functionName function name of streaming function. {@code functionName} is the name
|
||||
* of a
|
||||
* @param functionName function name of streaming function. {@code functionName} is the name of a
|
||||
* python function, or class name of subclass of `ray.streaming.function.`
|
||||
*/
|
||||
public PythonFunction(
|
||||
String moduleName,
|
||||
String functionName) {
|
||||
public PythonFunction(String moduleName, String functionName) {
|
||||
Preconditions.checkArgument(StringUtils.isNotBlank(moduleName));
|
||||
Preconditions.checkArgument(StringUtils.isNotBlank(functionName));
|
||||
this.function = null;
|
||||
@@ -110,12 +109,13 @@ public class PythonFunction implements Function {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringJoiner stringJoiner = new StringJoiner(", ",
|
||||
PythonFunction.class.getSimpleName() + "[", "]");
|
||||
StringJoiner stringJoiner =
|
||||
new StringJoiner(", ", PythonFunction.class.getSimpleName() + "[", "]");
|
||||
if (function != null) {
|
||||
stringJoiner.add("function=binary function");
|
||||
} else {
|
||||
stringJoiner.add("moduleName='" + moduleName + "'")
|
||||
stringJoiner
|
||||
.add("moduleName='" + moduleName + "'")
|
||||
.add("functionName='" + functionName + "'");
|
||||
}
|
||||
stringJoiner.add("functionInterface='" + functionInterface + "'");
|
||||
|
||||
+10
-10
@@ -12,9 +12,7 @@ import java.util.Map;
|
||||
import java.util.StringJoiner;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Represents a {@link StreamOperator} that wraps python {@link PythonFunction}.
|
||||
*/
|
||||
/** Represents a {@link StreamOperator} that wraps python {@link PythonFunction}. */
|
||||
@SuppressWarnings("unchecked")
|
||||
public class PythonOperator extends StreamOperator {
|
||||
|
||||
@@ -65,8 +63,10 @@ public class PythonOperator extends StreamOperator {
|
||||
StackTraceElement[] trace = Thread.currentThread().getStackTrace();
|
||||
Preconditions.checkState(trace.length >= 2);
|
||||
StackTraceElement traceElement = trace[2];
|
||||
String msg = String.format("Method %s.%s shouldn't be called.",
|
||||
traceElement.getClassName(), traceElement.getMethodName());
|
||||
String msg =
|
||||
String.format(
|
||||
"Method %s.%s shouldn't be called.",
|
||||
traceElement.getClassName(), traceElement.getMethodName());
|
||||
throw new UnsupportedOperationException(msg);
|
||||
}
|
||||
|
||||
@@ -90,13 +90,12 @@ public class PythonOperator extends StreamOperator {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringJoiner stringJoiner = new StringJoiner(", ",
|
||||
PythonOperator.class.getSimpleName() + "[", "]");
|
||||
StringJoiner stringJoiner =
|
||||
new StringJoiner(", ", PythonOperator.class.getSimpleName() + "[", "]");
|
||||
if (function != null) {
|
||||
stringJoiner.add("function='" + function + "'");
|
||||
} else {
|
||||
stringJoiner.add("moduleName='" + moduleName + "'")
|
||||
.add("className='" + className + "'");
|
||||
stringJoiner.add("moduleName='" + moduleName + "'").add("className='" + className + "'");
|
||||
}
|
||||
return stringJoiner.toString();
|
||||
}
|
||||
@@ -130,7 +129,8 @@ public class PythonOperator extends StreamOperator {
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return operators.stream().map(Operator::getName)
|
||||
return operators.stream()
|
||||
.map(Operator::getName)
|
||||
.collect(Collectors.joining(" -> ", "[", "]"));
|
||||
}
|
||||
|
||||
|
||||
+20
-19
@@ -7,25 +7,26 @@ import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
/**
|
||||
* Represents a python partition function.
|
||||
* <p>
|
||||
* Python worker can create a partition object using information in this PythonPartition.
|
||||
* <p>
|
||||
* If this object is constructed from serialized python partition, python worker can deserialize it
|
||||
* to create python partition directly. If this object is constructed from moduleName and
|
||||
*
|
||||
* <p>Python worker can create a partition object using information in this PythonPartition.
|
||||
*
|
||||
* <p>If this object is constructed from serialized python partition, python worker can deserialize
|
||||
* it to create python partition directly. If this object is constructed from moduleName and
|
||||
* className/functionName, python worker will use `importlib` to load python partition function.
|
||||
*
|
||||
* <p>
|
||||
*/
|
||||
public class PythonPartition implements Partition<Object> {
|
||||
|
||||
public static final PythonPartition BroadcastPartition = new PythonPartition(
|
||||
"ray.streaming.partition", "BroadcastPartition");
|
||||
public static final PythonPartition KeyPartition = new PythonPartition(
|
||||
"ray.streaming.partition", "KeyPartition");
|
||||
public static final PythonPartition RoundRobinPartition = new PythonPartition(
|
||||
"ray.streaming.partition", "RoundRobinPartition");
|
||||
public static final PythonPartition BroadcastPartition =
|
||||
new PythonPartition("ray.streaming.partition", "BroadcastPartition");
|
||||
public static final PythonPartition KeyPartition =
|
||||
new PythonPartition("ray.streaming.partition", "KeyPartition");
|
||||
public static final PythonPartition RoundRobinPartition =
|
||||
new PythonPartition("ray.streaming.partition", "RoundRobinPartition");
|
||||
public static final String FORWARD_PARTITION_CLASS = "ForwardPartition";
|
||||
public static final PythonPartition ForwardPartition = new PythonPartition(
|
||||
"ray.streaming.partition", FORWARD_PARTITION_CLASS);
|
||||
public static final PythonPartition ForwardPartition =
|
||||
new PythonPartition("ray.streaming.partition", FORWARD_PARTITION_CLASS);
|
||||
|
||||
private byte[] partition;
|
||||
private String moduleName;
|
||||
@@ -51,8 +52,8 @@ public class PythonPartition implements Partition<Object> {
|
||||
|
||||
@Override
|
||||
public int[] partition(Object record, int numPartition) {
|
||||
String msg = String.format("partition method of %s shouldn't be called.",
|
||||
getClass().getSimpleName());
|
||||
String msg =
|
||||
String.format("partition method of %s shouldn't be called.", getClass().getSimpleName());
|
||||
throw new UnsupportedOperationException(msg);
|
||||
}
|
||||
|
||||
@@ -74,15 +75,15 @@ public class PythonPartition implements Partition<Object> {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringJoiner stringJoiner = new StringJoiner(", ",
|
||||
PythonPartition.class.getSimpleName() + "[", "]");
|
||||
StringJoiner stringJoiner =
|
||||
new StringJoiner(", ", PythonPartition.class.getSimpleName() + "[", "]");
|
||||
if (partition != null) {
|
||||
stringJoiner.add("partition=binary partition");
|
||||
} else {
|
||||
stringJoiner.add("moduleName='" + moduleName + "'")
|
||||
stringJoiner
|
||||
.add("moduleName='" + moduleName + "'")
|
||||
.add("functionName='" + functionName + "'");
|
||||
}
|
||||
return stringJoiner.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+16
-30
@@ -13,14 +13,10 @@ import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Represents a stream of data whose transformations will be executed in python.
|
||||
*/
|
||||
/** Represents a stream of data whose transformations will be executed in python. */
|
||||
public class PythonDataStream extends Stream<PythonDataStream, Object> implements PythonStream {
|
||||
|
||||
protected PythonDataStream(
|
||||
StreamingContext streamingContext,
|
||||
PythonOperator pythonOperator) {
|
||||
protected PythonDataStream(StreamingContext streamingContext, PythonOperator pythonOperator) {
|
||||
super(streamingContext, pythonOperator);
|
||||
}
|
||||
|
||||
@@ -36,9 +32,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
|
||||
}
|
||||
|
||||
public PythonDataStream(
|
||||
PythonDataStream input,
|
||||
PythonOperator pythonOperator,
|
||||
Partition<Object> partition) {
|
||||
PythonDataStream input, PythonOperator pythonOperator, Partition<Object> partition) {
|
||||
super(input, pythonOperator, partition);
|
||||
}
|
||||
|
||||
@@ -57,8 +51,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
|
||||
/**
|
||||
* Apply a map function to this stream.
|
||||
*
|
||||
* @param func The python MapFunction.
|
||||
* @return A new PythonDataStream.
|
||||
* @param func The python MapFunction. Returns A new PythonDataStream.
|
||||
*/
|
||||
public PythonDataStream map(PythonFunction func) {
|
||||
func.setFunctionInterface(FunctionInterface.MAP_FUNCTION);
|
||||
@@ -72,8 +65,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
|
||||
/**
|
||||
* Apply a flat-map function to this stream.
|
||||
*
|
||||
* @param func The python FlapMapFunction.
|
||||
* @return A new PythonDataStream
|
||||
* @param func The python FlapMapFunction. Returns A new PythonDataStream
|
||||
*/
|
||||
public PythonDataStream flatMap(PythonFunction func) {
|
||||
func.setFunctionInterface(FunctionInterface.FLAT_MAP_FUNCTION);
|
||||
@@ -87,9 +79,8 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
|
||||
/**
|
||||
* Apply a filter function to this stream.
|
||||
*
|
||||
* @param func The python FilterFunction.
|
||||
* @return A new PythonDataStream that contains only the elements satisfying the given filter
|
||||
* predicate.
|
||||
* @param func The python FilterFunction. Returns A new PythonDataStream that contains only the
|
||||
* elements satisfying the given filter predicate.
|
||||
*/
|
||||
public PythonDataStream filter(PythonFunction func) {
|
||||
func.setFunctionInterface(FunctionInterface.FILTER_FUNCTION);
|
||||
@@ -101,8 +92,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
|
||||
* same type with each other.
|
||||
*
|
||||
* @param stream The DataStream to union output with.
|
||||
* @param others The other DataStreams to union output with.
|
||||
* @return A new UnionStream.
|
||||
* @param others The other DataStreams to union output with. Returns A new UnionStream.
|
||||
*/
|
||||
public final PythonDataStream union(PythonDataStream stream, PythonDataStream... others) {
|
||||
List<PythonDataStream> streams = new ArrayList<>();
|
||||
@@ -115,8 +105,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
|
||||
* Apply union transformations to this stream by merging {@link PythonDataStream} outputs of the
|
||||
* same type with each other.
|
||||
*
|
||||
* @param streams The DataStreams to union output with.
|
||||
* @return A new UnionStream.
|
||||
* @param streams The DataStreams to union output with. Returns A new UnionStream.
|
||||
*/
|
||||
public final PythonDataStream union(List<PythonDataStream> streams) {
|
||||
if (this instanceof PythonUnionStream) {
|
||||
@@ -135,8 +124,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
|
||||
/**
|
||||
* Apply a sink function and get a StreamSink.
|
||||
*
|
||||
* @param func The python SinkFunction.
|
||||
* @return A new StreamSink.
|
||||
* @param func The python SinkFunction. Returns A new StreamSink.
|
||||
*/
|
||||
public PythonStreamSink sink(PythonFunction func) {
|
||||
func.setFunctionInterface(FunctionInterface.SINK_FUNCTION);
|
||||
@@ -150,8 +138,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
|
||||
/**
|
||||
* Apply a key-by function to this stream.
|
||||
*
|
||||
* @param func the python keyFunction.
|
||||
* @return A new KeyDataStream.
|
||||
* @param func the python keyFunction. Returns A new KeyDataStream.
|
||||
*/
|
||||
public PythonKeyDataStream keyBy(PythonFunction func) {
|
||||
checkPartitionCall();
|
||||
@@ -162,7 +149,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
|
||||
/**
|
||||
* Apply broadcast to this stream.
|
||||
*
|
||||
* @return This stream.
|
||||
* <p>Returns This stream.
|
||||
*/
|
||||
public PythonDataStream broadcast() {
|
||||
checkPartitionCall();
|
||||
@@ -172,8 +159,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
|
||||
/**
|
||||
* Apply a partition to this stream.
|
||||
*
|
||||
* @param partition The partitioning strategy.
|
||||
* @return This stream.
|
||||
* @param partition The partitioning strategy. Returns This stream.
|
||||
*/
|
||||
public PythonDataStream partitionBy(PythonPartition partition) {
|
||||
checkPartitionCall();
|
||||
@@ -186,8 +172,9 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
|
||||
*/
|
||||
private void checkPartitionCall() {
|
||||
if (getInputStream() != null && getInputStream().getLanguage() == Language.JAVA) {
|
||||
throw new RuntimeException("Partition related methods can't be called on a " +
|
||||
"python stream if parent stream is a java stream.");
|
||||
throw new RuntimeException(
|
||||
"Partition related methods can't be called on a "
|
||||
+ "python stream if parent stream is a java stream.");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -204,5 +191,4 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
|
||||
public Language getLanguage() {
|
||||
return Language.PYTHON;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+2
-6
@@ -8,9 +8,7 @@ import io.ray.streaming.python.PythonFunction.FunctionInterface;
|
||||
import io.ray.streaming.python.PythonOperator;
|
||||
import io.ray.streaming.python.PythonPartition;
|
||||
|
||||
/**
|
||||
* Represents a python DataStream returned by a key-by operation.
|
||||
*/
|
||||
/** Represents a python DataStream returned by a key-by operation. */
|
||||
@SuppressWarnings("unchecked")
|
||||
public class PythonKeyDataStream extends PythonDataStream implements PythonStream {
|
||||
|
||||
@@ -33,8 +31,7 @@ public class PythonKeyDataStream extends PythonDataStream implements PythonStrea
|
||||
/**
|
||||
* Apply a reduce function to this stream.
|
||||
*
|
||||
* @param func The reduce function.
|
||||
* @return A new DataStream.
|
||||
* @param func The reduce function. Returns A new DataStream.
|
||||
*/
|
||||
public PythonDataStream reduce(PythonFunction func) {
|
||||
func.setFunctionInterface(FunctionInterface.REDUCE_FUNCTION);
|
||||
@@ -51,5 +48,4 @@ public class PythonKeyDataStream extends PythonDataStream implements PythonStrea
|
||||
public KeyDataStream<Object, Object> asJavaStream() {
|
||||
return new KeyDataStream(this);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+2
-6
@@ -1,8 +1,4 @@
|
||||
package io.ray.streaming.python.stream;
|
||||
|
||||
/**
|
||||
* A marker interface used to identify all python streams.
|
||||
*/
|
||||
public interface PythonStream {
|
||||
|
||||
}
|
||||
/** A marker interface used to identify all python streams. */
|
||||
public interface PythonStream {}
|
||||
|
||||
+1
-4
@@ -4,9 +4,7 @@ import io.ray.streaming.api.Language;
|
||||
import io.ray.streaming.api.stream.StreamSink;
|
||||
import io.ray.streaming.python.PythonOperator;
|
||||
|
||||
/**
|
||||
* Represents a sink of the PythonStream.
|
||||
*/
|
||||
/** Represents a sink of the PythonStream. */
|
||||
public class PythonStreamSink extends StreamSink implements PythonStream {
|
||||
|
||||
public PythonStreamSink(PythonDataStream input, PythonOperator sinkOperator) {
|
||||
@@ -18,5 +16,4 @@ public class PythonStreamSink extends StreamSink implements PythonStream {
|
||||
public Language getLanguage() {
|
||||
return Language.PYTHON;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+2
-6
@@ -7,9 +7,7 @@ import io.ray.streaming.python.PythonFunction;
|
||||
import io.ray.streaming.python.PythonFunction.FunctionInterface;
|
||||
import io.ray.streaming.python.PythonOperator;
|
||||
|
||||
/**
|
||||
* Represents a source of the PythonStream.
|
||||
*/
|
||||
/** Represents a source of the PythonStream. */
|
||||
public class PythonStreamSource extends PythonDataStream implements StreamSource {
|
||||
|
||||
private PythonStreamSource(StreamingContext streamingContext, PythonFunction sourceFunction) {
|
||||
@@ -18,10 +16,8 @@ public class PythonStreamSource extends PythonDataStream implements StreamSource
|
||||
}
|
||||
|
||||
public static PythonStreamSource from(
|
||||
StreamingContext streamingContext,
|
||||
PythonFunction sourceFunction) {
|
||||
StreamingContext streamingContext, PythonFunction sourceFunction) {
|
||||
sourceFunction.setFunctionInterface(FunctionInterface.SOURCE_FUNCTION);
|
||||
return new PythonStreamSource(streamingContext, sourceFunction);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+3
-3
@@ -6,6 +6,7 @@ import java.util.List;
|
||||
|
||||
/**
|
||||
* Represents a union DataStream.
|
||||
*
|
||||
* <p>This stream does not create a physical operation, it only affects how upstream data are
|
||||
* connected to downstream data.
|
||||
*/
|
||||
@@ -16,8 +17,7 @@ public class PythonUnionStream extends PythonDataStream {
|
||||
public PythonUnionStream(PythonDataStream input, List<PythonDataStream> others) {
|
||||
// Union stream does not create a physical operation, so we don't have to set partition
|
||||
// function for it.
|
||||
super(input, new PythonOperator(
|
||||
"ray.streaming.operator", "UnionOperator"));
|
||||
super(input, new PythonOperator("ray.streaming.operator", "UnionOperator"));
|
||||
this.unionStreams = new ArrayList<>();
|
||||
others.forEach(this::addStream);
|
||||
}
|
||||
@@ -33,4 +33,4 @@ public class PythonUnionStream extends PythonDataStream {
|
||||
public List<PythonDataStream> getUnionStreams() {
|
||||
return unionStreams;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,6 @@ public class Config {
|
||||
public static final String READ_TIMEOUT_MS = "read_timeout_ms";
|
||||
public static final String DEFAULT_READ_TIMEOUT_MS = "10";
|
||||
|
||||
|
||||
public static final String STREAMING_RING_BUFFER_CAPACITY = "streaming.ring_buffer_capacity";
|
||||
// write an empty message if there is no data to be written in this
|
||||
// interval.
|
||||
@@ -31,6 +30,4 @@ public class Config {
|
||||
public static final String FLOW_CONTROL_TYPE = "streaming.flow_control_type";
|
||||
public static final String WRITER_CONSUMED_STEP = "streaming.writer.consumed_step";
|
||||
public static final String READER_CONSUMED_STEP = "streaming.reader.consumed_step";
|
||||
|
||||
|
||||
}
|
||||
|
||||
+5
-6
@@ -2,7 +2,6 @@ package io.ray.streaming.api.stream;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
|
||||
|
||||
import io.ray.streaming.api.context.StreamingContext;
|
||||
import io.ray.streaming.operator.impl.MapOperator;
|
||||
import io.ray.streaming.python.stream.PythonDataStream;
|
||||
@@ -14,8 +13,8 @@ public class StreamTest {
|
||||
|
||||
@Test
|
||||
public void testReferencedDataStream() {
|
||||
DataStream dataStream = new DataStream(StreamingContext.buildContext(),
|
||||
new MapOperator(value -> null));
|
||||
DataStream dataStream =
|
||||
new DataStream(StreamingContext.buildContext(), new MapOperator(value -> null));
|
||||
PythonDataStream pythonDataStream = dataStream.asPythonStream();
|
||||
DataStream javaStream = pythonDataStream.asJavaStream();
|
||||
assertEquals(dataStream.getId(), pythonDataStream.getId());
|
||||
@@ -27,8 +26,8 @@ public class StreamTest {
|
||||
|
||||
@Test
|
||||
public void testReferencedKeyDataStream() {
|
||||
DataStream dataStream = new DataStream(StreamingContext.buildContext(),
|
||||
new MapOperator(value -> null));
|
||||
DataStream dataStream =
|
||||
new DataStream(StreamingContext.buildContext(), new MapOperator(value -> null));
|
||||
KeyDataStream keyDataStream = dataStream.keyBy(value -> null);
|
||||
PythonKeyDataStream pythonKeyDataStream = keyDataStream.asPythonStream();
|
||||
KeyDataStream javaKeyDataStream = pythonKeyDataStream.asJavaStream();
|
||||
@@ -38,4 +37,4 @@ public class StreamTest {
|
||||
assertEquals(keyDataStream.getParallelism(), pythonKeyDataStream.getParallelism());
|
||||
assertEquals(keyDataStream.getParallelism(), javaKeyDataStream.getParallelism());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+6
-8
@@ -33,13 +33,12 @@ public class JobGraphBuilderTest {
|
||||
JobVertex sourceVertex = jobVertexList.get(0);
|
||||
Assert.assertEquals(sinkVertex.getVertexType(), VertexType.SINK);
|
||||
Assert.assertEquals(sourceVertex.getVertexType(), VertexType.SOURCE);
|
||||
|
||||
}
|
||||
|
||||
public JobGraph buildDataSyncJobGraph() {
|
||||
StreamingContext streamingContext = StreamingContext.buildContext();
|
||||
DataStream<String> dataStream = DataStreamSource.fromCollection(streamingContext,
|
||||
Lists.newArrayList("a", "b", "c"));
|
||||
DataStream<String> dataStream =
|
||||
DataStreamSource.fromCollection(streamingContext, Lists.newArrayList("a", "b", "c"));
|
||||
StreamSink streamSink = dataStream.sink(x -> LOG.info(x));
|
||||
JobGraphBuilder jobGraphBuilder = new JobGraphBuilder(Lists.newArrayList(streamSink));
|
||||
|
||||
@@ -73,10 +72,9 @@ public class JobGraphBuilderTest {
|
||||
|
||||
public JobGraph buildKeyByJobGraph() {
|
||||
StreamingContext streamingContext = StreamingContext.buildContext();
|
||||
DataStream<String> dataStream = DataStreamSource.fromCollection(streamingContext,
|
||||
Lists.newArrayList("1", "2", "3", "4"));
|
||||
StreamSink streamSink = dataStream.keyBy(x -> x)
|
||||
.sink(x -> LOG.info(x));
|
||||
DataStream<String> dataStream =
|
||||
DataStreamSource.fromCollection(streamingContext, Lists.newArrayList("1", "2", "3", "4"));
|
||||
StreamSink streamSink = dataStream.keyBy(x -> x).sink(x -> LOG.info(x));
|
||||
JobGraphBuilder jobGraphBuilder = new JobGraphBuilder(Lists.newArrayList(streamSink));
|
||||
|
||||
JobGraph jobGraph = jobGraphBuilder.build();
|
||||
@@ -92,4 +90,4 @@ public class JobGraphBuilderTest {
|
||||
Assert.assertTrue(diGraph.contains("\"1-SourceOperatorImpl\" -> \"2-KeyByOperator\""));
|
||||
Assert.assertTrue(diGraph.contains("\"2-KeyByOperator\" -> \"3-SinkOperator\""));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+15
-15
@@ -2,7 +2,6 @@ package io.ray.streaming.jobgraph;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import io.ray.streaming.api.context.StreamingContext;
|
||||
import io.ray.streaming.api.stream.DataStream;
|
||||
@@ -19,13 +18,14 @@ public class JobGraphOptimizerTest {
|
||||
@Test
|
||||
public void testOptimize() {
|
||||
StreamingContext context = StreamingContext.buildContext();
|
||||
DataStream<Integer> source1 = DataStreamSource.fromCollection(context,
|
||||
Lists.newArrayList(1, 2, 3));
|
||||
DataStream<String> source2 = DataStreamSource.fromCollection(context,
|
||||
Lists.newArrayList("1", "2", "3"));
|
||||
DataStream<String> source3 = DataStreamSource.fromCollection(context,
|
||||
Lists.newArrayList("2", "3", "4"));
|
||||
source1.filter(x -> x > 1)
|
||||
DataStream<Integer> source1 =
|
||||
DataStreamSource.fromCollection(context, Lists.newArrayList(1, 2, 3));
|
||||
DataStream<String> source2 =
|
||||
DataStreamSource.fromCollection(context, Lists.newArrayList("1", "2", "3"));
|
||||
DataStream<String> source3 =
|
||||
DataStreamSource.fromCollection(context, Lists.newArrayList("2", "3", "4"));
|
||||
source1
|
||||
.filter(x -> x > 1)
|
||||
.map(String::valueOf)
|
||||
.union(source2)
|
||||
.join(source3)
|
||||
@@ -44,11 +44,12 @@ public class JobGraphOptimizerTest {
|
||||
@Test
|
||||
public void testOptimizeHybridStream() {
|
||||
StreamingContext context = StreamingContext.buildContext();
|
||||
DataStream<Integer> source1 = DataStreamSource.fromCollection(context,
|
||||
Lists.newArrayList(1, 2, 3));
|
||||
DataStream<String> source2 = DataStreamSource.fromCollection(context,
|
||||
Lists.newArrayList("1", "2", "3"));
|
||||
source1.asPythonStream()
|
||||
DataStream<Integer> source1 =
|
||||
DataStreamSource.fromCollection(context, Lists.newArrayList(1, 2, 3));
|
||||
DataStream<String> source2 =
|
||||
DataStreamSource.fromCollection(context, Lists.newArrayList("1", "2", "3"));
|
||||
source1
|
||||
.asPythonStream()
|
||||
.map(pyFunc(1))
|
||||
.filter(pyFunc(2))
|
||||
.union(source2.asPythonStream().filter(pyFunc(3)).map(pyFunc(4)))
|
||||
@@ -68,5 +69,4 @@ public class JobGraphOptimizerTest {
|
||||
private PythonFunction pyFunc(int number) {
|
||||
return new PythonFunction("module", "func" + number);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user