[Java] Format ray java code (#13056)

This commit is contained in:
chaokunyang
2020-12-29 10:36:16 +08:00
committed by GitHub
parent cc1c2c3dc9
commit d1dd3410c8
422 changed files with 4384 additions and 5035 deletions
@@ -9,5 +9,4 @@ package io.ray.streaming.api.collector;
public interface Collector<T> {
void collect(T value);
}
@@ -9,9 +9,7 @@ import io.ray.streaming.state.keystate.state.MapState;
import io.ray.streaming.state.keystate.state.ValueState;
import java.util.Map;
/**
* Encapsulate the runtime information of a streaming task.
*/
/** Encapsulate the runtime information of a streaming task. */
public interface RuntimeContext {
int getTaskId();
@@ -20,14 +18,10 @@ public interface RuntimeContext {
int getParallelism();
/**
* @return config of current function
*/
/** Returns config of current function */
Map<String, String> getConfig();
/**
* @return config of the job
*/
/** Returns config of the job */
Map<String, String> getJobConfig();
Long getCheckpointId();
@@ -19,28 +19,20 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Encapsulate the context information of a streaming Job.
*/
/** Encapsulate the context information of a streaming Job. */
public class StreamingContext implements Serializable {
private static final Logger LOG = LoggerFactory.getLogger(StreamingContext.class);
private transient AtomicInteger idGenerator;
/**
* The sinks of this streaming job.
*/
/** The sinks of this streaming job. */
private List<StreamSink> streamSinks;
/**
* The user custom streaming job configuration.
*/
/** The user custom streaming job configuration. */
private Map<String, String> jobConfig;
/**
* The logic plan.
*/
/** The logic plan. */
private JobGraph jobGraph;
private StreamingContext() {
@@ -53,9 +45,7 @@ public class StreamingContext implements Serializable {
return new StreamingContext();
}
/**
* Construct job DAG, and execute the job.
*/
/** Construct job DAG, and execute the job. */
public void execute(String jobName) {
JobGraphBuilder jobGraphBuilder = new JobGraphBuilder(this.streamSinks, jobName);
JobGraph originalJobGraph = jobGraphBuilder.build();
@@ -78,8 +68,8 @@ public class StreamingContext implements Serializable {
ServiceLoader<JobClient> serviceLoader = ServiceLoader.load(JobClient.class);
Iterator<JobClient> iterator = serviceLoader.iterator();
Preconditions.checkArgument(iterator.hasNext(),
"No JobClient implementation has been provided.");
Preconditions.checkArgument(
iterator.hasNext(), "No JobClient implementation has been provided.");
JobClient jobClient = iterator.next();
jobClient.submit(jobGraph, jobConfig);
}
@@ -2,31 +2,27 @@ package io.ray.streaming.api.function;
import java.io.Serializable;
/**
* Interface of streaming functions.
*/
/** Interface of streaming functions. */
public interface Function extends Serializable {
/**
* This method will be called periodically by framework, you should return a a serializable
* object which represents function state, framework will help you to serialize this object, save
* it to storage, and load it back when in fail-over through.
* {@link Function#loadCheckpoint(Serializable)}.
* This method will be called periodically by framework, you should return a a serializable object
* which represents function state, framework will help you to serialize this object, save it to
* storage, and load it back when in fail-over through. {@link
* Function#loadCheckpoint(Serializable)}.
*
* @return A serializable object which represents function state.
* <p>Returns A serializable object which represents function state.
*/
default Serializable saveCheckpoint() {
return null;
}
/**
* This method will be called by framework when a worker died and been restarted.
* We will pass the last object you returned in {@link Function#saveCheckpoint()} when
* doing checkpoint, you are responsible to load this object back to you function.
* This method will be called by framework when a worker died and been restarted. We will pass the
* last object you returned in {@link Function#saveCheckpoint()} when doing checkpoint, you are
* responsible to load this object back to you function.
*
* @param checkpointObject the last object you returned in {@link Function#saveCheckpoint()}
*/
default void loadCheckpoint(Serializable checkpointObject) {
}
default void loadCheckpoint(Serializable checkpointObject) {}
}
@@ -20,5 +20,4 @@ public interface RichFunction extends Function {
* Tear-down method for the user function which called after the last call to the user function.
*/
void close();
}
@@ -14,8 +14,8 @@ public interface FilterFunction<T> extends Function {
/**
* The filter function that evaluates the predicate.
*
* @param value The value to be filtered.
* @return True for values that should be retained, false for values to be filtered out.
* @param value The value to be filtered. Returns True for values that should be retained, false
* for values to be filtered out.
*/
boolean filter(T value) throws Exception;
}
@@ -13,5 +13,4 @@ import io.ray.streaming.api.function.Function;
public interface JoinFunction<T, O, R> extends Function {
R join(T left, O right);
}
@@ -18,6 +18,5 @@ public interface SourceFunction<T> extends Function {
interface SourceContext<T> {
void collect(T element) throws Exception;
}
}
@@ -18,8 +18,7 @@ public class CollectionSourceFunction<T> implements SourceFunction<T> {
}
@Override
public void init(int totalParallel, int currentIndex) {
}
public void init(int totalParallel, int currentIndex) {}
@Override
public void fetch(SourceContext<T> ctx) throws Exception {
@@ -33,7 +32,5 @@ public class CollectionSourceFunction<T> implements SourceFunction<T> {
}
@Override
public void close() {
}
public void close() {}
}
@@ -4,9 +4,7 @@ import io.ray.streaming.api.context.RuntimeContext;
import io.ray.streaming.api.function.Function;
import io.ray.streaming.api.function.RichFunction;
/**
* A util class for {@link Function}
*/
/** A util class for {@link Function} */
public class Functions {
private static class DefaultRichFunction implements RichFunction {
@@ -18,12 +16,10 @@ public class Functions {
}
@Override
public void open(RuntimeContext runtimeContext) {
}
public void open(RuntimeContext runtimeContext) {}
@Override
public void close() {
}
public void close() {}
public Function getFunction() {
return function;
@@ -41,5 +37,4 @@ public class Functions {
public static RichFunction emptyFunction() {
return new DefaultRichFunction(null);
}
}
@@ -15,9 +15,8 @@ public interface Partition<T> extends Function {
* record.
*
* @param record The record.
* @param numPartition num of partitions
* @return IDs of the downstream partitions that should receive the record.
* @param numPartition num of partitions Returns IDs of the downstream partitions that should
* receive the record.
*/
int[] partition(T record, int numPartition);
}
@@ -3,15 +3,12 @@ package io.ray.streaming.api.partition.impl;
import io.ray.streaming.api.partition.Partition;
import java.util.stream.IntStream;
/**
* Broadcast the record to all downstream partitions.
*/
/** Broadcast the record to all downstream partitions. */
public class BroadcastPartition<T> implements Partition<T> {
private int[] partitions = new int[0];
public BroadcastPartition() {
}
public BroadcastPartition() {}
@Override
public int[] partition(T value, int numPartition) {
@@ -20,5 +17,4 @@ public class BroadcastPartition<T> implements Partition<T> {
}
return partitions;
}
}
@@ -22,6 +22,7 @@ import java.util.List;
/**
* Represents a stream of data.
*
* <p>This class defines all the streaming operations.
*
* @param <T> Type of data in the stream.
@@ -33,9 +34,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
}
public DataStream(
StreamingContext streamingContext,
StreamOperator streamOperator,
Partition<T> partition) {
StreamingContext streamingContext, StreamOperator streamOperator, Partition<T> partition) {
super(streamingContext, streamOperator, partition);
}
@@ -44,9 +43,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
}
public <R> DataStream(
DataStream<R> input,
StreamOperator streamOperator,
Partition<T> partition) {
DataStream<R> input, StreamOperator streamOperator, Partition<T> partition) {
super(input, streamOperator, partition);
}
@@ -62,8 +59,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
* Apply a map function to this stream.
*
* @param mapFunction The map function.
* @param <R> Type of data returned by the map function.
* @return A new DataStream.
* @param <R> Type of data returned by the map function. Returns A new DataStream.
*/
public <R> DataStream<R> map(MapFunction<T, R> mapFunction) {
return new DataStream<>(this, new MapOperator<>(mapFunction));
@@ -73,8 +69,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
* Apply a flat-map function to this stream.
*
* @param flatMapFunction The FlatMapFunction
* @param <R> Type of data returned by the flatmap function.
* @return A new DataStream
* @param <R> Type of data returned by the flatmap function. Returns A new DataStream
*/
public <R> DataStream<R> flatMap(FlatMapFunction<T, R> flatMapFunction) {
return new DataStream<>(this, new FlatMapOperator<>(flatMapFunction));
@@ -89,8 +84,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
* type with each other.
*
* @param stream The DataStream to union output with.
* @param others The other DataStreams to union output with.
* @return A new UnionStream.
* @param others The other DataStreams to union output with. Returns A new UnionStream.
*/
@SafeVarargs
public final DataStream<T> union(DataStream<T> stream, DataStream<T>... others) {
@@ -104,8 +98,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
* Apply union transformations to this stream by merging {@link DataStream} outputs of the same
* type with each other.
*
* @param streams The DataStreams to union output with.
* @return A new UnionStream.
* @param streams The DataStreams to union output with. Returns A new UnionStream.
*/
public final DataStream<T> union(List<DataStream<T>> streams) {
if (this instanceof UnionStream) {
@@ -122,8 +115,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
*
* @param other Another stream.
* @param <O> The type of the other stream data.
* @param <R> The type of the data in the joined stream.
* @return A new JoinStream.
* @param <R> The type of the data in the joined stream. Returns A new JoinStream.
*/
public <O, R> JoinStream<T, O, R> join(DataStream<O> other) {
return new JoinStream<>(this, other);
@@ -137,8 +129,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
/**
* Apply a sink function and get a StreamSink.
*
* @param sinkFunction The sink function.
* @return A new StreamSink.
* @param sinkFunction The sink function. Returns A new StreamSink.
*/
public DataStreamSink<T> sink(SinkFunction<T> sinkFunction) {
return new DataStreamSink<>(this, new SinkOperator<>(sinkFunction));
@@ -148,8 +139,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
* Apply a key-by function to this stream.
*
* @param keyFunction the key function.
* @param <K> The type of the key.
* @return A new KeyDataStream.
* @param <K> The type of the key. Returns A new KeyDataStream.
*/
public <K> KeyDataStream<K, T> keyBy(KeyFunction<T, K> keyFunction) {
checkPartitionCall();
@@ -159,7 +149,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
/**
* Apply broadcast to this stream.
*
* @return This stream.
* <p>Returns This stream.
*/
public DataStream<T> broadcast() {
checkPartitionCall();
@@ -169,8 +159,7 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
/**
* Apply a partition to this stream.
*
* @param partition The partitioning strategy.
* @return This stream.
* @param partition The partitioning strategy. Returns This stream.
*/
public DataStream<T> partitionBy(Partition<T> partition) {
checkPartitionCall();
@@ -183,8 +172,9 @@ public class DataStream<T> extends Stream<DataStream<T>, T> {
*/
private void checkPartitionCall() {
if (getInputStream() != null && getInputStream().getLanguage() == Language.PYTHON) {
throw new RuntimeException("Partition related methods can't be called on a " +
"java stream if parent stream is a python stream.");
throw new RuntimeException(
"Partition related methods can't be called on a "
+ "java stream if parent stream is a python stream.");
}
}
@@ -27,12 +27,10 @@ public class DataStreamSource<T> extends DataStream<T> implements StreamSource<T
*
* @param context Stream context.
* @param values A collection of values.
* @param <T> The type of source data.
* @return A DataStreamSource.
* @param <T> The type of source data. Returns A DataStreamSource.
*/
public static <T> DataStreamSource<T> fromCollection(
StreamingContext context, Collection<T> values) {
return new DataStreamSource<>(context, new CollectionSourceFunction<>(values));
}
}
@@ -25,9 +25,7 @@ public class JoinStream<L, R, O> extends DataStream<L> {
return rightStream;
}
/**
* Apply key-by to the left join stream.
*/
/** Apply key-by to the left join stream. */
public <K> Where<K> where(KeyFunction<L, K> keyFunction) {
return new Where<>(this, keyFunction);
}
@@ -64,7 +62,8 @@ public class JoinStream<L, R, O> extends DataStream<L> {
private KeyFunction<R, K> rightKeyByFunction;
Equal(
JoinStream<L, R, O> joinStream, KeyFunction<L, K> leftKeyByFunction,
JoinStream<L, R, O> joinStream,
KeyFunction<L, K> leftKeyByFunction,
KeyFunction<R, K> rightKeyByFunction) {
this.joinStream = joinStream;
this.leftKeyByFunction = leftKeyByFunction;
@@ -78,5 +77,4 @@ public class JoinStream<L, R, O> extends DataStream<L> {
return (DataStream<O>) joinStream;
}
}
}
@@ -33,8 +33,7 @@ public class KeyDataStream<K, T> extends DataStream<T> {
/**
* Apply a reduce function to this stream.
*
* @param reduceFunction The reduce function.
* @return A new DataStream.
* @param reduceFunction The reduce function. Returns A new DataStream.
*/
public DataStream<T> reduce(ReduceFunction reduceFunction) {
return new DataStream<>(this, new ReduceOperator(reduceFunction));
@@ -45,8 +44,7 @@ public class KeyDataStream<K, T> extends DataStream<T> {
*
* @param aggregateFunction The aggregate function
* @param <A> The type of aggregated intermediate data.
* @param <O> The type of result data.
* @return A new DataStream.
* @param <O> The type of result data. Returns A new DataStream.
*/
public <A, O> DataStream<O> aggregate(AggregateFunction<T, A, O> aggregateFunction) {
return new DataStream<>(this, null);
@@ -60,5 +58,4 @@ public class KeyDataStream<K, T> extends DataStream<T> {
public PythonKeyDataStream asPythonStream() {
return new PythonKeyDataStream(this);
}
}
@@ -19,8 +19,7 @@ import java.util.Map;
* @param <S> Type of stream class
* @param <T> Type of the data in the stream.
*/
public abstract class Stream<S extends Stream<S, T>, T>
implements Serializable {
public abstract class Stream<S extends Stream<S, T>, T> implements Serializable {
private final int id;
private final StreamingContext streamingContext;
@@ -36,14 +35,15 @@ public abstract class Stream<S extends Stream<S, T>, T>
}
public Stream(
StreamingContext streamingContext,
StreamOperator streamOperator,
Partition<T> partition) {
StreamingContext streamingContext, StreamOperator streamOperator, Partition<T> partition) {
this(streamingContext, null, streamOperator, partition);
}
public Stream(Stream inputStream, StreamOperator streamOperator) {
this(inputStream.getStreamingContext(), inputStream, streamOperator,
this(
inputStream.getStreamingContext(),
inputStream,
streamOperator,
getForwardPartition(streamOperator));
}
@@ -87,8 +87,7 @@ public abstract class Stream<S extends Stream<S, T>, T>
case JAVA:
return new ForwardPartition<>();
default:
throw new UnsupportedOperationException(
"Unsupported language " + operator.getLanguage());
throw new UnsupportedOperationException("Unsupported language " + operator.getLanguage());
}
}
@@ -169,18 +168,14 @@ public abstract class Stream<S extends Stream<S, T>, T>
return originalStream;
}
/**
* Set chain strategy for this stream
*/
/** Set chain strategy for this stream */
public S withChainStrategy(ChainStrategy chainStrategy) {
Preconditions.checkArgument(!isProxyStream());
operator.setChainStrategy(chainStrategy);
return self();
}
/**
* Disable chain for this stream
*/
/** Disable chain for this stream */
public S disableChain() {
return withChainStrategy(ChainStrategy.NEVER);
}
@@ -5,6 +5,4 @@ package io.ray.streaming.api.stream;
*
* @param <T> The type of StreamSource data.
*/
public interface StreamSource<T> {
}
public interface StreamSource<T> {}
@@ -6,6 +6,7 @@ import java.util.List;
/**
* Represents a union DataStream.
*
* <p>This stream does not create a physical operation, it only affects how upstream data are
* connected to downstream data.
*
@@ -3,9 +3,7 @@ package io.ray.streaming.client;
import io.ray.streaming.jobgraph.JobGraph;
import java.util.Map;
/**
* Interface of the job client.
*/
/** Interface of the job client. */
public interface JobClient {
/**
@@ -3,9 +3,7 @@ package io.ray.streaming.jobgraph;
import io.ray.streaming.api.partition.Partition;
import java.io.Serializable;
/**
* Job edge is connection and partition rules of upstream and downstream execution nodes.
*/
/** Job edge is connection and partition rules of upstream and downstream execution nodes. */
public class JobEdge implements Serializable {
private int srcVertexId;
@@ -44,7 +42,13 @@ public class JobEdge implements Serializable {
@Override
public String toString() {
return "Edge(" + "from:" + srcVertexId + "-" + targetVertexId + "-" + this.partition.getClass()
return "Edge("
+ "from:"
+ srcVertexId
+ "-"
+ targetVertexId
+ "-"
+ this.partition.getClass()
+ ")";
}
}
@@ -9,9 +9,7 @@ import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Job graph, the logical plan of streaming job.
*/
/** Job graph, the logical plan of streaming job. */
public class JobGraph implements Serializable {
private static final Logger LOG = LoggerFactory.getLogger(JobGraph.class);
@@ -30,8 +28,10 @@ public class JobGraph implements Serializable {
}
public JobGraph(
String jobName, Map<String, String> jobConfig,
List<JobVertex> jobVertices, List<JobEdge> jobEdges) {
String jobName,
Map<String, String> jobConfig,
List<JobVertex> jobVertices,
List<JobEdge> jobEdges) {
this.jobName = jobName;
this.jobConfig = jobConfig;
this.jobVertices = jobVertices;
@@ -43,7 +43,7 @@ public class JobGraph implements Serializable {
* Generate direct-graph(made up of a set of vertices and connected by edges) by current job graph
* for simple log printing.
*
* @return Digraph in string type.
* <p>Returns Digraph in string type.
*/
public String generateDigraph() {
StringBuilder digraph = new StringBuilder();
@@ -136,5 +136,4 @@ public class JobGraph implements Serializable {
LOG.info(jobEdge.toString());
}
}
}
@@ -36,8 +36,7 @@ public class JobGraphBuilder {
}
public JobGraphBuilder(
List<StreamSink> streamSinkList, String jobName,
Map<String, String> jobConfig) {
List<StreamSink> streamSinkList, String jobName, Map<String, String> jobConfig) {
this.jobGraph = new JobGraph(jobName, jobConfig);
this.streamSinkList = streamSinkList;
this.edgeIdGenerator = new AtomicInteger(0);
@@ -60,7 +59,8 @@ public class JobGraphBuilder {
stream = stream.getOriginalStream();
}
StreamOperator streamOperator = stream.getOperator();
Preconditions.checkArgument(stream.getLanguage() == streamOperator.getLanguage(),
Preconditions.checkArgument(
stream.getLanguage() == streamOperator.getLanguage(),
"Reference stream should be skipped.");
int vertexId = stream.getId();
int parallelism = stream.getParallelism();
@@ -76,8 +76,8 @@ public class JobGraphBuilder {
} else if (stream instanceof StreamSource) {
jobVertex = new JobVertex(vertexId, parallelism, VertexType.SOURCE, streamOperator, config);
} else if (stream instanceof DataStream || stream instanceof PythonDataStream) {
jobVertex = new JobVertex(
vertexId, parallelism, VertexType.TRANSFORMATION, streamOperator, config);
jobVertex =
new JobVertex(vertexId, parallelism, VertexType.TRANSFORMATION, streamOperator, config);
Stream parentStream = stream.getInputStream();
int inputVertexId = parentStream.getId();
JobEdge jobEdge = new JobEdge(inputVertexId, vertexId, parentStream.getPartition());
@@ -114,5 +114,4 @@ public class JobGraphBuilder {
private int getEdgeId() {
return this.edgeIdGenerator.incrementAndGet();
}
}
@@ -36,24 +36,32 @@ public class JobGraphOptimizer {
public JobGraphOptimizer(JobGraph jobGraph) {
this.jobGraph = jobGraph;
vertexMap = jobGraph.getJobVertices().stream()
.collect(Collectors.toMap(JobVertex::getVertexId, Function.identity()));
outputEdgesMap = vertexMap.keySet().stream().collect(Collectors.toMap(
id -> vertexMap.get(id), id -> new HashSet<>(jobGraph.getVertexOutputEdges(id))));
vertexMap =
jobGraph.getJobVertices().stream()
.collect(Collectors.toMap(JobVertex::getVertexId, Function.identity()));
outputEdgesMap =
vertexMap.keySet().stream()
.collect(
Collectors.toMap(
id -> vertexMap.get(id),
id -> new HashSet<>(jobGraph.getVertexOutputEdges(id))));
mergedVertexMap = new HashMap<>();
}
public JobGraph optimize() {
// Deep-first traverse nodes from source to sink to merge vertices that can be chained
// together.
jobGraph.getSourceVertices().forEach(vertex -> {
List<JobVertex> verticesToMerge = new ArrayList<>();
verticesToMerge.add(vertex);
mergeVerticesRecursively(vertex, verticesToMerge);
});
jobGraph
.getSourceVertices()
.forEach(
vertex -> {
List<JobVertex> verticesToMerge = new ArrayList<>();
verticesToMerge.add(vertex);
mergeVerticesRecursively(vertex, verticesToMerge);
});
List<JobVertex> vertices = mergedVertexMap.values().stream()
.map(Pair::getLeft).collect(Collectors.toList());
List<JobVertex> vertices =
mergedVertexMap.values().stream().map(Pair::getLeft).collect(Collectors.toList());
return new JobGraph(jobGraph.getJobName(), jobGraph.getJobConfig(), vertices, createEdges());
}
@@ -65,18 +73,19 @@ public class JobGraphOptimizer {
if (outputEdges.isEmpty()) {
mergeAndAddVertex(verticesToMerge);
} else {
outputEdges.forEach(edge -> {
JobVertex succeedingVertex = vertexMap.get(edge.getTargetVertexId());
if (canBeChained(vertex, succeedingVertex, edge)) {
verticesToMerge.add(succeedingVertex);
mergeVerticesRecursively(succeedingVertex, verticesToMerge);
} else {
mergeAndAddVertex(verticesToMerge);
List<JobVertex> newMergedVertices = new ArrayList<>();
newMergedVertices.add(succeedingVertex);
mergeVerticesRecursively(succeedingVertex, newMergedVertices);
}
});
outputEdges.forEach(
edge -> {
JobVertex succeedingVertex = vertexMap.get(edge.getTargetVertexId());
if (canBeChained(vertex, succeedingVertex, edge)) {
verticesToMerge.add(succeedingVertex);
mergeVerticesRecursively(succeedingVertex, verticesToMerge);
} else {
mergeAndAddVertex(verticesToMerge);
List<JobVertex> newMergedVertices = new ArrayList<>();
newMergedVertices.add(succeedingVertex);
mergeVerticesRecursively(succeedingVertex, newMergedVertices);
}
});
}
}
}
@@ -89,25 +98,30 @@ public class JobGraphOptimizer {
// no chain
mergedVertex = headVertex;
} else {
List<StreamOperator> operators = verticesToMerge.stream()
.map(v -> vertexMap.get(v.getVertexId())
.getStreamOperator())
.collect(Collectors.toList());
List<Map<String, String>> configs = verticesToMerge.stream()
.map(v -> vertexMap.get(v.getVertexId()).getConfig())
.collect(Collectors.toList());
List<StreamOperator> operators =
verticesToMerge.stream()
.map(v -> vertexMap.get(v.getVertexId()).getStreamOperator())
.collect(Collectors.toList());
List<Map<String, String>> configs =
verticesToMerge.stream()
.map(v -> vertexMap.get(v.getVertexId()).getConfig())
.collect(Collectors.toList());
StreamOperator operator;
if (language == Language.JAVA) {
operator = ChainedOperator.newChainedOperator(operators, configs);
} else {
List<PythonOperator> pythonOperators = operators.stream()
.map(o -> (PythonOperator) o)
.collect(Collectors.toList());
List<PythonOperator> pythonOperators =
operators.stream().map(o -> (PythonOperator) o).collect(Collectors.toList());
operator = new ChainedPythonOperator(pythonOperators, configs);
}
// chained operator config is placed into `ChainedOperator`.
mergedVertex = new JobVertex(headVertex.getVertexId(), headVertex.getParallelism(),
headVertex.getVertexType(), operator, new HashMap<>());
mergedVertex =
new JobVertex(
headVertex.getVertexId(),
headVertex.getParallelism(),
headVertex.getVertexType(),
operator,
new HashMap<>());
}
mergedVertexMap.put(mergedVertex.getVertexId(), Pair.of(mergedVertex, verticesToMerge));
@@ -115,37 +129,39 @@ public class JobGraphOptimizer {
private List<JobEdge> createEdges() {
List<JobEdge> edges = new ArrayList<>();
mergedVertexMap.forEach((id, pair) -> {
JobVertex mergedVertex = pair.getLeft();
List<JobVertex> mergedVertices = pair.getRight();
JobVertex tailVertex = mergedVertices.get(mergedVertices.size() - 1);
// input edge will be set up in input vertices
if (outputEdgesMap.containsKey(tailVertex)) {
outputEdgesMap.get(tailVertex).forEach(edge -> {
Pair<JobVertex, List<JobVertex>> downstreamPair =
mergedVertexMap.get(edge.getTargetVertexId());
// change ForwardPartition to RoundRobinPartition.
Partition partition = changePartition(edge.getPartition());
JobEdge newEdge = new JobEdge(
mergedVertex.getVertexId(),
downstreamPair.getLeft().getVertexId(),
partition);
edges.add(newEdge);
mergedVertexMap.forEach(
(id, pair) -> {
JobVertex mergedVertex = pair.getLeft();
List<JobVertex> mergedVertices = pair.getRight();
JobVertex tailVertex = mergedVertices.get(mergedVertices.size() - 1);
// input edge will be set up in input vertices
if (outputEdgesMap.containsKey(tailVertex)) {
outputEdgesMap
.get(tailVertex)
.forEach(
edge -> {
Pair<JobVertex, List<JobVertex>> downstreamPair =
mergedVertexMap.get(edge.getTargetVertexId());
// change ForwardPartition to RoundRobinPartition.
Partition partition = changePartition(edge.getPartition());
JobEdge newEdge =
new JobEdge(
mergedVertex.getVertexId(),
downstreamPair.getLeft().getVertexId(),
partition);
edges.add(newEdge);
});
}
});
}
});
return edges;
}
/**
* Change ForwardPartition to RoundRobinPartition.
*/
/** Change ForwardPartition to RoundRobinPartition. */
private Partition changePartition(Partition partition) {
if (partition instanceof PythonPartition) {
PythonPartition pythonPartition = (PythonPartition) partition;
if (!pythonPartition.isConstructedFromBinary() &&
pythonPartition.getFunctionName().equals(PythonPartition.FORWARD_PARTITION_CLASS)) {
if (!pythonPartition.isConstructedFromBinary()
&& pythonPartition.getFunctionName().equals(PythonPartition.FORWARD_PARTITION_CLASS)) {
return PythonPartition.RoundRobinPartition;
} else {
return partition;
@@ -160,11 +176,9 @@ public class JobGraphOptimizer {
}
private boolean canBeChained(
JobVertex precedingVertex,
JobVertex succeedingVertex,
JobEdge edge) {
if (jobGraph.getVertexOutputEdges(precedingVertex.getVertexId()).size() > 1 ||
jobGraph.getVertexInputEdges(succeedingVertex.getVertexId()).size() > 1) {
JobVertex precedingVertex, JobVertex succeedingVertex, JobEdge edge) {
if (jobGraph.getVertexOutputEdges(precedingVertex.getVertexId()).size() > 1
|| jobGraph.getVertexInputEdges(succeedingVertex.getVertexId()).size() > 1) {
return false;
}
if (precedingVertex.getParallelism() != succeedingVertex.getParallelism()) {
@@ -183,9 +197,8 @@ public class JobGraphOptimizer {
return partition instanceof ForwardPartition;
} else {
PythonPartition pythonPartition = (PythonPartition) partition;
return !pythonPartition.isConstructedFromBinary() &&
pythonPartition.getFunctionName().equals(PythonPartition.FORWARD_PARTITION_CLASS);
return !pythonPartition.isConstructedFromBinary()
&& pythonPartition.getFunctionName().equals(PythonPartition.FORWARD_PARTITION_CLASS);
}
}
}
@@ -6,9 +6,7 @@ import io.ray.streaming.operator.StreamOperator;
import java.io.Serializable;
import java.util.Map;
/**
* Job vertex is a cell node where logic is executed.
*/
/** Job vertex is a cell node where logic is executed. */
public class JobVertex implements Serializable {
private int vertexId;
@@ -71,5 +69,4 @@ public class JobVertex implements Serializable {
.add("config", config)
.toString();
}
}
@@ -1,8 +1,6 @@
package io.ray.streaming.jobgraph;
/**
* Different roles for a node.
*/
/** Different roles for a node. */
public enum VertexType {
SOURCE,
TRANSFORMATION,
@@ -37,8 +37,7 @@ public class Record<T> implements Serializable {
return false;
}
Record<?> record = (Record<?>) o;
return Objects.equals(stream, record.stream) &&
Objects.equals(value, record.value);
return Objects.equals(stream, record.stream) && Objects.equals(value, record.value);
}
@Override
@@ -50,5 +49,4 @@ public class Record<T> implements Serializable {
public String toString() {
return value.toString();
}
}
@@ -1,20 +1,14 @@
package io.ray.streaming.operator;
/**
* Chain strategy for streaming operators. Chained operators are run in the same thread.
*/
/** Chain strategy for streaming operators. Chained operators are run in the same thread. */
public enum ChainStrategy {
/**
* The operator won't be chained with preceding operators, but maybe chained with succeeding
* operators.
*/
HEAD,
/**
* Operators will be chained together when possible.
*/
/** Operators will be chained together when possible. */
ALWAYS,
/**
* The operator won't be chained with any operator.
*/
/** The operator won't be chained with any operator. */
NEVER
}
@@ -25,13 +25,9 @@ public interface Operator extends Serializable {
ChainStrategy getChainStrategy();
/**
* See {@link Function#saveCheckpoint()}.
*/
/** See {@link Function#saveCheckpoint()}. */
Serializable saveCheckpoint();
/**
* See {@link Function#loadCheckpoint(Serializable)}.
*/
/** See {@link Function#loadCheckpoint(Serializable)}. */
void loadCheckpoint(Serializable checkpointObject);
}
@@ -11,4 +11,4 @@ public interface SourceOperator<T> extends Operator {
default OperatorType getOpType() {
return OperatorType.SOURCE;
}
}
}
@@ -42,9 +42,7 @@ public abstract class StreamOperator<F extends Function> implements Operator {
}
@Override
public void finish() {
}
public void finish() {}
@Override
public void close() {
@@ -20,9 +20,7 @@ import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/**
* Abstract base class for chained operators.
*/
/** Abstract base class for chained operators. */
public abstract class ChainedOperator extends StreamOperator<Function> {
protected final List<StreamOperator> operators;
@@ -31,9 +29,10 @@ public abstract class ChainedOperator extends StreamOperator<Function> {
private final List<Map<String, String>> configs;
public ChainedOperator(List<StreamOperator> operators, List<Map<String, String>> configs) {
Preconditions.checkArgument(operators.size() >= 2,
"Need at lease two operators to be chained together");
operators.stream().skip(1)
Preconditions.checkArgument(
operators.size() >= 2, "Need at lease two operators to be chained together");
operators.stream()
.skip(1)
.forEach(operator -> Preconditions.checkArgument(operator instanceof OneInputOperator));
this.operators = operators;
this.configs = configs;
@@ -44,10 +43,11 @@ public abstract class ChainedOperator extends StreamOperator<Function> {
@Override
public void open(List<Collector> collectorList, RuntimeContext runtimeContext) {
// Dont' call super.open() as we `open` every operator separately.
List<ForwardCollector> succeedingCollectors = operators.stream().skip(1)
.map(operator -> new ForwardCollector(
(OneInputOperator) operator))
.collect(Collectors.toList());
List<ForwardCollector> succeedingCollectors =
operators.stream()
.skip(1)
.map(operator -> new ForwardCollector((OneInputOperator) operator))
.collect(Collectors.toList());
for (int i = 0; i < operators.size() - 1; i++) {
StreamOperator operator = operators.get(i);
List<ForwardCollector> forwardCollectors =
@@ -70,8 +70,7 @@ public abstract class ChainedOperator extends StreamOperator<Function> {
@Override
public String getName() {
return operators.stream().map(Operator::getName)
.collect(Collectors.joining(" -> ", "[", "]"));
return operators.stream().map(Operator::getName).collect(Collectors.joining(" -> ", "[", "]"));
}
public List<StreamOperator> getOperators() {
@@ -104,20 +103,21 @@ public abstract class ChainedOperator extends StreamOperator<Function> {
}
private RuntimeContext createRuntimeContext(RuntimeContext runtimeContext, int index) {
return (RuntimeContext) Proxy.newProxyInstance(runtimeContext.getClass().getClassLoader(),
new Class[] {RuntimeContext.class},
(proxy, method, methodArgs) -> {
if (method.getName().equals("getConfig")) {
return configs.get(index);
} else {
return method.invoke(runtimeContext, methodArgs);
}
});
return (RuntimeContext)
Proxy.newProxyInstance(
runtimeContext.getClass().getClassLoader(),
new Class[] {RuntimeContext.class},
(proxy, method, methodArgs) -> {
if (method.getName().equals("getConfig")) {
return configs.get(index);
} else {
return method.invoke(runtimeContext, methodArgs);
}
});
}
public static ChainedOperator newChainedOperator(
List<StreamOperator> operators,
List<Map<String, String>> configs) {
List<StreamOperator> operators, List<Map<String, String>> configs) {
switch (operators.get(0).getOpType()) {
case SOURCE:
return new ChainedSourceOperator(operators, configs);
@@ -131,8 +131,7 @@ public abstract class ChainedOperator extends StreamOperator<Function> {
}
}
static class ChainedSourceOperator<T> extends ChainedOperator
implements SourceOperator<T> {
static class ChainedSourceOperator<T> extends ChainedOperator implements SourceOperator<T> {
private final SourceOperator<T> sourceOperator;
@@ -151,11 +150,9 @@ public abstract class ChainedOperator extends StreamOperator<Function> {
public SourceContext<T> getSourceContext() {
return sourceOperator.getSourceContext();
}
}
static class ChainedOneInputOperator<T> extends ChainedOperator
implements OneInputOperator<T> {
static class ChainedOneInputOperator<T> extends ChainedOperator implements OneInputOperator<T> {
private final OneInputOperator<T> inputOperator;
@@ -169,7 +166,6 @@ public abstract class ChainedOperator extends StreamOperator<Function> {
public void processElement(Record<T> record) throws Exception {
inputOperator.processElement(record);
}
}
static class ChainedTwoInputOperator<L, R> extends ChainedOperator
@@ -187,6 +183,5 @@ public abstract class ChainedOperator extends StreamOperator<Function> {
public void processElement(Record<L> record1, Record<R> record2) {
inputOperator.processElement(record1, record2);
}
}
}
@@ -5,8 +5,8 @@ import io.ray.streaming.message.Record;
import io.ray.streaming.operator.OneInputOperator;
import io.ray.streaming.operator.StreamOperator;
public class FilterOperator<T> extends StreamOperator<FilterFunction<T>> implements
OneInputOperator<T> {
public class FilterOperator<T> extends StreamOperator<FilterFunction<T>>
implements OneInputOperator<T> {
public FilterOperator(FilterFunction<T> filterFunction) {
super(filterFunction);
@@ -9,8 +9,8 @@ import io.ray.streaming.operator.OneInputOperator;
import io.ray.streaming.operator.StreamOperator;
import java.util.List;
public class FlatMapOperator<T, R> extends StreamOperator<FlatMapFunction<T, R>> implements
OneInputOperator<T> {
public class FlatMapOperator<T, R> extends StreamOperator<FlatMapFunction<T, R>>
implements OneInputOperator<T> {
private CollectionCollector collectionCollector;
@@ -15,12 +15,10 @@ import io.ray.streaming.operator.TwoInputOperator;
* @param <K> Type of the data in the join key.
* @param <O> Type of the data in the joined stream.
*/
public class JoinOperator<L, R, K, O> extends StreamOperator<JoinFunction<L, R, O>> implements
TwoInputOperator<L, R> {
public class JoinOperator<L, R, K, O> extends StreamOperator<JoinFunction<L, R, O>>
implements TwoInputOperator<L, R> {
public JoinOperator() {
}
public JoinOperator() {}
public JoinOperator(JoinFunction<L, R, O> function) {
super(function);
@@ -28,13 +26,10 @@ public class JoinOperator<L, R, K, O> extends StreamOperator<JoinFunction<L, R,
}
@Override
public void processElement(Record<L> record1, Record<R> record2) {
}
public void processElement(Record<L> record1, Record<R> record2) {}
@Override
public OperatorType getOpType() {
return OperatorType.TWO_INPUT;
}
}
@@ -6,8 +6,8 @@ import io.ray.streaming.message.Record;
import io.ray.streaming.operator.OneInputOperator;
import io.ray.streaming.operator.StreamOperator;
public class KeyByOperator<T, K> extends StreamOperator<KeyFunction<T, K>> implements
OneInputOperator<T> {
public class KeyByOperator<T, K> extends StreamOperator<KeyFunction<T, K>>
implements OneInputOperator<T> {
public KeyByOperator(KeyFunction<T, K> keyFunction) {
super(keyFunction);
@@ -19,4 +19,3 @@ public class KeyByOperator<T, K> extends StreamOperator<KeyFunction<T, K>> imple
collect(new KeyRecord<>(key, record.getValue()));
}
}
@@ -5,8 +5,8 @@ import io.ray.streaming.message.Record;
import io.ray.streaming.operator.OneInputOperator;
import io.ray.streaming.operator.StreamOperator;
public class MapOperator<T, R> extends StreamOperator<MapFunction<T, R>> implements
OneInputOperator<T> {
public class MapOperator<T, R> extends StreamOperator<MapFunction<T, R>>
implements OneInputOperator<T> {
public MapOperator(MapFunction<T, R> mapFunction) {
super(mapFunction);
@@ -12,8 +12,8 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class ReduceOperator<K, T> extends StreamOperator<ReduceFunction<T>> implements
OneInputOperator<T> {
public class ReduceOperator<K, T> extends StreamOperator<ReduceFunction<T>>
implements OneInputOperator<T> {
private Map<K, T> reduceState;
@@ -43,5 +43,4 @@ public class ReduceOperator<K, T> extends StreamOperator<ReduceFunction<T>> impl
collect(record);
}
}
}
@@ -5,8 +5,8 @@ import io.ray.streaming.message.Record;
import io.ray.streaming.operator.OneInputOperator;
import io.ray.streaming.operator.StreamOperator;
public class SinkOperator<T> extends StreamOperator<SinkFunction<T>> implements
OneInputOperator<T> {
public class SinkOperator<T> extends StreamOperator<SinkFunction<T>>
implements OneInputOperator<T> {
public SinkOperator(SinkFunction<T> sinkFunction) {
super(sinkFunction);
@@ -61,7 +61,5 @@ public class SourceOperatorImpl<T> extends StreamOperator<SourceFunction<T>>
collector.collect(new Record<>(t));
}
}
}
}
@@ -6,8 +6,7 @@ import io.ray.streaming.message.Record;
import io.ray.streaming.operator.OneInputOperator;
import io.ray.streaming.operator.StreamOperator;
public class UnionOperator<T> extends StreamOperator<Function> implements
OneInputOperator<T> {
public class UnionOperator<T> extends StreamOperator<Function> implements OneInputOperator<T> {
public UnionOperator() {
super(Functions.emptyFunction());
@@ -17,5 +16,4 @@ public class UnionOperator<T> extends StreamOperator<Function> implements
public void processElement(Record<T> record) {
collect(record);
}
}
@@ -7,14 +7,18 @@ import org.apache.commons.lang3.StringUtils;
/**
* Represents a user defined python function.
* <p>Python worker can use information in this class to create a function object.</p>
* <p>If this object is constructed from serialized python function,
* python worker can deserialize it to create python function directly. If this object is
* constructed from moduleName and className/functionName, python worker will use `importlib` to
* load python function.</p>
* <p>If the python data stream api is invoked from python, `function` will be not null.</p>
* <p>If the python data stream api is invoked from java, `moduleName` and
* `functionName` will be not null.</p>
*
* <p>Python worker can use information in this class to create a function object.
*
* <p>If this object is constructed from serialized python function, python worker can deserialize
* it to create python function directly. If this object is constructed from moduleName and
* className/functionName, python worker will use `importlib` to load python function.
*
* <p>If the python data stream api is invoked from python, `function` will be not null.
*
* <p>If the python data stream api is invoked from java, `moduleName` and `functionName` will be
* not null.
*
* <p>
*/
public class PythonFunction implements Function {
@@ -30,9 +34,7 @@ public class PythonFunction implements Function {
private String functionInterface;
/**
* @param functionInterface function class name in `ray.streaming.function` module.
*/
/** @param functionInterface function class name in `ray.streaming.function` module. */
FunctionInterface(String functionInterface) {
this.functionInterface = functionInterface;
}
@@ -66,13 +68,10 @@ public class PythonFunction implements Function {
* Create a {@link PythonFunction} from a moduleName and streaming function name.
*
* @param moduleName module name of streaming function.
* @param functionName function name of streaming function. {@code functionName} is the name
* of a
* @param functionName function name of streaming function. {@code functionName} is the name of a
* python function, or class name of subclass of `ray.streaming.function.`
*/
public PythonFunction(
String moduleName,
String functionName) {
public PythonFunction(String moduleName, String functionName) {
Preconditions.checkArgument(StringUtils.isNotBlank(moduleName));
Preconditions.checkArgument(StringUtils.isNotBlank(functionName));
this.function = null;
@@ -110,12 +109,13 @@ public class PythonFunction implements Function {
@Override
public String toString() {
StringJoiner stringJoiner = new StringJoiner(", ",
PythonFunction.class.getSimpleName() + "[", "]");
StringJoiner stringJoiner =
new StringJoiner(", ", PythonFunction.class.getSimpleName() + "[", "]");
if (function != null) {
stringJoiner.add("function=binary function");
} else {
stringJoiner.add("moduleName='" + moduleName + "'")
stringJoiner
.add("moduleName='" + moduleName + "'")
.add("functionName='" + functionName + "'");
}
stringJoiner.add("functionInterface='" + functionInterface + "'");
@@ -12,9 +12,7 @@ import java.util.Map;
import java.util.StringJoiner;
import java.util.stream.Collectors;
/**
* Represents a {@link StreamOperator} that wraps python {@link PythonFunction}.
*/
/** Represents a {@link StreamOperator} that wraps python {@link PythonFunction}. */
@SuppressWarnings("unchecked")
public class PythonOperator extends StreamOperator {
@@ -65,8 +63,10 @@ public class PythonOperator extends StreamOperator {
StackTraceElement[] trace = Thread.currentThread().getStackTrace();
Preconditions.checkState(trace.length >= 2);
StackTraceElement traceElement = trace[2];
String msg = String.format("Method %s.%s shouldn't be called.",
traceElement.getClassName(), traceElement.getMethodName());
String msg =
String.format(
"Method %s.%s shouldn't be called.",
traceElement.getClassName(), traceElement.getMethodName());
throw new UnsupportedOperationException(msg);
}
@@ -90,13 +90,12 @@ public class PythonOperator extends StreamOperator {
@Override
public String toString() {
StringJoiner stringJoiner = new StringJoiner(", ",
PythonOperator.class.getSimpleName() + "[", "]");
StringJoiner stringJoiner =
new StringJoiner(", ", PythonOperator.class.getSimpleName() + "[", "]");
if (function != null) {
stringJoiner.add("function='" + function + "'");
} else {
stringJoiner.add("moduleName='" + moduleName + "'")
.add("className='" + className + "'");
stringJoiner.add("moduleName='" + moduleName + "'").add("className='" + className + "'");
}
return stringJoiner.toString();
}
@@ -130,7 +129,8 @@ public class PythonOperator extends StreamOperator {
@Override
public String getName() {
return operators.stream().map(Operator::getName)
return operators.stream()
.map(Operator::getName)
.collect(Collectors.joining(" -> ", "[", "]"));
}
@@ -7,25 +7,26 @@ import org.apache.commons.lang3.StringUtils;
/**
* Represents a python partition function.
* <p>
* Python worker can create a partition object using information in this PythonPartition.
* <p>
* If this object is constructed from serialized python partition, python worker can deserialize it
* to create python partition directly. If this object is constructed from moduleName and
*
* <p>Python worker can create a partition object using information in this PythonPartition.
*
* <p>If this object is constructed from serialized python partition, python worker can deserialize
* it to create python partition directly. If this object is constructed from moduleName and
* className/functionName, python worker will use `importlib` to load python partition function.
*
* <p>
*/
public class PythonPartition implements Partition<Object> {
public static final PythonPartition BroadcastPartition = new PythonPartition(
"ray.streaming.partition", "BroadcastPartition");
public static final PythonPartition KeyPartition = new PythonPartition(
"ray.streaming.partition", "KeyPartition");
public static final PythonPartition RoundRobinPartition = new PythonPartition(
"ray.streaming.partition", "RoundRobinPartition");
public static final PythonPartition BroadcastPartition =
new PythonPartition("ray.streaming.partition", "BroadcastPartition");
public static final PythonPartition KeyPartition =
new PythonPartition("ray.streaming.partition", "KeyPartition");
public static final PythonPartition RoundRobinPartition =
new PythonPartition("ray.streaming.partition", "RoundRobinPartition");
public static final String FORWARD_PARTITION_CLASS = "ForwardPartition";
public static final PythonPartition ForwardPartition = new PythonPartition(
"ray.streaming.partition", FORWARD_PARTITION_CLASS);
public static final PythonPartition ForwardPartition =
new PythonPartition("ray.streaming.partition", FORWARD_PARTITION_CLASS);
private byte[] partition;
private String moduleName;
@@ -51,8 +52,8 @@ public class PythonPartition implements Partition<Object> {
@Override
public int[] partition(Object record, int numPartition) {
String msg = String.format("partition method of %s shouldn't be called.",
getClass().getSimpleName());
String msg =
String.format("partition method of %s shouldn't be called.", getClass().getSimpleName());
throw new UnsupportedOperationException(msg);
}
@@ -74,15 +75,15 @@ public class PythonPartition implements Partition<Object> {
@Override
public String toString() {
StringJoiner stringJoiner = new StringJoiner(", ",
PythonPartition.class.getSimpleName() + "[", "]");
StringJoiner stringJoiner =
new StringJoiner(", ", PythonPartition.class.getSimpleName() + "[", "]");
if (partition != null) {
stringJoiner.add("partition=binary partition");
} else {
stringJoiner.add("moduleName='" + moduleName + "'")
stringJoiner
.add("moduleName='" + moduleName + "'")
.add("functionName='" + functionName + "'");
}
return stringJoiner.toString();
}
}
@@ -13,14 +13,10 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* Represents a stream of data whose transformations will be executed in python.
*/
/** Represents a stream of data whose transformations will be executed in python. */
public class PythonDataStream extends Stream<PythonDataStream, Object> implements PythonStream {
protected PythonDataStream(
StreamingContext streamingContext,
PythonOperator pythonOperator) {
protected PythonDataStream(StreamingContext streamingContext, PythonOperator pythonOperator) {
super(streamingContext, pythonOperator);
}
@@ -36,9 +32,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
}
public PythonDataStream(
PythonDataStream input,
PythonOperator pythonOperator,
Partition<Object> partition) {
PythonDataStream input, PythonOperator pythonOperator, Partition<Object> partition) {
super(input, pythonOperator, partition);
}
@@ -57,8 +51,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
/**
* Apply a map function to this stream.
*
* @param func The python MapFunction.
* @return A new PythonDataStream.
* @param func The python MapFunction. Returns A new PythonDataStream.
*/
public PythonDataStream map(PythonFunction func) {
func.setFunctionInterface(FunctionInterface.MAP_FUNCTION);
@@ -72,8 +65,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
/**
* Apply a flat-map function to this stream.
*
* @param func The python FlapMapFunction.
* @return A new PythonDataStream
* @param func The python FlapMapFunction. Returns A new PythonDataStream
*/
public PythonDataStream flatMap(PythonFunction func) {
func.setFunctionInterface(FunctionInterface.FLAT_MAP_FUNCTION);
@@ -87,9 +79,8 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
/**
* Apply a filter function to this stream.
*
* @param func The python FilterFunction.
* @return A new PythonDataStream that contains only the elements satisfying the given filter
* predicate.
* @param func The python FilterFunction. Returns A new PythonDataStream that contains only the
* elements satisfying the given filter predicate.
*/
public PythonDataStream filter(PythonFunction func) {
func.setFunctionInterface(FunctionInterface.FILTER_FUNCTION);
@@ -101,8 +92,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
* same type with each other.
*
* @param stream The DataStream to union output with.
* @param others The other DataStreams to union output with.
* @return A new UnionStream.
* @param others The other DataStreams to union output with. Returns A new UnionStream.
*/
public final PythonDataStream union(PythonDataStream stream, PythonDataStream... others) {
List<PythonDataStream> streams = new ArrayList<>();
@@ -115,8 +105,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
* Apply union transformations to this stream by merging {@link PythonDataStream} outputs of the
* same type with each other.
*
* @param streams The DataStreams to union output with.
* @return A new UnionStream.
* @param streams The DataStreams to union output with. Returns A new UnionStream.
*/
public final PythonDataStream union(List<PythonDataStream> streams) {
if (this instanceof PythonUnionStream) {
@@ -135,8 +124,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
/**
* Apply a sink function and get a StreamSink.
*
* @param func The python SinkFunction.
* @return A new StreamSink.
* @param func The python SinkFunction. Returns A new StreamSink.
*/
public PythonStreamSink sink(PythonFunction func) {
func.setFunctionInterface(FunctionInterface.SINK_FUNCTION);
@@ -150,8 +138,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
/**
* Apply a key-by function to this stream.
*
* @param func the python keyFunction.
* @return A new KeyDataStream.
* @param func the python keyFunction. Returns A new KeyDataStream.
*/
public PythonKeyDataStream keyBy(PythonFunction func) {
checkPartitionCall();
@@ -162,7 +149,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
/**
* Apply broadcast to this stream.
*
* @return This stream.
* <p>Returns This stream.
*/
public PythonDataStream broadcast() {
checkPartitionCall();
@@ -172,8 +159,7 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
/**
* Apply a partition to this stream.
*
* @param partition The partitioning strategy.
* @return This stream.
* @param partition The partitioning strategy. Returns This stream.
*/
public PythonDataStream partitionBy(PythonPartition partition) {
checkPartitionCall();
@@ -186,8 +172,9 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
*/
private void checkPartitionCall() {
if (getInputStream() != null && getInputStream().getLanguage() == Language.JAVA) {
throw new RuntimeException("Partition related methods can't be called on a " +
"python stream if parent stream is a java stream.");
throw new RuntimeException(
"Partition related methods can't be called on a "
+ "python stream if parent stream is a java stream.");
}
}
@@ -204,5 +191,4 @@ public class PythonDataStream extends Stream<PythonDataStream, Object> implement
public Language getLanguage() {
return Language.PYTHON;
}
}
@@ -8,9 +8,7 @@ import io.ray.streaming.python.PythonFunction.FunctionInterface;
import io.ray.streaming.python.PythonOperator;
import io.ray.streaming.python.PythonPartition;
/**
* Represents a python DataStream returned by a key-by operation.
*/
/** Represents a python DataStream returned by a key-by operation. */
@SuppressWarnings("unchecked")
public class PythonKeyDataStream extends PythonDataStream implements PythonStream {
@@ -33,8 +31,7 @@ public class PythonKeyDataStream extends PythonDataStream implements PythonStrea
/**
* Apply a reduce function to this stream.
*
* @param func The reduce function.
* @return A new DataStream.
* @param func The reduce function. Returns A new DataStream.
*/
public PythonDataStream reduce(PythonFunction func) {
func.setFunctionInterface(FunctionInterface.REDUCE_FUNCTION);
@@ -51,5 +48,4 @@ public class PythonKeyDataStream extends PythonDataStream implements PythonStrea
public KeyDataStream<Object, Object> asJavaStream() {
return new KeyDataStream(this);
}
}
@@ -1,8 +1,4 @@
package io.ray.streaming.python.stream;
/**
* A marker interface used to identify all python streams.
*/
public interface PythonStream {
}
/** A marker interface used to identify all python streams. */
public interface PythonStream {}
@@ -4,9 +4,7 @@ import io.ray.streaming.api.Language;
import io.ray.streaming.api.stream.StreamSink;
import io.ray.streaming.python.PythonOperator;
/**
* Represents a sink of the PythonStream.
*/
/** Represents a sink of the PythonStream. */
public class PythonStreamSink extends StreamSink implements PythonStream {
public PythonStreamSink(PythonDataStream input, PythonOperator sinkOperator) {
@@ -18,5 +16,4 @@ public class PythonStreamSink extends StreamSink implements PythonStream {
public Language getLanguage() {
return Language.PYTHON;
}
}
@@ -7,9 +7,7 @@ import io.ray.streaming.python.PythonFunction;
import io.ray.streaming.python.PythonFunction.FunctionInterface;
import io.ray.streaming.python.PythonOperator;
/**
* Represents a source of the PythonStream.
*/
/** Represents a source of the PythonStream. */
public class PythonStreamSource extends PythonDataStream implements StreamSource {
private PythonStreamSource(StreamingContext streamingContext, PythonFunction sourceFunction) {
@@ -18,10 +16,8 @@ public class PythonStreamSource extends PythonDataStream implements StreamSource
}
public static PythonStreamSource from(
StreamingContext streamingContext,
PythonFunction sourceFunction) {
StreamingContext streamingContext, PythonFunction sourceFunction) {
sourceFunction.setFunctionInterface(FunctionInterface.SOURCE_FUNCTION);
return new PythonStreamSource(streamingContext, sourceFunction);
}
}
@@ -6,6 +6,7 @@ import java.util.List;
/**
* Represents a union DataStream.
*
* <p>This stream does not create a physical operation, it only affects how upstream data are
* connected to downstream data.
*/
@@ -16,8 +17,7 @@ public class PythonUnionStream extends PythonDataStream {
public PythonUnionStream(PythonDataStream input, List<PythonDataStream> others) {
// Union stream does not create a physical operation, so we don't have to set partition
// function for it.
super(input, new PythonOperator(
"ray.streaming.operator", "UnionOperator"));
super(input, new PythonOperator("ray.streaming.operator", "UnionOperator"));
this.unionStreams = new ArrayList<>();
others.forEach(this::addStream);
}
@@ -33,4 +33,4 @@ public class PythonUnionStream extends PythonDataStream {
public List<PythonDataStream> getUnionStreams() {
return unionStreams;
}
}
}
@@ -18,7 +18,6 @@ public class Config {
public static final String READ_TIMEOUT_MS = "read_timeout_ms";
public static final String DEFAULT_READ_TIMEOUT_MS = "10";
public static final String STREAMING_RING_BUFFER_CAPACITY = "streaming.ring_buffer_capacity";
// write an empty message if there is no data to be written in this
// interval.
@@ -31,6 +30,4 @@ public class Config {
public static final String FLOW_CONTROL_TYPE = "streaming.flow_control_type";
public static final String WRITER_CONSUMED_STEP = "streaming.writer.consumed_step";
public static final String READER_CONSUMED_STEP = "streaming.reader.consumed_step";
}
@@ -2,7 +2,6 @@ package io.ray.streaming.api.stream;
import static org.testng.Assert.assertEquals;
import io.ray.streaming.api.context.StreamingContext;
import io.ray.streaming.operator.impl.MapOperator;
import io.ray.streaming.python.stream.PythonDataStream;
@@ -14,8 +13,8 @@ public class StreamTest {
@Test
public void testReferencedDataStream() {
DataStream dataStream = new DataStream(StreamingContext.buildContext(),
new MapOperator(value -> null));
DataStream dataStream =
new DataStream(StreamingContext.buildContext(), new MapOperator(value -> null));
PythonDataStream pythonDataStream = dataStream.asPythonStream();
DataStream javaStream = pythonDataStream.asJavaStream();
assertEquals(dataStream.getId(), pythonDataStream.getId());
@@ -27,8 +26,8 @@ public class StreamTest {
@Test
public void testReferencedKeyDataStream() {
DataStream dataStream = new DataStream(StreamingContext.buildContext(),
new MapOperator(value -> null));
DataStream dataStream =
new DataStream(StreamingContext.buildContext(), new MapOperator(value -> null));
KeyDataStream keyDataStream = dataStream.keyBy(value -> null);
PythonKeyDataStream pythonKeyDataStream = keyDataStream.asPythonStream();
KeyDataStream javaKeyDataStream = pythonKeyDataStream.asJavaStream();
@@ -38,4 +37,4 @@ public class StreamTest {
assertEquals(keyDataStream.getParallelism(), pythonKeyDataStream.getParallelism());
assertEquals(keyDataStream.getParallelism(), javaKeyDataStream.getParallelism());
}
}
}
@@ -33,13 +33,12 @@ public class JobGraphBuilderTest {
JobVertex sourceVertex = jobVertexList.get(0);
Assert.assertEquals(sinkVertex.getVertexType(), VertexType.SINK);
Assert.assertEquals(sourceVertex.getVertexType(), VertexType.SOURCE);
}
public JobGraph buildDataSyncJobGraph() {
StreamingContext streamingContext = StreamingContext.buildContext();
DataStream<String> dataStream = DataStreamSource.fromCollection(streamingContext,
Lists.newArrayList("a", "b", "c"));
DataStream<String> dataStream =
DataStreamSource.fromCollection(streamingContext, Lists.newArrayList("a", "b", "c"));
StreamSink streamSink = dataStream.sink(x -> LOG.info(x));
JobGraphBuilder jobGraphBuilder = new JobGraphBuilder(Lists.newArrayList(streamSink));
@@ -73,10 +72,9 @@ public class JobGraphBuilderTest {
public JobGraph buildKeyByJobGraph() {
StreamingContext streamingContext = StreamingContext.buildContext();
DataStream<String> dataStream = DataStreamSource.fromCollection(streamingContext,
Lists.newArrayList("1", "2", "3", "4"));
StreamSink streamSink = dataStream.keyBy(x -> x)
.sink(x -> LOG.info(x));
DataStream<String> dataStream =
DataStreamSource.fromCollection(streamingContext, Lists.newArrayList("1", "2", "3", "4"));
StreamSink streamSink = dataStream.keyBy(x -> x).sink(x -> LOG.info(x));
JobGraphBuilder jobGraphBuilder = new JobGraphBuilder(Lists.newArrayList(streamSink));
JobGraph jobGraph = jobGraphBuilder.build();
@@ -92,4 +90,4 @@ public class JobGraphBuilderTest {
Assert.assertTrue(diGraph.contains("\"1-SourceOperatorImpl\" -> \"2-KeyByOperator\""));
Assert.assertTrue(diGraph.contains("\"2-KeyByOperator\" -> \"3-SinkOperator\""));
}
}
}
@@ -2,7 +2,6 @@ package io.ray.streaming.jobgraph;
import static org.testng.Assert.assertEquals;
import com.google.common.collect.Lists;
import io.ray.streaming.api.context.StreamingContext;
import io.ray.streaming.api.stream.DataStream;
@@ -19,13 +18,14 @@ public class JobGraphOptimizerTest {
@Test
public void testOptimize() {
StreamingContext context = StreamingContext.buildContext();
DataStream<Integer> source1 = DataStreamSource.fromCollection(context,
Lists.newArrayList(1, 2, 3));
DataStream<String> source2 = DataStreamSource.fromCollection(context,
Lists.newArrayList("1", "2", "3"));
DataStream<String> source3 = DataStreamSource.fromCollection(context,
Lists.newArrayList("2", "3", "4"));
source1.filter(x -> x > 1)
DataStream<Integer> source1 =
DataStreamSource.fromCollection(context, Lists.newArrayList(1, 2, 3));
DataStream<String> source2 =
DataStreamSource.fromCollection(context, Lists.newArrayList("1", "2", "3"));
DataStream<String> source3 =
DataStreamSource.fromCollection(context, Lists.newArrayList("2", "3", "4"));
source1
.filter(x -> x > 1)
.map(String::valueOf)
.union(source2)
.join(source3)
@@ -44,11 +44,12 @@ public class JobGraphOptimizerTest {
@Test
public void testOptimizeHybridStream() {
StreamingContext context = StreamingContext.buildContext();
DataStream<Integer> source1 = DataStreamSource.fromCollection(context,
Lists.newArrayList(1, 2, 3));
DataStream<String> source2 = DataStreamSource.fromCollection(context,
Lists.newArrayList("1", "2", "3"));
source1.asPythonStream()
DataStream<Integer> source1 =
DataStreamSource.fromCollection(context, Lists.newArrayList(1, 2, 3));
DataStream<String> source2 =
DataStreamSource.fromCollection(context, Lists.newArrayList("1", "2", "3"));
source1
.asPythonStream()
.map(pyFunc(1))
.filter(pyFunc(2))
.union(source2.asPythonStream().filter(pyFunc(3)).map(pyFunc(4)))
@@ -68,5 +69,4 @@ public class JobGraphOptimizerTest {
private PythonFunction pyFunc(int number) {
return new PythonFunction("module", "func" + number);
}
}
}